--- language: R author: e99n09 author_url: http://github.com/e99n09 --- R is a statistical computing language. ```r # Comments start with hashtags. # You can't make a multi-line comment per se, # but you can stack multiple comments like so. # Protip: hit COMMAND-ENTER to execute a line ################################################################################### # The absolute basics ################################################################################### # NUMERICS # We've got numbers! Behold the "numeric" class 5 # => [1] 5 class(5) # => [1] "numeric" # Try ?class for more information on the class() function # In fact, you can look up the documentation on just about anything with ? # Numerics are like doubles. There's no such thing as integers 5 == 5.0 # => [1] TRUE # Because R doesn't distinguish between integers and doubles, # R shows the "integer" form instead of the equivalent "double" form # whenever it's convenient: 5.0 # => [1] 5 # All the normal operations! 10 + 66 # => [1] 76 53.2 - 4 # => [1] 49.2 3.37 * 5.4 # => [1] 18.198 2 * 2.0 # => [1] 4 3 / 4 # => [1] 0.75 2.0 / 2 # => [1] 1 3 %% 2 # => [1] 1 4 %% 2 # => [1] 0 # Finally, we've got not-a-numbers! They're numerics too class(NaN) # => [1] "numeric" # CHARACTERS # We've (sort of) got strings! Behold the "character" class "plugh" # => [1] "plugh" class("plugh") # "character" # There's no difference between strings and characters in R # LOGICALS # We've got booleans! Behold the "logical" class class(TRUE) # => [1] "logical" class(FALSE) # => [1] "logical" # Behavior is normal TRUE == TRUE # => [1] TRUE TRUE == FALSE # => [1] FALSE FALSE != FALSE # => [1] FALSE FALSE != TRUE # => [1] TRUE # Missing data (NA) is logical, too class(NA) # => [1] "logical" # FACTORS # The factor class is for categorical data # It has an attribute called levels that describes all the possible categories factor("dog") # => # [1] dog # Levels: dog # (This will make more sense once we start talking about vectors) # VARIABLES # Lots of way to assign stuff x = 5 # this is possible y <- "1" # this is preferred TRUE -> z # this works but is weird # We can use coerce variables to different classes as.numeric(y) # => [1] 1 as.character(x) # => [1] "5" # LOOPS # We've got for loops for (i in 1:4) { print(i) } # We've got while loops a <- 10 while (a > 4) { cat(a, "...", sep = "") a <- a - 1 } # Keep in mind that for and while loops run slowly in R # Operations on entire vectors (i.e. a whole row, a whole column) # or apply()-type functions (we'll discuss later) are preferred # FUNCTIONS # Defined like so: myFunc <- function(x) { x <- x * 4 x <- x - 1 return(x) } # Called like any other R function: myFunc(5) # => [1] 19 ################################################################################### # Fun with data: vectors, matrices, data frames, and arrays ################################################################################### # ONE-DIMENSIONAL # You can vectorize anything, so long as all components have the same type vec <- c(4, 5, 6, 7) vec # => [1] 4 5 6 7 # The class of a vector is the class of its components class(vec) # => [1] "numeric" # If you vectorize items of different classes, weird coersions happen c(TRUE, 4) # => [1] 1 4 c("dog", TRUE, 4) # => [1] "dog" "TRUE" "4" # We ask for specific components like so (R starts counting from 1) vec[1] # => [1] 4 # We can also search for the indices of specific components which(vec %% 2 == 0) # If an index "goes over" you'll get NA: vec[6] # => [1] NA # You can perform operations on entire vectors or subsets of vectors vec * 4 # => [1] 16 20 24 28 vec[2:3] * 5 # => [1] 25 30 # TWO-DIMENSIONAL (ALL ONE CLASS) # You can make a matrix out of entries all of the same type like so: mat <- matrix(nrow = 3, ncol = 2, c(1,2,3,4,5,6)) mat # => # [,1] [,2] # [1,] 1 4 # [2,] 2 5 # [3,] 3 6 # Unlike a vector, the class of a matrix is "matrix", no matter what's in it class(mat) # => "matrix" # Ask for the first row mat[1,] # => [1] 1 4 # Perform operation on the first column 3 * mat[,1] # => [1] 3 6 9 # Ask for a specific cell mat[3,2] # => [1] 6 # Transpose the whole matrix t(mat) # => # [,1] [,2] [,3] # [1,] 1 2 3 # [2,] 4 5 6 # cbind() sticks vectors together column-wise to make a matrix mat2 <- cbind(1:4, c("dog", "cat", "bird", "dog")) mat2 # => # [,1] [,2] # [1,] "1" "dog" # [2,] "2" "cat" # [3,] "3" "bird" # [4,] "4" "dog" class(mat2) # => [1] matrix # Again, note what happened! # Because matrices must contain entries all of the same class, # everything got converted to the character class c(class(mat2[,1]), class(mat2[,2])) # rbind() sticks vectors together row-wise to make a matrix mat3 <- rbind(c(1,2,4,5), c(6,7,0,4)) mat3 # => # [,1] [,2] [,3] [,4] # [1,] 1 2 4 5 # [2,] 6 7 0 4 # Aah, everything of the same class. No coersions. Much better. # TWO-DIMENSIONAL (DIFFERENT CLASSES) # For columns of different classes, use the data frame dat <- data.frame(c(5,2,1,4), c("dog", "cat", "bird", "dog")) names(dat) <- c("number", "species") # name the columns class(dat) # => [1] "data.frame" dat # => # number species # 1 5 dog # 2 2 cat # 3 1 bird # 4 4 dog class(dat$number) # => [1] "numeric" class(dat[,2]) # => [1] "factor" # The data.frame() function converts character vectors to factor vectors # There are many twisty ways to subset data frames, all subtly unalike dat$number # => [1] 5 2 1 4 dat[,1] # => [1] 5 2 1 4 dat[,"number"] # => [1] 5 2 1 4 # MULTI-DIMENSIONAL (ALL OF ONE CLASS) # Arrays creates n-dimensional tables # You can make a two-dimensional table (sort of like a matrix) array(c(c(1,2,4,5),c(8,9,3,6)), dim=c(2,4)) # => # [,1] [,2] [,3] [,4] # [1,] 1 4 8 3 # [2,] 2 5 9 6 # You can use array to make three-dimensional matrices too array(c(c(c(2,300,4),c(8,9,0)),c(c(5,60,0),c(66,7,847))), dim=c(3,2,2)) # => # , , 1 # # [,1] [,2] # [1,] 1 4 # [2,] 2 5 # # , , 2 # # [,1] [,2] # [1,] 8 1 # [2,] 9 2 # LISTS (MULTI-DIMENSIONAL, POSSIBLY RAGGED, OF DIFFERENT TYPES) # Finally, R has lists (of vectors) list1 <- list(time = 1:40, price = c(rnorm(40,.5*list1$time,4))) # generate random list1 # You can get items in the list like so list1$time # You can subset list items like vectors list1$price[4] ################################################################################### # The apply() family of functions ################################################################################### # Remember mat? mat # => # [,1] [,2] # [1,] 1 4 # [2,] 2 5 # [3,] 3 6 # Use apply(X, MARGIN, FUN) to apply function FUN to a matrix X # over rows (MAR = 1) or columns (MAR = 2) # That is, R does FUN to each row (or column) of X, much faster than a # for or while loop would do apply(mat, MAR = 2, myFunc) # => # [,1] [,2] # [1,] 3 15 # [2,] 7 19 # [3,] 11 23 # Other functions: ?lapply, ?sapply # Don't feel too intimiated; everyone agrees they are rather confusing # The plyr package aims to replace (and improve upon!) the *apply() family. install.packages("plyr") require(plyr) ?plyr ################################################################################### # Loading data ################################################################################### # "pets.csv" is a file on the internet pets <- read.csv("http://learnxinyminutes.com/docs/pets.csv") pets head(pets, 2) # first two rows tail(pets, 1) # last row # To save a data frame or matrix as a .csv file write.csv(pets, "pets2.csv") # to make a new .csv file in the working directory # set working directory with setwd(), look it up with getwd() # Try ?read.csv and ?write.csv for more information ################################################################################### # Plots ################################################################################### # Scatterplots! plot(list1$time, list1$price, main = "fake data") # Fit a linear model myLm <- lm(price ~ time, data = list1) myLm # outputs result of regression # Plot regression line on existing plot abline(myLm, col = "red") # Get a variety of nice diagnostics plot(myLm) # Histograms! hist(rpois(n = 10000, lambda = 5), col = "thistle") # Barplots! barplot(c(1,4,5,1,2), names.arg = c("red","blue","purple","green","yellow")) # Try the ggplot2 package for more and better graphics install.packages("ggplot2") require(ggplot2) ?ggplot2 ```