diff options
Diffstat (limited to 'r.html.markdown')
-rw-r--r-- | r.html.markdown | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/r.html.markdown b/r.html.markdown new file mode 100644 index 00000000..535b9065 --- /dev/null +++ b/r.html.markdown @@ -0,0 +1,350 @@ +--- +language: R +author: e99n09 +author_url: http://github.com/e99n09 +filename: learnr.r +--- + +R is a statistical computing language. It has lots of good built-in functions for uploading and cleaning data sets, running common statistical tests, and making graphs. You can also easily compile it within a LaTeX document. + +```python + +# Comments start with hashtags. + +# You can't make a multi-line comment per se, +# but you can stack multiple comments like so. + +# Hit COMMAND-ENTER to execute a line + +######################### +# The absolute basics +######################### + +# NUMBERS + +# We've got doubles! Behold the "numeric" class +5 # => [1] 5 +class(5) # => [1] "numeric" +# We've also got integers! They look suspiciously similar, +# but indeed are different +5L # => [1] 5 +class(5L) # => [1] "integer" +# Try ?class for more information on the class() function +# In fact, you can look up the documentation on just about anything with ? + +# All the normal operations! +10 + 66 # => [1] 76 +53.2 - 4 # => [1] 49.2 +2 * 2.0 # => [1] 4 +3L / 4 # => [1] 0.75 +3 %% 2 # => [1] 1 + +# Finally, we've got not-a-numbers! They're numerics too +class(NaN) # => [1] "numeric" + +# CHARACTERS + +# We've (sort of) got strings! Behold the "character" class +"plugh" # => [1] "plugh" +class("plugh") # "character" +# There's no difference between strings and characters in R + +# LOGICALS + +# We've got booleans! Behold the "logical" class +class(TRUE) # => [1] "logical" +class(FALSE) # => [1] "logical" +# Behavior is normal +TRUE == TRUE # => [1] TRUE +TRUE == FALSE # => [1] FALSE +FALSE != FALSE # => [1] FALSE +FALSE != TRUE # => [1] TRUE +# Missing data (NA) is logical, too +class(NA) # => [1] "logical" + +# FACTORS + +# The factor class is for categorical data +# It has an attribute called levels that describes all the possible categories +factor("dog") +# => +# [1] dog +# Levels: dog +# (This will make more sense once we start talking about vectors) + +# VARIABLES + +# Lots of way to assign stuff +x = 5 # this is possible +y <- "1" # this is preferred +TRUE -> z # this works but is weird + +# We can use coerce variables to different classes +as.numeric(y) # => [1] 1 +as.character(x) # => [1] "5" + +# LOOPS + +# We've got for loops +for (i in 1:4) { + print(i) +} + +# We've got while loops +a <- 10 +while (a > 4) { + cat(a, "...", sep = "") + a <- a - 1 +} + +# Keep in mind that for and while loops run slowly in R +# Operations on entire vectors (i.e. a whole row, a whole column) +# or apply()-type functions (we'll discuss later) are preferred + +# IF/ELSE + +# Again, pretty standard +if (4 > 3) { + print("Huzzah! It worked!") +} else { + print("Noooo! This is blatantly illogical!") +} +# => +# [1] "Huzzah! It worked!" + +# FUNCTIONS + +# Defined like so: +myFunc <- function(x) { + x <- x * 4 + x <- x - 1 + return(x) +} + +# Called like any other R function: +myFunc(5) # => [1] 19 + +######################### +# Fun with data: vectors, matrices, data frames, and arrays +######################### + +# ONE-DIMENSIONAL + +# You can vectorize anything, so long as all components have the same type +vec <- c(8, 9, 10, 11) +vec # => [1] 8 9 10 11 +# The class of a vector is the class of its components +class(vec) # => [1] "numeric" +# If you vectorize items of different classes, weird coercions happen +c(TRUE, 4) # => [1] 1 4 +c("dog", TRUE, 4) # => [1] "dog" "TRUE" "4" + +# We ask for specific components like so (R starts counting from 1) +vec[1] # => [1] 8 +# We can also search for the indices of specific components, +which(vec %% 2 == 0) # => [1] 1 3 +# or grab just the first or last entry in the vector +head(vec, 1) # => [1] 8 +tail(vec, 1) # => [1] 11 +# If an index "goes over" you'll get NA: +vec[6] # => [1] NA +# You can find the length of your vector with length() +length(vec) # => [1] 4 + +# You can perform operations on entire vectors or subsets of vectors +vec * 4 # => [1] 16 20 24 28 +vec[2:3] * 5 # => [1] 25 30 +# and there are many built-in functions to summarize vectors +mean(vec) # => [1] 9.5 +var(vec) # => [1] 1.666667 +sd(vec) # => [1] 1.290994 +max(vec) # => [1] 11 +min(vec) # => [1] 8 +sum(vec) # => [1] 38 + +# TWO-DIMENSIONAL (ALL ONE CLASS) + +# You can make a matrix out of entries all of the same type like so: +mat <- matrix(nrow = 3, ncol = 2, c(1,2,3,4,5,6)) +mat +# => +# [,1] [,2] +# [1,] 1 4 +# [2,] 2 5 +# [3,] 3 6 +# Unlike a vector, the class of a matrix is "matrix", no matter what's in it +class(mat) # => "matrix" +# Ask for the first row +mat[1,] # => [1] 1 4 +# Perform operation on the first column +3 * mat[,1] # => [1] 3 6 9 +# Ask for a specific cell +mat[3,2] # => [1] 6 +# Transpose the whole matrix +t(mat) +# => +# [,1] [,2] [,3] +# [1,] 1 2 3 +# [2,] 4 5 6 + +# cbind() sticks vectors together column-wise to make a matrix +mat2 <- cbind(1:4, c("dog", "cat", "bird", "dog")) +mat2 +# => +# [,1] [,2] +# [1,] "1" "dog" +# [2,] "2" "cat" +# [3,] "3" "bird" +# [4,] "4" "dog" +class(mat2) # => [1] matrix +# Again, note what happened! +# Because matrices must contain entries all of the same class, +# everything got converted to the character class +c(class(mat2[,1]), class(mat2[,2])) + +# rbind() sticks vectors together row-wise to make a matrix +mat3 <- rbind(c(1,2,4,5), c(6,7,0,4)) +mat3 +# => +# [,1] [,2] [,3] [,4] +# [1,] 1 2 4 5 +# [2,] 6 7 0 4 +# Aah, everything of the same class. No coercions. Much better. + +# TWO-DIMENSIONAL (DIFFERENT CLASSES) + +# For columns of different classes, use the data frame +dat <- data.frame(c(5,2,1,4), c("dog", "cat", "bird", "dog")) +names(dat) <- c("number", "species") # name the columns +class(dat) # => [1] "data.frame" +dat +# => +# number species +# 1 5 dog +# 2 2 cat +# 3 1 bird +# 4 4 dog +class(dat$number) # => [1] "numeric" +class(dat[,2]) # => [1] "factor" +# The data.frame() function converts character vectors to factor vectors + +# There are many twisty ways to subset data frames, all subtly unalike +dat$number # => [1] 5 2 1 4 +dat[,1] # => [1] 5 2 1 4 +dat[,"number"] # => [1] 5 2 1 4 + +# MULTI-DIMENSIONAL (ALL OF ONE CLASS) + +# Arrays creates n-dimensional tables +# You can make a two-dimensional table (sort of like a matrix) +array(c(c(1,2,4,5),c(8,9,3,6)), dim=c(2,4)) +# => +# [,1] [,2] [,3] [,4] +# [1,] 1 4 8 3 +# [2,] 2 5 9 6 +# You can use array to make three-dimensional matrices too +array(c(c(c(2,300,4),c(8,9,0)),c(c(5,60,0),c(66,7,847))), dim=c(3,2,2)) +# => +# , , 1 +# +# [,1] [,2] +# [1,] 1 4 +# [2,] 2 5 +# +# , , 2 +# +# [,1] [,2] +# [1,] 8 1 +# [2,] 9 2 + +# LISTS (MULTI-DIMENSIONAL, POSSIBLY RAGGED, OF DIFFERENT TYPES) + +# Finally, R has lists (of vectors) +list1 <- list(time = 1:40) +list1$price = c(rnorm(40,.5*list1$time,4)) # random +list1 + +# You can get items in the list like so +list1$time +# You can subset list items like vectors +list1$price[4] + +######################### +# The apply() family of functions +######################### + +# Remember mat? +mat +# => +# [,1] [,2] +# [1,] 1 4 +# [2,] 2 5 +# [3,] 3 6 +# Use apply(X, MARGIN, FUN) to apply function FUN to a matrix X +# over rows (MAR = 1) or columns (MAR = 2) +# That is, R does FUN to each row (or column) of X, much faster than a +# for or while loop would do +apply(mat, MAR = 2, myFunc) +# => +# [,1] [,2] +# [1,] 3 15 +# [2,] 7 19 +# [3,] 11 23 +# Other functions: ?lapply, ?sapply + +# Don't feel too intimidated; everyone agrees they are rather confusing + +# The plyr package aims to replace (and improve upon!) the *apply() family. + +install.packages("plyr") +require(plyr) +?plyr + +######################### +# Loading data +######################### + +# "pets.csv" is a file on the internet +pets <- read.csv("http://learnxinyminutes.com/docs/pets.csv") +pets +head(pets, 2) # first two rows +tail(pets, 1) # last row + +# To save a data frame or matrix as a .csv file +write.csv(pets, "pets2.csv") # to make a new .csv file +# set working directory with setwd(), look it up with getwd() + +# Try ?read.csv and ?write.csv for more information + +######################### +# Plots +######################### + +# Scatterplots! +plot(list1$time, list1$price, main = "fake data") +# Regressions! +linearModel <- lm(price ~ time, data = list1) +linearModel # outputs result of regression +# Plot regression line on existing plot +abline(linearModel, col = "red") +# Get a variety of nice diagnostics +plot(linearModel) + +# Histograms! +hist(rpois(n = 10000, lambda = 5), col = "thistle") + +# Barplots! +barplot(c(1,4,5,1,2), names.arg = c("red","blue","purple","green","yellow")) + +# Try the ggplot2 package for more and better graphics + +install.packages("ggplot2") +require(ggplot2) +?ggplot2 + +``` + +## How do I get R? + +* Get R and the R GUI from [http://www.r-project.org/](http://www.r-project.org/) +* [RStudio](http://www.rstudio.com/ide/) is another GUI |