From 9f4c2399d66f7eb35c87a2e36d848c69e5d9a508 Mon Sep 17 00:00:00 2001 From: e99n09 Date: Sat, 29 Jun 2013 03:37:14 -0400 Subject: Create r.html.markdown An executable R tutorial (with complementary .csv file) --- r.html.markdown | 328 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 r.html.markdown (limited to 'r.html.markdown') diff --git a/r.html.markdown b/r.html.markdown new file mode 100644 index 00000000..ad2a4559 --- /dev/null +++ b/r.html.markdown @@ -0,0 +1,328 @@ +--- +language: R +author: e99n09 +author_url: http://github.com/e99n09 + +--- + +R is a statistical computing language. + +```r + +# Comments start with hashtags. + +# You can't make a multi-line comment per se, +# but you can stack multiple comments like so. + +# Protip: hit COMMAND-ENTER to execute a line + +################################################################################### +# The absolute basics +################################################################################### + +# NUMERICS + +# We've got numbers! Behold the "numeric" class +5 # => [1] 5 +class(5) # => [1] "numeric" +# Try ?class for more information on the class() function +# In fact, you can look up the documentation on just about anything with ? + +# Numerics are like doubles. There's no such thing as integers +5 == 5.0 # => [1] TRUE +# Because R doesn't distinguish between integers and doubles, +# R shows the "integer" form instead of the equivalent "double" form +# whenever it's convenient: +5.0 # => [1] 5 + +# All the normal operations! +10 + 66 # => [1] 76 +53.2 - 4 # => [1] 49.2 +3.37 * 5.4 # => [1] 18.198 +2 * 2.0 # => [1] 4 +3 / 4 # => [1] 0.75 +2.0 / 2 # => [1] 1 +3 %% 2 # => [1] 1 +4 %% 2 # => [1] 0 + +# Finally, we've got not-a-numbers! They're numerics too +class(NaN) # => [1] "numeric" + +# CHARACTERS + +# We've (sort of) got strings! Behold the "character" class +"plugh" # => [1] "plugh" +class("plugh") # "character" +# There's no difference between strings and characters in R + +# LOGICALS + +# We've got booleans! Behold the "logical" class +class(TRUE) # => [1] "logical" +class(FALSE) # => [1] "logical" +# Behavior is normal +TRUE == TRUE # => [1] TRUE +TRUE == FALSE # => [1] FALSE +FALSE != FALSE # => [1] FALSE +FALSE != TRUE # => [1] TRUE +# Missing data (NA) is logical, too +class(NA) # => [1] "logical" + +# FACTORS + +# The factor class is for categorical data +# It has an attribute called levels that describes all the possible categories +factor("dog") +# => +# [1] dog +# Levels: dog +# (This will make more sense once we start talking about vectors) + +# VARIABLES + +# Lots of way to assign stuff +x = 5 # this is possible +y <- "1" # this is preferred +TRUE -> z # this works but is weird + +# We can use coerce variables to different classes +as.numeric(y) # => [1] 1 +as.character(x) # => [1] "5" + +# LOOPS + +# We've got for loops +for (i in 1:4) { + print(i) +} + +# We've got while loops +a <- 10 +while (a > 4) { + cat(a, "...", sep = "") + a <- a - 1 +} + +# Keep in mind that for and while loops run slowly in R +# Operations on entire vectors (i.e. a whole row, a whole column) +# or apply()-type functions (we'll discuss later) are preferred + +# FUNCTIONS + +# Defined like so: +myFunc <- function(x) { + x <- x * 4 + x <- x - 1 + return(x) +} + +# Called like any other R function: +myFunc(5) # => [1] 19 + +################################################################################### +# Fun with data: vectors, matrices, data frames, and arrays +################################################################################### + +# ONE-DIMENSIONAL + +# You can vectorize anything, so long as all components have the same type +vec <- c(4, 5, 6, 7) +vec # => [1] 4 5 6 7 +# The class of a vector is the class of its components +class(vec) # => [1] "numeric" +# If you vectorize items of different classes, weird coersions happen +c(TRUE, 4) # => [1] 1 4 +c("dog", TRUE, 4) # => [1] "dog" "TRUE" "4" + +# We ask for specific components like so (R starts counting from 1) +vec[1] # => [1] 4 +# We can also search for the indices of specific components +which(vec %% 2 == 0) +# If an index "goes over" you'll get NA: +vec[6] # => [1] NA + +# You can perform operations on entire vectors or subsets of vectors +vec * 4 # => [1] 16 20 24 28 +vec[2:3] * 5 # => [1] 25 30 + +# TWO-DIMENSIONAL (ALL ONE CLASS) + +# You can make a matrix out of entries all of the same type like so: +mat <- matrix(nrow = 3, ncol = 2, c(1,2,3,4,5,6)) +mat +# => +# [,1] [,2] +# [1,] 1 4 +# [2,] 2 5 +# [3,] 3 6 +# Unlike a vector, the class of a matrix is "matrix", no matter what's in it +class(mat) # => "matrix" +# Ask for the first row +mat[1,] # => [1] 1 4 +# Perform operation on the first column +3 * mat[,1] # => [1] 3 6 9 +# Ask for a specific cell +mat[3,2] # => [1] 6 +# Transpose the whole matrix +t(mat) +# => +# [,1] [,2] [,3] +# [1,] 1 2 3 +# [2,] 4 5 6 + +# cbind() sticks vectors together column-wise to make a matrix +mat2 <- cbind(1:4, c("dog", "cat", "bird", "dog")) +mat2 +# => +# [,1] [,2] +# [1,] "1" "dog" +# [2,] "2" "cat" +# [3,] "3" "bird" +# [4,] "4" "dog" +class(mat2) # => [1] matrix +# Again, note what happened! +# Because matrices must contain entries all of the same class, +# everything got converted to the character class +c(class(mat2[,1]), class(mat2[,2])) + +# rbind() sticks vectors together row-wise to make a matrix +mat3 <- rbind(c(1,2,4,5), c(6,7,0,4)) +mat3 +# => +# [,1] [,2] [,3] [,4] +# [1,] 1 2 4 5 +# [2,] 6 7 0 4 +# Aah, everything of the same class. No coersions. Much better. + +# TWO-DIMENSIONAL (DIFFERENT CLASSES) + +# For columns of different classes, use the data frame +dat <- data.frame(c(5,2,1,4), c("dog", "cat", "bird", "dog")) +names(dat) <- c("number", "species") # name the columns +class(dat) # => [1] "data.frame" +dat +# => +# number species +# 1 5 dog +# 2 2 cat +# 3 1 bird +# 4 4 dog +class(dat$number) # => [1] "numeric" +class(dat[,2]) # => [1] "factor" +# The data.frame() function converts character vectors to factor vectors + +# There are many twisty ways to subset data frames, all subtly unalike +dat$number # => [1] 5 2 1 4 +dat[,1] # => [1] 5 2 1 4 +dat[,"number"] # => [1] 5 2 1 4 + +# MULTI-DIMENSIONAL (ALL OF ONE CLASS) + +# Arrays creates n-dimensional tables +# You can make a two-dimensional table (sort of like a matrix) +array(c(c(1,2,4,5),c(8,9,3,6)), dim=c(2,4)) +# => +# [,1] [,2] [,3] [,4] +# [1,] 1 4 8 3 +# [2,] 2 5 9 6 +# You can use array to make three-dimensional matrices too +array(c(c(c(2,300,4),c(8,9,0)),c(c(5,60,0),c(66,7,847))), dim=c(3,2,2)) +# => +# , , 1 +# +# [,1] [,2] +# [1,] 1 4 +# [2,] 2 5 +# +# , , 2 +# +# [,1] [,2] +# [1,] 8 1 +# [2,] 9 2 + +# LISTS (MULTI-DIMENSIONAL, POSSIBLY RAGGED, OF DIFFERENT TYPES) + +# Finally, R has lists (of vectors) +list1 <- list(time = 1:40, price = c(rnorm(40,.5*list1$time,4))) # generate random +list1 + +# You can get items in the list like so +list1$time +# You can subset list items like vectors +list1$price[4] + +################################################################################### +# The apply() family of functions +################################################################################### + +# Remember mat? +mat +# => +# [,1] [,2] +# [1,] 1 4 +# [2,] 2 5 +# [3,] 3 6 +# Use apply(X, MARGIN, FUN) to apply function FUN to a matrix X +# over rows (MAR = 1) or columns (MAR = 2) +# That is, R does FUN to each row (or column) of X, much faster than a +# for or while loop would do +apply(mat, MAR = 2, myFunc) +# => +# [,1] [,2] +# [1,] 3 15 +# [2,] 7 19 +# [3,] 11 23 +# Other functions: ?lapply, ?sapply +# Don't feel too intimiated; everyone agrees they are rather confusing + +# The plyr package aims to replace (and improve upon!) the *apply() family. + +install.packages("plyr") +require(plyr) +?plyr + +################################################################################### +# Loading data +################################################################################### + +# "pets.csv" is a file on the internet +pets <- read.csv("http://learnxinyminutes.com/docs/pets.csv") +pets +head(pets, 2) # first two rows +tail(pets, 1) # last row + +# To save a data frame or matrix as a .csv file +write.csv(pets, "pets2.csv") # to make a new .csv file in the working directory +# set working directory with setwd(), look it up with getwd() + +# Try ?read.csv and ?write.csv for more information + +################################################################################### +# Plots +################################################################################### + +# Scatterplots! +plot(list1$time, list1$price, main = "fake data") +# Fit a linear model +myLm <- lm(price ~ time, data = list1) +myLm # outputs result of regression +# Plot regression line on existing plot +abline(myLm, col = "red") +# Get a variety of nice diagnostics +plot(myLm) + +# Histograms! +hist(rpois(n = 10000, lambda = 5), col = "thistle") + +# Barplots! +barplot(c(1,4,5,1,2), names.arg = c("red","blue","purple","green","yellow")) + +# Try the ggplot2 package for more and better graphics + +install.packages("ggplot2") +require(ggplot2) +?ggplot2 + +``` + + -- cgit v1.2.3