diff options
| author | e99n09 <ysiioj81pcqu@lavabit.com> | 2014-05-24 08:54:39 -0400 | 
|---|---|---|
| committer | e99n09 <ysiioj81pcqu@lavabit.com> | 2014-05-24 08:54:39 -0400 | 
| commit | a8d8cee0d82a8f8be48f3a80c307223856764a31 (patch) | |
| tree | bd8efc75099207901dfb9e8cb29002ef86736a27 | |
| parent | 31c74615e6a492db39e1586c3d4aaa7c4ada5e56 (diff) | |
Update r.html.markdown
Minor changes to comments (fixing typos, etc.). Deleted "weird types" section; broke out "NULL" type into its own type category. Added instructions for dropping rows and columns in data.frame and data.table. How to make summary tables in data.table.
| -rw-r--r-- | r.html.markdown | 93 | 
1 files changed, 69 insertions, 24 deletions
| diff --git a/r.html.markdown b/r.html.markdown index dfc945c1..cd09e8da 100644 --- a/r.html.markdown +++ b/r.html.markdown @@ -188,7 +188,7 @@ class(-Inf)	# "numeric"  10L + 66L # 76      # integer plus integer gives integer  53.2 - 4  # 49.2    # numeric minus numeric gives numeric  2.0 * 2L  # 4       # numeric times integer gives numeric -3L / 4    # 0.75    # integer over integer gives numeric +3L / 4    # 0.75    # integer over numeric gives numeric  3 %% 2	  # 1       # the remainder of two numerics is another numeric  # Illegal arithmetic yeilds you a "not-a-number":  0 / 0 # NaN @@ -241,27 +241,29 @@ factor(c("female", "female", "male", "NA", "female"))  # Levels: female male NA  # The "levels" are the values the categorical data can take  levels(factor(c("male", "male", "female", "NA", "female"))) # "female" "male"   "NA"  -# If a factor has length 1, its levels will have length 1, too +# If a factor vector has length 1, its levels will have length 1, too  length(factor("male")) # 1  length(levels(factor("male"))) # 1  # Factors are commonly seen in data frames, a data structure we will cover later -# in this tutorial:  data(infert) # "Infertility after Spontaneous and Induced Abortion"  levels(infert$education) # "0-5yrs"  "6-11yrs" "12+ yrs" -# WEIRD TYPES -# A quick summary of some of the weirder types in R -class(Inf)	# "numeric" -class(-Inf)	# "numeric" -class(NaN)	# "numeric" -class(NA)	# "logical" +# NULL +# "NULL" is a weird one; use it to "blank out" a vector  class(NULL)	# NULL +parakeet +# => +# [1] "beak"     "feathers" "wings"    "eyes"     +parakeet <- NULL +parakeet +# => +# NULL  # TYPE COERCION  # Type-coercion is when you force a value to take on a different type  as.character(c(6, 8)) # "6" "8"  as.logical(c(1,0,1,1)) # TRUE FALSE  TRUE  TRUE -# If you put elements of different classes into a vector, weird coercions happen: +# If you put elements of different types into a vector, weird coercions happen:  c(TRUE, 4) # 1 4  c("dog", TRUE, 4) # "dog"  "TRUE" "4"  as.numeric("Bilbo") @@ -332,8 +334,6 @@ jiggle(5)	# 5±ε. After set.seed(2716057), jiggle(5)==5.005043  # ONE-DIMENSIONAL  # Let's start from the very beginning, and with something you already know: vectors. -# As explained above, every single element in R is already a vector -# Make sure the elements of long vectors all have the same type  vec <- c(8, 9, 10, 11)  vec	#  8  9 10 11  # We ask for specific elements by subsetting with square brackets @@ -345,9 +345,9 @@ month.name[9]	# "September"  c(6, 8, 7, 5, 3, 0, 9)[3]	# 7  # We can also search for the indices of specific components,  which(vec %% 2 == 0)	# 1 3 -# grab just the first or last entry in the vector, +# grab just the first or last few entries in the vector,  head(vec, 1)	# 8 -tail(vec, 1)	# 11 +tail(vec, w)	# 10 11  # or figure out if a certain value is in the vector  any(vec == 10) # TRUE  # If an index "goes over" you'll get NA: @@ -358,7 +358,7 @@ length(vec)	# 4  vec * 4	# 16 20 24 28  vec[2:3] * 5	# 25 30  any(vec[2:3] == 8) # FALSE -# and there are many built-in functions to summarize vectors +# and R has many built-in functions to summarize vectors  mean(vec)	# 9.5  var(vec)	# 1.666667  sd(vec)		# 1.290994 @@ -368,6 +368,7 @@ sum(vec)	# 38  # Some more nice built-ins:  5:15	# 5  6  7  8  9 10 11 12 13 14 15  seq(from=0, to=31337, by=1337) +# =>  #  [1]     0  1337  2674  4011  5348  6685  8022  9359 10696 12033 13370 14707  # [13] 16044 17381 18718 20055 21392 22729 24066 25403 26740 28077 29414 30751 @@ -427,11 +428,11 @@ mat3  #      [,1] [,2] [,3] [,4]  # [1,]    1    2    4    5  # [2,]    6    7    0    4 -# Aah, everything of the same class. No coercions. Much better. +# Ah, everything of the same class. No coercions. Much better.  # TWO-DIMENSIONAL (DIFFERENT CLASSES) -# For columns of different classes, use the data frame +# For columns of different types, use a data frame  # This data structure is so useful for statistical programming,  # a version of it was added to Python in the package "pandas". @@ -465,11 +466,11 @@ students$year	# 3  2  2  1  0 -1  students[,2]	# 3  2  2  1  0 -1  students[,"year"]	# 3  2  2  1  0 -1 -# A popular replacement for the data.frame structure is the data.table +# An augmented version of the data.frame structure is the data.table  # If you're working with huge or panel data, or need to merge a few data  # sets, data.table can be a good choice. Here's a whirlwind tour: -install.packages("data.table") -require(data.table) +install.packages("data.table") # download the package from CRAN +require(data.table) # load it  students <- as.data.table(students)  students # note the slightly different print-out  # => @@ -480,15 +481,17 @@ students # note the slightly different print-out  # 4:    Cho    1     R  # 5:  Draco    0     S  # 6:  Ginny   -1     G -students[name=="Ginny"] +students[name=="Ginny"] # get rows with name == "Ginny"  # =>  #     name year house  # 1: Ginny   -1     G -students[year==2] +students[year==2] # get rows with year == 2  # =>  #      name year house  # 1:   Fred    2     G  # 2: George    2     G +# data.table makes merging two data sets easy +# let's make another data.table to merge with students  founders <- data.table(house=c("G","H","R","S"),                         founder=c("Godric","Helga","Rowena","Salazar"))  founders @@ -500,7 +503,7 @@ founders  # 4:     S Salazar  setkey(students, house)  setkey(founders, house) -students <- founders[students] # merge the two data sets +students <- founders[students] # merge the two data sets by matching "house"  setnames(students, c("house","houseFounderName","studentName","year"))  students[,order(c("name","year","house","houseFounderName")), with=F]  # => @@ -512,9 +515,51 @@ students[,order(c("name","year","house","houseFounderName")), with=F]  # 5:         Cho    1     R           Rowena  # 6:       Draco    0     S          Salazar -# MULTI-DIMENSIONAL (ALL OF ONE CLASS) +# data.table makes summary tables easy +# => +# students[,sum(year),by=house] +#    house V1 +# 1:     G  3 +# 2:     H  3 +# 3:     R  1 +# 4:     S  0 + +# To drop a column from a data.frame or data.table, +# assign it the NULL value +students$houseFounderName <- NULL +students +# => +#    studentName year house +# 1:        Fred    2     G +# 2:      George    2     G +# 3:       Ginny   -1     G +# 4:      Cedric    3     H +# 5:         Cho    1     R +# 6:       Draco    0     S + +# Drop a row by subsetting +# Using data.table: +students[studentName != "Draco"] +# => +#    house studentName year +# 1:     G        Fred    2 +# 2:     G      George    2 +# 3:     G       Ginny   -1 +# 4:     H      Cedric    3 +# 5:     R         Cho    1 +# Using data.frame: +students <- as.data.frame(students) +students[students$house != "G",] +# => +#   house houseFounderName studentName year +# 4     H            Helga      Cedric    3 +# 5     R           Rowena         Cho    1 +# 6     S          Salazar       Draco    0 + +# MULTI-DIMENSIONAL (ALL ELEMENTS OF ONE TYPE)  # Arrays creates n-dimensional tables +# All elements must be of the same type  # You can make a two-dimensional table (sort of like a matrix)  array(c(c(1,2,4,5),c(8,9,3,6)), dim=c(2,4))  # => | 
