From c351cab657897229e1d6b826a53399bca129b489 Mon Sep 17 00:00:00 2001 From: Hank Hester Date: Fri, 15 Jan 2021 02:30:42 -0800 Subject: Remove binary gender example from R (#4082) --- r.html.markdown | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'r.html.markdown') diff --git a/r.html.markdown b/r.html.markdown index 3e855602..79af40ce 100644 --- a/r.html.markdown +++ b/r.html.markdown @@ -255,16 +255,16 @@ c('Z', 'o', 'r', 'r', 'o') == "Z" # TRUE FALSE FALSE FALSE FALSE # FACTORS # The factor class is for categorical data -# Factors can be ordered (like childrens' grade levels) or unordered (like gender) -factor(c("female", "female", "male", NA, "female")) -# female female male female -# Levels: female male +# Factors can be ordered (like childrens' grade levels) or unordered (like colors) +factor(c("blue", "blue", "green", NA, "blue")) +# blue blue green blue +# Levels: blue green # The "levels" are the values the categorical data can take # Note that missing data does not enter the levels -levels(factor(c("male", "male", "female", NA, "female"))) # "female" "male" +levels(factor(c("green", "green", "blue", NA, "blue"))) # "blue" "green" # If a factor vector has length 1, its levels will have length 1, too -length(factor("male")) # 1 -length(levels(factor("male"))) # 1 +length(factor("green")) # 1 +length(levels(factor("green"))) # 1 # Factors are commonly seen in data frames, a data structure we will cover later data(infert) # "Infertility after Spontaneous and Induced Abortion" levels(infert$education) # "0-5yrs" "6-11yrs" "12+ yrs" -- cgit v1.2.3 From 5828962380f3a573b4e7a47b440d651890df628b Mon Sep 17 00:00:00 2001 From: Shivanshu <53912074+shivanshu-semwal@users.noreply.github.com> Date: Mon, 3 Jan 2022 21:53:08 +0530 Subject: Fixed output. (#4157) Fixed output provided in comments beside the command. --- r.html.markdown | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'r.html.markdown') diff --git a/r.html.markdown b/r.html.markdown index 79af40ce..e90d5a97 100644 --- a/r.html.markdown +++ b/r.html.markdown @@ -377,8 +377,8 @@ vec[6] # NA # You can find the length of your vector with length() length(vec) # 4 # You can perform operations on entire vectors or subsets of vectors -vec * 4 # 16 20 24 28 -vec[2:3] * 5 # 25 30 +vec * 4 # 32 36 40 44 +vec[2:3] * 5 # 45 50 any(vec[2:3] == 8) # FALSE # and R has many built-in functions to summarize vectors mean(vec) # 9.5 -- cgit v1.2.3 From 231388888838b8a00880a431bbc949f525293b88 Mon Sep 17 00:00:00 2001 From: Crystal-RainSlide <16851802+Crystal-RainSlide@users.noreply.github.com> Date: Tue, 8 Mar 2022 16:56:31 +0800 Subject: [R/en] Format R code --- r.html.markdown | 279 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 139 insertions(+), 140 deletions(-) (limited to 'r.html.markdown') diff --git a/r.html.markdown b/r.html.markdown index e90d5a97..66e9ba74 100644 --- a/r.html.markdown +++ b/r.html.markdown @@ -29,13 +29,13 @@ R is a statistical computing language. It has lots of libraries for uploading an # R without understanding anything about programming. Do not worry # about understanding everything the code does. Just enjoy! -data() # browse pre-loaded data sets -data(rivers) # get this one: "Lengths of Major North American Rivers" -ls() # notice that "rivers" now appears in the workspace -head(rivers) # peek at the data set +data() # browse pre-loaded data sets +data(rivers) # get this one: "Lengths of Major North American Rivers" +ls() # notice that "rivers" now appears in the workspace +head(rivers) # peek at the data set # 735 320 325 392 524 450 -length(rivers) # how many rivers were measured? +length(rivers) # how many rivers were measured? # 141 summary(rivers) # what are some summary statistics? # Min. 1st Qu. Median Mean 3rd Qu. Max. @@ -91,14 +91,15 @@ stem(log(rivers)) # Notice that the data are neither normal nor log-normal! # 82 | 2 # make a histogram: -hist(rivers, col="#333333", border="white", breaks=25) # play around with these parameters -hist(log(rivers), col="#333333", border="white", breaks=25) # you'll do more plotting later +hist(rivers, col = "#333333", border = "white", breaks = 25) +hist(log(rivers), col = "#333333", border = "white", breaks = 25) +# play around with these parameters, you'll do more plotting later # Here's another neat data set that comes pre-loaded. R has tons of these. data(discoveries) -plot(discoveries, col="#333333", lwd=3, xlab="Year", +plot(discoveries, col = "#333333", lwd = 3, xlab = "Year", main="Number of important discoveries per year") -plot(discoveries, col="#333333", lwd=3, type = "h", xlab="Year", +plot(discoveries, col = "#333333", lwd = 3, type = "h", xlab = "Year", main="Number of important discoveries per year") # Rather than leaving the default ordering (by year), @@ -109,7 +110,7 @@ sort(discoveries) # [51] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 # [76] 4 4 4 4 5 5 5 5 5 5 5 6 6 6 6 6 6 7 7 7 7 8 9 10 12 -stem(discoveries, scale=2) +stem(discoveries, scale = 2) # # The decimal point is at the | # @@ -134,7 +135,7 @@ summary(discoveries) # 0.0 2.0 3.0 3.1 4.0 12.0 # Roll a die a few times -round(runif(7, min=.5, max=6.5)) +round(runif(7, min = .5, max = 6.5)) # 1 4 6 1 4 6 4 # Your numbers will differ from mine unless we set the same random.seed(31337) @@ -157,69 +158,68 @@ rnorm(9) # INTEGERS # Long-storage integers are written with L -5L # 5 -class(5L) # "integer" +5L # 5 +class(5L) # "integer" # (Try ?class for more information on the class() function.) # In R, every single value, like 5L, is considered a vector of length 1 -length(5L) # 1 +length(5L) # 1 # You can have an integer vector with length > 1 too: -c(4L, 5L, 8L, 3L) # 4 5 8 3 -length(c(4L, 5L, 8L, 3L)) # 4 -class(c(4L, 5L, 8L, 3L)) # "integer" +c(4L, 5L, 8L, 3L) # 4 5 8 3 +length(c(4L, 5L, 8L, 3L)) # 4 +class(c(4L, 5L, 8L, 3L)) # "integer" # NUMERICS # A "numeric" is a double-precision floating-point number -5 # 5 -class(5) # "numeric" +5 # 5 +class(5) # "numeric" # Again, everything in R is a vector; # you can make a numeric vector with more than one element -c(3,3,3,2,2,1) # 3 3 3 2 2 1 +c(3, 3, 3, 2, 2, 1) # 3 3 3 2 2 1 # You can use scientific notation too -5e4 # 50000 -6.02e23 # Avogadro's number -1.6e-35 # Planck length +5e4 # 50000 +6.02e23 # Avogadro's number +1.6e-35 # Planck length # You can also have infinitely large or small numbers -class(Inf) # "numeric" -class(-Inf) # "numeric" +class(Inf) # "numeric" +class(-Inf) # "numeric" # You might use "Inf", for example, in integrate(dnorm, 3, Inf); # this obviates Z-score tables. # BASIC ARITHMETIC # You can do arithmetic with numbers # Doing arithmetic on a mix of integers and numerics gives you another numeric -10L + 66L # 76 # integer plus integer gives integer -53.2 - 4 # 49.2 # numeric minus numeric gives numeric -2.0 * 2L # 4 # numeric times integer gives numeric -3L / 4 # 0.75 # integer over numeric gives numeric -3 %% 2 # 1 # the remainder of two numerics is another numeric +10L + 66L # 76 # integer plus integer gives integer +53.2 - 4 # 49.2 # numeric minus numeric gives numeric +2.0 * 2L # 4 # numeric times integer gives numeric +3L / 4 # 0.75 # integer over numeric gives numeric +3 %% 2 # 1 # the remainder of two numerics is another numeric # Illegal arithmetic yields you a "not-a-number": -0 / 0 # NaN -class(NaN) # "numeric" +0 / 0 # NaN +class(NaN) # "numeric" # You can do arithmetic on two vectors with length greater than 1, # so long as the larger vector's length is an integer multiple of the smaller -c(1,2,3) + c(1,2,3) # 2 4 6 +c(1, 2, 3) + c(1, 2, 3) # 2 4 6 # Since a single number is a vector of length one, scalars are applied # elementwise to vectors -(4 * c(1,2,3) - 2) / 2 # 1 3 5 +(4 * c(1, 2, 3) - 2) / 2 # 1 3 5 # Except for scalars, use caution when performing arithmetic on vectors with # different lengths. Although it can be done, -c(1,2,3,1,2,3) * c(1,2) # 1 4 3 2 2 6 -# Matching lengths is better practice and easier to read -c(1,2,3,1,2,3) * c(1,2,1,2,1,2) +c(1, 2, 3, 1, 2, 3) * c(1, 2) # 1 4 3 2 2 6 +# Matching lengths is better practice and easier to read most times +c(1, 2, 3, 1, 2, 3) * c(1, 2, 1, 2, 1, 2) # 1 4 3 2 2 6 # CHARACTERS # There's no difference between strings and characters in R -"Horatio" # "Horatio" -class("Horatio") # "character" -class('H') # "character" +"Horatio" # "Horatio" +class("Horatio") # "character" +class('H') # "character" # Those were both character vectors of length 1 # Here is a longer one: c('alef', 'bet', 'gimmel', 'dalet', 'he') -# => -# "alef" "bet" "gimmel" "dalet" "he" +# => "alef" "bet" "gimmel" "dalet" "he" length(c("Call","me","Ishmael")) # 3 # You can do regex operations on character vectors: -substr("Fortuna multis dat nimis, nulli satis.", 9, 15) # "multis " +substr("Fortuna multis dat nimis, nulli satis.", 9, 15) # "multis " gsub('u', 'ø', "Fortuna multis dat nimis, nulli satis.") # "Fortøna møltis dat nimis, nølli satis." # R has several built-in character vectors: letters @@ -230,32 +230,33 @@ month.abb # "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "D # LOGICALS # In R, a "logical" is a boolean -class(TRUE) # "logical" -class(FALSE) # "logical" + +class(TRUE) # "logical" +class(FALSE) # "logical" # Their behavior is normal -TRUE == TRUE # TRUE -TRUE == FALSE # FALSE -FALSE != FALSE # FALSE -FALSE != TRUE # TRUE +TRUE == TRUE # TRUE +TRUE == FALSE # FALSE +FALSE != FALSE # FALSE +FALSE != TRUE # TRUE # Missing data (NA) is logical, too -class(NA) # "logical" +class(NA) # "logical" # Use | and & for logic operations. # OR -TRUE | FALSE # TRUE +TRUE | FALSE # TRUE # AND -TRUE & FALSE # FALSE +TRUE & FALSE # FALSE # Applying | and & to vectors returns elementwise logic operations -c(TRUE,FALSE,FALSE) | c(FALSE,TRUE,FALSE) # TRUE TRUE FALSE -c(TRUE,FALSE,TRUE) & c(FALSE,TRUE,TRUE) # FALSE FALSE TRUE +c(TRUE, FALSE, FALSE) | c(FALSE, TRUE, FALSE) # TRUE TRUE FALSE +c(TRUE, FALSE, TRUE) & c(FALSE, TRUE, TRUE) # FALSE FALSE TRUE # You can test if x is TRUE -isTRUE(TRUE) # TRUE +isTRUE(TRUE) # TRUE # Here we get a logical vector with many elements: -c('Z', 'o', 'r', 'r', 'o') == "Zorro" # FALSE FALSE FALSE FALSE FALSE -c('Z', 'o', 'r', 'r', 'o') == "Z" # TRUE FALSE FALSE FALSE FALSE +c('Z', 'o', 'r', 'r', 'o') == "Zorro" # FALSE FALSE FALSE FALSE FALSE +c('Z', 'o', 'r', 'r', 'o') == "Z" # TRUE FALSE FALSE FALSE FALSE # FACTORS # The factor class is for categorical data -# Factors can be ordered (like childrens' grade levels) or unordered (like colors) +# Factors can be ordered (like grade levels) or unordered (like colors) factor(c("blue", "blue", "green", NA, "blue")) # blue blue green blue # Levels: blue green @@ -263,31 +264,27 @@ factor(c("blue", "blue", "green", NA, "blue")) # Note that missing data does not enter the levels levels(factor(c("green", "green", "blue", NA, "blue"))) # "blue" "green" # If a factor vector has length 1, its levels will have length 1, too -length(factor("green")) # 1 +length(factor("green")) # 1 length(levels(factor("green"))) # 1 # Factors are commonly seen in data frames, a data structure we will cover later -data(infert) # "Infertility after Spontaneous and Induced Abortion" +data(infert) # "Infertility after Spontaneous and Induced Abortion" levels(infert$education) # "0-5yrs" "6-11yrs" "12+ yrs" # NULL # "NULL" is a weird one; use it to "blank out" a vector -class(NULL) # NULL +class(NULL) # NULL parakeet = c("beak", "feathers", "wings", "eyes") -parakeet -# => -# [1] "beak" "feathers" "wings" "eyes" +parakeet # "beak" "feathers" "wings" "eyes" parakeet <- NULL -parakeet -# => -# NULL +parakeet # NULL # TYPE COERCION # Type-coercion is when you force a value to take on a different type -as.character(c(6, 8)) # "6" "8" -as.logical(c(1,0,1,1)) # TRUE FALSE TRUE TRUE +as.character(c(6, 8)) # "6" "8" +as.logical(c(1,0,1,1)) # TRUE FALSE TRUE TRUE # If you put elements of different types into a vector, weird coercions happen: -c(TRUE, 4) # 1 4 -c("dog", TRUE, 4) # "dog" "TRUE" "4" +c(TRUE, 4) # 1 4 +c("dog", TRUE, 4) # "dog" "TRUE" "4" as.numeric("Bilbo") # => # [1] NA @@ -309,14 +306,15 @@ as.numeric("Bilbo") # VARIABLES # Lots of way to assign stuff: -x = 5 # this is possible -y <- "1" # this is preferred -TRUE -> z # this works but is weird +x = 5 # this is possible +y <- "1" # this is preferred traditionally +TRUE -> z # this works but is weird +# Refer to the Internet for the behaviors and preferences about them. # LOOPS # We've got for loops for (i in 1:4) { - print(i) + print(i) } # We've got while loops a <- 10 @@ -341,11 +339,11 @@ if (4 > 3) { # FUNCTIONS # Defined like so: jiggle <- function(x) { - x = x + rnorm(1, sd=.1) #add in a bit of (controlled) noise + x = x + rnorm(1, sd=.1) # add in a bit of (controlled) noise return(x) } # Called like any other R function: -jiggle(5) # 5±ε. After set.seed(2716057), jiggle(5)==5.005043 +jiggle(5) # 5±ε. After set.seed(2716057), jiggle(5)==5.005043 @@ -357,39 +355,39 @@ jiggle(5) # 5±ε. After set.seed(2716057), jiggle(5)==5.005043 # Let's start from the very beginning, and with something you already know: vectors. vec <- c(8, 9, 10, 11) -vec # 8 9 10 11 +vec # 8 9 10 11 # We ask for specific elements by subsetting with square brackets # (Note that R starts counting from 1) -vec[1] # 8 -letters[18] # "r" -LETTERS[13] # "M" -month.name[9] # "September" -c(6, 8, 7, 5, 3, 0, 9)[3] # 7 +vec[1] # 8 +letters[18] # "r" +LETTERS[13] # "M" +month.name[9] # "September" +c(6, 8, 7, 5, 3, 0, 9)[3] # 7 # We can also search for the indices of specific components, -which(vec %% 2 == 0) # 1 3 +which(vec %% 2 == 0) # 1 3 # grab just the first or last few entries in the vector, -head(vec, 1) # 8 -tail(vec, 2) # 10 11 +head(vec, 1) # 8 +tail(vec, 2) # 10 11 # or figure out if a certain value is in the vector -any(vec == 10) # TRUE +any(vec == 10) # TRUE # If an index "goes over" you'll get NA: -vec[6] # NA +vec[6] # NA # You can find the length of your vector with length() -length(vec) # 4 +length(vec) # 4 # You can perform operations on entire vectors or subsets of vectors -vec * 4 # 32 36 40 44 -vec[2:3] * 5 # 45 50 -any(vec[2:3] == 8) # FALSE +vec * 4 # 32 36 40 44 +vec[2:3] * 5 # 45 50 +any(vec[2:3] == 8) # FALSE # and R has many built-in functions to summarize vectors -mean(vec) # 9.5 -var(vec) # 1.666667 -sd(vec) # 1.290994 -max(vec) # 11 -min(vec) # 8 -sum(vec) # 38 +mean(vec) # 9.5 +var(vec) # 1.666667 +sd(vec) # 1.290994 +max(vec) # 11 +min(vec) # 8 +sum(vec) # 38 # Some more nice built-ins: -5:15 # 5 6 7 8 9 10 11 12 13 14 15 -seq(from=0, to=31337, by=1337) +5:15 # 5 6 7 8 9 10 11 12 13 14 15 +seq(from = 0, to = 31337, by = 1337) # => # [1] 0 1337 2674 4011 5348 6685 8022 9359 10696 12033 13370 14707 # [13] 16044 17381 18718 20055 21392 22729 24066 25403 26740 28077 29414 30751 @@ -397,7 +395,7 @@ seq(from=0, to=31337, by=1337) # TWO-DIMENSIONAL (ALL ONE CLASS) # You can make a matrix out of entries all of the same type like so: -mat <- matrix(nrow = 3, ncol = 2, c(1,2,3,4,5,6)) +mat <- matrix(nrow = 3, ncol = 2, c(1, 2, 3, 4, 5, 6)) mat # => # [,1] [,2] @@ -405,13 +403,13 @@ mat # [2,] 2 5 # [3,] 3 6 # Unlike a vector, the class of a matrix is "matrix", no matter what's in it -class(mat) # => "matrix" +class(mat) # "matrix" # Ask for the first row -mat[1,] # 1 4 +mat[1, ] # 1 4 # Perform operation on the first column -3 * mat[,1] # 3 6 9 +3 * mat[, 1] # 3 6 9 # Ask for a specific cell -mat[3,2] # 6 +mat[3, 2] # 6 # Transpose the whole matrix t(mat) @@ -437,14 +435,14 @@ mat2 # [2,] "2" "cat" # [3,] "3" "bird" # [4,] "4" "dog" -class(mat2) # matrix +class(mat2) # matrix # Again, note what happened! # Because matrices must contain entries all of the same class, # everything got converted to the character class -c(class(mat2[,1]), class(mat2[,2])) +c(class(mat2[, 1]), class(mat2[, 2])) # rbind() sticks vectors together row-wise to make a matrix -mat3 <- rbind(c(1,2,4,5), c(6,7,0,4)) +mat3 <- rbind(c(1, 2, 4, 5), c(6, 7, 0, 4)) mat3 # => # [,1] [,2] [,3] [,4] @@ -458,11 +456,11 @@ mat3 # This data structure is so useful for statistical programming, # a version of it was added to Python in the package "pandas". -students <- data.frame(c("Cedric","Fred","George","Cho","Draco","Ginny"), - c(3,2,2,1,0,-1), - c("H", "G", "G", "R", "S", "G")) +students <- data.frame(c("Cedric", "Fred", "George", "Cho", "Draco", "Ginny"), + c( 3, 2, 2, 1, 0, -1), + c( "H", "G", "G", "R", "S", "G")) names(students) <- c("name", "year", "house") # name the columns -class(students) # "data.frame" +class(students) # "data.frame" students # => # name year house @@ -472,21 +470,21 @@ students # 4 Cho 1 R # 5 Draco 0 S # 6 Ginny -1 G -class(students$year) # "numeric" -class(students[,3]) # "factor" +class(students$year) # "numeric" +class(students[,3]) # "factor" # find the dimensions -nrow(students) # 6 -ncol(students) # 3 -dim(students) # 6 3 +nrow(students) # 6 +ncol(students) # 3 +dim(students) # 6 3 # The data.frame() function converts character vectors to factor vectors # by default; turn this off by setting stringsAsFactors = FALSE when # you create the data.frame ?data.frame # There are many twisty ways to subset data frames, all subtly unalike -students$year # 3 2 2 1 0 -1 -students[,2] # 3 2 2 1 0 -1 -students[,"year"] # 3 2 2 1 0 -1 +students$year # 3 2 2 1 0 -1 +students[, 2] # 3 2 2 1 0 -1 +students[, "year"] # 3 2 2 1 0 -1 # An augmented version of the data.frame structure is the data.table # If you're working with huge or panel data, or need to merge a few data @@ -503,19 +501,19 @@ students # note the slightly different print-out # 4: Cho 1 R # 5: Draco 0 S # 6: Ginny -1 G -students[name=="Ginny"] # get rows with name == "Ginny" +students[name == "Ginny"] # get rows with name == "Ginny" # => # name year house # 1: Ginny -1 G -students[year==2] # get rows with year == 2 +students[year == 2] # get rows with year == 2 # => # name year house # 1: Fred 2 G # 2: George 2 G # data.table makes merging two data sets easy # let's make another data.table to merge with students -founders <- data.table(house=c("G","H","R","S"), - founder=c("Godric","Helga","Rowena","Salazar")) +founders <- data.table(house = c("G" , "H" , "R" , "S"), + founder = c("Godric", "Helga", "Rowena", "Salazar")) founders # => # house founder @@ -526,8 +524,8 @@ founders setkey(students, house) setkey(founders, house) students <- founders[students] # merge the two data sets by matching "house" -setnames(students, c("house","houseFounderName","studentName","year")) -students[,order(c("name","year","house","houseFounderName")), with=F] +setnames(students, c("house", "houseFounderName", "studentName", "year")) +students[, order(c("name", "year", "house", "houseFounderName")), with = F] # => # studentName year house houseFounderName # 1: Fred 2 G Godric @@ -538,7 +536,7 @@ students[,order(c("name","year","house","houseFounderName")), with=F] # 6: Draco 0 S Salazar # data.table makes summary tables easy -students[,sum(year),by=house] +students[, sum(year), by = house] # => # house V1 # 1: G 3 @@ -571,7 +569,7 @@ students[studentName != "Draco"] # 5: R Cho 1 # Using data.frame: students <- as.data.frame(students) -students[students$house != "G",] +students[students$house != "G", ] # => # house houseFounderName studentName year # 4 H Helga Cedric 3 @@ -583,13 +581,13 @@ students[students$house != "G",] # Arrays creates n-dimensional tables # All elements must be of the same type # You can make a two-dimensional table (sort of like a matrix) -array(c(c(1,2,4,5),c(8,9,3,6)), dim=c(2,4)) +array(c(c(1, 2, 4, 5), c(8, 9, 3, 6)), dim = c(2, 4)) # => # [,1] [,2] [,3] [,4] # [1,] 1 4 8 3 # [2,] 2 5 9 6 # You can use array to make three-dimensional matrices too -array(c(c(c(2,300,4),c(8,9,0)),c(c(5,60,0),c(66,7,847))), dim=c(3,2,2)) +array(c(c(c(2, 300, 4), c(8, 9, 0)), c(c(5, 60, 0), c(66, 7, 847))), dim = c(3, 2, 2)) # => # , , 1 # @@ -609,7 +607,7 @@ array(c(c(c(2,300,4),c(8,9,0)),c(c(5,60,0),c(66,7,847))), dim=c(3,2,2)) # Finally, R has lists (of vectors) list1 <- list(time = 1:40) -list1$price = c(rnorm(40,.5*list1$time,4)) # random +list1$price = c(rnorm(40, .5*list1$time, 4)) # random list1 # You can get items in the list like so list1$time # one way @@ -682,7 +680,7 @@ write.csv(pets, "pets2.csv") # to make a new .csv file ######################### # Linear regression! -linearModel <- lm(price ~ time, data = list1) +linearModel <- lm(price ~ time, data = list1) linearModel # outputs result of regression # => # Call: @@ -719,7 +717,7 @@ summary(linearModel)$coefficients # another way to extract results # Estimate Std. Error t value Pr(>|t|) # (Intercept) 0.1452662 1.50084246 0.09678975 9.234021e-01 # time 0.4943490 0.06379348 7.74920901 2.440008e-09 -summary(linearModel)$coefficients[,4] # the p-values +summary(linearModel)$coefficients[, 4] # the p-values # => # (Intercept) time # 9.234021e-01 2.440008e-09 @@ -728,8 +726,7 @@ summary(linearModel)$coefficients[,4] # the p-values # Logistic regression set.seed(1) list1$success = rbinom(length(list1$time), 1, .5) # random binary -glModel <- glm(success ~ time, data = list1, - family=binomial(link="logit")) +glModel <- glm(success ~ time, data = list1, family=binomial(link="logit")) glModel # outputs result of logistic regression # => # Call: glm(formula = success ~ time, @@ -745,8 +742,10 @@ glModel # outputs result of logistic regression summary(glModel) # more verbose output from the regression # => # Call: -# glm(formula = success ~ time, -# family = binomial(link = "logit"), data = list1) +# glm( +# formula = success ~ time, +# family = binomial(link = "logit"), +# data = list1) # Deviance Residuals: # Min 1Q Median 3Q Max @@ -780,7 +779,7 @@ plot(linearModel) # Histograms! hist(rpois(n = 10000, lambda = 5), col = "thistle") # Barplots! -barplot(c(1,4,5,1,2), names.arg = c("red","blue","purple","green","yellow")) +barplot(c(1, 4, 5, 1, 2), names.arg = c("red", "blue", "purple", "green", "yellow")) # GGPLOT2 # But these are not even the prettiest of R's plots @@ -788,10 +787,10 @@ barplot(c(1,4,5,1,2), names.arg = c("red","blue","purple","green","yellow")) install.packages("ggplot2") require(ggplot2) ?ggplot2 -pp <- ggplot(students, aes(x=house)) +pp <- ggplot(students, aes(x = house)) pp + geom_bar() ll <- as.data.table(list1) -pp <- ggplot(ll, aes(x=time,price)) +pp <- ggplot(ll, aes(x = time, price)) pp + geom_point() # ggplot2 has excellent documentation (available http://docs.ggplot2.org/current/) -- cgit v1.2.3 From fbdc0115e9d53cf1b57f0bacd85094402b5af608 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro Dantas Date: Sat, 9 Apr 2022 00:14:18 +0200 Subject: Update return of class(mat) according to R 4.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A matrix is the special case of a two-dimensional ‘array’. Since R 4.0.0, ‘inherits(m, "array")’ is true for a ‘matrix’ ‘m’, which makes class(mat) return "matrix" "array", not only "matrix" as before. --- r.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'r.html.markdown') diff --git a/r.html.markdown b/r.html.markdown index e90d5a97..c3422d2d 100644 --- a/r.html.markdown +++ b/r.html.markdown @@ -405,7 +405,7 @@ mat # [2,] 2 5 # [3,] 3 6 # Unlike a vector, the class of a matrix is "matrix", no matter what's in it -class(mat) # => "matrix" +class(mat) # => "matrix" "array" # Ask for the first row mat[1,] # 1 4 # Perform operation on the first column -- cgit v1.2.3 From c4c53cc95eeb792e25e04871ca7d7b13b11193e9 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro Dantas Date: Sat, 9 Apr 2022 00:21:01 +0200 Subject: Fix warning about stringsAsFactors In R 4.0.0, stringsAsFactors default value changed from TRUE to FALSE. This commit fixes the warning message in the current file to make this clear for readers. This commit also updates the list of contributors for this file. --- r.html.markdown | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'r.html.markdown') diff --git a/r.html.markdown b/r.html.markdown index c3422d2d..e42b1f5e 100644 --- a/r.html.markdown +++ b/r.html.markdown @@ -4,6 +4,7 @@ contributors: - ["e99n09", "http://github.com/e99n09"] - ["isomorphismes", "http://twitter.com/isomorphisms"] - ["kalinn", "http://github.com/kalinn"] + - ["mribeirodantas", "http://github.com/mribeirodantas"] filename: learnr.r --- @@ -478,9 +479,10 @@ class(students[,3]) # "factor" nrow(students) # 6 ncol(students) # 3 dim(students) # 6 3 -# The data.frame() function converts character vectors to factor vectors -# by default; turn this off by setting stringsAsFactors = FALSE when -# you create the data.frame +# The data.frame() function used to convert character vectors to factor +# vectors by default; This has changed in R 4.0.0. If your R version is +# older, turn this off by setting stringsAsFactors = FALSE when you +# create the data.frame ?data.frame # There are many twisty ways to subset data frames, all subtly unalike -- cgit v1.2.3