Intermediate R programming from Datacamp
In this chapter, we will learn about conditional statements, loops, and functions to power the R scripts.
Conditional and control flow
# Equality:
# Comparison of logicals
TRUE == FALSE
# Comparison of numerics
-6 * 14 != 17 - 101
# Comparison of character strings
"useR" == "user"
# Compare a logical with a numeric
TRUE == 1
[1] TRUE
# Greater and less than:
# Comparison of numerics
-6 * 5 + 2 >= -10 + 1
# Comparison of character strings
"raining" <= "raining dogs"
# Comparison of logicals
TRUE > FALSE
[1] TRUE
# Compare vectors:
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
# Popular days
linkedin > 15
# Quiet days
linkedin <= 5
# LinkedIn more popular than Facebook
linkedin > facebook
[1] FALSE TRUE TRUE FALSE FALSE TRUE FALSE
# Compare matrices:
# The social data has been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
views <- matrix(c(linkedin, facebook), nrow = 2, byrow = TRUE)
# When does views equal 13?
views == 13
# When is views less than or equal to 14?
views <= 14
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] FALSE TRUE TRUE TRUE TRUE FALSE TRUE
[2,] FALSE TRUE TRUE FALSE TRUE TRUE TRUE
# & and | :
# The linkedin and last variable are already defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
last <- tail(linkedin, 1)
# Is last under 5 or above 10?
last > 5 | last < 10
# Is last between 15 (exclusive) and 20 (inclusive)?
last > 15 & last <= 20
[1] FALSE
# & and | 2 :
# The social data (linkedin, facebook, views) has been created for you
# linkedin exceeds 10 but facebook below 10
linkedin > 10 & facebook <10
# When were one or both visited at least 12 times?
linkedin >= 12 | facebook >=12
# When is views between 11 (exclusive) and 14 (inclusive)?
views > 11 & views <= 14
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] FALSE FALSE TRUE FALSE FALSE FALSE TRUE
[2,] FALSE FALSE FALSE FALSE FALSE TRUE TRUE
# Blend it all together:
# li_df is pre-loaded in your workspace
# Select the second column, named day2, from li_df: second
second <- li_df[,"day2"]
# Build a logical vector, TRUE if value in second is extreme: extremes
extremes <- ((second > 25 | second < 5) == TRUE)
# Count the number of TRUEs in extremes
sum(extremes)
[1] 16
# The if statement:
# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14
# Examine the if statement for medium
if (medium == "LinkedIn") {
print("Showing LinkedIn information")
}
[1] "Showing LinkedIn information"
# Write the if statement for num_views
if (num_views > 15) {
print("You are popular!")
}
# Add an else:
# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14
# Control structure for medium
if (medium == "LinkedIn") {
print("Showing LinkedIn information")
} else {
print("Unknown medium")
}
[1] "Showing LinkedIn information"
# Control structure for num_views
if (num_views > 15) {
print("You're popular!")
} else{
print("Try to be more visible!")
}
[1] "Try to be more visible!"
# Customize further else if:
# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14
# Control structure for medium
if (medium == "LinkedIn") {
print("Showing LinkedIn information")
} else if (medium == "Facebook") {
# Add code to print correct string when condition is TRUE
print("Showing Facebook information")
} else {
print("Unknown medium")
}
[1] "Showing LinkedIn information"
# Control structure for num_views
if (num_views > 15) {
print("You're popular!")
} else if (num_views <= 15 & num_views > 10) {
# Add code to print correct string when condition is TRUE
print("Your number of views is average")
} else {
print("Try to be more visible!")
}
[1] "Your number of views is average"
# Else if 2.0:
# Variables related to your last day of recordings
li <- 15
fb <- 9
# Code the control-flow construct
if (li >= 15 & fb >= 15) {
sms <- 2 * (li + fb)
} else if (li < 10 & fb < 10) {
sms <- 0.5 * (li + fb)
} else {
sms <- (li + fb)
}
print (sms)
[1] 24
Loops
While loop
# Initialize the speed variable
speed <- 64
# Code the while loop
while (speed > 30) {
print(paste("Slow down!"))
speed <- speed - 7
}
# Print out the speed variable
speed
[1] 29
Throw in more conditionals
# Initialize the speed variable
speed <- 64
# Extend/adapt the while loop
while (speed > 30) {
print(paste("Your speed is",speed))
if (speed > 48) {
print(paste("Slow down big time!"))
speed <- speed - 11
} else {
print(paste("Slow down!"))
speed <- speed - 6
}
}
[1] "Your speed is 64"
[1] "Slow down big time!"
[1] "Your speed is 53"
[1] "Slow down big time!"
[1] "Your speed is 42"
[1] "Slow down!"
[1] "Your speed is 36"
[1] "Slow down!"
Stop the while: break
# Initialize the speed variable
speed <- 88
while (speed > 30) {
print(paste("Your speed is", speed))
# Break the while loop when speed exceeds 80
if (speed > 80 ) {
break
}
if (speed > 48) {
print("Slow down big time!")
speed <- speed - 11
} else {
print("Slow down!")
speed <- speed - 6
}
}
[1] "Your speed is 88"
Build a while loop from scratch
# Initialize i as 1
i <- 1
# Code the while loop
while (i <= 10) {
print(3 * i)
if (i %% 8 == 0) {
break
}
i <- i + 1
}
[1] 3
[1] 6
[1] 9
[1] 12
[1] 15
[1] 18
[1] 21
[1] 24
For loop
primes <- c(2, 3, 5, 7, 11, 13)
# loop version 1
for (p in primes) {
print(p)
}
[1] 2
[1] 3
[1] 5
[1] 7
[1] 11
[1] 13
# loop version 2
for (i in 1:length(primes)) {
print(primes[i])
}
[1] 2
[1] 3
[1] 5
[1] 7
[1] 11
[1] 13
Loop over a vector
# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
# Loop version 1
for( lin in linkedin){
print(lin)
}
# Loop version 2
for(i in 1:length(linkedin)){
print(linkedin[i])
}
[1] 16
[1] 9
[1] 13
[1] 5
[1] 2
[1] 17
[1] 14
Loop over a list
# The nyc list is already specified
nyc <- list(pop = 8405837,
boroughs = c("Manhattan", "Bronx", "Brooklyn",
"Queens", "Staten Island"),
capital = FALSE)
# Loop version 1
for( n in nyc){
print(n)
}
[1] 8405837
[1] "Manhattan" "Bronx" "Brooklyn" "Queens"
[5] "Staten Island"
[1] FALSE
# Loop version 2
for(i in 1:length(nyc)){
print(nyc[[i]])
}
[1] 8405837
[1] "Manhattan" "Bronx" "Brooklyn" "Queens"
[5] "Staten Island"
[1] FALSE
Loop over a matrix
# The tic-tac-toe matrix ttt has already been defined for you
[,1] [,2] [,3]
[1,] "O" NA "X"
[2,] NA "O" "O"
[3,] "X" NA "X"
# define the double for loop
for (i in 1:nrow(ttt)) {
for (j in 1:ncol(ttt)) {
print(paste("On row", i, "and column", j, "the board contains", ttt))
}
}
[1] "On row 1 and column 1 the board contains O"
[2] "On row 1 and column 1 the board contains NA"
[3] "On row 1 and column 1 the board contains X"
[4] "On row 1 and column 1 the board contains NA"
[5] "On row 1 and column 1 the board contains O"
[6] "On row 1 and column 1 the board contains NA"
[7] "On row 1 and column 1 the board contains X"
[8] "On row 1 and column 1 the board contains O"
[9] "On row 1 and column 1 the board contains X"
[1] "On row 1 and column 2 the board contains O"
[2] "On row 1 and column 2 the board contains NA"
[3] "On row 1 and column 2 the board contains X"
[4] "On row 1 and column 2 the board contains NA"
[5] "On row 1 and column 2 the board contains O"
[6] "On row 1 and column 2 the board contains NA"
[7] "On row 1 and column 2 the board contains X"
[8] "On row 1 and column 2 the board contains O"
[9] "On row 1 and column 2 the board contains X"
[1] "On row 1 and column 3 the board contains O"
[2] "On row 1 and column 3 the board contains NA"
[3] "On row 1 and column 3 the board contains X"
[4] "On row 1 and column 3 the board contains NA"
[5] "On row 1 and column 3 the board contains O"
[6] "On row 1 and column 3 the board contains NA"
[7] "On row 1 and column 3 the board contains X"
[8] "On row 1 and column 3 the board contains O"
[9] "On row 1 and column 3 the board contains X"
[1] "On row 2 and column 1 the board contains O"
[2] "On row 2 and column 1 the board contains NA"
[3] "On row 2 and column 1 the board contains X"
[4] "On row 2 and column 1 the board contains NA"
[5] "On row 2 and column 1 the board contains O"
[6] "On row 2 and column 1 the board contains NA"
[7] "On row 2 and column 1 the board contains X"
[8] "On row 2 and column 1 the board contains O"
[9] "On row 2 and column 1 the board contains X"
[1] "On row 2 and column 2 the board contains O"
[2] "On row 2 and column 2 the board contains NA"
[3] "On row 2 and column 2 the board contains X"
[4] "On row 2 and column 2 the board contains NA"
[5] "On row 2 and column 2 the board contains O"
[6] "On row 2 and column 2 the board contains NA"
[7] "On row 2 and column 2 the board contains X"
[8] "On row 2 and column 2 the board contains O"
[9] "On row 2 and column 2 the board contains X"
[1] "On row 2 and column 3 the board contains O"
[2] "On row 2 and column 3 the board contains NA"
[3] "On row 2 and column 3 the board contains X"
[4] "On row 2 and column 3 the board contains NA"
[5] "On row 2 and column 3 the board contains O"
[6] "On row 2 and column 3 the board contains NA"
[7] "On row 2 and column 3 the board contains X"
[8] "On row 2 and column 3 the board contains O"
[9] "On row 2 and column 3 the board contains X"
[1] "On row 3 and column 1 the board contains O"
[2] "On row 3 and column 1 the board contains NA"
[3] "On row 3 and column 1 the board contains X"
[4] "On row 3 and column 1 the board contains NA"
[5] "On row 3 and column 1 the board contains O"
[6] "On row 3 and column 1 the board contains NA"
[7] "On row 3 and column 1 the board contains X"
[8] "On row 3 and column 1 the board contains O"
[9] "On row 3 and column 1 the board contains X"
[1] "On row 3 and column 2 the board contains O"
[2] "On row 3 and column 2 the board contains NA"
[3] "On row 3 and column 2 the board contains X"
[4] "On row 3 and column 2 the board contains NA"
[5] "On row 3 and column 2 the board contains O"
[6] "On row 3 and column 2 the board contains NA"
[7] "On row 3 and column 2 the board contains X"
[8] "On row 3 and column 2 the board contains O"
[9] "On row 3 and column 2 the board contains X"
[1] "On row 3 and column 3 the board contains O"
[2] "On row 3 and column 3 the board contains NA"
[3] "On row 3 and column 3 the board contains X"
[4] "On row 3 and column 3 the board contains NA"
[5] "On row 3 and column 3 the board contains O"
[6] "On row 3 and column 3 the board contains NA"
[7] "On row 3 and column 3 the board contains X"
[8] "On row 3 and column 3 the board contains O"
[9] "On row 3 and column 3 the board contains X"
Mix it up with control flow
# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
# Code the for loop with conditionals
for (li in linkedin) {
if (li > 10 ) {
print("You're popular!")
} else {
print("Be more visible!")
}
print(li)
}
[1] "You're popular!"
[1] 16
[1] "Be more visible!"
[1] 9
[1] "You're popular!"
[1] 13
[1] "Be more visible!"
[1] 5
[1] "Be more visible!"
[1] 2
[1] "You're popular!"
[1] 17
[1] "You're popular!"
[1] 14
Next, you break it
# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
# Adapt/extend the for loop
for (li in linkedin) {
if (li > 10) {
print("You're popular!")
} else {
print("Be more visible!")
}
# Add if statement with break
if(li > 16){
print( "This is ridiculous, I'm outta here!")
break
}
# Add if statement with next
if(li < 5){
print("This is too embarrassing!")
next
}
print(li)
}
[1] "You're popular!"
[1] 16
[1] "Be more visible!"
[1] 9
[1] "You're popular!"
[1] 13
[1] "Be more visible!"
[1] 5
[1] "Be more visible!"
[1] "This is too embarrassing!"
[1] "You're popular!"
[1] "This is ridiculous, I'm outta here!"
Build a for loop from scratch
# Pre-defined variables
rquote <- "r's internals are irrefutably intriguing"
chars <- strsplit(rquote, split = "")[[1]]
# Initialize rcount
rcount <- 0
# Finish the for loop
for (char in chars) {
if(char == "r"){
rcount = rcount + 1
}
else if(char == "u"){
break
}
}
# Print out rcount
print(rcount)
[1] 5
Functions
Function documentation
# Consult the documentation on the mean() function
?mean
# Inspect the arguments of the mean() function
args(mean)
function (x, ...)
NULL
Use a function
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
# Calculate average number of views
avg_li <- mean(linkedin)
avg_fb <- mean(facebook)
# Inspect avg_li and avg_fb
print(avg_li)
[1] 10.85714
print(avg_fb)
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
# Calculate the mean of the sum
avg_sum <- mean(linkedin+facebook)
# Calculate the trimmed mean of the sum
avg_sum_trimmed <- mean(linkedin+facebook, trim = 0.2)
# Inspect both new variables
print(avg_sum)
print(avg_sum_trimmed)
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)
# Basic average of linkedin
print(mean(linkedin))
# Advanced average of linkedin
print(mean(linkedin, na.rm = TRUE))
Functions inside functions
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)
# Calculate the mean absolute deviation
mean(abs(linkedin - facebook), na.rm = TRUE)
Write your own function
# Create a function pow_two()
pow_two <- function(a){
a * a
}
# Use the function
pow_two(12)
# Create a function sum_abs()
sum_abs <- function(a, b){
abs(a) + abs(b)
}
# Use the function
sum_abs(-2, 3)
[1] 5
# Define the function hello()
hello <- function(){
print("Hi there!")
return(TRUE)
}
# Call the function hello()
hello()
# Finish the pow_two() function
pow_two <- function(x,print_info = TRUE) {
y <- x ^ 2
if(print_info == TRUE){
print(paste(x, "to the power two equals", y))
}
return(y)
}
pow_two(5)
[1] "5 to the power two equals 25"
R passes arguments by value
triple <- function(x) {
x <- 3*x
x
}
[1] 15
a <- 5
triple(a)
a
[1] 5
R you functional?
# The linkedin and facebook vectors have already been created for you
# Define the interpret function
interpret <- function(num_views) {
if (num_views > 15) {
print( "You're popular!")
return(num_views)
} else {
print("Try to be more visible!")
return (0)
}
}
# Call the interpret function twice
interpret(linkedin)
interpret(facebook[2])
[1] "Try to be more visible!"
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
# The interpret() can be used inside interpret_all()
interpret <- function(num_views) {
if (num_views > 15) {
print("You're popular!")
return(num_views)
} else {
print("Try to be more visible!")
return(0)
}
}
# Define the interpret_all() function
# views: vector with data to interpret
# return_sum: return total number of views on popular days?
interpret_all <- function(views, return_sum = TRUE) {
count <- 0
for (v in views) {
count <- count + interpret(v)
}
if (return_sum == TRUE) {
return (count)
} else {
return (NULL)
}
}
# Call the interpret_all() function on both linkedin and facebook
interpret_all(linkedin)
[1] "You're popular!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "You're popular!"
[1] "Try to be more visible!"
interpret_all(facebook)
[1] "You're popular!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "You're popular!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
Load R Packages
# Load the ggplot2 package
library(ggplot2)
# Retry the qplot() function
qplot(mtcars$wt, mtcars$hp)
# Check out the currently attached packages again
search()
[1] ".GlobalEnv" "package:ggplot2" "package:RBackend"
[4] "package:stats" "package:graphics" "package:grDevices"
[7] "package:utils" "package:datasets" "package:methods"
[10] "Autoloads" "package:base"
The apply family
Use lapply with a built-in R function
# The vector pioneers has already been created for you
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
# Split names from birth year
split_math <- strsplit(pioneers, split = ":")
# Convert to lowercase strings: split_low
split_low <- lapply(split_math,tolower)
# Take a look at the structure of split_low
str(split_low)
List of 4
$ : chr [1:2] "gauss" "1777"
$ : chr [1:2] "bayes" "1702"
$ : chr [1:2] "pascal" "1623"
$ : chr [1:2] "pearson" "1857"
Use lapply with your own function
# Code from previous exercise:
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split_low <- lapply(split, tolower)
# Write function select_first()
select_first <- function(x) {
x[1]
}
# Apply select_first() over split_low: names
names <- lapply(split_low, select_first)
# Write function select_second()
select_second <- function(x){
x[2]
}
# Apply select_second() over split_low: years
years <- lapply(split_low,select_second)
lapply and anonymous functions
# split_low has been created for you
split_low
# Transform: use anonymous function inside lapply
names <- lapply(split_low, function(x){x[1]})
# Transform: use anonymous function inside lapply
years <- lapply(split_low, function(x){x[2]})
Use lapply with additional arguments
# Definition of split_low
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split_low <- lapply(split, tolower)
# Generic select function
select_el <- function(x, index) {
x[index]
}
# Use lapply() twice on split_low: names and years
names <- lapply(split_low,select_el, 1)
years <- lapply(split_low,select_el, 2)
How to use sapply
# temp has already been defined in the workspace
# Use lapply() to find each day's minimum temperature
lapply(temp, min)
[[1]]
[1] -1
[[2]]
[1] 5
[[3]]
[1] -3
[[4]]
[1] -2
[[5]]
[1] 2
[[6]]
[1] -3
[[7]]
[1] 1
# Use sapply() to find each day's minimum temperature
sapply(temp, min)
[1] -1 5 -3 -2 2 -3 1
# Use lapply() to find each day's maximum temperature
lapply(temp, max)
[[1]]
[1] 9
[[2]]
[1] 13
[[3]]
[1] 8
[[4]]
[1] 7
[[5]]
[1] 9
[[6]]
[1] 9
[[7]]
[1] 9
# Use sapply() to find each day's maximum temperature
sapply(temp, max)
[1] 9 13 8 7 9 9 9
sapply with your own function
# temp is already defined in the workspace
# Finish function definition of extremes_avg
extremes_avg <- function(x) {
( min(x) + max(x)) / 2
}
# Apply extremes_avg() over temp using sapply()
sapply(temp, extremes_avg)
[1] 4.0 9.0 2.5 2.5 5.5 3.0 5.0
# Apply extremes_avg() over temp using lapply()
lapply(temp, extremes_avg)
[[1]]
[1] 4
[[2]]
[1] 9
[[3]]
[1] 2.5
[[4]]
[1] 2.5
[[5]]
[1] 5.5
[[6]]
[1] 3
[[7]]
[1] 5
sapply with function returning vector
# temp is already available in the workspace
# Create a function that returns min and max of a vector: extremes
extremes <- function(x) {
c(min = min(x), max = max(x))
}
# Apply extremes() over temp with sapply()
sapply(temp, extremes)
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
min -1 5 -3 -2 2 -3 1
max 9 13 8 7 9 9 9
# Apply extremes() over temp with lapply()
lapply(temp, extremes)
[[1]]
min max
-1 9
[[2]]
min max
5 13
[[3]]
min max
-3 8
[[4]]
min max
-2 7
[[5]]
min max
2 9
[[6]]
min max
-3 9
[[7]]
min max
1 9
sapply can't simplify, now what?
# temp is already prepared for you in the workspace
# Definition of below_zero()
below_zero <- function(x) {
return(x[x < 0])
}
# Apply below_zero over temp using sapply(): freezing_s
freezing_s <- sapply(temp,below_zero)
# Apply below_zero over temp using lapply(): freezing_l
freezing_l <- lapply(temp, below_zero)
# Are freezing_s and freezing_l identical?
identical(freezing_l,freezing_s)
[1] TRUE
sapply with functions that return NULL
# temp is already available in the workspace
# Definition of print_info()
print_info <- function(x) {
cat("The average temperature is", mean(x), "\n")
}
# Apply print_info() over temp using sapply()
sapply(temp, print_info)
The average temperature is 4.8
The average temperature is 9
The average temperature is 2.2
The average temperature is 2.4
The average temperature is 5.4
The average temperature is 4.6
The average temperature is 4.6
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
# Apply print_info() over temp using lapply()
lapply(temp, print_info)
The average temperature is 4.8
The average temperature is 9
The average temperature is 2.2
The average temperature is 2.4
The average temperature is 5.4
The average temperature is 4.6
The average temperature is 4.6
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
Use vapply
# temp is already available in the workspace
# Definition of basics()
basics <- function(x) {
c(min = min(x), mean = mean(x), max = max(x))
}
# Apply basics() over temp using vapply()
vapply(temp, basics,numeric(3))
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
min -1.0 5 -3.0 -2.0 2.0 -3.0 1.0
mean 4.8 9 2.2 2.4 5.4 4.6 4.6
max 9.0 13 8.0 7.0 9.0 9.0 9.0
# temp is already available in the workspace
# Definition of the basics() function
basics <- function(x) {
c(min = min(x), mean = mean(x), median = median(x), max = max(x))
}
# Fix the error:
vapply(temp, basics, numeric(4))
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
min -1.0 5 -3.0 -2.0 2.0 -3.0 1.0
mean 4.8 9 2.2 2.4 5.4 4.6 4.6
median 6.0 9 3.0 2.0 5.0 5.0 4.0
max 9.0 13 8.0 7.0 9.0 9.0 9.0
From sapply to vapply
# temp is already defined in the workspace
# Convert to vapply() expression
sapply(temp, max)
[1] 9 13 8 7 9 9 9
vapply(temp, max, numeric(1))
[1] 9 13 8 7 9 9 9
# Convert to vapply() expression
sapply(temp, function(x, y) { mean(x) > y }, y = 5)
[1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE
vapply(temp, function(x, y) { mean(x) > y }, y = 5, logical(1) )
[1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE
Utilities
Mathematical utilities
# The errors vector has already been defined for you
errors <- c(1.9, -2.6, 4.0, -9.5, -3.4, 7.3)
# Sum of absolute rounded values of errors
sum(round(abs(errors)))
[1] 29
Find the error
# Don't edit these two lines
vec1 <- c(1.5, 2.5, 8.4, 3.7, 6.3)
vec2 <- rev(vec1)
# Fix the error
mean(c(abs(vec1), abs(vec2)))
[1] 4.48
Data utilities
# The linkedin and facebook lists have already been created for you
linkedin <- list(16, 9, 13, 5, 2, 17, 14)
facebook <- list(17, 7, 5, 16, 8, 13, 14)
# Convert linkedin and facebook to a vector: li_vec and fb_vec
li_vec <- unlist(linkedin)
fb_vec <- unlist(facebook)
# Append fb_vec to li_vec: social_vec
social_vec <- append(li_vec,fb_vec)
# Sort social_vec
sort(social_vec, decreasing = TRUE)
[1] 17 17 16 16 14 14 13 13 9 8 7 5 5 2
Find the error 2
# Fix me
rep(seq(1, 7, by = 2), times = 7)
[1] 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7
Beat Gauss using R
# Create first sequence: seq1
seq1 <- seq(1, 500, by = 3)
# Create second sequence: seq2
seq2 <- seq(1200, 900, by = -7)
# Calculate total sum of the sequences
sum(sum(seq1)+ sum(seq2))
[1] 87029
grepl and grep
# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov",
"dalai.lama@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")
# Use grepl() to match for "edu"
grepl("edu", emails)
# Use grep() to match for "edu", save result to hits
hits <- grep("edu", emails)
# Subset emails using hits
emails[hits]
[1] "john.doe@ivyleague.edu" "education@world.gov"
[3] "invalid.edu" "quant@bigdatacollege.edu"
grepl and grep 2
# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov",
"dalai.lama@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")
# Use grepl() to match for .edu addresses more robustly
grepl("@.*\\.edu$", emails)
# Use grep() to match for .edu addresses more robustly, save result to hits
hits <- grep("@.*\\.edu$", emails)
# Subset emails using hits
emails[hits]
[1] "john.doe@ivyleague.edu" "quant@bigdatacollege.edu"
sub and gsub
# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov",
"global@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")
# Use sub() to convert the email domains to datacamp.edu
sub("@.*\\.edu$", "@datacamp.edu",emails)
[1] "john.doe@datacamp.edu" "education@world.gov"
[3] "global@peace.org" "invalid.edu"
[5] "quant@datacamp.edu" "cookie.monster@sesame.tv"
sub and gsub 2
awards <- c("Won 1 Oscar.",
"Won 1 Oscar. Another 9 wins & 24 nominations.",
"1 win and 2 nominations.",
"2 wins & 3 nominations.",
"Nominated for 2 Golden Globes. 1 more win & 2 nominations.",
"4 wins & 1 nomination.")
sub(".*\\s([0-9]+)\\snomination.*$", "\\1", awards)
[1] "Won 1 Oscar." "24" "2" "3" "2"
[6] "1"
Times and Dates (Right here, right now)
# Get the current date: today
today <- Sys.Date()
# See what today looks like under the hood
unclass(today)
# Get the current time: now
now <- Sys.time()
# See what now looks like under the hood
unclass(now)
[1] 1657256530
Create and format dates
%Y
: 4-digit year (1982)%y
: 2-digit year (82)%m
: 2-digit month (01)%d
: 2-digit day of the month (13)%A
: weekday (Wednesday)%a
: abbreviated weekday (Wed)%B
: month (January)%b
: abbreviated month (Jan)
# Definition of character strings representing dates
str1 <- "May 23, '96"
str2 <- "2012-03-15"
str3 <- "30/January/2006"
# Convert the strings to dates: date1, date2, date3
date1 <- as.Date(str1, format = "%b %d, '%y")
date2 <- as.Date(str2)
date3 <- as.Date(str3, format = "%d/%B/%Y")
# Convert dates to formatted strings
format(date1, "%A")
[1] "Thursday"
format(date2, "%d")
[1] "15"
format(date3, "%b %Y")
[1] "Jan 2006"
Create and format times
# Definition of character strings representing times
str1 <- "May 23, '96 hours:23 minutes:01 seconds:45"
str2 <- "2012-3-12 14:23:08"
# Convert the strings to POSIXct objects: time1, time2
time1 <- as.POSIXct(str1, format = "%B %d, '%y hours:%H
minutes:%M seconds:%S")
time2 <- as.POSIXct(str2, format = "%Y-%m-%d %H:%M:%S")
# Convert times to formatted strings
format(time1, "%M")
[1] "01"
format(time2, "%I:%M %p")
[1] "02:23 PM"
Calculations with Dates
# day1, day2, day3, day4 and day5 are already available in the workspace
# Difference between last and first pizza day
day5 - day1
Time difference of 18 days
# Create vector pizza
pizza <- c(day1, day2, day3, day4, day5)
# Create differences between consecutive pizza days: day_diff
day_diff <- diff(pizza)
# Average period between two consecutive pizza days
mean(day_diff)
Time difference of 4.5 days
Calculations with Times
# login and logout are already defined in the workspace
# Calculate the difference between login and logout: time_online
time_online <- logout - login
# Inspect the variable time_online
time_online
Time differences in secs
[1] 2305.11818 34.18472 837.18182 2397.90153 1851.30411
# Calculate the total time online
print(sum(time_online))
Time difference of 7425.69 secs
# Calculate the average time online
print(mean(time_online))
Time difference of 1485.138 secs
Time is of the essence
# Convert astro to vector of Date objects: astro_dates
astro_dates <- as.Date(astro, format = "%d-%b-%Y")
# Convert meteo to vector of Date objects: meteo_dates
meteo_dates <- as.Date(meteo, format = "%B %d, %y")
# Calculate the maximum absolute difference between astro_dates
and meteo_dates
max(abs(astro_dates - meteo_dates))
Time difference of 24 days
Thank You.
For the slides from datacamp, check this:
https://github.com/DataSaramsh/Data-Science/tree/main/Datacamp%20R%20programming
Comments
Post a Comment