Intermediate R programming | Datacamp- Intermediate R programming | R programming for data scientist

Intermediate R programming from Datacamp

In this chapter, we will learn about conditional statements, loops, and functions to power the R scripts.

Conditional and control flow

# Equality:

# Comparison of logicals

TRUE == FALSE

# Comparison of numerics
-6 * 14 != 17 - 101

# Comparison of character strings
"useR" == "user"

# Compare a logical with a numeric
TRUE == 1
[1] TRUE

# Greater and less than:
# Comparison of numerics
-6 * 5 + 2 >= -10 + 1

# Comparison of character strings
"raining" <= "raining dogs"

# Comparison of logicals
TRUE > FALSE
[1] TRUE

# Compare vectors:
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# Popular days
linkedin > 15

# Quiet days
linkedin <= 5

# LinkedIn more popular than Facebook
linkedin > facebook
[1] FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE

# Compare matrices:
# The social data has been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)
views <- matrix(c(linkedin, facebook), nrow = 2, byrow = TRUE)

# When does views equal 13?
views == 13

# When is views less than or equal to 14?
views <= 14
      [,1] [,2] [,3]  [,4] [,5]  [,6] [,7]
[1,] FALSE TRUE TRUE  TRUE TRUE FALSE TRUE
[2,] FALSE TRUE TRUE FALSE TRUE  TRUE TRUE

# & and | :
# The linkedin and last variable are already defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
last <- tail(linkedin, 1)

# Is last under 5 or above 10?
last > 5 | last < 10

# Is last between 15 (exclusive) and 20 (inclusive)?
last > 15 & last <= 20 
[1] FALSE

# & and |  2 :
# The social data (linkedin, facebook, views) has been created for you

# linkedin exceeds 10 but facebook below 10
linkedin > 10 & facebook <10

# When were one or both visited at least 12 times?
linkedin >= 12 | facebook >=12

# When is views between 11 (exclusive) and 14 (inclusive)?
views > 11 & views <= 14
      [,1]  [,2]  [,3]  [,4]  [,5]  [,6] [,7]
[1,] FALSE FALSE  TRUE FALSE FALSE FALSE TRUE
[2,] FALSE FALSE FALSE FALSE FALSE  TRUE TRUE


# Blend it all together:
# li_df is pre-loaded in your workspace

# Select the second column, named day2, from li_df: second
second <- li_df[,"day2"]

# Build a logical vector, TRUE if value in second is extreme: extremes
extremes <- ((second > 25 | second < 5) == TRUE)

# Count the number of TRUEs in extremes
sum(extremes)
[1] 16

# The if statement:
# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14

# Examine the if statement for medium
if (medium == "LinkedIn") {
  print("Showing LinkedIn information")
}
[1] "Showing LinkedIn information"

# Write the if statement for num_views
if (num_views > 15) {
  print("You are popular!")
}

# Add an else:
# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14

# Control structure for medium
if (medium == "LinkedIn") {
  print("Showing LinkedIn information")
} else {
  print("Unknown medium")
}
[1] "Showing LinkedIn information"

# Control structure for num_views
if (num_views > 15) {
  print("You're popular!")
} else{
  print("Try to be more visible!")
}
[1] "Try to be more visible!"


# Customize further else if:
# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14

# Control structure for medium
if (medium == "LinkedIn") {
  print("Showing LinkedIn information")
} else if (medium == "Facebook") {
  # Add code to print correct string when condition is TRUE
  print("Showing Facebook information")
} else {
  print("Unknown medium")
}
[1] "Showing LinkedIn information"

# Control structure for num_views
if (num_views > 15) {
  print("You're popular!")
} else if (num_views <= 15 & num_views > 10) {
  # Add code to print correct string when condition is TRUE
  print("Your number of views is average")
} else {
  print("Try to be more visible!")
}
[1] "Your number of views is average"


# Else if 2.0:
# Variables related to your last day of recordings
li <- 15
fb <- 9

# Code the control-flow construct
if (li >= 15 & fb >= 15) {
  sms <- 2 * (li + fb)
} else if (li < 10 & fb < 10) {
  sms <- 0.5 * (li + fb)
} else {
  sms <- (li + fb)
}
print (sms)
[1] 24

Loops

While loop

# Initialize the speed variable
speed <- 64

# Code the while loop
while (speed > 30) {
  print(paste("Slow down!"))
  speed <- speed - 7
}

# Print out the speed variable
speed
[1] 29

Throw in more conditionals

# Initialize the speed variable
speed <- 64

# Extend/adapt the while loop
while (speed > 30) {
  print(paste("Your speed is",speed))
  if (speed > 48) {
    print(paste("Slow down big time!"))
    speed <- speed - 11
  } else {
    print(paste("Slow down!"))
    speed <- speed - 6
  }
}
[1] "Your speed is 64"
[1] "Slow down big time!"
[1] "Your speed is 53"
[1] "Slow down big time!"
[1] "Your speed is 42"
[1] "Slow down!"
[1] "Your speed is 36"
[1] "Slow down!"

Stop the while: break

# Initialize the speed variable
speed <- 88

while (speed > 30) {
  print(paste("Your speed is", speed))
  
  # Break the while loop when speed exceeds 80
  if (speed > 80 ) {
    break
  }
  
  if (speed > 48) {
    print("Slow down big time!")
    speed <- speed - 11
  } else {
    print("Slow down!")
    speed <- speed - 6
  }
}
[1] "Your speed is 88"

Build a while loop from scratch

# Initialize i as 1 
i <- 1

# Code the while loop
while (i <= 10) {
  print(3 * i)
  if (i %% 8 == 0) {
    break
  }
  i <- i + 1
}
[1] 3
[1] 6
[1] 9
[1] 12
[1] 15
[1] 18
[1] 21
[1] 24

For loop

primes <- c(2, 3, 5, 7, 11, 13)

# loop version 1
for (p in primes) {
  print(p)
}
[1] 2
[1] 3
[1] 5
[1] 7
[1] 11
[1] 13

# loop version 2
for (i in 1:length(primes)) {
  print(primes[i])
}
[1] 2
[1] 3
[1] 5
[1] 7
[1] 11
[1] 13

Loop over a vector

# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)

# Loop version 1
for( lin in linkedin){
    print(lin)
}
# Loop version 2
for(i in 1:length(linkedin)){
    print(linkedin[i])
}
[1] 16
[1] 9
[1] 13
[1] 5
[1] 2
[1] 17
[1] 14

Loop over a list

# The nyc list is already specified
nyc <- list(pop = 8405837, 
            boroughs = c("Manhattan", "Bronx", "Brooklyn", 
            "Queens", "Staten Island"), 
            capital = FALSE)

# Loop version 1
for( n in nyc){
    print(n)
}
[1] 8405837
[1] "Manhattan"     "Bronx"         "Brooklyn"      "Queens"       
[5] "Staten Island"
[1] FALSE

# Loop version 2
for(i in 1:length(nyc)){
    print(nyc[[i]])
}
[1] 8405837
[1] "Manhattan"     "Bronx"         "Brooklyn"      "Queens"       
[5] "Staten Island"
[1] FALSE

Loop over a matrix

# The tic-tac-toe matrix ttt has already been defined for you
>ttt
     [,1] [,2] [,3]
[1,] "O"  NA   "X" 
[2,] NA   "O"  "O" 
[3,] "X"  NA   "X" 

# define the double for loop
for (i in 1:nrow(ttt)) {
  for (j in 1:ncol(ttt)) {
    print(paste("On row", i, "and column", j, "the board contains", ttt))
  }
}
[1] "On row 1 and column 1 the board contains O" 
[2] "On row 1 and column 1 the board contains NA"
[3] "On row 1 and column 1 the board contains X" 
[4] "On row 1 and column 1 the board contains NA"
[5] "On row 1 and column 1 the board contains O" 
[6] "On row 1 and column 1 the board contains NA"
[7] "On row 1 and column 1 the board contains X" 
[8] "On row 1 and column 1 the board contains O" 
[9] "On row 1 and column 1 the board contains X" 
[1] "On row 1 and column 2 the board contains O" 
[2] "On row 1 and column 2 the board contains NA"
[3] "On row 1 and column 2 the board contains X" 
[4] "On row 1 and column 2 the board contains NA"
[5] "On row 1 and column 2 the board contains O" 
[6] "On row 1 and column 2 the board contains NA"
[7] "On row 1 and column 2 the board contains X" 
[8] "On row 1 and column 2 the board contains O" 
[9] "On row 1 and column 2 the board contains X" 
[1] "On row 1 and column 3 the board contains O" 
[2] "On row 1 and column 3 the board contains NA"
[3] "On row 1 and column 3 the board contains X" 
[4] "On row 1 and column 3 the board contains NA"
[5] "On row 1 and column 3 the board contains O" 
[6] "On row 1 and column 3 the board contains NA"
[7] "On row 1 and column 3 the board contains X" 
[8] "On row 1 and column 3 the board contains O" 
[9] "On row 1 and column 3 the board contains X" 
[1] "On row 2 and column 1 the board contains O" 
[2] "On row 2 and column 1 the board contains NA"
[3] "On row 2 and column 1 the board contains X" 
[4] "On row 2 and column 1 the board contains NA"
[5] "On row 2 and column 1 the board contains O" 
[6] "On row 2 and column 1 the board contains NA"
[7] "On row 2 and column 1 the board contains X" 
[8] "On row 2 and column 1 the board contains O" 
[9] "On row 2 and column 1 the board contains X" 
[1] "On row 2 and column 2 the board contains O" 
[2] "On row 2 and column 2 the board contains NA"
[3] "On row 2 and column 2 the board contains X" 
[4] "On row 2 and column 2 the board contains NA"
[5] "On row 2 and column 2 the board contains O" 
[6] "On row 2 and column 2 the board contains NA"
[7] "On row 2 and column 2 the board contains X" 
[8] "On row 2 and column 2 the board contains O" 
[9] "On row 2 and column 2 the board contains X" 
[1] "On row 2 and column 3 the board contains O" 
[2] "On row 2 and column 3 the board contains NA"
[3] "On row 2 and column 3 the board contains X" 
[4] "On row 2 and column 3 the board contains NA"
[5] "On row 2 and column 3 the board contains O" 
[6] "On row 2 and column 3 the board contains NA"
[7] "On row 2 and column 3 the board contains X" 
[8] "On row 2 and column 3 the board contains O" 
[9] "On row 2 and column 3 the board contains X" 
[1] "On row 3 and column 1 the board contains O" 
[2] "On row 3 and column 1 the board contains NA"
[3] "On row 3 and column 1 the board contains X" 
[4] "On row 3 and column 1 the board contains NA"
[5] "On row 3 and column 1 the board contains O" 
[6] "On row 3 and column 1 the board contains NA"
[7] "On row 3 and column 1 the board contains X" 
[8] "On row 3 and column 1 the board contains O" 
[9] "On row 3 and column 1 the board contains X" 
[1] "On row 3 and column 2 the board contains O" 
[2] "On row 3 and column 2 the board contains NA"
[3] "On row 3 and column 2 the board contains X" 
[4] "On row 3 and column 2 the board contains NA"
[5] "On row 3 and column 2 the board contains O" 
[6] "On row 3 and column 2 the board contains NA"
[7] "On row 3 and column 2 the board contains X" 
[8] "On row 3 and column 2 the board contains O" 
[9] "On row 3 and column 2 the board contains X" 
[1] "On row 3 and column 3 the board contains O" 
[2] "On row 3 and column 3 the board contains NA"
[3] "On row 3 and column 3 the board contains X" 
[4] "On row 3 and column 3 the board contains NA"
[5] "On row 3 and column 3 the board contains O" 
[6] "On row 3 and column 3 the board contains NA"
[7] "On row 3 and column 3 the board contains X" 
[8] "On row 3 and column 3 the board contains O" 
[9] "On row 3 and column 3 the board contains X"

Mix it up with control flow

# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)

# Code the for loop with conditionals
for (li in linkedin) {
  if (li > 10 ) {
    print("You're popular!")    
  } else {
    print("Be more visible!")
  }
  print(li)
}
[1] "You're popular!"
[1] 16
[1] "Be more visible!"
[1] 9
[1] "You're popular!"
[1] 13
[1] "Be more visible!"
[1] 5
[1] "Be more visible!"
[1] 2
[1] "You're popular!"
[1] 17
[1] "You're popular!"
[1] 14


Next, you break it
# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)

# Adapt/extend the for loop
for (li in linkedin) {
  if (li > 10) {
    print("You're popular!")
  } else {
    print("Be more visible!")
  }
  
  # Add if statement with break
  if(li > 16){
    print( "This is ridiculous, I'm outta here!")
    break
  }

  # Add if statement with next
  if(li < 5){
    print("This is too embarrassing!")
    next
  }
  print(li)
}
[1] "You're popular!"
[1] 16
[1] "Be more visible!"
[1] 9
[1] "You're popular!"
[1] 13
[1] "Be more visible!"
[1] 5
[1] "Be more visible!"
[1] "This is too embarrassing!"
[1] "You're popular!"
[1] "This is ridiculous, I'm outta here!"

Build a for loop from scratch

# Pre-defined variables
rquote <- "r's internals are irrefutably intriguing"
chars <- strsplit(rquote, split = "")[[1]]

# Initialize rcount
rcount <- 0

# Finish the for loop
for (char in chars) {
if(char == "r"){
rcount = rcount + 1
}
else if(char == "u"){
break
}
}
# Print out rcount
print(rcount)
[1] 5

Functions

Function documentation

# Consult the documentation on the mean() function

?mean

# Inspect the arguments of the mean() function
args(mean)
function (x, ...) 
NULL

Use a function

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# Calculate average number of views
avg_li <- mean(linkedin)
avg_fb <- mean(facebook)

# Inspect avg_li and avg_fb
print(avg_li)
[1] 10.85714

print(avg_fb)
[1] 11.42857








# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# Calculate the mean of the sum
avg_sum <- mean(linkedin+facebook)

# Calculate the trimmed mean of the sum
avg_sum_trimmed <- mean(linkedin+facebook, trim = 0.2)

# Inspect both new variables
print(avg_sum)
[1] 22.28571

print(avg_sum_trimmed)
[1] 22.6
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)

# Basic average of linkedin
print(mean(linkedin))
[1] NA

# Advanced average of linkedin
print(mean(linkedin, na.rm = TRUE))
[1] 12.33333

Functions inside functions

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)

# Calculate the mean absolute deviation
mean(abs(linkedin - facebook), na.rm = TRUE)
[1] 4.8

Write your own function

# Create a function pow_two()
pow_two <- function(a){
    a * a
}

# Use the function

pow_two(12)
# Create a function sum_abs()
sum_abs <- function(a, b){
    abs(a) + abs(b)
}

# Use the function
sum_abs(-2, 3)
[1] 5

# Define the function hello()
hello <- function(){
    print("Hi there!")
    return(TRUE)
}

# Call the function hello()
hello()
[1] "Hi there!"
[1] TRUE

# Finish the pow_two() function
pow_two <- function(x,print_info = TRUE) {
  y <- x ^ 2
  if(print_info == TRUE){
  print(paste(x, "to the power two equals", y))
  }
  return(y)
}
pow_two(5)
[1] "5 to the power two equals 25"
[1] 25

R passes arguments by value

triple <- function(x) {
  x <- 3*x
  x
}
[1] 15
a <- 5
triple(a)
a
[1] 5

R you functional?

# The linkedin and facebook vectors have already been created for you

# Define the interpret function
interpret <- function(num_views) {
  if (num_views > 15) {
    print( "You're popular!")
    return(num_views)

  } else {
    print("Try to be more visible!")
    return (0)

  }
}
# Call the interpret function twice
interpret(linkedin)
[1] "You're popular!"
[1] 16  9 13  5  2 17 14
interpret(facebook[2])
[1] "Try to be more visible!"
[1] 0

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# The interpret() can be used inside interpret_all()
interpret <- function(num_views) {
  if (num_views > 15) {
    print("You're popular!")
    return(num_views)
  } else {
    print("Try to be more visible!")
    return(0)
  }
}

# Define the interpret_all() function
# views: vector with data to interpret
# return_sum: return total number of views on popular days?
interpret_all <- function(views, return_sum = TRUE) {
  count <- 0

  for (v in views) {
    count <- count + interpret(v)
  }

  if (return_sum == TRUE) {
    return (count)

  } else {
    return (NULL)

  }
}

# Call the interpret_all() function on both linkedin and facebook
interpret_all(linkedin)
[1] "You're popular!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "You're popular!"
[1] "Try to be more visible!"
[1] 33
interpret_all(facebook)
[1] "You're popular!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "You're popular!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] "Try to be more visible!"
[1] 33

Load R Packages

# Load the ggplot2 package
library(ggplot2)

# Retry the qplot() function
qplot(mtcars$wt, mtcars$hp)

# Check out the currently attached packages again
search()
[1] ".GlobalEnv" "package:ggplot2" "package:RBackend" [4] "package:stats" "package:graphics" "package:grDevices" [7] "package:utils" "package:datasets" "package:methods" [10] "Autoloads" "package:base"

The apply family

Use lapply with a built-in R function

# The vector pioneers has already been created for you
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")

# Split names from birth year
split_math <- strsplit(pioneers, split = ":")

# Convert to lowercase strings: split_low
split_low <- lapply(split_math,tolower)

# Take a look at the structure of split_low
str(split_low)

List of 4
 $ : chr [1:2] "gauss" "1777"
 $ : chr [1:2] "bayes" "1702"
 $ : chr [1:2] "pascal" "1623"
 $ : chr [1:2] "pearson" "1857"

Use lapply with your own function

# Code from previous exercise:
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split_low <- lapply(split, tolower)

# Write function select_first()
select_first <- function(x) {
  x[1]
}

# Apply select_first() over split_low: names
names <- lapply(split_low, select_first)

# Write function select_second()
select_second <- function(x){
  x[2]
}
# Apply select_second() over split_low: years
years <- lapply(split_low,select_second)

lapply and anonymous functions

# split_low has been created for you
split_low

# Transform: use anonymous function inside lapply
names <- lapply(split_low, function(x){x[1]})

# Transform: use anonymous function inside lapply
years <- lapply(split_low, function(x){x[2]})

Use lapply with additional arguments

# Definition of split_low
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split_low <- lapply(split, tolower)

# Generic select function
select_el <- function(x, index) {
  x[index]
}

# Use lapply() twice on split_low: names and years
names <- lapply(split_low,select_el, 1)
years <- lapply(split_low,select_el, 2)

How to use sapply

# temp has already been defined in the workspace

# Use lapply() to find each day's minimum temperature
lapply(temp, min)
[[1]]
[1] -1

[[2]]
[1] 5

[[3]]
[1] -3

[[4]]
[1] -2

[[5]]
[1] 2

[[6]]
[1] -3

[[7]]
[1] 1


# Use sapply() to find each day's minimum temperature
sapply(temp, min)
[1] -1  5 -3 -2  2 -3  1

# Use lapply() to find each day's maximum temperature
lapply(temp, max)
[[1]]
[1] 9

[[2]]
[1] 13

[[3]]
[1] 8

[[4]]
[1] 7

[[5]]
[1] 9

[[6]]
[1] 9

[[7]]
[1] 9

# Use sapply() to find each day's maximum temperature
sapply(temp, max)
[1]  9 13  8  7  9  9  9

sapply with your own function

# temp is already defined in the workspace

# Finish function definition of extremes_avg
extremes_avg <- function(x) {
  ( min(x) + max(x)) / 2
}

# Apply extremes_avg() over temp using sapply()
sapply(temp, extremes_avg)
[1] 4.0 9.0 2.5 2.5 5.5 3.0 5.0

# Apply extremes_avg() over temp using lapply()
lapply(temp, extremes_avg)
[[1]]
[1] 4

[[2]]
[1] 9

[[3]]
[1] 2.5

[[4]]
[1] 2.5

[[5]]
[1] 5.5

[[6]]
[1] 3

[[7]]
[1] 5

sapply with function returning vector

# temp is already available in the workspace

# Create a function that returns min and max of a vector: extremes
extremes <- function(x) {
  c(min = min(x), max = max(x))
}

# Apply extremes() over temp with sapply()
sapply(temp, extremes)
    [,1] [,2] [,3] [,4] [,5] [,6] [,7]
min   -1    5   -3   -2    2   -3    1
max    9   13    8    7    9    9    9

# Apply extremes() over temp with lapply()
lapply(temp, extremes)
[[1]]
min max 
 -1   9 

[[2]]
min max 
  5  13 

[[3]]
min max 
 -3   8 

[[4]]
min max 
 -2   7 

[[5]]
min max 
  2   9 

[[6]]
min max 
 -3   9 

[[7]]
min max 
  1   9

sapply can't simplify, now what?

# temp is already prepared for you in the workspace

# Definition of below_zero()
below_zero <- function(x) {
  return(x[x < 0])
}

# Apply below_zero over temp using sapply(): freezing_s
freezing_s <- sapply(temp,below_zero)

# Apply below_zero over temp using lapply(): freezing_l
freezing_l <- lapply(temp, below_zero)

# Are freezing_s and freezing_l identical?
identical(freezing_l,freezing_s)
[1] TRUE

sapply with functions that return NULL

# temp is already available in the workspace

# Definition of print_info()
print_info <- function(x) {
  cat("The average temperature is", mean(x), "\n")
}

# Apply print_info() over temp using sapply()
sapply(temp, print_info)
The average temperature is 4.8 
The average temperature is 9 
The average temperature is 2.2 
The average temperature is 2.4 
The average temperature is 5.4 
The average temperature is 4.6 
The average temperature is 4.6 
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

[[4]]
NULL

[[5]]
NULL

[[6]]
NULL

[[7]]
NULL

# Apply print_info() over temp using lapply()
lapply(temp, print_info)
The average temperature is 4.8 
The average temperature is 9 
The average temperature is 2.2 
The average temperature is 2.4 
The average temperature is 5.4 
The average temperature is 4.6 
The average temperature is 4.6 
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

[[4]]
NULL

[[5]]
NULL

[[6]]
NULL

[[7]]
NULL

Use vapply

# temp is already available in the workspace

# Definition of basics()
basics <- function(x) {
  c(min = min(x), mean = mean(x), max = max(x))
}

# Apply basics() over temp using vapply()
vapply(temp, basics,numeric(3))

     [,1] [,2] [,3] [,4] [,5] [,6] [,7]
min  -1.0    5 -3.0 -2.0  2.0 -3.0  1.0
mean  4.8    9  2.2  2.4  5.4  4.6  4.6
max   9.0   13  8.0  7.0  9.0  9.0  9.0

# temp is already available in the workspace

# Definition of the basics() function
basics <- function(x) {
  c(min = min(x), mean = mean(x), median = median(x), max = max(x))
}

# Fix the error:
vapply(temp, basics, numeric(4))

       [,1] [,2] [,3] [,4] [,5] [,6] [,7]
min    -1.0    5 -3.0 -2.0  2.0 -3.0  1.0
mean    4.8    9  2.2  2.4  5.4  4.6  4.6
median  6.0    9  3.0  2.0  5.0  5.0  4.0
max     9.0   13  8.0  7.0  9.0  9.0  9.0

From sapply to vapply

# temp is already defined in the workspace

# Convert to vapply() expression
sapply(temp, max)
[1]  9 13  8  7  9  9  9

vapply(temp, max, numeric(1))
[1]  9 13  8  7  9  9  9

# Convert to vapply() expression
sapply(temp, function(x, y) { mean(x) > y }, y = 5)
[1] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE

vapply(temp, function(x, y) { mean(x) > y }, y = 5, logical(1) )
[1] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE

Utilities

Mathematical utilities

# The errors vector has already been defined for you
errors <- c(1.9, -2.6, 4.0, -9.5, -3.4, 7.3)

# Sum of absolute rounded values of errors
sum(round(abs(errors)))
[1] 29

Find the error

# Don't edit these two lines
vec1 <- c(1.5, 2.5, 8.4, 3.7, 6.3)
vec2 <- rev(vec1)

# Fix the error
mean(c(abs(vec1), abs(vec2)))
[1] 4.48

Data utilities

# The linkedin and facebook lists have already been created for you
linkedin <- list(16, 9, 13, 5, 2, 17, 14)
facebook <- list(17, 7, 5, 16, 8, 13, 14)

# Convert linkedin and facebook to a vector: li_vec and fb_vec
li_vec <- unlist(linkedin)
fb_vec <- unlist(facebook)

# Append fb_vec to li_vec: social_vec
social_vec <- append(li_vec,fb_vec)

# Sort social_vec
sort(social_vec, decreasing = TRUE)
[1] 17 17 16 16 14 14 13 13  9  8  7  5  5  2

Find the error 2

# Fix me
rep(seq(1, 7, by = 2), times = 7)
[1] 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7

Beat Gauss using R

# Create first sequence: seq1
seq1 <- seq(1, 500, by = 3)

# Create second sequence: seq2
seq2 <- seq(1200, 900, by = -7)

# Calculate total sum of the sequences
sum(sum(seq1)+ sum(seq2))
[1] 87029

grepl and grep

# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov", 
"dalai.lama@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")

# Use grepl() to match for "edu"
grepl("edu", emails)

# Use grep() to match for "edu", save result to hits
hits <- grep("edu", emails)

# Subset emails using hits
emails[hits]
[1] "john.doe@ivyleague.edu"   "education@world.gov"     
[3] "invalid.edu"              "quant@bigdatacollege.edu"

grepl and grep 2

# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov",
"dalai.lama@peace.org","invalid.edu", "quant@bigdatacollege.edu", 
"cookie.monster@sesame.tv")

# Use grepl() to match for .edu addresses more robustly
grepl("@.*\\.edu$", emails)

# Use grep() to match for .edu addresses more robustly, save result to hits
hits <- grep("@.*\\.edu$", emails)

# Subset emails using hits
emails[hits]
[1] "john.doe@ivyleague.edu"   "quant@bigdatacollege.edu"

sub and gsub

# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov", 
"global@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")

# Use sub() to convert the email domains to datacamp.edu
sub("@.*\\.edu$", "@datacamp.edu",emails)
[1] "john.doe@datacamp.edu"    "education@world.gov"     
[3] "global@peace.org"         "invalid.edu"             
[5] "quant@datacamp.edu"       "cookie.monster@sesame.tv"

sub and gsub 2

awards <- c("Won 1 Oscar.",
  "Won 1 Oscar. Another 9 wins & 24 nominations.",
  "1 win and 2 nominations.",
  "2 wins & 3 nominations.",
  "Nominated for 2 Golden Globes. 1 more win & 2 nominations.",
  "4 wins & 1 nomination.")

sub(".*\\s([0-9]+)\\snomination.*$", "\\1", awards)
[1] "Won 1 Oscar." "24"           "2"            "3"            "2"           
[6] "1"

Times and Dates (Right here, right now)

# Get the current date: today
today <- Sys.Date()

# See what today looks like under the hood
unclass(today)

# Get the current time: now
now <- Sys.time()

# See what now looks like under the hood
unclass(now)
[1] 1657256530

Create and format dates

%Y: 4-digit year (1982)
%y: 2-digit year (82)
%m: 2-digit month (01)
%d: 2-digit day of the month (13)
%A: weekday (Wednesday)
%a: abbreviated weekday (Wed)
%B: month (January)
%b: abbreviated month (Jan)

# Definition of character strings representing dates
str1 <- "May 23, '96"
str2 <- "2012-03-15"
str3 <- "30/January/2006"

# Convert the strings to dates: date1, date2, date3
date1 <- as.Date(str1, format = "%b %d, '%y")
date2 <- as.Date(str2)
date3 <- as.Date(str3, format = "%d/%B/%Y")

# Convert dates to formatted strings
format(date1, "%A")
[1] "Thursday"

format(date2, "%d")
[1] "15"

format(date3, "%b %Y")
[1] "Jan 2006"

Create and format times

# Definition of character strings representing times
str1 <- "May 23, '96 hours:23 minutes:01 seconds:45"
str2 <- "2012-3-12 14:23:08"

# Convert the strings to POSIXct objects: time1, time2
time1 <- as.POSIXct(str1, format = "%B %d, '%y hours:%H 
minutes:%M seconds:%S")
time2 <- as.POSIXct(str2, format = "%Y-%m-%d %H:%M:%S")

# Convert times to formatted strings
format(time1, "%M")
[1] "01"

format(time2, "%I:%M %p")
[1] "02:23 PM"

Calculations with Dates

# day1, day2, day3, day4 and day5 are already available in the workspace 
# Difference between last and first pizza day
day5 - day1
Time difference of 18 days

# Create vector pizza
pizza <- c(day1, day2, day3, day4, day5)

# Create differences between consecutive pizza days: day_diff
day_diff <- diff(pizza) 

# Average period between two consecutive pizza days
mean(day_diff)
Time difference of 4.5 days

Calculations with Times

# login and logout are already defined in the workspace
# Calculate the difference between login and logout: time_online
time_online <- logout - login

# Inspect the variable time_online
time_online
Time differences in secs
[1] 2305.11818   34.18472  837.18182 2397.90153 1851.30411

# Calculate the total time online
print(sum(time_online))
Time difference of 7425.69 secs
# Calculate the average time online
print(mean(time_online))
Time difference of 1485.138 secs

Time is of the essence

# Convert astro to vector of Date objects: astro_dates
astro_dates <- as.Date(astro, format = "%d-%b-%Y") 
 
# Convert meteo to vector of Date objects: meteo_dates
meteo_dates <- as.Date(meteo, format = "%B %d, %y")

# Calculate the maximum absolute difference between astro_dates 
and meteo_dates
max(abs(astro_dates - meteo_dates))
Time difference of 24 days



Thank You.

For the slides from datacamp, check this:

https://github.com/DataSaramsh/Data-Science/tree/main/Datacamp%20R%20programming