Intermediate R programming | Datacamp- Intermediate R programming | R programming for data scientist | programming language for Data Scientist

 


Intermediate R programming from Datacamp

In this chapter, we will learn about conditional statements, loops, and functions to power the R scripts.

Conditional and control flow

# Equality:

# Comparison of logicals
TRUE == FALSE

# Comparison of numerics
-6 * 14 != 17 - 101

# Comparison of character strings
"useR" == "user"

# Compare a logical with a numeric
TRUE == 1
[1] TRUE

# Greater and less than:

# Comparison of numerics
-6 * 5 + 2 >= -10 + 1

# Comparison of character strings
"raining" <= "raining dogs"

# Comparison of logicals
TRUE > FALSE
[1] TRUE

# Compare vectors:

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16913521714)
facebook <- c(17751681314)

# Popular days
linkedin > 15

# Quiet days
linkedin <= 5

# LinkedIn more popular than Facebook
linkedin > facebook
[1] FALSE TRUE TRUE FALSE FALSE TRUE FALSE

# Compare matrices:

# The social data has been created for you
linkedin <- c(16913521714)
facebook <- c(17751681314)
views <- matrix(c(linkedinfacebook), nrow = 2byrow = TRUE)

# When does views equal 13?
views == 13

# When is views less than or equal to 14?
views <= 14
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [1,] FALSE TRUE TRUE TRUE TRUE FALSE TRUE [2,] FALSE TRUE TRUE FALSE TRUE TRUE TRUE

# & and | :

# The linkedin and last variable are already defined for you
linkedin <- c(16913521714)
last <- tail(linkedin1)

# Is last under 5 or above 10?
last > 5 | last < 10

# Is last between 15 (exclusive) and 20 (inclusive)?
last > 15 & last <= 20 
[1] FALSE

# & and |  2 :
# The social data (linkedin, facebook, views) has been created for you

# linkedin exceeds 10 but facebook below 10
linkedin > 10 & facebook <10

# When were one or both visited at least 12 times?
linkedin >= 12 | facebook >=12

# When is views between 11 (exclusive) and 14 (inclusive)?
views > 11 & views <= 14
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [1,] FALSE FALSE TRUE FALSE FALSE FALSE TRUE [2,] FALSE FALSE FALSE FALSE FALSE TRUE TRUE


# Blend it all together:

# li_df is pre-loaded in your workspace

# Select the second column, named day2, from li_df: second
second <- li_df[,"day2"]

# Build a logical vector, TRUE if value in second is extreme: extremes
extremes <- ((second > 25 | second < 5) == TRUE)

# Count the number of TRUEs in extremes
sum(extremes)
[1] 16

# The if statement:

# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14

# Examine the if statement for medium
if (medium == "LinkedIn") {
  print("Showing LinkedIn information")
}
[1] "Showing LinkedIn information"

# Write the if statement for num_views
if (num_views > 15) {
  print("You are popular!")
}

# Add an else:

# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14

# Control structure for medium
if (medium == "LinkedIn") {
  print("Showing LinkedIn information")
else {
  print("Unknown medium")
}
[1] "Showing LinkedIn information"

# Control structure for num_views
if (num_views > 15) {
  print("You're popular!")
else{
  print("Try to be more visible!")
}
[1] "Try to be more visible!"


# Customize further else if:

# Variables related to your last day of recordings
medium <- "LinkedIn"
num_views <- 14

# Control structure for medium
if (medium == "LinkedIn") {
  print("Showing LinkedIn information")
else if (medium == "Facebook") {
  # Add code to print correct string when condition is TRUE
  print("Showing Facebook information")
else {
  print("Unknown medium")
}
[1] "Showing LinkedIn information"

# Control structure for num_views
if (num_views > 15) {
  print("You're popular!")
else if (num_views <= 15 & num_views > 10) {
  # Add code to print correct string when condition is TRUE
  print("Your number of views is average")
else {
  print("Try to be more visible!")
}
[1] "Your number of views is average"


# Else if 2.0:

# Variables related to your last day of recordings
li <- 15
fb <- 9

# Code the control-flow construct
if (li >= 15 & fb >= 15) {
  sms <- 2 * (li + fb)
else if (li < 10 & fb < 10) {
  sms <- 0.5 * (li + fb)
else {
  sms <- (li + fb)
}
print (sms)
[1] 24


Loops

While loop

# Initialize the speed variable
speed <- 64

# Code the while loop
while (speed > 30) {
print(paste("Slow down!"))
speed <- speed - 7
}

# Print out the speed variable
speed
[1] 29

Throw in more conditionals

# Initialize the speed variable
speed <- 64

# Extend/adapt the while loop
while (speed > 30) {
print(paste("Your speed is",speed))
if (speed > 48) {
print(paste("Slow down big time!"))
speed <- speed - 11
} else {
print(paste("Slow down!"))
speed <- speed - 6
}
}
[1] "Your speed is 64" [1] "Slow down big time!" [1] "Your speed is 53" [1] "Slow down big time!" [1] "Your speed is 42" [1] "Slow down!" [1] "Your speed is 36" [1] "Slow down!"

Stop the while: break

# Initialize the speed variable
speed <- 88

while (speed > 30) {
print(paste("Your speed is", speed))
# Break the while loop when speed exceeds 80
if (speed > 80 ) {
break
}
if (speed > 48) {
print("Slow down big time!")
speed <- speed - 11
} else {
print("Slow down!")
speed <- speed - 6
}
}
[1] "Your speed is 88"

Build a while loop from scratch

# Initialize i as 1
i <- 1

# Code the while loop
while (i <= 10) {
print(3 * i)
if (i %% 8 == 0) {
break
}
i <- i + 1
}
[1] 3 [1] 6 [1] 9 [1] 12 [1] 15 [1] 18 [1] 21 [1] 24


For loop

primes <- c(2, 3, 5, 7, 11, 13)

# loop version 1
for (p in primes) {
print(p)
}
[1] 2 [1] 3 [1] 5 [1] 7 [1] 11 [1] 13

# loop version 2
for (i in 1:length(primes)) {
print(primes[i])
}
[1] 2 [1] 3 [1] 5 [1] 7 [1] 11 [1] 13

Loop over a vector

# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)

# Loop version 1
for( lin in linkedin){
print(lin)
}
# Loop version 2
for(i in 1:length(linkedin)){
print(linkedin[i])
}
[1] 16 [1] 9 [1] 13 [1] 5 [1] 2 [1] 17 [1] 14


Loop over a list

# The nyc list is already specified
nyc <- list(pop = 8405837,
boroughs = c("Manhattan", "Bronx", "Brooklyn"
"Queens", "Staten Island"),
capital = FALSE)

# Loop version 1
for( n in nyc){
print(n)
}
[1] 8405837 [1] "Manhattan" "Bronx" "Brooklyn" "Queens" [5] "Staten Island" [1] FALSE

# Loop version 2
for(i in 1:length(nyc)){
print(nyc[[i]])
}
[1] 8405837 [1] "Manhattan" "Bronx" "Brooklyn" "Queens" [5] "Staten Island" [1] FALSE


Loop over a matrix

# The tic-tac-toe matrix ttt has already been defined for you
>
ttt
[,1] [,2] [,3] [1,] "O" NA "X" [2,] NA "O" "O" [3,] "X" NA "X"

# define the double for loop
for (i in 1:nrow(ttt)) {
for (j in 1:ncol(ttt)) {
print(paste("On row", i, "and column", j, "the board contains", ttt))
}
}
[1] "On row 1 and column 1 the board contains O" [2] "On row 1 and column 1 the board contains NA" [3] "On row 1 and column 1 the board contains X" [4] "On row 1 and column 1 the board contains NA" [5] "On row 1 and column 1 the board contains O" [6] "On row 1 and column 1 the board contains NA" [7] "On row 1 and column 1 the board contains X" [8] "On row 1 and column 1 the board contains O" [9] "On row 1 and column 1 the board contains X" [1] "On row 1 and column 2 the board contains O" [2] "On row 1 and column 2 the board contains NA" [3] "On row 1 and column 2 the board contains X" [4] "On row 1 and column 2 the board contains NA" [5] "On row 1 and column 2 the board contains O" [6] "On row 1 and column 2 the board contains NA" [7] "On row 1 and column 2 the board contains X" [8] "On row 1 and column 2 the board contains O" [9] "On row 1 and column 2 the board contains X" [1] "On row 1 and column 3 the board contains O" [2] "On row 1 and column 3 the board contains NA" [3] "On row 1 and column 3 the board contains X" [4] "On row 1 and column 3 the board contains NA" [5] "On row 1 and column 3 the board contains O" [6] "On row 1 and column 3 the board contains NA" [7] "On row 1 and column 3 the board contains X" [8] "On row 1 and column 3 the board contains O" [9] "On row 1 and column 3 the board contains X" [1] "On row 2 and column 1 the board contains O" [2] "On row 2 and column 1 the board contains NA" [3] "On row 2 and column 1 the board contains X" [4] "On row 2 and column 1 the board contains NA" [5] "On row 2 and column 1 the board contains O" [6] "On row 2 and column 1 the board contains NA" [7] "On row 2 and column 1 the board contains X" [8] "On row 2 and column 1 the board contains O" [9] "On row 2 and column 1 the board contains X" [1] "On row 2 and column 2 the board contains O" [2] "On row 2 and column 2 the board contains NA" [3] "On row 2 and column 2 the board contains X" [4] "On row 2 and column 2 the board contains NA" [5] "On row 2 and column 2 the board contains O" [6] "On row 2 and column 2 the board contains NA" [7] "On row 2 and column 2 the board contains X" [8] "On row 2 and column 2 the board contains O" [9] "On row 2 and column 2 the board contains X" [1] "On row 2 and column 3 the board contains O" [2] "On row 2 and column 3 the board contains NA" [3] "On row 2 and column 3 the board contains X" [4] "On row 2 and column 3 the board contains NA" [5] "On row 2 and column 3 the board contains O" [6] "On row 2 and column 3 the board contains NA" [7] "On row 2 and column 3 the board contains X" [8] "On row 2 and column 3 the board contains O" [9] "On row 2 and column 3 the board contains X" [1] "On row 3 and column 1 the board contains O" [2] "On row 3 and column 1 the board contains NA" [3] "On row 3 and column 1 the board contains X" [4] "On row 3 and column 1 the board contains NA" [5] "On row 3 and column 1 the board contains O" [6] "On row 3 and column 1 the board contains NA" [7] "On row 3 and column 1 the board contains X" [8] "On row 3 and column 1 the board contains O" [9] "On row 3 and column 1 the board contains X" [1] "On row 3 and column 2 the board contains O" [2] "On row 3 and column 2 the board contains NA" [3] "On row 3 and column 2 the board contains X" [4] "On row 3 and column 2 the board contains NA" [5] "On row 3 and column 2 the board contains O" [6] "On row 3 and column 2 the board contains NA" [7] "On row 3 and column 2 the board contains X" [8] "On row 3 and column 2 the board contains O" [9] "On row 3 and column 2 the board contains X" [1] "On row 3 and column 3 the board contains O" [2] "On row 3 and column 3 the board contains NA" [3] "On row 3 and column 3 the board contains X" [4] "On row 3 and column 3 the board contains NA" [5] "On row 3 and column 3 the board contains O" [6] "On row 3 and column 3 the board contains NA" [7] "On row 3 and column 3 the board contains X" [8] "On row 3 and column 3 the board contains O" [9] "On row 3 and column 3 the board contains X"

Mix it up with control flow

# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)

# Code the for loop with conditionals
for (li in linkedin) {
if (li > 10 ) {
print("You're popular!")
} else {
print("Be more visible!")
}
print(li)
}
[1] "You're popular!" [1] 16 [1] "Be more visible!" [1] 9 [1] "You're popular!" [1] 13 [1] "Be more visible!" [1] 5 [1] "Be more visible!" [1] 2 [1] "You're popular!" [1] 17 [1] "You're popular!" [1] 14


Next, you break it

# The linkedin vector has already been defined for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)

# Adapt/extend the for loop
for (li in linkedin) {
if (li > 10) {
print("You're popular!")
} else {
print("Be more visible!")
}
# Add if statement with break
if(li > 16){
print( "This is ridiculous, I'm outta here!")
break
}

# Add if statement with next
if(li < 5){
print("This is too embarrassing!")
next
}
print(li)
}
[1] "You're popular!" [1] 16 [1] "Be more visible!" [1] 9 [1] "You're popular!" [1] 13 [1] "Be more visible!" [1] 5 [1] "Be more visible!" [1] "This is too embarrassing!" [1] "You're popular!" [1] "This is ridiculous, I'm outta here!"

Build a for loop from scratch

# Pre-defined variables
rquote <- "r's internals are irrefutably intriguing"
chars <- strsplit(rquote, split = "")[[1]]

# Initialize rcount
rcount <- 0

# Finish the for loop
for (char in chars) {
if(char == "r"){
rcount = rcount + 1
}
else if(char == "u"){
break
}
}
# Print out rcount
print(rcount)
[1] 5


Functions

Function documentation

# Consult the documentation on the mean() function

?mean

# Inspect the arguments of the mean() function
args(mean)
function (x, ...) NULL

Use a function

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# Calculate average number of views
avg_li <- mean(linkedin)
avg_fb <- mean(facebook)

# Inspect avg_li and avg_fb
print(avg_li)
[1] 10.85714

print(avg_fb)
[1] 11.42857








# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# Calculate the mean of the sum
avg_sum <- mean(linkedin+facebook)

# Calculate the trimmed mean of the sum
avg_sum_trimmed <- mean(linkedin+facebook, trim = 0.2)

# Inspect both new variables
print(avg_sum)
[1] 22.28571

print(avg_sum_trimmed)
[1] 22.6
# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)

# Basic average of linkedin
print(mean(linkedin))
[1] NA

# Advanced average of linkedin
print(mean(linkedin, na.rm = TRUE))
[1] 12.33333

Functions inside functions

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, NA, 17, 14)
facebook <- c(17, NA, 5, 16, 8, 13, 14)

# Calculate the mean absolute deviation
mean(abs(linkedin - facebook), na.rm = TRUE)
[1] 4.8


Write your own function

# Create a function pow_two()
pow_two <- function(a){
a * a
}

# Use the function

pow_two(12)
# Create a function sum_abs()
sum_abs <- function(a, b){
abs(a) + abs(b)
}

# Use the function
sum_abs(-2, 3)
[1] 5

# Define the function hello()
hello <- function(){
print("Hi there!")
return(TRUE)
}

# Call the function hello()
hello()
[1] "Hi there!"
[1] TRUE

# Finish the pow_two() function
pow_two <- function(x,print_info = TRUE) {
y <- x ^ 2
if(print_info == TRUE){
print(paste(x, "to the power two equals", y))
}
return(y)
}
pow_two(5)
[1] "5 to the power two equals 25"
[1] 25

R passes arguments by value

triple <- function(x) {
x <- 3*x
x
}
[1] 15
a <- 5
triple(a)
a
[1] 5

R you functional?

# The linkedin and facebook vectors have already been created for you

# Define the interpret function
interpret <- function(num_views) {
if (num_views > 15) {
print( "You're popular!")
return(num_views)

} else {
print("Try to be more visible!")
return (0)

}
}
# Call the interpret function twice
interpret(linkedin)
[1] "You're popular!"
[1] 16 9 13 5 2 17 14
interpret(facebook[2])
[1] "Try to be more visible!"
[1] 0

# The linkedin and facebook vectors have already been created for you
linkedin <- c(16, 9, 13, 5, 2, 17, 14)
facebook <- c(17, 7, 5, 16, 8, 13, 14)

# The interpret() can be used inside interpret_all()
interpret <- function(num_views) {
if (num_views > 15) {
print("You're popular!")
return(num_views)
} else {
print("Try to be more visible!")
return(0)
}
}

# Define the interpret_all() function
# views: vector with data to interpret
# return_sum: return total number of views on popular days?
interpret_all <- function(views, return_sum = TRUE) {
count <- 0

for (v in views) {
count <- count + interpret(v)
}

if (return_sum == TRUE) {
return (count)

} else {
return (NULL)

}
}

# Call the interpret_all() function on both linkedin and facebook
interpret_all(linkedin)
[1] "You're popular!" [1] "Try to be more visible!" [1] "Try to be more visible!" [1] "Try to be more visible!" [1] "Try to be more visible!" [1] "You're popular!" [1] "Try to be more visible!"
[1] 33
interpret_all(facebook)
[1] "You're popular!" [1] "Try to be more visible!" [1] "Try to be more visible!" [1] "You're popular!" [1] "Try to be more visible!" [1] "Try to be more visible!" [1] "Try to be more visible!"
[1] 33

Load R Packages

# Load the ggplot2 package
library(ggplot2)

# Retry the qplot() function
qplot(mtcars$wt, mtcars$hp)

# Check out the currently attached packages again
search()
[1] ".GlobalEnv" "package:ggplot2" "package:RBackend" [4] "package:stats" "package:graphics" "package:grDevices" [7] "package:utils" "package:datasets" "package:methods" [10] "Autoloads" "package:base"


The apply family

Use lapply with a built-in R function

# The vector pioneers has already been created for you
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")

# Split names from birth year
split_math <- strsplit(pioneers, split = ":")

# Convert to lowercase strings: split_low
split_low <- lapply(split_math,tolower)

# Take a look at the structure of split_low
str(split_low)

List of 4 $ : chr [1:2] "gauss" "1777" $ : chr [1:2] "bayes" "1702" $ : chr [1:2] "pascal" "1623" $ : chr [1:2] "pearson" "1857"

Use lapply with your own function

# Code from previous exercise:
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split_low <- lapply(split, tolower)

# Write function select_first()
select_first <- function(x) {
x[1]
}

# Apply select_first() over split_low: names
names <- lapply(split_low, select_first)

# Write function select_second()
select_second <- function(x){
x[2]
}
# Apply select_second() over split_low: years
years <- lapply(split_low,select_second)

lapply and anonymous functions

# split_low has been created for you
split_low

# Transform: use anonymous function inside lapply
names <- lapply(split_low, function(x){x[1]})

# Transform: use anonymous function inside lapply
years <- lapply(split_low, function(x){x[2]})


Use lapply with additional arguments

# Definition of split_low
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split_low <- lapply(split, tolower)

# Generic select function
select_el <- function(x, index) {
x[index]
}

# Use lapply() twice on split_low: names and years
names <- lapply(split_low,select_el, 1)
years <- lapply(split_low,select_el, 2)

How to use sapply

# temp has already been defined in the workspace

# Use lapply() to find each day's minimum temperature
lapply(temp, min)
[[1]] [1] -1 [[2]] [1] 5 [[3]] [1] -3 [[4]] [1] -2 [[5]] [1] 2 [[6]] [1] -3 [[7]] [1] 1


# Use sapply() to find each day's minimum temperature
sapply(temp, min)
[1] -1 5 -3 -2 2 -3 1

# Use lapply() to find each day's maximum temperature
lapply(temp, max)
[[1]] [1] 9 [[2]] [1] 13 [[3]] [1] 8 [[4]] [1] 7 [[5]] [1] 9 [[6]] [1] 9 [[7]] [1] 9

# Use sapply() to find each day's maximum temperature
sapply(temp, max)
[1] 9 13 8 7 9 9 9

sapply with your own function

# temp is already defined in the workspace

# Finish function definition of extremes_avg
extremes_avg <- function(x) {
( min(x) + max(x)) / 2
}

# Apply extremes_avg() over temp using sapply()
sapply(temp, extremes_avg)
[1] 4.0 9.0 2.5 2.5 5.5 3.0 5.0

# Apply extremes_avg() over temp using lapply()
lapply(temp, extremes_avg)
[[1]] [1] 4 [[2]] [1] 9 [[3]] [1] 2.5 [[4]] [1] 2.5 [[5]] [1] 5.5 [[6]] [1] 3 [[7]] [1] 5

sapply with function returning vector

# temp is already available in the workspace

# Create a function that returns min and max of a vector: extremes
extremes <- function(x) {
c(min = min(x), max = max(x))
}

# Apply extremes() over temp with sapply()
sapply(temp, extremes)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] min -1 5 -3 -2 2 -3 1 max 9 13 8 7 9 9 9

# Apply extremes() over temp with lapply()
lapply(temp, extremes)
[[1]] min max -1 9 [[2]] min max 5 13 [[3]] min max -3 8 [[4]] min max -2 7 [[5]] min max 2 9 [[6]] min max -3 9 [[7]] min max 1 9

sapply can't simplify, now what?

# temp is already prepared for you in the workspace

# Definition of below_zero()
below_zero <- function(x) {
return(x[x < 0])
}

# Apply below_zero over temp using sapply(): freezing_s
freezing_s <- sapply(temp,below_zero)

# Apply below_zero over temp using lapply(): freezing_l
freezing_l <- lapply(temp, below_zero)

# Are freezing_s and freezing_l identical?
identical(freezing_l,freezing_s)
[1] TRUE

sapply with functions that return NULL

# temp is already available in the workspace

# Definition of print_info()
print_info <- function(x) {
cat("The average temperature is", mean(x), "\n")
}

# Apply print_info() over temp using sapply()
sapply(temp, print_info)
The average temperature is 4.8 The average temperature is 9 The average temperature is 2.2 The average temperature is 2.4 The average temperature is 5.4 The average temperature is 4.6 The average temperature is 4.6
[[1]] NULL [[2]] NULL [[3]] NULL [[4]] NULL [[5]] NULL [[6]] NULL [[7]] NULL

# Apply print_info() over temp using lapply()
lapply(temp, print_info)
The average temperature is 4.8 The average temperature is 9 The average temperature is 2.2 The average temperature is 2.4 The average temperature is 5.4 The average temperature is 4.6 The average temperature is 4.6
[[1]] NULL [[2]] NULL [[3]] NULL [[4]] NULL [[5]] NULL [[6]] NULL [[7]] NULL

Use vapply

# temp is already available in the workspace

# Definition of basics()
basics <- function(x) {
c(min = min(x), mean = mean(x), max = max(x))
}

# Apply basics() over temp using vapply()
vapply(temp, basics,numeric(3))

[,1] [,2] [,3] [,4] [,5] [,6] [,7] min -1.0 5 -3.0 -2.0 2.0 -3.0 1.0 mean 4.8 9 2.2 2.4 5.4 4.6 4.6 max 9.0 13 8.0 7.0 9.0 9.0 9.0

# temp is already available in the workspace

# Definition of the basics() function
basics <- function(x) {
c(min = min(x), mean = mean(x), median = median(x), max = max(x))
}

# Fix the error:
vapply(temp, basics, numeric(4))

[,1] [,2] [,3] [,4] [,5] [,6] [,7] min -1.0 5 -3.0 -2.0 2.0 -3.0 1.0 mean 4.8 9 2.2 2.4 5.4 4.6 4.6 median 6.0 9 3.0 2.0 5.0 5.0 4.0 max 9.0 13 8.0 7.0 9.0 9.0 9.0

From sapply to vapply

# temp is already defined in the workspace

# Convert to vapply() expression
sapply(temp, max)
[1] 9 13 8 7 9 9 9

vapply(temp, max, numeric(1))
[1] 9 13 8 7 9 9 9

# Convert to vapply() expression
sapply(temp, function(x, y) { mean(x) > y }, y = 5)
[1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE

vapply(temp, function(x, y) { mean(x) > y }, y = 5, logical(1) )
[1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE


Utilities

Mathematical utilities

# The errors vector has already been defined for you
errors <- c(1.9, -2.6, 4.0, -9.5, -3.4, 7.3)

# Sum of absolute rounded values of errors
sum(round(abs(errors)))
[1] 29

Find the error

# Don't edit these two lines
vec1 <- c(1.5, 2.5, 8.4, 3.7, 6.3)
vec2 <- rev(vec1)

# Fix the error
mean(c(abs(vec1), abs(vec2)))
[1] 4.48

Data utilities

# The linkedin and facebook lists have already been created for you
linkedin <- list(16, 9, 13, 5, 2, 17, 14)
facebook <- list(17, 7, 5, 16, 8, 13, 14)

# Convert linkedin and facebook to a vector: li_vec and fb_vec
li_vec <- unlist(linkedin)
fb_vec <- unlist(facebook)

# Append fb_vec to li_vec: social_vec
social_vec <- append(li_vec,fb_vec)

# Sort social_vec
sort(social_vec, decreasing = TRUE)
[1] 17 17 16 16 14 14 13 13 9 8 7 5 5 2

Find the error 2

# Fix me
rep(seq(1, 7, by = 2), times = 7)
[1] 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7 1 3 5 7

Beat Gauss using R

# Create first sequence: seq1
seq1 <- seq(1, 500, by = 3)

# Create second sequence: seq2
seq2 <- seq(1200, 900, by = -7)

# Calculate total sum of the sequences
sum(sum(seq1)+ sum(seq2))
[1] 87029

grepl and grep

# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov"
"dalai.lama@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")

# Use grepl() to match for "edu"
grepl("edu", emails)

# Use grep() to match for "edu", save result to hits
hits <- grep("edu", emails)

# Subset emails using hits
emails[hits]
[1] "john.doe@ivyleague.edu" "education@world.gov" [3] "invalid.edu" "quant@bigdatacollege.edu"

grepl and grep 2

# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov",
"dalai.lama@peace.org","invalid.edu", "quant@bigdatacollege.edu"
"cookie.monster@sesame.tv")

# Use grepl() to match for .edu addresses more robustly
grepl("@.*\\.edu$", emails)

# Use grep() to match for .edu addresses more robustly, save result to hits
hits <- grep("@.*\\.edu$", emails)

# Subset emails using hits
emails[hits]
[1] "john.doe@ivyleague.edu" "quant@bigdatacollege.edu"

sub and gsub

# The emails vector has already been defined for you
emails <- c("john.doe@ivyleague.edu", "education@world.gov"
"global@peace.org","invalid.edu", "quant@bigdatacollege.edu",
"cookie.monster@sesame.tv")

# Use sub() to convert the email domains to datacamp.edu
sub("@.*\\.edu$", "@datacamp.edu",emails)
[1] "john.doe@datacamp.edu" "education@world.gov" [3] "global@peace.org" "invalid.edu" [5] "quant@datacamp.edu" "cookie.monster@sesame.tv"

sub and gsub 2

awards <- c("Won 1 Oscar.",
"Won 1 Oscar. Another 9 wins & 24 nominations.",
"1 win and 2 nominations.",
"2 wins & 3 nominations.",
"Nominated for 2 Golden Globes. 1 more win & 2 nominations.",
"4 wins & 1 nomination.")

sub(".*\\s([0-9]+)\\snomination.*$", "\\1", awards)
[1] "Won 1 Oscar." "24" "2" "3" "2" [6] "1"

Times and Dates (Right here, right now)

# Get the current date: today
today <- Sys.Date()

# See what today looks like under the hood
unclass(today)

# Get the current time: now
now <- Sys.time()

# See what now looks like under the hood
unclass(now)
[1] 1657256530

Create and format dates

  • %Y: 4-digit year (1982)
  • %y: 2-digit year (82)
  • %m: 2-digit month (01)
  • %d: 2-digit day of the month (13)
  • %A: weekday (Wednesday)
  • %a: abbreviated weekday (Wed)
  • %B: month (January)
  • %b: abbreviated month (Jan)
# Definition of character strings representing dates
str1 <- "May 23, '96"
str2 <- "2012-03-15"
str3 <- "30/January/2006"

# Convert the strings to dates: date1, date2, date3
date1 <- as.Date(str1, format = "%b %d, '%y")
date2 <- as.Date(str2)
date3 <- as.Date(str3, format = "%d/%B/%Y")

# Convert dates to formatted strings
format(date1, "%A")
[1] "Thursday"

format(date2, "%d")
[1] "15"

format(date3, "%b %Y")
[1] "Jan 2006"


Create and format times

# Definition of character strings representing times
str1 <- "May 23, '96 hours:23 minutes:01 seconds:45"
str2 <- "2012-3-12 14:23:08"

# Convert the strings to POSIXct objects: time1, time2
time1 <- as.POSIXct(str1, format = "%B %d, '%y hours:%H 
minutes:%M seconds:%S")
time2 <- as.POSIXct(str2, format = "%Y-%m-%d %H:%M:%S")

# Convert times to formatted strings
format(time1, "%M")
[1] "01"

format(time2, "%I:%M %p")
[1] "02:23 PM"

Calculations with Dates

# day1, day2, day3, day4 and day5 are already available in the workspace
# Difference between last and first pizza day
day5 - day1
Time difference of 18 days

# Create vector pizza
pizza <- c(day1, day2, day3, day4, day5)

# Create differences between consecutive pizza days: day_diff
day_diff <- diff(pizza)

# Average period between two consecutive pizza days
mean(day_diff)
Time difference of 4.5 days


Calculations with Times

# login and logout are already defined in the workspace
# Calculate the difference between login and logout: time_online
time_online <- logout - login

# Inspect the variable time_online
time_online
Time differences in secs [1] 2305.11818 34.18472 837.18182 2397.90153 1851.30411

# Calculate the total time online
print(sum(time_online))
Time difference of 7425.69 secs
# Calculate the average time online
print(mean(time_online))
Time difference of 1485.138 secs

Time is of the essence

# Convert astro to vector of Date objects: astro_dates
astro_dates <- as.Date(astro, format = "%d-%b-%Y")
# Convert meteo to vector of Date objects: meteo_dates
meteo_dates <- as.Date(meteo, format = "%B %d, %y")

# Calculate the maximum absolute difference between astro_dates 
and meteo_dates
max(abs(astro_dates - meteo_dates))
Time difference of 24 days



Thank You.

For the slides from datacamp, check this:
https://github.com/DataSaramsh/Data-Science/tree/main/Datacamp%20R%20programming

Comments

Popular posts

Introduction to R programming | Datacamp - Introduction to R programming | R programming for data scientist | Programming language for Data Scientist