The course provides a brief overview of R data structures followed by the following topics:
Loops in R
Vectorized functions (apply family of functions)
How R functions work
Function writing
The course provides a brief overview of R data structures followed by the following topics:
Loops in R
Vectorized functions (apply family of functions)
How R functions work
Function writing
Knowing how to write functions vital to custom analyses!
Function writing syntax
nameOfFunction <- function(input1, input2, ...) { #code #return something with return() #or returns last value }
var
## function (x, y = NULL, na.rm = FALSE, use) ## { ## if (missing(use)) ## use <- if (na.rm) ## "na.or.complete" ## else "everything" ## na.method <- pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs", ## "everything", "na.or.complete")) ## if (is.na(na.method)) ## stop("invalid 'use' argument") ## if (is.data.frame(x)) ## x <- as.matrix(x) ## else stopifnot(is.atomic(x)) ## if (is.data.frame(y)) ## y <- as.matrix(y) ## else stopifnot(is.atomic(y)) ## .Call(C_cov, x, y, na.method, FALSE) ## } ## <bytecode: 0x0000000019558700> ## <environment: namespace:stats>
colMeans
## function (x, na.rm = FALSE, dims = 1L) ## { ## if (is.data.frame(x)) ## x <- as.matrix(x) ## if (!is.array(x) || length(dn <- dim(x)) < 2L) ## stop("'x' must be an array of at least two dimensions") ## if (dims < 1L || dims > length(dn) - 1L) ## stop("invalid 'dims'") ## n <- prod(dn[id <- seq_len(dims)]) ## dn <- dn[-id] ## z <- if (is.complex(x)) ## .Internal(colMeans(Re(x), n, prod(dn), na.rm)) + (0+1i) * ## .Internal(colMeans(Im(x), n, prod(dn), na.rm)) ## else .Internal(colMeans(x, n, prod(dn), na.rm)) ## if (length(dn) > 1L) { ## dim(z) <- dn ## dimnames(z) <- dimnames(x)[-id] ## } ## else names(z) <- dimnames(x)[[dims + 1L]] ## z ## } ## <bytecode: 0x0000000018fe4f28> ## <environment: namespace:base>
mean
## function (x, ...) ## UseMethod("mean") ## <bytecode: 0x0000000017e89428> ## <environment: namespace:base>
mean.default
## function (x, trim = 0, na.rm = FALSE, ...) ## { ## if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) { ## warning("argument is not numeric or logical: returning NA") ## return(NA_real_) ## } ## if (na.rm) ## x <- x[!is.na(x)] ## if (!is.numeric(trim) || length(trim) != 1L) ## stop("'trim' must be numeric of length one") ## n <- length(x) ## if (trim > 0 && n) { ## if (is.complex(x)) ## stop("trimmed means are not defined for complex data") ## if (anyNA(x)) ## return(NA_real_) ## if (trim >= 0.5) ## return(stats::median(x, na.rm = FALSE)) ## lo <- floor(n * trim) + 1 ## hi <- n + 1 - lo ## x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi] ## } ## .Internal(mean(x)) ## } ## <bytecode: 0x0000000014e129e8> ## <environment: namespace:base>
Goal: Create a standardize()
function
Take vector of values
z-score idea
Formula: For value i,
\[\frac{(value[i]-mean(value))}{sd(value)}\]
nameOfFunction <- function(input1, input2, ...) { #code #return something with return() #or returns last value }
standardize <- function(vector) { return((vector - mean(vector)) / sd(vector)) }
data <- runif(5) data
## [1] 0.1985510 0.9055358 0.1268306 0.4333906 0.1967265
result <- standardize(data) result
## [1] -0.5428453 1.6671771 -0.7670422 0.1912591 -0.5485487
mean(result)
## [1] 2.218278e-17
sd(result)
## [1] 1
standardize <- function(vector, center, scale) { if (center) { vector <- vector - mean(vector) } if (scale) { vector <- vector / sd(vector) } return(vector) }
result <- standardize(data, center = TRUE, scale = TRUE) result
## [1] -0.5428453 1.6671771 -0.7670422 0.1912591 -0.5485487
result <- standardize(data, center = FALSE, scale = TRUE) result
## [1] 0.6206673 2.8306897 0.3964704 1.3547717 0.6149638
standardize <- function(vector, center = TRUE, scale = TRUE) { if (center) { vector <- vector - mean(vector) } if (scale) { vector <- vector / sd(vector) } return(vector) }
result <- standardize(data, center = TRUE, scale = TRUE) result
## [1] -0.5428453 1.6671771 -0.7670422 0.1912591 -0.5485487
result <- standardize(data) result
## [1] -0.5428453 1.6671771 -0.7670422 0.1912591 -0.5485487
Return more than 1 object by returning a list
Goal: Also return
mean()
of original datasd()
of original datastandardize <- function(vector, center = TRUE, scale = TRUE) { mean <- mean(vector) stdev <- sd(vector) if (center) { vector <- vector - mean } if (scale) { vector <- vector / stdev } return(list(vector, mean, stdev)) }
result <- standardize(data) result
## [[1]] ## [1] -0.5428453 1.6671771 -0.7670422 0.1912591 -0.5485487 ## ## [[2]] ## [1] 0.3722069 ## ## [[3]] ## [1] 0.3198993
result[[2]]
## [1] 0.3722069
standardize <- function(vector, center = TRUE, scale = TRUE) { mean <- mean(vector) stdev <- sd(vector) if (center) { vector <- vector - mean } if (scale) { vector <- vector / stdev } return(list(result = vector, mean = mean, sd = stdev)) }
result <- standardize(data, center = TRUE, scale = TRUE) result
## $result ## [1] -0.5428453 1.6671771 -0.7670422 0.1912591 -0.5485487 ## ## $mean ## [1] 0.3722069 ## ## $sd ## [1] 0.3198993
result$sd
## [1] 0.3198993
mean
## function (x, ...) ## UseMethod("mean") ## <bytecode: 0x0000000017e89428> ## <environment: namespace:base>
x <- c(rnorm(5), NA) mean(x)
## [1] NA
mean(x, na.rm = TRUE)
## [1] 0.4093447
plot
## function (x, y, ...) ## UseMethod("plot") ## <bytecode: 0x00000000161dd960> ## <environment: namespace:graphics>
plot(x = 1:5, y = (1:5)^2, type = "l", lty = "dashed")
plot(x = 1:5, y = (1:5)^2, type = "l", lty = "dashed")
plot.default
## function (x, y = NULL, type = "p", xlim = NULL, ylim = NULL, ## log = "", main = NULL, sub = NULL, xlab = NULL, ylab = NULL, ## ann = par("ann"), axes = TRUE, frame.plot = axes, panel.first = NULL, ## panel.last = NULL, asp = NA, xgap.axis = NA, ygap.axis = NA, ## ...) ## { ## localAxis <- function(..., col, bg, pch, cex, lty, lwd) Axis(...) ## localBox <- function(..., col, bg, pch, cex, lty, lwd) box(...) ## localWindow <- function(..., col, bg, pch, cex, lty, lwd) plot.window(...) ## localTitle <- function(..., col, bg, pch, cex, lty, lwd) title(...) ## xlabel <- if (!missing(x)) ## deparse(substitute(x)) ## ylabel <- if (!missing(y)) ## deparse(substitute(y)) ## xy <- xy.coords(x, y, xlabel, ylabel, log) ## xlab <- if (is.null(xlab)) ## xy$xlab ## else xlab ## ylab <- if (is.null(ylab)) ## xy$ylab ## else ylab ## xlim <- if (is.null(xlim)) ## range(xy$x[is.finite(xy$x)]) ## else xlim ## ylim <- if (is.null(ylim)) ## range(xy$y[is.finite(xy$y)]) ## else ylim ## dev.hold() ## on.exit(dev.flush()) ## plot.new() ## localWindow(xlim, ylim, log, asp, ...) ## panel.first ## plot.xy(xy, type, ...) ## panel.last ## if (axes) { ## localAxis(if (is.null(y)) ## xy$x ## else x, side = 1, gap.axis = xgap.axis, ...) ## localAxis(if (is.null(y)) ## x ## else y, side = 2, gap.axis = ygap.axis, ...) ## } ## if (frame.plot) ## localBox(...) ## if (ann) ## localTitle(main = main, sub = sub, xlab = xlab, ylab = ylab, ## ...) ## invisible() ## } ## <bytecode: 0x000000001c885160> ## <environment: namespace:graphics>
sd()
and mean()
sd
## function (x, na.rm = FALSE) ## sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x), ## na.rm = na.rm)) ## <bytecode: 0x0000000018230908> ## <environment: namespace:stats>
mean.default
## function (x, trim = 0, na.rm = FALSE, ...) ## { ## if (!is.numeric(x) && !is.complex(x) && !is.logical(x)) { ## warning("argument is not numeric or logical: returning NA") ## return(NA_real_) ## } ## if (na.rm) ## x <- x[!is.na(x)] ## if (!is.numeric(trim) || length(trim) != 1L) ## stop("'trim' must be numeric of length one") ## n <- length(x) ## if (trim > 0 && n) { ## if (is.complex(x)) ## stop("trimmed means are not defined for complex data") ## if (anyNA(x)) ## return(NA_real_) ## if (trim >= 0.5) ## return(stats::median(x, na.rm = FALSE)) ## lo <- floor(n * trim) + 1 ## hi <- n + 1 - lo ## x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi] ## } ## .Internal(mean(x)) ## } ## <bytecode: 0x0000000014e129e8> ## <environment: namespace:base>
sd()
and mean()
standardize <- function(vector, center = TRUE, scale = TRUE, ...) { mean <- mean(vector, ...) stdev <- sd(vector, ...) if (center) { vector <- vector - mean } if (scale) { vector <- vector / stdev } return(list(result = vector, mean = mean, sd = stdev)) }
sData <- standardize(airquality$Ozone, na.rm = TRUE) sData$sd
## [1] 32.98788
sData$result
## [1] -0.03423409 -0.18580489 -0.91334473 -0.73145977 NA -0.42831817 ## [7] -0.57988897 -0.70114561 -1.03460136 NA -1.06491552 -0.79208809 ## [13] -0.94365889 -0.85271641 -0.73145977 -0.85271641 -0.24643321 -1.09522968 ## [19] -0.36768985 -0.94365889 -1.24680048 -0.94365889 -1.15585800 -0.30706153 ## [25] NA NA NA -0.57988897 0.08702254 2.20901373 ## [31] -0.15549073 NA NA NA NA NA ## [37] NA -0.39800401 NA 0.87519070 -0.09486241 NA ## [43] NA -0.57988897 NA NA -0.64051729 -0.15549073 ## [49] -0.67083145 -0.91334473 -0.88303057 NA NA NA ## [55] NA NA NA NA NA NA ## [61] NA 2.81529692 0.20827918 -0.30706153 NA 0.66299158 ## [67] -0.06454825 1.05707566 1.66335885 1.66335885 1.29958893 NA ## [73] -0.97397305 -0.45863233 NA -1.06491552 0.17796502 -0.21611905 ## [79] 0.57204910 1.11770398 0.63267742 -0.79208809 NA NA ## [85] 1.14801813 1.99681461 -0.67083145 0.29922166 1.20864645 0.23859334 ## [91] 0.66299158 0.51142078 -0.09486241 -1.00428721 -0.79208809 1.08738982 ## [97] -0.21611905 0.72361990 2.42121284 1.42084557 2.05744293 NA ## [103] NA 0.05670838 -0.42831817 0.69330574 NA -0.61020313 ## [109] 0.51142078 -0.57988897 -0.33737569 0.05670838 -0.64051729 -1.00428721 ## [115] NA 0.08702254 3.81566419 0.93581902 NA 1.02676150 ## [121] 2.29995620 1.26927477 1.29958893 1.63304469 1.08738982 0.93581902 ## [127] 1.48147389 0.14765086 -0.30706153 -0.67083145 -0.57988897 -0.64051729 ## [133] -0.54957481 0.05670838 -0.64051729 -0.42831817 -1.00428721 -0.88303057 ## [139] 0.11733670 -0.73145977 -0.88303057 -0.54957481 -0.79208809 -0.88303057 ## [145] -0.57988897 -0.18580489 -1.06491552 -0.85271641 -0.36768985 NA ## [151] -0.85271641 -0.73145977 -0.67083145
list(...)
f <- function(x, ...){ unnamed <- names(list(...)) unnamedVals <- list(...) modifyX <- x^2 return(list(newX = modifyX, elipNames = unnamed, elipValues = unnamedVals)) }
f(x = 10, a = 1, b = "hey there", num = 1:3)
## $newX ## [1] 100 ## ## $elipNames ## [1] "a" "b" "num" ## ## $elipValues ## $elipValues$a ## [1] 1 ## ## $elipValues$b ## [1] "hey there" ## ## $elipValues$num ## [1] 1 2 3
Function writing opens R up!
Syntax
nameOfFunction <- function(input1, input2, ...) { #code #return something with return() #or returns last value }
Can set defaults in function definition
Can return a named list
Can give unnamed arguments for use
Use of consistent naming schemes is important!
Objects
_
, and .
Use of consistent naming schemes is important!
Objects
_
, and .
Functions usually verbs, data objects usually nouns
Naming schemes
Also need to name inputs! (From R for Data Science)
Otherwise, consider matching names of arguments in existing R functions. For example, use na.rm
to determine if missing values should be removed.
cor()
functioncor
## function (x, y = NULL, use = "everything", method = c("pearson", ## "kendall", "spearman")) ## { ## na.method <- pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs", ## "everything", "na.or.complete")) ## if (is.na(na.method)) ## stop("invalid 'use' argument") ## method <- match.arg(method) ## if (is.data.frame(y)) ## y <- as.matrix(y) ## if (is.data.frame(x)) ## x <- as.matrix(x) ## if (!is.matrix(x) && is.null(y)) ## stop("supply both 'x' and 'y' or a matrix-like 'x'") ## if (!(is.numeric(x) || is.logical(x))) ## stop("'x' must be numeric") ## stopifnot(is.atomic(x)) ## if (!is.null(y)) { ## if (!(is.numeric(y) || is.logical(y))) ## stop("'y' must be numeric") ## stopifnot(is.atomic(y)) ## } ## Rank <- function(u) { ## if (length(u) == 0L) ## u ## else if (is.matrix(u)) { ## if (nrow(u) > 1L) ## apply(u, 2L, rank, na.last = "keep") ## else row(u) ## } ## else rank(u, na.last = "keep") ## } ## if (method == "pearson") ## .Call(C_cor, x, y, na.method, FALSE) ## else if (na.method %in% c(2L, 5L)) { ## if (is.null(y)) { ## .Call(C_cor, Rank(na.omit(x)), NULL, na.method, method == ## "kendall") ## } ## else { ## nas <- attr(na.omit(cbind(x, y)), "na.action") ## dropNA <- function(x, nas) { ## if (length(nas)) { ## if (is.matrix(x)) ## x[-nas, , drop = FALSE] ## else x[-nas] ## } ## else x ## } ## .Call(C_cor, Rank(dropNA(x, nas)), Rank(dropNA(y, ## nas)), na.method, method == "kendall") ## } ## } ## else if (na.method != 3L) { ## x <- Rank(x) ## if (!is.null(y)) ## y <- Rank(y) ## .Call(C_cor, x, y, na.method, method == "kendall") ## } ## else { ## if (is.null(y)) { ## ncy <- ncx <- ncol(x) ## if (ncx == 0) ## stop("'x' is empty") ## r <- matrix(0, nrow = ncx, ncol = ncy) ## for (i in seq_len(ncx)) { ## for (j in seq_len(i)) { ## x2 <- x[, i] ## y2 <- x[, j] ## ok <- complete.cases(x2, y2) ## x2 <- rank(x2[ok]) ## y2 <- rank(y2[ok]) ## r[i, j] <- if (any(ok)) ## .Call(C_cor, x2, y2, 1L, method == "kendall") ## else NA ## } ## } ## r <- r + t(r) - diag(diag(r)) ## rownames(r) <- colnames(x) ## colnames(r) <- colnames(x) ## r ## } ## else { ## if (length(x) == 0L || length(y) == 0L) ## stop("both 'x' and 'y' must be non-empty") ## matrix_result <- is.matrix(x) || is.matrix(y) ## if (!is.matrix(x)) ## x <- matrix(x, ncol = 1L) ## if (!is.matrix(y)) ## y <- matrix(y, ncol = 1L) ## ncx <- ncol(x) ## ncy <- ncol(y) ## r <- matrix(0, nrow = ncx, ncol = ncy) ## for (i in seq_len(ncx)) { ## for (j in seq_len(ncy)) { ## x2 <- x[, i] ## y2 <- y[, j] ## ok <- complete.cases(x2, y2) ## x2 <- rank(x2[ok]) ## y2 <- rank(y2[ok]) ## r[i, j] <- if (any(ok)) ## .Call(C_cor, x2, y2, 1L, method == "kendall") ## else NA ## } ## } ## rownames(r) <- colnames(x) ## colnames(r) <- colnames(y) ## if (matrix_result) ## r ## else drop(r) ## } ## } ## } ## <bytecode: 0x0000000020688328> ## <environment: namespace:stats>
Consider the inputs of the cor()
function
Apply it to iris
data…
cor(iris$Sepal.Length, iris$Sepal.Width)
## [1] -0.1175698
function (x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman"))
Consider the inputs of the cor()
function
Apply it to iris
data…
cor(x = iris$Sepal.Length, method = "spearman", iris$Sepal.Width)
## [1] -0.1667777
function (x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman"))
Consider the inputs of the cor()
function
Apply it to iris
data…
cor(x = iris$Sepal.Length, met = "spearman", iris$Sepal.Width)
## [1] -0.1667777
function (x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman"))
stop()
and switch()
To kick out of a function, you can use stop()
transposeDF <- function(df) { if(!is.data.frame(df)){ stop("I want a data frame only!") } t(df) } transposeDF(iris)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] ## Sepal.Length "5.1" "4.9" "4.7" "4.6" "5.0" "5.4" "4.6" ## Sepal.Width "3.5" "3.0" "3.2" "3.1" "3.6" "3.9" "3.4" ## Petal.Length "1.4" "1.4" "1.3" "1.5" "1.4" "1.7" "1.4" ## Petal.Width "0.2" "0.2" "0.2" "0.2" "0.2" "0.4" "0.3" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,8] [,9] [,10] [,11] [,12] [,13] [,14] ## Sepal.Length "5.0" "4.4" "4.9" "5.4" "4.8" "4.8" "4.3" ## Sepal.Width "3.4" "2.9" "3.1" "3.7" "3.4" "3.0" "3.0" ## Petal.Length "1.5" "1.4" "1.5" "1.5" "1.6" "1.4" "1.1" ## Petal.Width "0.2" "0.2" "0.1" "0.2" "0.2" "0.1" "0.1" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,15] [,16] [,17] [,18] [,19] [,20] [,21] ## Sepal.Length "5.8" "5.7" "5.4" "5.1" "5.7" "5.1" "5.4" ## Sepal.Width "4.0" "4.4" "3.9" "3.5" "3.8" "3.8" "3.4" ## Petal.Length "1.2" "1.5" "1.3" "1.4" "1.7" "1.5" "1.7" ## Petal.Width "0.2" "0.4" "0.4" "0.3" "0.3" "0.3" "0.2" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,22] [,23] [,24] [,25] [,26] [,27] [,28] ## Sepal.Length "5.1" "4.6" "5.1" "4.8" "5.0" "5.0" "5.2" ## Sepal.Width "3.7" "3.6" "3.3" "3.4" "3.0" "3.4" "3.5" ## Petal.Length "1.5" "1.0" "1.7" "1.9" "1.6" "1.6" "1.5" ## Petal.Width "0.4" "0.2" "0.5" "0.2" "0.2" "0.4" "0.2" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,29] [,30] [,31] [,32] [,33] [,34] [,35] ## Sepal.Length "5.2" "4.7" "4.8" "5.4" "5.2" "5.5" "4.9" ## Sepal.Width "3.4" "3.2" "3.1" "3.4" "4.1" "4.2" "3.1" ## Petal.Length "1.4" "1.6" "1.6" "1.5" "1.5" "1.4" "1.5" ## Petal.Width "0.2" "0.2" "0.2" "0.4" "0.1" "0.2" "0.2" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,36] [,37] [,38] [,39] [,40] [,41] [,42] ## Sepal.Length "5.0" "5.5" "4.9" "4.4" "5.1" "5.0" "4.5" ## Sepal.Width "3.2" "3.5" "3.6" "3.0" "3.4" "3.5" "2.3" ## Petal.Length "1.2" "1.3" "1.4" "1.3" "1.5" "1.3" "1.3" ## Petal.Width "0.2" "0.2" "0.1" "0.2" "0.2" "0.3" "0.3" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,43] [,44] [,45] [,46] [,47] [,48] [,49] ## Sepal.Length "4.4" "5.0" "5.1" "4.8" "5.1" "4.6" "5.3" ## Sepal.Width "3.2" "3.5" "3.8" "3.0" "3.8" "3.2" "3.7" ## Petal.Length "1.3" "1.6" "1.9" "1.4" "1.6" "1.4" "1.5" ## Petal.Width "0.2" "0.6" "0.4" "0.3" "0.2" "0.2" "0.2" ## Species "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" ## [,50] [,51] [,52] [,53] [,54] ## Sepal.Length "5.0" "7.0" "6.4" "6.9" "5.5" ## Sepal.Width "3.3" "3.2" "3.2" "3.1" "2.3" ## Petal.Length "1.4" "4.7" "4.5" "4.9" "4.0" ## Petal.Width "0.2" "1.4" "1.5" "1.5" "1.3" ## Species "setosa" "versicolor" "versicolor" "versicolor" "versicolor" ## [,55] [,56] [,57] [,58] [,59] ## Sepal.Length "6.5" "5.7" "6.3" "4.9" "6.6" ## Sepal.Width "2.8" "2.8" "3.3" "2.4" "2.9" ## Petal.Length "4.6" "4.5" "4.7" "3.3" "4.6" ## Petal.Width "1.5" "1.3" "1.6" "1.0" "1.3" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,60] [,61] [,62] [,63] [,64] ## Sepal.Length "5.2" "5.0" "5.9" "6.0" "6.1" ## Sepal.Width "2.7" "2.0" "3.0" "2.2" "2.9" ## Petal.Length "3.9" "3.5" "4.2" "4.0" "4.7" ## Petal.Width "1.4" "1.0" "1.5" "1.0" "1.4" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,65] [,66] [,67] [,68] [,69] ## Sepal.Length "5.6" "6.7" "5.6" "5.8" "6.2" ## Sepal.Width "2.9" "3.1" "3.0" "2.7" "2.2" ## Petal.Length "3.6" "4.4" "4.5" "4.1" "4.5" ## Petal.Width "1.3" "1.4" "1.5" "1.0" "1.5" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,70] [,71] [,72] [,73] [,74] ## Sepal.Length "5.6" "5.9" "6.1" "6.3" "6.1" ## Sepal.Width "2.5" "3.2" "2.8" "2.5" "2.8" ## Petal.Length "3.9" "4.8" "4.0" "4.9" "4.7" ## Petal.Width "1.1" "1.8" "1.3" "1.5" "1.2" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,75] [,76] [,77] [,78] [,79] ## Sepal.Length "6.4" "6.6" "6.8" "6.7" "6.0" ## Sepal.Width "2.9" "3.0" "2.8" "3.0" "2.9" ## Petal.Length "4.3" "4.4" "4.8" "5.0" "4.5" ## Petal.Width "1.3" "1.4" "1.4" "1.7" "1.5" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,80] [,81] [,82] [,83] [,84] ## Sepal.Length "5.7" "5.5" "5.5" "5.8" "6.0" ## Sepal.Width "2.6" "2.4" "2.4" "2.7" "2.7" ## Petal.Length "3.5" "3.8" "3.7" "3.9" "5.1" ## Petal.Width "1.0" "1.1" "1.0" "1.2" "1.6" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,85] [,86] [,87] [,88] [,89] ## Sepal.Length "5.4" "6.0" "6.7" "6.3" "5.6" ## Sepal.Width "3.0" "3.4" "3.1" "2.3" "3.0" ## Petal.Length "4.5" "4.5" "4.7" "4.4" "4.1" ## Petal.Width "1.5" "1.6" "1.5" "1.3" "1.3" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,90] [,91] [,92] [,93] [,94] ## Sepal.Length "5.5" "5.5" "6.1" "5.8" "5.0" ## Sepal.Width "2.5" "2.6" "3.0" "2.6" "2.3" ## Petal.Length "4.0" "4.4" "4.6" "4.0" "3.3" ## Petal.Width "1.3" "1.2" "1.4" "1.2" "1.0" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,95] [,96] [,97] [,98] [,99] ## Sepal.Length "5.6" "5.7" "5.7" "6.2" "5.1" ## Sepal.Width "2.7" "3.0" "2.9" "2.9" "2.5" ## Petal.Length "4.2" "4.2" "4.2" "4.3" "3.0" ## Petal.Width "1.3" "1.2" "1.3" "1.3" "1.1" ## Species "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" ## [,100] [,101] [,102] [,103] [,104] ## Sepal.Length "5.7" "6.3" "5.8" "7.1" "6.3" ## Sepal.Width "2.8" "3.3" "2.7" "3.0" "2.9" ## Petal.Length "4.1" "6.0" "5.1" "5.9" "5.6" ## Petal.Width "1.3" "2.5" "1.9" "2.1" "1.8" ## Species "versicolor" "virginica" "virginica" "virginica" "virginica" ## [,105] [,106] [,107] [,108] [,109] ## Sepal.Length "6.5" "7.6" "4.9" "7.3" "6.7" ## Sepal.Width "3.0" "3.0" "2.5" "2.9" "2.5" ## Petal.Length "5.8" "6.6" "4.5" "6.3" "5.8" ## Petal.Width "2.2" "2.1" "1.7" "1.8" "1.8" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,110] [,111] [,112] [,113] [,114] ## Sepal.Length "7.2" "6.5" "6.4" "6.8" "5.7" ## Sepal.Width "3.6" "3.2" "2.7" "3.0" "2.5" ## Petal.Length "6.1" "5.1" "5.3" "5.5" "5.0" ## Petal.Width "2.5" "2.0" "1.9" "2.1" "2.0" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,115] [,116] [,117] [,118] [,119] ## Sepal.Length "5.8" "6.4" "6.5" "7.7" "7.7" ## Sepal.Width "2.8" "3.2" "3.0" "3.8" "2.6" ## Petal.Length "5.1" "5.3" "5.5" "6.7" "6.9" ## Petal.Width "2.4" "2.3" "1.8" "2.2" "2.3" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,120] [,121] [,122] [,123] [,124] ## Sepal.Length "6.0" "6.9" "5.6" "7.7" "6.3" ## Sepal.Width "2.2" "3.2" "2.8" "2.8" "2.7" ## Petal.Length "5.0" "5.7" "4.9" "6.7" "4.9" ## Petal.Width "1.5" "2.3" "2.0" "2.0" "1.8" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,125] [,126] [,127] [,128] [,129] ## Sepal.Length "6.7" "7.2" "6.2" "6.1" "6.4" ## Sepal.Width "3.3" "3.2" "2.8" "3.0" "2.8" ## Petal.Length "5.7" "6.0" "4.8" "4.9" "5.6" ## Petal.Width "2.1" "1.8" "1.8" "1.8" "2.1" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,130] [,131] [,132] [,133] [,134] ## Sepal.Length "7.2" "7.4" "7.9" "6.4" "6.3" ## Sepal.Width "3.0" "2.8" "3.8" "2.8" "2.8" ## Petal.Length "5.8" "6.1" "6.4" "5.6" "5.1" ## Petal.Width "1.6" "1.9" "2.0" "2.2" "1.5" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,135] [,136] [,137] [,138] [,139] ## Sepal.Length "6.1" "7.7" "6.3" "6.4" "6.0" ## Sepal.Width "2.6" "3.0" "3.4" "3.1" "3.0" ## Petal.Length "5.6" "6.1" "5.6" "5.5" "4.8" ## Petal.Width "1.4" "2.3" "2.4" "1.8" "1.8" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,140] [,141] [,142] [,143] [,144] ## Sepal.Length "6.9" "6.7" "6.9" "5.8" "6.8" ## Sepal.Width "3.1" "3.1" "3.1" "2.7" "3.2" ## Petal.Length "5.4" "5.6" "5.1" "5.1" "5.9" ## Petal.Width "2.1" "2.4" "2.3" "1.9" "2.3" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,145] [,146] [,147] [,148] [,149] ## Sepal.Length "6.7" "6.7" "6.3" "6.5" "6.2" ## Sepal.Width "3.3" "3.0" "2.5" "3.0" "3.4" ## Petal.Length "5.7" "5.2" "5.0" "5.2" "5.4" ## Petal.Width "2.5" "2.3" "1.9" "2.0" "2.3" ## Species "virginica" "virginica" "virginica" "virginica" "virginica" ## [,150] ## Sepal.Length "5.9" ## Sepal.Width "3.0" ## Petal.Length "5.1" ## Petal.Width "1.8" ## Species "virginica"
stop()
and switch()
To kick out of a function, you can use stop()
transposeDF <- function(df) { if(!is.data.frame(df)){ stop("I want a data frame only!") } t(df) } transposeDF(as.matrix(iris))
## Error in transposeDF(as.matrix(iris)): I want a data frame only!
stop()
and switch()
Often you want to check on inputs, can use if()
or switch()
center <- function(vec, type, ...) { if(!is.vector(vec)){ stop("Not a vector my friend.") } switch(type, mean = vec - mean(vec), median = vec - median(vec), trimmed = vec - mean(vec, ...), stop("Mistake!") ) } center(c(1,1,1,6,10), "mean")
## [1] -2.8 -2.8 -2.8 2.2 6.2
stop()
and switch()
Often you want to check on inputs, can use if()
or switch()
center <- function(vec, type, ...) { if(!is.vector(vec)){ stop("Not a vector my friend.") } switch(type, mean = vec - mean(vec), median = vec - median(vec), trimmed = vec - mean(vec, ...), stop("Mistake!") ) } center(c(1,1,1,6,10), "median")
## [1] 0 0 0 5 9
stop()
and switch()
Often you want to check on inputs, can use if()
or switch()
center <- function(vec, type, ...) { if(!is.vector(vec)){ stop("Not a vector my friend.") } switch(type, mean = vec - mean(vec), median = vec - median(vec), trimmed = vec - mean(vec, ...), stop("Mistake!") ) } center(c(1,1,1,6,10), "trimmed", trim = 0.2)
## [1] -1.666667 -1.666667 -1.666667 3.333333 7.333333
stop()
and switch()
Often you want to check on inputs, can use if()
or switch()
center <- function(vec, type, ...) { if(!is.vector(vec)){ stop("Not a vector my friend.") } if(type == "mean"){ vec - mean(vec) } else if (type =="median"){ vec - median(vec) } else if (type =="trimmed") { vec - mean(vec, ...) } else { stop("Mistake!") } } center(c(1,1,1,6,10), "trimmed", trim = 0.2)
## [1] -1.666667 -1.666667 -1.666667 3.333333 7.333333
Two types of pipeable functions:
Two types of pipeable functions:
transformations naturally return the modified argument (df)
invisible()
Two types of pipeable functions:
printNumObs <- function(df) { cat("The number of observations in the data set is ", nrow(df), "\n", sep = "") } iris %>% printNumObs %>% summarize(mean = mean(Sepal.Length))
## The number of observations in the data set is 150
## Error in UseMethod("summarise"): no applicable method for 'summarise' applied to an object of class "NULL"
Two types of pipeable functions:
printNumObs <- function(df) { cat("The number of observations in the data set is ", nrow(df), "\n", sep = "") invisible(df) } iris %>% printNumObs %>% summarize(mean = mean(Sepal.Length))
## The number of observations in the data set is 150
## mean ## 1 5.843333
printNumObs <- function(df) { cat("The number of observations in the data set is ", nrow(df), "\n", sep = "") invisible(df) } temp <- printNumObs(iris)
## The number of observations in the data set is 150
str(temp)
## 'data.frame': 150 obs. of 5 variables: ## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... ## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... ## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... ## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... ## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
Speaking of pipes…
Infix function - a function that goes between arguments (as opposed to prefix)
mean(3:5) #prefix
## [1] 4
3 + 5 #+ is infix
## [1] 8
`+`(3, 5) #used as a prefix function
## [1] 8
Infix function - a function that goes between arguments (as opposed to prefix)
Common built-in infix functions include: ::
, $
, ^
, *
, /
, +
, -
, >
, >=
, <
, <=
, ==
, !=
, &
, |
, <-
Others use %symbol%
syntax: %*%
(matrix multiplication), %in%
(check if LHS value(s) is(are) in RHS value(s)
Can call like prefix functions
cars <- as.matrix(cars) t(cars) %*% cars
## speed dist ## speed 13228 38482 ## dist 38482 124903
`%*%`(t(cars), cars)
## speed dist ## speed 13228 38482 ## dist 38482 124903
Infix function - a function that goes between arguments (as opposed to prefix)
Can write your own!
`%+%` <- function(a, b) paste0(a, b) "new" %+% " string"
## [1] "new string"
+
and other operators: just don’t do that…Infix function - a function that goes between arguments (as opposed to prefix)
Can use precendence rules to save typing
x <- y <- 2 `<-`(x, `<-`(y, 2)) #interpretation of above code! x <- y = 2# error! <- has higher precedence `=`(`<-`(x, y), 2) #interpretation of above code! x = y <- 2 # this will work! `=`(x, `<-`(y, 2)) #interpretation of above code!
Infix function - a function that goes between arguments (as opposed to prefix)
Can use precendence rules to save typing
`%-%` <- function(a, b) { paste0("(", a, " %-% ", b, ")") } "a" %-% "b" %-% "c" #user defined infix are evaluated left to right!
## [1] "((a %-% b) %-% c)"
`%-%`(`%-%`("a", "b"), "c") #interpretation of above code!
## [1] "((a %-% b) %-% c)"
R objects live in an environment
You can think of it as a “bag of names” that point to things in memory
Like a list but with no ordering (and other things)
Environments have ‘parents’ and ‘children’
Global environment is where our created function objects live
Search path has all packages loaded in (most recent package is the parent of the global environment)
base environment is the child of the ultimate ancestor, the empty environment
Environments have ‘parents’ and ‘children’
Global environment is where our created function objects live
Search path has all packages loaded in (most recent package is the parent of the global environment)
base environment is the child of the ultimate ancestor, the empty environment
## [1] ".GlobalEnv" "package:knitr" "package:forcats" ## [4] "package:stringr" "package:dplyr" "package:purrr" ## [7] "package:readr" "package:tidyr" "package:tibble" ## [10] "package:ggplot2" "package:tidyverse" "package:stats" ## [13] "package:graphics" "package:grDevices" "package:utils" ## [16] "package:datasets" "package:methods" "Autoloads" ## [19] "package:base"
library(pryr) #install if needed x <- "hey" where("x")
## <environment: R_GlobalEnv>
where("mean")
## <environment: base>
When you call a function, it creates temporary function environments
This is why variables in functions don’t overwrite things (mean still exists as is!)
f <- function(x){ mean <- paste0(x, " is a value") mean } f(1:3)
## [1] "1 is a value" "2 is a value" "3 is a value"
mean
## function (x, ...) ## UseMethod("mean") ## <bytecode: 0x0000000017e89428> ## <environment: namespace:base>
g <- function(x) { if (!exists("a", inherits = FALSE)) { message("Defining a") a <- 1 } else { a <- a + 1 } a }
#Running the function doesn't create #the a object in our global environment! g(10)
## Defining a
## [1] 1
g(10)
## Defining a
## [1] 1
When you call a function, it creates temporary function environments
This is why variables can have the same name in a function and in your global environment
y <- 10 f <- function(x){ y <- 1 x + y } f(15)
## [1] 16
y <- 1 f <- function(x){ x + y } f(10)
## [1] 11