Pass a data.frame column name to a function
You can just use the column name directly:
df <- data.frame(A=1:10, B=2:11, C=3:12)
fun1 <- function(x, column){
max(x[,column])
}
fun1(df, "B")
fun1(df, c("B","A"))
There's no need to use substitute, eval, etc.
You can even pass the desired function as a parameter:
fun1 <- function(x, column, fn) {
fn(x[,column])
}
fun1(df, "B", max)
Alternatively, using [[
also works for selecting a single column at a time:
df <- data.frame(A=1:10, B=2:11, C=3:12)
fun1 <- function(x, column){
max(x[[column]])
}
fun1(df, "B")
Pass column names into a function using apply or map
The first issue is that you pass the name of the dataframe but not the the dataframe itself. That's why you get the first error as you are trying to select
from a character string. To solve this issue add the dataframe to the list you are looping over.
The second issue is that when you pass the column names as character string you have to tell dplyr
that these characters refer to columns in your data. This could be achieved by e.g. making use of the .data
pronoun.
Finally, instead of select + unlist + as.vector
you could simply use dplyr::pull
:
library(purrr)
library(dplyr)
check_1 <- function(x, col1, col2) {
x %>%
dplyr::select(all_of(c(col1, col2))) %>%
dplyr::mutate(row.index = row_number()) %>%
dplyr::filter(.data[[col1]] == "Jason" & is.na(.data[[col2]]) == TRUE) %>%
dplyr::pull(row.index)
}
check_2 <- function(x, col1, col2) {
x %>%
dplyr::select(all_of(c(col1, col2))) %>%
dplyr::mutate(row.index = row_number()) %>%
dplyr::filter(.data[[col1]] >= 3 & .data[[col1]] <= 5 & is.na(.data[[col2]]) == TRUE) %>%
dplyr::pull(row.index)
}
checks <-
list(df = list(df = df, fn = check_1, pars = list(col1 = "1.1.", col2 = "1.2.")),
df = list(df = df, fn = check_2, pars = list(col1 = "1.2.", col2 = "1.2.1.")))
purrr::map(checks, ~ exec(.x$fn, x = .x$df, !!!.x$pars))
#> $df
#> [1] 8
#>
#> $df
#> [1] 5 6
dplyr - using column names as function arguments
This can work using the latest dplyr
syntax (as can be seen on github):
library(dplyr)
library(rlang)
sumByColumn <- function(df, colName) {
df %>%
group_by(a) %>%
summarize(tot = sum(!! sym(colName)))
}
sumByColumn(data, "b")
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27
And an alternative way of specifying b
as a variable:
library(dplyr)
sumByColumn <- function(df, colName) {
myenc <- enquo(colName)
df %>%
group_by(a) %>%
summarize(tot = sum(!!myenc))
}
sumByColumn(data, b)
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27
Pass a data.frame column name to a function that uses purrr::map
This should work for you, it uses the tidy eval framework. This assumes col_name is a string.
testfunc2 <- function(df, col_name) {
df <- df %>%
mutate(out = map(!! rlang::sym(col_name), min))
tibble(min2 = df$out)
}
EDIT:
If you'd rather pass a bare column name to the function, instead of a string, use enquo
instead of sym
.
testfunc2 <- function(df, col_name) {
col_quo = enquo(col_name)
df <- df %>%
mutate(out = map(!! col_quo, min))
tibble(min2 = df$out)
}
passing data.frame column names to a function
As we pass strings as input, instead of using group_by
, we can use group_by_at
which takes strings for column names, the summarize
column can be converted to symbol (sym
) and evaluate (!!
)
createhrly_0595quants <- function(df, hourcolumn,
value, qtype, metadata_to_add) {
value <- rlang::sym(value)
df %>%
group_by_at(vars(hourcolumn)) %>%
summarize(`05%`=quantile(!!value, probs=0.05, type =qtype),
`95%`=quantile(!!value, probs=0.95, type = qtype),
median = median(!!value), n=n()) %>%
mutate(qtype = qtype,
metadata_to_add = metadata_to_add)
}
createhrly_0595quants(df_x, "hrly_gmt",
"myvalues", 4, "version x.2")
Pass a data.table column name to a function using :=
set
is the data.table-idiomatic way. If you need to do other stuff like use by
, rlang
has a generic way to delay evaluation, which is to enexpr
the args (or enquo if you want them evaluated in the original environment) and !!
them inside an inject
with the expression you'd normally use.
library(rlang)
#> Warning: package 'rlang' was built under R version 4.1.2
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following object is masked from 'package:rlang':
#>
#> :=
dat <- data.table(x = 1:4,
y = 5:8)
new_column <- function(df, col_name, expr) {
col_name <- enexpr(col_name)
expr <- enexpr(expr)
inject(df[, !!col_name := !!expr])
}
new_column(dat, z, x + y)
dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12
Created on 2022-02-25 by the reprex package (v2.0.1)
Or, similarly without rlang
library(data.table)
dat <- data.table(x = 1:4,
y = 5:8)
new_column <- function(df, col_name, expr) {
col_name <- deparse(substitute(col_name))
expr <- substitute(expr)
df[, (col_name) := eval(expr)]
}
new_column(dat, z, x + y)
dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12
Created on 2022-02-25 by the reprex package (v2.0.1)
Using the "programming on the language" interface in the dev version.
https://rdatatable.gitlab.io/data.table/news/index.html
library(data.table)
dat <- data.table(x = 1:4,
y = 5:8)
new_column <- function(df, col_name, expr) {
df[, col_name := expr,
env = list(col_name = substitute(col_name),
expr = substitute(expr))]
}
new_column(dat, z, x + y)
dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12
Created on 2022-03-01 by the reprex package (v2.0.1)
Or using match.call
library(data.table)
dat <- data.table(x = 1:4,
y = 5:8)
new_column <- function(df, col_name, expr) {
fun_call <- match.call()
df[, col_name := expr,
env = as.list(fun_call)[c('col_name', 'expr')]]
}
new_column(dat, z, x + y)
dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12
Created on 2022-03-01 by the reprex package (v2.0.1)
How to rename a column of a dataframe within a function in R?
Pass the column names as string if you want to use them. It is not possible to get "var1"
value if you pass df$var1
as input to the function. You can use [[
(and [
) to subset the data from column names in the function.
myfunction<-function(X, Y, M=c(...), data){
print(X)
print(Y)
print(M)
val1 <- data[[X]]
val2 <- data[[Y]]
val3 <- data[M]
}
# Test function
myfunction("var1", "var2", c("var3", "var4"), df)
How to pass a dataframe column as an argument in a function using piping?
We can use non-standard evaluation with curly-curly ({{}}
)
library(dplyr)
library(rlang)
fxtop <- function(df, number, column){
tops <- df %>% top_n(number, {{column}})
return(tops)
}
and pass unquoted variable names
fxtop(df=df_econ, number=5, pop)
# date pce pop psavert uempmed unemploy
# <date> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2014-12-01 12062 319746. 7.6 12.9 8717
#2 2015-01-01 12046 319929. 7.7 13.2 8903
#3 2015-02-01 12082. 320075. 7.9 12.9 8610
#4 2015-03-01 12158. 320231. 7.4 12 8504
#5 2015-04-01 12194. 320402. 7.6 11.5 8526
If you want to pass column name as string (quoted), we can use sym
with !!
fxtop <- function(df, number, column){
tops <- df %>% top_n(number, !!sym(column))
return(tops)
}
fxtop(df=df_econ, number=5, 'pop')
Passing Pandas dataframe columns as function arguments
fillna()
by default does not update the dataframe in place, instead expecting you to assign it back, such as
def Funct(colname):
return colname.fillna(method='ffill')
df = Funct(df)
It's worth mentioning that fillna()
does have an argument inplace=
which you could set to True
if you need to update in place.
Related Topics
Extract Rows for the First Occurrence of a Variable in a Data Frame
How to Convert a Factor to Integer\Numeric Without Loss of Information
Combine a List of Data Frames into One Data Frame by Row
Unique Combination of All Elements from Two (Or More) Vectors
Replace Missing Values (Na) With Most Recent Non-Na by Group
Replace Specific Characters Within Strings
Convert Date-Time String to Class Date
Specify Custom Date Format For Colclasses Argument in Read.Table/Read.Csv
Choose the Top Five Values from Each Group in R
Reshaping Multiple Sets of Measurement Columns (Wide Format) into Single Columns (Long Format)
Add Regression Line Equation and R^2 on Graph
Strptime, As.Posixct and As.Date Return Unexpected Na
Shading a Kernel Density Plot Between Two Points.