Pass a Data.Frame Column Name to a Function

Pass a data.frame column name to a function

You can just use the column name directly:

df <- data.frame(A=1:10, B=2:11, C=3:12)
fun1 <- function(x, column){
max(x[,column])
}
fun1(df, "B")
fun1(df, c("B","A"))

There's no need to use substitute, eval, etc.

You can even pass the desired function as a parameter:

fun1 <- function(x, column, fn) {
fn(x[,column])
}
fun1(df, "B", max)

Alternatively, using [[ also works for selecting a single column at a time:

df <- data.frame(A=1:10, B=2:11, C=3:12)
fun1 <- function(x, column){
max(x[[column]])
}
fun1(df, "B")

Pass column names into a function using apply or map

The first issue is that you pass the name of the dataframe but not the the dataframe itself. That's why you get the first error as you are trying to select from a character string. To solve this issue add the dataframe to the list you are looping over.

The second issue is that when you pass the column names as character string you have to tell dplyr that these characters refer to columns in your data. This could be achieved by e.g. making use of the .data pronoun.

Finally, instead of select + unlist + as.vector you could simply use dplyr::pull:

library(purrr)
library(dplyr)

check_1 <- function(x, col1, col2) {
x %>%
dplyr::select(all_of(c(col1, col2))) %>%
dplyr::mutate(row.index = row_number()) %>%
dplyr::filter(.data[[col1]] == "Jason" & is.na(.data[[col2]]) == TRUE) %>%
dplyr::pull(row.index)
}

check_2 <- function(x, col1, col2) {
x %>%
dplyr::select(all_of(c(col1, col2))) %>%
dplyr::mutate(row.index = row_number()) %>%
dplyr::filter(.data[[col1]] >= 3 & .data[[col1]] <= 5 & is.na(.data[[col2]]) == TRUE) %>%
dplyr::pull(row.index)
}

checks <-
list(df = list(df = df, fn = check_1, pars = list(col1 = "1.1.", col2 = "1.2.")),
df = list(df = df, fn = check_2, pars = list(col1 = "1.2.", col2 = "1.2.1.")))

purrr::map(checks, ~ exec(.x$fn, x = .x$df, !!!.x$pars))
#> $df
#> [1] 8
#>
#> $df
#> [1] 5 6

dplyr - using column names as function arguments

This can work using the latest dplyr syntax (as can be seen on github):

library(dplyr)
library(rlang)
sumByColumn <- function(df, colName) {
df %>%
group_by(a) %>%
summarize(tot = sum(!! sym(colName)))
}

sumByColumn(data, "b")
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27

And an alternative way of specifying b as a variable:

library(dplyr)
sumByColumn <- function(df, colName) {
myenc <- enquo(colName)
df %>%
group_by(a) %>%
summarize(tot = sum(!!myenc))
}

sumByColumn(data, b)
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27

Pass a data.frame column name to a function that uses purrr::map

This should work for you, it uses the tidy eval framework. This assumes col_name is a string.

testfunc2 <- function(df, col_name) {
df <- df %>%
mutate(out = map(!! rlang::sym(col_name), min))
tibble(min2 = df$out)

}

EDIT:

If you'd rather pass a bare column name to the function, instead of a string, use enquo instead of sym.

testfunc2 <- function(df, col_name) {
col_quo = enquo(col_name)
df <- df %>%
mutate(out = map(!! col_quo, min))
tibble(min2 = df$out)

}

passing data.frame column names to a function

As we pass strings as input, instead of using group_by, we can use group_by_at which takes strings for column names, the summarize column can be converted to symbol (sym) and evaluate (!!)

createhrly_0595quants <- function(df, hourcolumn, 
value, qtype, metadata_to_add) {

value <- rlang::sym(value)


df %>%
group_by_at(vars(hourcolumn)) %>%
summarize(`05%`=quantile(!!value, probs=0.05, type =qtype),
`95%`=quantile(!!value, probs=0.95, type = qtype),
median = median(!!value), n=n()) %>%
mutate(qtype = qtype,
metadata_to_add = metadata_to_add)
}



createhrly_0595quants(df_x, "hrly_gmt",
"myvalues", 4, "version x.2")

Pass a data.table column name to a function using :=

set is the data.table-idiomatic way. If you need to do other stuff like use by, rlang has a generic way to delay evaluation, which is to enexpr the args (or enquo if you want them evaluated in the original environment) and !! them inside an inject with the expression you'd normally use.

library(rlang)
#> Warning: package 'rlang' was built under R version 4.1.2
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following object is masked from 'package:rlang':
#>
#> :=

dat <- data.table(x = 1:4,
y = 5:8)

new_column <- function(df, col_name, expr) {
col_name <- enexpr(col_name)
expr <- enexpr(expr)
inject(df[, !!col_name := !!expr])
}

new_column(dat, z, x + y)

dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12

Created on 2022-02-25 by the reprex package (v2.0.1)

Or, similarly without rlang

library(data.table)

dat <- data.table(x = 1:4,
y = 5:8)

new_column <- function(df, col_name, expr) {
col_name <- deparse(substitute(col_name))
expr <- substitute(expr)
df[, (col_name) := eval(expr)]
}

new_column(dat, z, x + y)

dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12

Created on 2022-02-25 by the reprex package (v2.0.1)

Using the "programming on the language" interface in the dev version.

https://rdatatable.gitlab.io/data.table/news/index.html

library(data.table)
dat <- data.table(x = 1:4,
y = 5:8)

new_column <- function(df, col_name, expr) {
df[, col_name := expr,
env = list(col_name = substitute(col_name),
expr = substitute(expr))]
}

new_column(dat, z, x + y)

dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12

Created on 2022-03-01 by the reprex package (v2.0.1)

Or using match.call

library(data.table)
dat <- data.table(x = 1:4,
y = 5:8)

new_column <- function(df, col_name, expr) {
fun_call <- match.call()
df[, col_name := expr,
env = as.list(fun_call)[c('col_name', 'expr')]]
}

new_column(dat, z, x + y)

dat
#> x y z
#> <int> <int> <int>
#> 1: 1 5 6
#> 2: 2 6 8
#> 3: 3 7 10
#> 4: 4 8 12

Created on 2022-03-01 by the reprex package (v2.0.1)

How to rename a column of a dataframe within a function in R?

Pass the column names as string if you want to use them. It is not possible to get "var1" value if you pass df$var1 as input to the function. You can use [[ (and [) to subset the data from column names in the function.

myfunction<-function(X, Y, M=c(...), data){
print(X)
print(Y)
print(M)
val1 <- data[[X]]
val2 <- data[[Y]]
val3 <- data[M]
}

# Test function
myfunction("var1", "var2", c("var3", "var4"), df)

How to pass a dataframe column as an argument in a function using piping?

We can use non-standard evaluation with curly-curly ({{}})

library(dplyr)
library(rlang)

fxtop <- function(df, number, column){
tops <- df %>% top_n(number, {{column}})
return(tops)
}

and pass unquoted variable names

fxtop(df=df_econ, number=5, pop)

# date pce pop psavert uempmed unemploy
# <date> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2014-12-01 12062 319746. 7.6 12.9 8717
#2 2015-01-01 12046 319929. 7.7 13.2 8903
#3 2015-02-01 12082. 320075. 7.9 12.9 8610
#4 2015-03-01 12158. 320231. 7.4 12 8504
#5 2015-04-01 12194. 320402. 7.6 11.5 8526

If you want to pass column name as string (quoted), we can use sym with !!

fxtop <- function(df, number, column){
tops <- df %>% top_n(number, !!sym(column))
return(tops)
}
fxtop(df=df_econ, number=5, 'pop')

Passing Pandas dataframe columns as function arguments

fillna() by default does not update the dataframe in place, instead expecting you to assign it back, such as

def Funct(colname):
return colname.fillna(method='ffill')

df = Funct(df)

It's worth mentioning that fillna() does have an argument inplace= which you could set to True if you need to update in place.



Related Topics



Leave a reply



Submit