Dplyr - Using Column Names as Function Arguments

dplyr - using column names as function arguments

This can work using the latest dplyr syntax (as can be seen on github):

library(dplyr)
library(rlang)
sumByColumn <- function(df, colName) {
df %>%
group_by(a) %>%
summarize(tot = sum(!! sym(colName)))
}

sumByColumn(data, "b")
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27

And an alternative way of specifying b as a variable:

library(dplyr)
sumByColumn <- function(df, colName) {
myenc <- enquo(colName)
df %>%
group_by(a) %>%
summarize(tot = sum(!!myenc))
}

sumByColumn(data, b)
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27

Passing column name as argument in function within pipes

You need to make use of non standard evaluation which is worth a quick read about. In this case you most likely need to !! infront of var in the mutate line.

Here's the line:

mutate(new_variable = !!sym(var) * 100)

Using column names as arguments in existing functions in dplyr mutate()?

The str_detect solution in the comments is good. You could also use purrr::map2_lgl to make the grepl act across rows.

library(dplyr)
library(purrr)

df %>%
mutate(Present = map2_lgl(ID, Text, grepl))

ID Text Present
1 A A/B TRUE
2 B C/D FALSE
3 C B/C TRUE

How to pass a column argument in a dplyr function in select?

We can use enquo to convert it to a quosure and then evaluate with !!

slicedata <- function(df, column_name){
column_name = enquo(column_name)
df %>%
select(!!column_name, C, D, E) %>%
group_by(!!column_name) %>%
summarise(C = sum(C), D = sum(D), E = sum(E)

}

slicedata(df, B)

How to pass column name as an arguments in the function

This is basically a question related to programming in dplyr. To achieve your desired result and get rid of hardcoding the column names in your function and use x, y, z instead you could make use of the {{ curly-curly operator as you did in the ggplot code and the special assignment operator :=. Additionally instead of wrapping all your code inside ggplotly you proceed in steps. Do the data wrangling, make your ggplot and finally pass it to ggplotly:

library(plotly)
library(dplyr)
library(stringr)

ggplot_common_function <- function(data, x, y, z) {
data <- data %>%
group_by({{ x }}, {{ z }}) %>%
summarise({{ y }} := sum({{ y }})) %>%
mutate(total_sum = sum({{ y }}))

p <- ggplot(data, mapping = aes({{ x }}, {{ y }}, text = paste(total_sum))) +
geom_col(aes(fill = {{ z }})) +
theme_classic() +
theme(axis.line.y = element_blank(), axis.ticks = element_blank(), legend.position = "bottom") +
labs(x = "", y = "Agreements Values (In Lakhs)", fill = "") +
theme(axis.title.y = element_text(size = 8)) +
scale_fill_manual(values = c("#1F7A3F", "#70B821")) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE), expand = expansion(mult = c(0, .3)), breaks = integer_breaks())

ggp <- ggplotly(p, tooltip = c("text")) %>%
layout(legend = list(orientation = "h", x = 0.1, y = -0.2, font = list(family = "Arial", size = 10, color = "black")), xaxis = x_labels, yaxis = y_labels) %>%
config(displaylogo = FALSE, modeBarButtonsToRemove = list("sendDataToCloud", "autoScale2d", "resetScale2d", "toggleSpikelines", "hoverClosestCartesian", "hoverCompareCartesian", "zoom2d", "pan2d", "select2d", "lasso2d", "zoomIn2d", "zoomOut2d"))

Removestring(ggp)
}

ggplot_common_function(data, m_year, Applications, status)
#> `summarise()` has grouped output by 'm_year'. You can override using the
#> `.groups` argument.

Sample Image

Pass column names as function arguments in formula

Just make a formula instead of wrapping them in sym:

library(dplyr)
library(rstatix)
do.function <- function(table, column, category) {
formula <- paste0(column, '~', category) %>%
as.formula()

table %>%
group_by(subset) %>%
t_test(formula)
}
tmp = data.frame(id=seq(1:100), value = rnorm(100), subset = rep(c("Set1", "Set2"),each=50,2),categorical_value= rep(c("A", "B"),each=25,4))
do.function(table= tmp, column = "value", category = "categorical_value")
# A tibble: 2 x 9
subset .y. group1 group2 n1 n2 statistic df p
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
1 Set1 value A B 50 50 0.484 94.3 0.63
2 Set2 value A B 50 50 -2.15 97.1 0.034

How to pass column names into a function dplyr

We can use the new quosures from the devel version of dplyr (soon to be released in 0.6.0)

summarise_data_categorical <- function(var1, t_var, dt){

var1 <- enquo(var1)
t_var <- enquo(t_var)
v1 <- quo_name(var1)
v2 <- quo_name(t_var)

dt %>%
select(one_of(v1, v2)) %>%
group_by(!!t_var, !!var1) %>%
summarise(count = n())

}
summarise_data_categorical(lets, quartertype, fr)
#Source: local data frame [65 x 3]
#Groups: quartertype [?]

# quartertype lets count
# <int> <fctr> <int>
#1 1 A 1
#2 1 F 2
#3 1 G 2
#4 1 H 1
#5 1 I 1
#6 1 J 4
#7 1 M 3
#8 1 N 1
#9 1 P 1
#10 1 S 5
# ... with 55 more rows

The enquo does a similar functionality as substitute from base R by taking the input arguments and convert it to quosures. The one_of takes a string argument, so quosures can be converted to string with quo_name. Inside the group_by/summarise/mutate etc, we can evaluate the quosure by unquote (UQ or !!)


The quosures seems to be working fine with dplyr though we have some difficulty in implementing the same with tidyr functions. The following code should work for the full code

 summarise_data_categorical <- function(var1, t_var, dt){

var1 <- enquo(var1)
t_var <- enquo(t_var)

v1 <- quo_name(var1)
v2 <- quo_name(t_var)

Summ_func <- dt %>%
select(one_of(v1, v2)) %>%
group_by(!!t_var, !!var1) %>%
summarise(count = n())

count_table <- Summ_func %>%
spread_(v2, "count")

freq <- Summ_func %>%
mutate(freq = round(count / sum(count),3)*100) %>%
select(-count)

freq_table <- freq %>%
spread_(v2, "freq")

freq_chart <- freq %>%
ggplot()+
geom_line(mapping=aes_string(x= v2 , y = "freq", colour= v1))

results <- list(count_table, freq_table, freq_chart)
results

}
summarise_data_categorical(lets, quartertype, fr)
#[[1]]
# A tibble: 24 × 5
# lets `1` `2` `3` `4`
#* <fctr> <int> <int> <int> <int>
#1 A NA NA 1 2
#2 B 2 NA NA 1
#3 C 1 5 1 2
#4 E 1 1 NA NA
#5 G NA 1 2 2
#6 H 1 NA 1 1
#7 I NA 1 1 2
#8 J 2 1 1 1
#9 K 1 1 2 1
#10 L NA 2 NA NA
# ... with 14 more rows

#[[2]]
# A tibble: 24 × 5
# lets `1` `2` `3` `4`
#* <fctr> <dbl> <dbl> <dbl> <dbl>
#1 A NA NA 3.1 9.5
#2 B 8.7 NA NA 4.8
#3 C 4.3 20.8 3.1 9.5
#4 E 4.3 4.2 NA NA
#5 G NA 4.2 6.2 9.5
#6 H 4.3 NA 3.1 4.8
#7 I NA 4.2 3.1 9.5
#8 J 8.7 4.2 3.1 4.8
#9 K 4.3 4.2 6.2 4.8
#10 L NA 8.3 NA NA
## ... with 14 more rows

#[[3]]

Sample Image

Include column names as function input with dplyr

I've slightly updated your code to dplyr 1.0.0 and tidyr. Then you can make use of the new dplyr programming feature {{}} to specify variables that are arguments of a function.

# Example data frame
df <- data.frame("ID" = rep(1:5, each = 4), "score" = runif(20, 0, 100), "location" = rep(c("a", "b", "c", "d"), 5))
library(dplyr)
wide_fun <- function(.data, key_name, value_name) {
.data %>%
group_by(across(-{{value_name}})) %>% # group by everything other than the value column.
mutate(row_id = 1:n()) %>% ungroup() %>% # build group index
tidyr::pivot_wider(
names_from = {{key_name}},
values_from = {{value_name}}) %>% # spread
select(-row_id)
}

wide_fun(df, location, score)
#> # A tibble: 5 x 5
#> ID a b c d
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 90.8 38.9 28.7 39.0
#> 2 2 94.5 24.9 84.6 54.6
#> 3 3 61.1 97.2 12.2 57.7
#> 4 4 52.7 85.6 41.4 100.
#> 5 5 17.8 86.1 92.3 33.7

Created on 2020-09-11 by the reprex package (v0.3.0)

Edit

This function should also work with older versions of dplyr:

library(dplyr)
wide_fun_2 <- function(.data, key_name, value_name) {
.data %>%
group_by_at(vars(-!!ensym(value_name))) %>% # group by everything other than the value column.
mutate(row_id = 1:n()) %>% ungroup() %>% # build group index
tidyr::pivot_wider(
names_from = !!ensym(key_name),
values_from = !!ensym(value_name)) %>% # spread
select(-row_id)
}

df %>%
wide_fun_2(location, score)
A tibble: 5 x 5
ID a b c d
<int> <dbl> <dbl> <dbl> <dbl>
1 1 72.2 81.4 52.5 48.8
2 2 36.1 27.5 82.2 73.0
3 3 83.9 68.2 80.9 15.7
4 4 0.451 70.0 18.5 43.2
5 5 82.6 68.2 22.8 63.0

If you just provide the argument that specifies the column, you only need to deal with symbols and not quosures, therefore you need to use ensym.



Related Topics



Leave a reply



Submit