dplyr - using column names as function arguments
This can work using the latest dplyr
syntax (as can be seen on github):
library(dplyr)
library(rlang)
sumByColumn <- function(df, colName) {
df %>%
group_by(a) %>%
summarize(tot = sum(!! sym(colName)))
}
sumByColumn(data, "b")
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27
And an alternative way of specifying b
as a variable:
library(dplyr)
sumByColumn <- function(df, colName) {
myenc <- enquo(colName)
df %>%
group_by(a) %>%
summarize(tot = sum(!!myenc))
}
sumByColumn(data, b)
## A tibble: 2 x 2
# a tot
# <int> <int>
#1 1 24
#2 2 27
Passing column name as argument in function within pipes
You need to make use of non standard evaluation which is worth a quick read about. In this case you most likely need to !!
infront of var
in the mutate line.
Here's the line:
mutate(new_variable = !!sym(var) * 100)
Using column names as arguments in existing functions in dplyr mutate()?
The str_detect
solution in the comments is good. You could also use purrr::map2_lgl
to make the grepl
act across rows.
library(dplyr)
library(purrr)
df %>%
mutate(Present = map2_lgl(ID, Text, grepl))
ID Text Present
1 A A/B TRUE
2 B C/D FALSE
3 C B/C TRUE
How to pass a column argument in a dplyr function in select?
We can use enquo
to convert it to a quosure and then evaluate with !!
slicedata <- function(df, column_name){
column_name = enquo(column_name)
df %>%
select(!!column_name, C, D, E) %>%
group_by(!!column_name) %>%
summarise(C = sum(C), D = sum(D), E = sum(E)
}
slicedata(df, B)
How to pass column name as an arguments in the function
This is basically a question related to programming in dplyr
. To achieve your desired result and get rid of hardcoding the column names in your function and use x, y, z instead you could make use of the {{
curly-curly operator as you did in the ggplot code and the special assignment operator :=
. Additionally instead of wrapping all your code inside ggplotly you proceed in steps. Do the data wrangling, make your ggplot and finally pass it to ggplotly:
library(plotly)
library(dplyr)
library(stringr)
ggplot_common_function <- function(data, x, y, z) {
data <- data %>%
group_by({{ x }}, {{ z }}) %>%
summarise({{ y }} := sum({{ y }})) %>%
mutate(total_sum = sum({{ y }}))
p <- ggplot(data, mapping = aes({{ x }}, {{ y }}, text = paste(total_sum))) +
geom_col(aes(fill = {{ z }})) +
theme_classic() +
theme(axis.line.y = element_blank(), axis.ticks = element_blank(), legend.position = "bottom") +
labs(x = "", y = "Agreements Values (In Lakhs)", fill = "") +
theme(axis.title.y = element_text(size = 8)) +
scale_fill_manual(values = c("#1F7A3F", "#70B821")) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE), expand = expansion(mult = c(0, .3)), breaks = integer_breaks())
ggp <- ggplotly(p, tooltip = c("text")) %>%
layout(legend = list(orientation = "h", x = 0.1, y = -0.2, font = list(family = "Arial", size = 10, color = "black")), xaxis = x_labels, yaxis = y_labels) %>%
config(displaylogo = FALSE, modeBarButtonsToRemove = list("sendDataToCloud", "autoScale2d", "resetScale2d", "toggleSpikelines", "hoverClosestCartesian", "hoverCompareCartesian", "zoom2d", "pan2d", "select2d", "lasso2d", "zoomIn2d", "zoomOut2d"))
Removestring(ggp)
}
ggplot_common_function(data, m_year, Applications, status)
#> `summarise()` has grouped output by 'm_year'. You can override using the
#> `.groups` argument.
Pass column names as function arguments in formula
Just make a formula instead of wrapping them in sym
:
library(dplyr)
library(rstatix)
do.function <- function(table, column, category) {
formula <- paste0(column, '~', category) %>%
as.formula()
table %>%
group_by(subset) %>%
t_test(formula)
}
tmp = data.frame(id=seq(1:100), value = rnorm(100), subset = rep(c("Set1", "Set2"),each=50,2),categorical_value= rep(c("A", "B"),each=25,4))
do.function(table= tmp, column = "value", category = "categorical_value")
# A tibble: 2 x 9
subset .y. group1 group2 n1 n2 statistic df p
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
1 Set1 value A B 50 50 0.484 94.3 0.63
2 Set2 value A B 50 50 -2.15 97.1 0.034
How to pass column names into a function dplyr
We can use the new quosures from the devel version of dplyr
(soon to be released in 0.6.0)
summarise_data_categorical <- function(var1, t_var, dt){
var1 <- enquo(var1)
t_var <- enquo(t_var)
v1 <- quo_name(var1)
v2 <- quo_name(t_var)
dt %>%
select(one_of(v1, v2)) %>%
group_by(!!t_var, !!var1) %>%
summarise(count = n())
}
summarise_data_categorical(lets, quartertype, fr)
#Source: local data frame [65 x 3]
#Groups: quartertype [?]
# quartertype lets count
# <int> <fctr> <int>
#1 1 A 1
#2 1 F 2
#3 1 G 2
#4 1 H 1
#5 1 I 1
#6 1 J 4
#7 1 M 3
#8 1 N 1
#9 1 P 1
#10 1 S 5
# ... with 55 more rows
The enquo
does a similar functionality as substitute
from base R
by taking the input arguments and convert it to quosures
. The one_of
takes a string argument, so quosures can be converted to string with quo_name
. Inside the group_by/summarise/mutate
etc, we can evaluate the quosure by unquote (UQ
or !!
)
The quosures
seems to be working fine with dplyr
though we have some difficulty in implementing the same with tidyr
functions. The following code should work for the full code
summarise_data_categorical <- function(var1, t_var, dt){
var1 <- enquo(var1)
t_var <- enquo(t_var)
v1 <- quo_name(var1)
v2 <- quo_name(t_var)
Summ_func <- dt %>%
select(one_of(v1, v2)) %>%
group_by(!!t_var, !!var1) %>%
summarise(count = n())
count_table <- Summ_func %>%
spread_(v2, "count")
freq <- Summ_func %>%
mutate(freq = round(count / sum(count),3)*100) %>%
select(-count)
freq_table <- freq %>%
spread_(v2, "freq")
freq_chart <- freq %>%
ggplot()+
geom_line(mapping=aes_string(x= v2 , y = "freq", colour= v1))
results <- list(count_table, freq_table, freq_chart)
results
}
summarise_data_categorical(lets, quartertype, fr)
#[[1]]
# A tibble: 24 × 5
# lets `1` `2` `3` `4`
#* <fctr> <int> <int> <int> <int>
#1 A NA NA 1 2
#2 B 2 NA NA 1
#3 C 1 5 1 2
#4 E 1 1 NA NA
#5 G NA 1 2 2
#6 H 1 NA 1 1
#7 I NA 1 1 2
#8 J 2 1 1 1
#9 K 1 1 2 1
#10 L NA 2 NA NA
# ... with 14 more rows
#[[2]]
# A tibble: 24 × 5
# lets `1` `2` `3` `4`
#* <fctr> <dbl> <dbl> <dbl> <dbl>
#1 A NA NA 3.1 9.5
#2 B 8.7 NA NA 4.8
#3 C 4.3 20.8 3.1 9.5
#4 E 4.3 4.2 NA NA
#5 G NA 4.2 6.2 9.5
#6 H 4.3 NA 3.1 4.8
#7 I NA 4.2 3.1 9.5
#8 J 8.7 4.2 3.1 4.8
#9 K 4.3 4.2 6.2 4.8
#10 L NA 8.3 NA NA
## ... with 14 more rows
#[[3]]
Include column names as function input with dplyr
I've slightly updated your code to dplyr 1.0.0
and tidyr
. Then you can make use of the new dplyr
programming feature {{}}
to specify variables that are arguments of a function.
# Example data frame
df <- data.frame("ID" = rep(1:5, each = 4), "score" = runif(20, 0, 100), "location" = rep(c("a", "b", "c", "d"), 5))
library(dplyr)
wide_fun <- function(.data, key_name, value_name) {
.data %>%
group_by(across(-{{value_name}})) %>% # group by everything other than the value column.
mutate(row_id = 1:n()) %>% ungroup() %>% # build group index
tidyr::pivot_wider(
names_from = {{key_name}},
values_from = {{value_name}}) %>% # spread
select(-row_id)
}
wide_fun(df, location, score)
#> # A tibble: 5 x 5
#> ID a b c d
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 90.8 38.9 28.7 39.0
#> 2 2 94.5 24.9 84.6 54.6
#> 3 3 61.1 97.2 12.2 57.7
#> 4 4 52.7 85.6 41.4 100.
#> 5 5 17.8 86.1 92.3 33.7
Created on 2020-09-11 by the reprex package (v0.3.0)
Edit
This function should also work with older versions of dplyr
:
library(dplyr)
wide_fun_2 <- function(.data, key_name, value_name) {
.data %>%
group_by_at(vars(-!!ensym(value_name))) %>% # group by everything other than the value column.
mutate(row_id = 1:n()) %>% ungroup() %>% # build group index
tidyr::pivot_wider(
names_from = !!ensym(key_name),
values_from = !!ensym(value_name)) %>% # spread
select(-row_id)
}
df %>%
wide_fun_2(location, score)
A tibble: 5 x 5
ID a b c d
<int> <dbl> <dbl> <dbl> <dbl>
1 1 72.2 81.4 52.5 48.8
2 2 36.1 27.5 82.2 73.0
3 3 83.9 68.2 80.9 15.7
4 4 0.451 70.0 18.5 43.2
5 5 82.6 68.2 22.8 63.0
If you just provide the argument that specifies the column, you only need to deal with symbols and not quosures, therefore you need to use ensym
.
Related Topics
Check for Installed Packages Before Running Install.Packages()
Get Values and Positions to Label a Ggplot Histogram
Concatenate Unique Strings After Groupby in R
R - Group by Variable and Then Assign a Unique Id
Issue with Geom_Text When Using Position_Dodge
How to Access the Last Value in a Vector
How to Use the Strsplit Function with a Period
How to Increase the Space Between the Bars in a Bar Plot in Ggplot2
R Convert Zipcode or Lat/Long to County
R + Ggplot2 => Add Labels on Facet Pie Chart
How to Wait for a Keypress in R
Twitter, Roauth and Windows: Register Ok, But Certificate Verify Failed
Reduce PDF File Size of Plots by Filtering Hidden Objects
How to Create a Loop That Includes Both a Code Chunk and Text with Knitr in R
Add "Filename" Column to Table as Multiple Files Are Read and Bound
How to Get a Reversed, Log10 Scale in Ggplot2