Dplyr Issues When Using Group_By(Multiple Variables)

dplyr issues when using group_by(multiple variables)

Taking Dickoa's answer one step further -- as Hadley says "summarise peels off a single layer of grouping". It peels off grouping from the reverse order in which you applied it so you can just use

mtcars %>%
 group_by(cyl, gear) %>%
 summarise(newvar = sum(wt)) %>%
 summarise(newvar2 = sum(newvar) + 5)

Note that this will give a different answer if you use group_by(gear, cyl) in the second line.

And to get your first attempt working:

df1 <- mtcars %>%
 group_by(cyl, gear) %>%
 summarise(newvar = sum(wt))

df2 <- df1 %>%
 group_by(cyl) %>%
 summarise(newvar2 = sum(newvar)+5)

dplyr group by multiple variables summarise by multiple variables

library(dplyr)
vmp %>%
     mutate(Inv_Total=as.numeric(as.character(Inv_Total))) %>% 
     group_by(Priority,LOS) %>%
     summarise(sr_count=n(), 
               inv_total=sum(Inv_Total))

dplyr: Handing over multiple variables to group_by in a function

If we pass multiple variables, pass that as a string and make use of group_by_at

myfunction <- function(mydf, grp, xvar) {  
  mydf %>%       
    group_by_at(grp) %>%
    summarise(sum = sum({{xvar}}))
 }



myfunction(mtcars, "am", mpg)
# A tibble: 2 x 2
#     am   sum
#  <dbl> <dbl>
#1     0  326.
#2     1  317.
myfunction(mtcars, c("am", "gear"), mpg)
# A tibble: 4 x 3
# Groups:   am [2]
#     am  gear   sum
#  <dbl> <dbl> <dbl>
#1     0     3 242. 
#2     0     4  84.2
#3     1     4 210. 
#4     1     5 107.

In case, we want to pass the groups as showed in the OP's post, one way is to convert with enexpr and evaluate (!!!)

myfunction <- function(mydf, grp, xvar) {  
   grp <- as.list(rlang::enexpr(grp))
   grp <- if(length(grp) > 1) grp[-1] else grp

   mydf %>%
        group_by(!!! grp) %>%
        summarise(sum = sum({{xvar}}))

 }

myfunction(mtcars, am, mpg)
# A tibble: 2 x 2
#     am   sum
#  <dbl> <dbl>
#1     0  326.
#2     1  317.
myfunction(mtcars, c(am, gear), mpg)
# A tibble: 4 x 3
# Groups:   am [2]
#     am  gear   sum
#  <dbl> <dbl> <dbl>
#1     0     3 242. 
#2     0     4  84.2
#3     1     4 210. 
#4     1     5 107.

dplyr summarise : Group by multiple variables in a loop and add results in the same dataframe

library(questionr)
library(tidyverse)
data(hdv2003)

list("trav.satisf", "cuisine", "sexe") %>%
  map(~ {
    hdv2003 %>%
      group_by_at(.x) %>%
      summarise(
        n = n(),
        percent = round((n() / nrow(hdv2003)) * 100, digits = 1),
        femmes = round((sum(sexe == "Femme", na.rm = TRUE) / sum(!is.na(sexe))) * 100, digits = 1),
        age = round(mean(age, na.rm = TRUE), digits = 1)
      ) %>%
      rename_at(1, ~"group") %>%
      mutate(grouping = .x)
  }) %>%
  bind_rows() %>%
  select(grouping, group, everything())
#> # A tibble: 8 x 6
#>   grouping    group              n percent femmes   age
#>   <chr>       <fct>          <int>   <dbl>  <dbl> <dbl>
#> 1 trav.satisf Satisfaction     480    24     51.5  41.4
#> 2 trav.satisf Insatisfaction   117     5.9   47.9  40.3
#> 3 trav.satisf Equilibre        451    22.6   49.9  40.9
#> 4 trav.satisf <NA>             952    47.6   60.2  56  
#> 5 cuisine     Non             1119    56     43.8  50.1
#> 6 cuisine     Oui              881    44     69.4  45.6
#> 7 sexe        Homme            899    45      0    48.2
#> 8 sexe        Femme           1101    55    100    48.2

^{Created on 2021-11-12 by the reprex package (v2.0.1)}

dplyr::group_by() with multiple variables but NOT intersection

I'm guessing what you're looking for is the tidyr package...

gather first duplicates the dataset so that there are n rows for each factor by which grouping will occur; mutate then creates the grouping variable.

library(dplyr)
library(tidyr)

mtcars %>%
  gather(col, value, cyl, am) %>% 
  mutate(group = paste(col, value, sep = "_")) %>%
  group_by(group) %>% 
  summarise(est = mean(disp))

How to use dplyr::group_by with multiple groups when programming

Instead of pasteing and using group_by_ (deprecated - but it would not work because it is expecting NSE), we can directly use the vector in group_by_at

library(dplyr)
group_vars <- c('factor1','factor2')
test %>%
     group_by_at(group_vars)
# A tibble: 10 x 3
# Groups:   factor1, factor2 [10]
#   factor1 factor2 variable
#     <int> <fct>      <int>
# 1       1 d            145
# 2       5 e            119
# 3       4 a            181
# 4       3 e            155
# 5       3 d            164
# 6       3 b            135
# 7       4 e            137
# 8       4 d            197
# 9       2 d            142
#10       2 c            110

Or another option is to convert to symbols (syms from rlang) and evaluate (!!!) within group_by

test %>%
      group_by(!!! rlang::syms(group_vars))

If we go by the route of paste, then one option is parse_expr (from rlang)

group_vars = c('factor1','factor2') %>% paste(., collapse = ';')
test %>%
      group_by(!!! rlang::parse_exprs(group_vars))
# A tibble: 10 x 3
# Groups:   factor1, factor2 [10]
#   factor1 factor2 variable
#     <int> <fct>      <int>
# 1       1 d            145
# 2       5 e            119
# 3       4 a            181
# 4       3 e            155
# 5       3 d            164
# 6       3 b            135
# 7       4 e            137
# 8       4 d            197
# 9       2 d            142
#10       2 c            110

Using multiple different group_by variables (dplyr) to summarise a dataframe

I am by no means a dplyr expert but this seems to accomplish what you are trying to do:

for (i in 1:length(num_of_years)){
  var1 <- names(my_data)[[i]]
  var2 <- c(var1)

  price_means <- my_data %>% 
    select(eval(i), price, price.2, price.3) %>% 
    group_by_(var2) %>% 
    summarise_each(funs(mean(., na.rm=TRUE))) %>% 
    ungroup()

  assign(paste("PriceMeans",i,sep=""),price_means, envir = .GlobalEnv)
}

Dplyr Issues When Using Group_By(Multiple Variables)