Dplyr - Mean for Multiple Columns

Dplyr - Mean for multiple columns

You don't need to group, just select() and then mutate()

library(dplyr)
mutate(df, IVMean = rowMeans(select(df, starts_with("IV")), na.rm = TRUE))

How to get the average of two columns using dplyr?

If you want to use dplyr to achieve this, I would suggest using the function rowwise():

    R> library(dplyr)
    R> dt <- data.table(A=1:5, B=c(1,4,NA,6,8))
    R> j <- dt %>% rowwise() %>% mutate(Avg=mean(c(A, B), na.rm=T)) 
    R> j
Source: local data frame [5 x 3]
Groups: <by row>

      A     B   Avg
  (int) (dbl) (dbl)
1     1     1   1.0
2     2     4   3.0
3     3    NA   3.0
4     4     6   5.0
5     5     8   6.5

Calculate the mean of some columns using dplyr::mutate

You can use rowMeans with select(., BL1:BL9); Here select(., BL1:BL9) select columns from BL1 to BL9 and rowMeans calculate the row average; You can't directly use a character vector in mutate as columns, which will be treated as is instead of columns:

test %>% mutate(ave = rowMeans(select(., BL1:BL9)))

#   BL1 BL2 BL3 BL4 BL5 BL6 BL7 BL8 BL9 BL10 BL11 BL12      ave
#1    5  11   1   1  12   5  10  12   6   11   12    9 7.000000
#2    1  10   5  11   7   6   5   9   9    1    8    4 7.000000
#3    8  10   1   2   7  12   5   9   5    3    3   11 6.555556
#4    5   2   5   4   9   5   5   3   5    2    8    1 4.777778
#5    9   1   1  10   3   5   1   9   9    6    3   12 5.333333
#6    9   7   9   6   3   2   5   4   9    5    1    2 6.000000
#7    3   3   1   9   7   8   7   9   9   11   12    9 6.222222
#8   12   9   3   3   9  11   4   2   5   12   12   12 6.444444
#9    1   7   7  12   6   6   5   3  10   12    5   10 6.333333
#10  12   7   7   1   2   8   5   8  11    9    1    5 6.777778
#11   9   1   5   8  12   6   6  11   3   12    3    9 6.777778
#12   5   6   1  11  10  12   6   7   8    7    8    2 7.333333

How to calculate mean by row for multiple groups using dplyr in R?

We may use %in% or == to subset the 'Value' based on the 'Distance' values (assuming the precision is correct) after grouping by 'Age', 'Location'

library(dplyr)
df1 %>%
     group_by(Age, Location) %>% 
     summarise(Mean_0.5 = mean(Value[Distance == 0.5]), 
        Mean_1.5_and_2.5 = mean(Value[Distance %in% c(1.5, 2.5)]),
        .groups = 'drop')

-output

# A tibble: 4 × 4
    Age Location Mean_0.5 Mean_1.5_and_2.5
  <dbl> <chr>       <dbl>            <dbl>
1     1 Central      206.             202.
2     1 North        210.             201.
3     2 Central      193              186.
4     2 North        202.             214.

Getting rolling average of multiple column by multiple condition, with dplyr and apply family

Do you want this? (mean_run from library(runner) used).

You can automate this process for as many variables you want. Just use their names in .cols argument of mutate(across...
To change rolling window size just change k in mean_run as per choice.

df %>% pivot_longer(!gmID, names_to = c("H_T", ".value"),
                    names_pattern = "(.+)\\.(.+)") %>%
  group_by(Team) %>%
  mutate(across(.cols = c(PTS, AST), 
                ~ runner::mean_run(x = ., k = 3, lag = 1), 
                .names = '{.col}_av')) %>%
  pivot_wider(id_cols = gmID, 
              names_from = H_T, 
              names_glue = "{H_T}_{.value}", 
              values_from = -c(gmID, H_T))

# A tibble: 20 x 11
    gmID H_Team A_Team H_PTS A_PTS H_AST A_AST H_PTS_av A_PTS_av H_AST_av A_AST_av
   <int> <chr>  <chr>  <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
 1     1 CLE    WAS       94    84    22    26     NA       NA       NA       NA  
 2     2 MIA    BOS      120   107    25    24     NA       NA       NA       NA  
 3     3 LAL    DAL       91    99    24    22     NA       NA       NA       NA  
 4     4 PHI    DEN       84    75    18    19     NA       NA       NA       NA  
 5     5 CLE    IND       88    90    18    22     94       NA       22       NA  
 6     6 DET    HOU       96   105    21    28     NA       NA       NA       NA  
 7     7 CHI    SAC       93    87    21    14     NA       NA       NA       NA  
 8     8 DAL    WAS       95    99    26    22     99       84       22       26  
 9     9 UTA    DAL      113    94    24    20     NA       97       NA       24  
10    10 PHO    CLE       85    87    16    19     NA       91       NA       20  
11    11 POR    LAL      116   106    19    21     NA       91       NA       24  
12    12 WAS    OKC       86    84    27    18     91.5     NA       24       NA  
13    13 ORL    DEN      102    89    24    22     NA       75       NA       19  
14    14 CHA    IND       90    89    18    19     NA       90       NA       22  
15    15 BOS    MIL       88    99    22    26    107       NA       24       NA  
16    16 CHI    CLE       86   115    23    34     93       89.7     21       19.7
17    17 ATL    HOU      102   109    23    22     NA      105       NA       28  
18    18 DAL    MIA      104    84    27    18     96      120       22.7     25  
19    19 CLE    UTA       88    86    23    19     96.7    113       23.7     24  
20    20 WAS    DEN      111    88    25    16     89.7     82       25       20.5

Mutate across multiple columns using dplyr

Two possibilities using dplyr:

library(dplyr)

mtcars %>% 
  rowwise() %>% 
  mutate(varmean = mean(c_across(mpg:vs)))

This returns

# A tibble: 32 x 12
# Rowwise: 
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb varmean
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4    40.0
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4    40.1
 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1    31.7
 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1    52.8
 5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    73.2
 6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1    47.7
 7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    81.2
 8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2    33.1
 9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2    36.7
10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4    42.8
# ... with 22 more rows

and without rowwise() and using base Rs rowMeans():

mtcars %>% 
  mutate(varmean = rowMeans(across(mpg:vs)))

returns

                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb  varmean
Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4 39.99750
Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4 40.09938
Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1 31.69750
Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1 52.76687
Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2 73.16375
Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1 47.69250
Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4 81.24000
Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2 33.12250
Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2 36.69625
Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4 42.80750

Find mean of multiple columns in R

We can use colMeans on the selected columns and get the mean of it, then assign the output to create new column (no packages are needed)

df$values_acceptance<- mean(colMeans(df[c('values', 'acceptance')], na.rm = TRUE))

-output

> df
  values acceptance diffusion attitudes values_acceptance
1      9          8         9         7          7.833333
2      8          8         8         7          7.833333
3     NA         NA         7         6          7.833333
4      8          6        NA        NA          7.833333

Or if we need dplyr

library(dplyr)
df %>%
    mutate(values_acceptance = mean(unlist(across(c(values,
         acceptance), mean, na.rm = TRUE))))

-output

values acceptance diffusion attitudes values_acceptance
1      9          8         9         7          7.833333
2      8          8         8         7          7.833333
3     NA         NA         7         6          7.833333
4      8          6        NA        NA          7.833333

Summarizing multiple columns with dplyr?

In dplyr (>=1.00) you may use across(everything() in summarise to apply a function to all variables:

library(dplyr)

df %>% group_by(grp) %>% summarise(across(everything(), list(mean)))
#> # A tibble: 3 x 5
#>     grp     a     b     c     d
#>   <int> <dbl> <dbl> <dbl> <dbl>
#> 1     1  3.08  2.98  2.98  2.91
#> 2     2  3.03  3.04  2.97  2.87
#> 3     3  2.85  2.95  2.95  3.06

Alternatively, the purrrlyr package provides the same functionality:

library(purrrlyr)
df %>% slice_rows("grp") %>% dmap(mean)
#> # A tibble: 3 x 5
#>     grp     a     b     c     d
#>   <int> <dbl> <dbl> <dbl> <dbl>
#> 1     1  3.08  2.98  2.98  2.91
#> 2     2  3.03  3.04  2.97  2.87
#> 3     3  2.85  2.95  2.95  3.06

Also don't forget about data.table (use keyby to sort sort groups):

library(data.table)
setDT(df)[, lapply(.SD, mean), keyby = grp]
#>    grp        a        b        c        d
#> 1:   1 3.079412 2.979412 2.979412 2.914706
#> 2:   2 3.029126 3.038835 2.967638 2.873786
#> 3:   3 2.854701 2.948718 2.951567 3.062678

Let's try to compare performance.

library(dplyr)
library(purrrlyr)
library(data.table)
library(bench)
set.seed(123)
n <- 10000
df <- data.frame(
  a = sample(1:5, n, replace = TRUE), 
  b = sample(1:5, n, replace = TRUE), 
  c = sample(1:5, n, replace = TRUE), 
  d = sample(1:5, n, replace = TRUE), 
  grp = sample(1:3, n, replace = TRUE)
)
dt <- setDT(df)
mark(
  dplyr = df %>% group_by(grp) %>% summarise(across(everything(), list(mean))),
  purrrlyr = df %>% slice_rows("grp") %>% dmap(mean),
  data.table = dt[, lapply(.SD, mean), keyby = grp],
  check = FALSE
)
#> # A tibble: 3 x 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 dplyr        2.81ms   2.85ms      328.        NA     17.3
#> 2 purrrlyr     7.96ms   8.04ms      123.        NA     24.5
#> 3 data.table 596.33µs 707.91µs     1409.        NA     10.3

Means multiple columns by multiple groups

We can use dplyr with summarise_at to get mean of the concerned columns after grouping by the column of interest

library(dplyr)
airquality %>%
   group_by(City, year) %>% 
   summarise_at(vars("PM25", "Ozone", "CO2"), mean)

Or using the devel version of dplyr (version - ‘0.8.99.9000’)

airquality %>%
     group_by(City, year) %>%
     summarise(across(PM25:CO2, mean))

Dplyr - Mean for Multiple Columns