Dplyr - Mean for Multiple Columns

Dplyr - Mean for multiple columns

You don't need to group, just select() and then mutate()

library(dplyr)
mutate(df, IVMean = rowMeans(select(df, starts_with("IV")), na.rm = TRUE))

How to get the average of two columns using dplyr?

If you want to use dplyr to achieve this, I would suggest using the function rowwise():

    R> library(dplyr)
R> dt <- data.table(A=1:5, B=c(1,4,NA,6,8))
R> j <- dt %>% rowwise() %>% mutate(Avg=mean(c(A, B), na.rm=T))
R> j
Source: local data frame [5 x 3]
Groups: <by row>

A B Avg
(int) (dbl) (dbl)
1 1 1 1.0
2 2 4 3.0
3 3 NA 3.0
4 4 6 5.0
5 5 8 6.5

Calculate the mean of some columns using dplyr::mutate

You can use rowMeans with select(., BL1:BL9); Here select(., BL1:BL9) select columns from BL1 to BL9 and rowMeans calculate the row average; You can't directly use a character vector in mutate as columns, which will be treated as is instead of columns:

test %>% mutate(ave = rowMeans(select(., BL1:BL9)))

# BL1 BL2 BL3 BL4 BL5 BL6 BL7 BL8 BL9 BL10 BL11 BL12 ave
#1 5 11 1 1 12 5 10 12 6 11 12 9 7.000000
#2 1 10 5 11 7 6 5 9 9 1 8 4 7.000000
#3 8 10 1 2 7 12 5 9 5 3 3 11 6.555556
#4 5 2 5 4 9 5 5 3 5 2 8 1 4.777778
#5 9 1 1 10 3 5 1 9 9 6 3 12 5.333333
#6 9 7 9 6 3 2 5 4 9 5 1 2 6.000000
#7 3 3 1 9 7 8 7 9 9 11 12 9 6.222222
#8 12 9 3 3 9 11 4 2 5 12 12 12 6.444444
#9 1 7 7 12 6 6 5 3 10 12 5 10 6.333333
#10 12 7 7 1 2 8 5 8 11 9 1 5 6.777778
#11 9 1 5 8 12 6 6 11 3 12 3 9 6.777778
#12 5 6 1 11 10 12 6 7 8 7 8 2 7.333333

How to calculate mean by row for multiple groups using dplyr in R?

We may use %in% or == to subset the 'Value' based on the 'Distance' values (assuming the precision is correct) after grouping by 'Age', 'Location'

library(dplyr)
df1 %>%
group_by(Age, Location) %>%
summarise(Mean_0.5 = mean(Value[Distance == 0.5]),
Mean_1.5_and_2.5 = mean(Value[Distance %in% c(1.5, 2.5)]),
.groups = 'drop')

-output

# A tibble: 4 × 4
Age Location Mean_0.5 Mean_1.5_and_2.5
<dbl> <chr> <dbl> <dbl>
1 1 Central 206. 202.
2 1 North 210. 201.
3 2 Central 193 186.
4 2 North 202. 214.

Getting rolling average of multiple column by multiple condition, with dplyr and apply family

Do you want this? (mean_run from library(runner) used).

  • You can automate this process for as many variables you want. Just use their names in .cols argument of mutate(across...
  • To change rolling window size just change k in mean_run as per choice.
df %>% pivot_longer(!gmID, names_to = c("H_T", ".value"),
names_pattern = "(.+)\\.(.+)") %>%
group_by(Team) %>%
mutate(across(.cols = c(PTS, AST),
~ runner::mean_run(x = ., k = 3, lag = 1),
.names = '{.col}_av')) %>%
pivot_wider(id_cols = gmID,
names_from = H_T,
names_glue = "{H_T}_{.value}",
values_from = -c(gmID, H_T))

# A tibble: 20 x 11
gmID H_Team A_Team H_PTS A_PTS H_AST A_AST H_PTS_av A_PTS_av H_AST_av A_AST_av
<int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 CLE WAS 94 84 22 26 NA NA NA NA
2 2 MIA BOS 120 107 25 24 NA NA NA NA
3 3 LAL DAL 91 99 24 22 NA NA NA NA
4 4 PHI DEN 84 75 18 19 NA NA NA NA
5 5 CLE IND 88 90 18 22 94 NA 22 NA
6 6 DET HOU 96 105 21 28 NA NA NA NA
7 7 CHI SAC 93 87 21 14 NA NA NA NA
8 8 DAL WAS 95 99 26 22 99 84 22 26
9 9 UTA DAL 113 94 24 20 NA 97 NA 24
10 10 PHO CLE 85 87 16 19 NA 91 NA 20
11 11 POR LAL 116 106 19 21 NA 91 NA 24
12 12 WAS OKC 86 84 27 18 91.5 NA 24 NA
13 13 ORL DEN 102 89 24 22 NA 75 NA 19
14 14 CHA IND 90 89 18 19 NA 90 NA 22
15 15 BOS MIL 88 99 22 26 107 NA 24 NA
16 16 CHI CLE 86 115 23 34 93 89.7 21 19.7
17 17 ATL HOU 102 109 23 22 NA 105 NA 28
18 18 DAL MIA 104 84 27 18 96 120 22.7 25
19 19 CLE UTA 88 86 23 19 96.7 113 23.7 24
20 20 WAS DEN 111 88 25 16 89.7 82 25 20.5

Mutate across multiple columns using dplyr

Two possibilities using dplyr:

library(dplyr)

mtcars %>%
rowwise() %>%
mutate(varmean = mean(c_across(mpg:vs)))

This returns

# A tibble: 32 x 12
# Rowwise:
mpg cyl disp hp drat wt qsec vs am gear carb varmean
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 40.0
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 40.1
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 31.7
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 52.8
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 73.2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 47.7
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 81.2
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 33.1
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 36.7
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 42.8
# ... with 22 more rows

and without rowwise() and using base Rs rowMeans():

mtcars %>% 
mutate(varmean = rowMeans(across(mpg:vs)))

returns

                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb  varmean
Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 39.99750
Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 40.09938
Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 31.69750
Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 52.76687
Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 73.16375
Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 47.69250
Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 81.24000
Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 33.12250
Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 36.69625
Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 42.80750

Find mean of multiple columns in R

We can use colMeans on the selected columns and get the mean of it, then assign the output to create new column (no packages are needed)

df$values_acceptance<- mean(colMeans(df[c('values', 'acceptance')], na.rm = TRUE))

-output

> df
values acceptance diffusion attitudes values_acceptance
1 9 8 9 7 7.833333
2 8 8 8 7 7.833333
3 NA NA 7 6 7.833333
4 8 6 NA NA 7.833333

Or if we need dplyr

library(dplyr)
df %>%
mutate(values_acceptance = mean(unlist(across(c(values,
acceptance), mean, na.rm = TRUE))))

-output

values acceptance diffusion attitudes values_acceptance
1 9 8 9 7 7.833333
2 8 8 8 7 7.833333
3 NA NA 7 6 7.833333
4 8 6 NA NA 7.833333

Summarizing multiple columns with dplyr?

In dplyr (>=1.00) you may use across(everything() in summarise to apply a function to all variables:

library(dplyr)

df %>% group_by(grp) %>% summarise(across(everything(), list(mean)))
#> # A tibble: 3 x 5
#> grp a b c d
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 3.08 2.98 2.98 2.91
#> 2 2 3.03 3.04 2.97 2.87
#> 3 3 2.85 2.95 2.95 3.06

Alternatively, the purrrlyr package provides the same functionality:

library(purrrlyr)
df %>% slice_rows("grp") %>% dmap(mean)
#> # A tibble: 3 x 5
#> grp a b c d
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 3.08 2.98 2.98 2.91
#> 2 2 3.03 3.04 2.97 2.87
#> 3 3 2.85 2.95 2.95 3.06

Also don't forget about data.table (use keyby to sort sort groups):

library(data.table)
setDT(df)[, lapply(.SD, mean), keyby = grp]
#> grp a b c d
#> 1: 1 3.079412 2.979412 2.979412 2.914706
#> 2: 2 3.029126 3.038835 2.967638 2.873786
#> 3: 3 2.854701 2.948718 2.951567 3.062678

Let's try to compare performance.

library(dplyr)
library(purrrlyr)
library(data.table)
library(bench)
set.seed(123)
n <- 10000
df <- data.frame(
a = sample(1:5, n, replace = TRUE),
b = sample(1:5, n, replace = TRUE),
c = sample(1:5, n, replace = TRUE),
d = sample(1:5, n, replace = TRUE),
grp = sample(1:3, n, replace = TRUE)
)
dt <- setDT(df)
mark(
dplyr = df %>% group_by(grp) %>% summarise(across(everything(), list(mean))),
purrrlyr = df %>% slice_rows("grp") %>% dmap(mean),
data.table = dt[, lapply(.SD, mean), keyby = grp],
check = FALSE
)
#> # A tibble: 3 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 dplyr 2.81ms 2.85ms 328. NA 17.3
#> 2 purrrlyr 7.96ms 8.04ms 123. NA 24.5
#> 3 data.table 596.33µs 707.91µs 1409. NA 10.3

Means multiple columns by multiple groups

We can use dplyr with summarise_at to get mean of the concerned columns after grouping by the column of interest

library(dplyr)
airquality %>%
group_by(City, year) %>%
summarise_at(vars("PM25", "Ozone", "CO2"), mean)

Or using the devel version of dplyr (version - ‘0.8.99.9000’)

airquality %>%
group_by(City, year) %>%
summarise(across(PM25:CO2, mean))


Related Topics



Leave a reply



Submit