Summarise Over All Columns

Summarizing multiple columns with dplyr?

In dplyr (>=1.00) you may use across(everything() in summarise to apply a function to all variables:

library(dplyr)

df %>% group_by(grp) %>% summarise(across(everything(), list(mean)))
#> # A tibble: 3 x 5
#>     grp     a     b     c     d
#>   <int> <dbl> <dbl> <dbl> <dbl>
#> 1     1  3.08  2.98  2.98  2.91
#> 2     2  3.03  3.04  2.97  2.87
#> 3     3  2.85  2.95  2.95  3.06

Alternatively, the purrrlyr package provides the same functionality:

library(purrrlyr)
df %>% slice_rows("grp") %>% dmap(mean)
#> # A tibble: 3 x 5
#>     grp     a     b     c     d
#>   <int> <dbl> <dbl> <dbl> <dbl>
#> 1     1  3.08  2.98  2.98  2.91
#> 2     2  3.03  3.04  2.97  2.87
#> 3     3  2.85  2.95  2.95  3.06

Also don't forget about data.table (use keyby to sort sort groups):

library(data.table)
setDT(df)[, lapply(.SD, mean), keyby = grp]
#>    grp        a        b        c        d
#> 1:   1 3.079412 2.979412 2.979412 2.914706
#> 2:   2 3.029126 3.038835 2.967638 2.873786
#> 3:   3 2.854701 2.948718 2.951567 3.062678

Let's try to compare performance.

library(dplyr)
library(purrrlyr)
library(data.table)
library(bench)
set.seed(123)
n <- 10000
df <- data.frame(
  a = sample(1:5, n, replace = TRUE), 
  b = sample(1:5, n, replace = TRUE), 
  c = sample(1:5, n, replace = TRUE), 
  d = sample(1:5, n, replace = TRUE), 
  grp = sample(1:3, n, replace = TRUE)
)
dt <- setDT(df)
mark(
  dplyr = df %>% group_by(grp) %>% summarise(across(everything(), list(mean))),
  purrrlyr = df %>% slice_rows("grp") %>% dmap(mean),
  data.table = dt[, lapply(.SD, mean), keyby = grp],
  check = FALSE
)
#> # A tibble: 3 x 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 dplyr        2.81ms   2.85ms      328.        NA     17.3
#> 2 purrrlyr     7.96ms   8.04ms      123.        NA     24.5
#> 3 data.table 596.33µs 707.91µs     1409.        NA     10.3

How to summarise all columns using group_by and summarise?

It's hard to try and answer your question without a better example (ie, you can dput() your data to give us a sample). But here is a solution to your last issue: "For the first problem, I expect to get a table with the sum of repeated rows for all columns. Moreover, if it was possible, I would expect to get a better code for the sum of different activities on Saturday."

# create toy data of 3 different IDs, 3 different types, and repeated days
df <- data.frame(id=sample(c(1:3),100,T),
                 type=sample(letters[1:3],100,T),
                 day=sample(c(1:7),100,T),
                 matrix(runif(300),nrow=100),
                 stringsAsFactors = F)

# gather data, summarize each activity column by ID, type and day
# and select Saturday==6
df %>% gather(k,v,-id,-type,-day) %>% 
  group_by(id,type,day,k) %>% 
  summarise(sum=sum(v)) %>% 
  filter(day==6) %>% 
  spread(k,sum)

# A tibble: 8 x 6
# Groups:   id, type, day [8]
     id type    day    X1    X2    X3
  <int> <chr> <int> <dbl> <dbl> <dbl>
1     1 a         6 1.85  3.26  2.09 
2     1 b         6 0.604 0.583 0.586
3     1 c         6 0.163 0.663 0.624
4     2 a         6 0.185 0.952 0.349
5     2 b         6 1.16  0.832 0.974
6     2 c         6 0.906 1.62  0.853
7     3 b         6 0.671 1.39  0.887
8     3 c         6 0.449 0.150 0.647

UPDATE
Here is an updated solution with the new data provided.

df %>% group_by(LbNr,Type,Weekday) %>% summarise_all(.,sum)

# A tibble: 20 x 14
# Groups:   LbNr, Type [5]
    LbNr Type  Weekday   Time    lie     sit   stand    move    walk     run  stairs   cycle
   <dbl> <fct>   <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
 1 22002 A1. ~       1  6.33  0.386  4.52e+0 0.726   0.499   0.189   0.00111 0.0075  0.00556
 2 22002 A1. ~       2  7.9   0.766  4.74e+0 1.28    0.611   0.489   0.00194 0.0111  0      
 3 22002 A1. ~       3  7.33  0.262  3.63e+0 2.04    0.941   0.449   0.00083 0.0114  0      
 4 22002 A1. ~       4 11.7   0.761  5.91e+0 2.54    1.19    1.25    0.00416 0.0394  0.00778
 5 22002 A1. ~       5  6.57  0.140  4.51e+0 1.12    0.51    0.254   0.00139 0.0183  0.01   
 6 22002 A1. ~       6  0.433 0.0169 3.02e-1 0.0589  0.0378  0.0175  0       0       0      
 7 22002 A2. ~       1  7.5   0.0792 5.90e+0 0.546   0.326   0.611   0.00111 0.0392  0      
 8 22002 A2. ~       2  9.83  0.0597 6.64e+0 1.64    0.595   0.842   0.00167 0.0575  0      
 9 22002 A2. ~       3  9.83  0.653  5.79e+0 1.82    0.525   1.01    0.00083 0.0333  0      
10 22002 A2. ~       4  5     0.383  2.80e+0 0.886   0.392   0.514   0.0025  0.0247  0      
11 22002 A2. ~       5 11.0   0.0103 6.77e+0 1.83    1.05    1.29    0.00472 0.0672  0      
12 22002 A4. ~       2  6.27  4.86   1.41e+0 0       0       0       0       0       0      
13 22002 A4. ~       3  6.83  5.69   1.15e+0 0       0       0       0       0       0      
14 22002 A4. ~       4  7.3   7.28   4.72e-3 0.00667 0.00667 0       0       0.00194 0      
15 22002 A4. ~       5  6.42  5.49   9.30e-1 0       0       0       0       0       0      
16 22002 C0. ~       6 15.7   0.245  9.78e+0 2.34    2.45    0.800   0.00194 0.0581  0      
17 22002 C0. ~       7 15.6   0.122  1.20e+1 1.80    0.940   0.656   0.0869  0.0164  0      
18 22002 C4. ~       1  6.33  5.75   5.84e-1 0       0       0       0       0       0      
19 22002 C4. ~       6  7.9   6.96   9.22e-1 0.00667 0.00806 0.00306 0       0       0      
20 22002 C4. ~       7  8.35  7.36   9.33e-1 0.0364  0.0208  0.00472 0       0       0      
# ... with 2 more variables: WalkSlow <dbl>, WalkFast <dbl>

I think this answers your first question about wanting a 'small code'. I don't understand your second question still about "I would expect to get a better code for the sum of different activities on Saturday." Does this mean that you want to sum across the different activities (lie, sit, etc.) for Saturday only? Or do you want to sum across different types (A2, C0, etc) of activities?

df %>% group_by(LbNr,Type,Weekday) %>% summarise_all(.,sum) %>% 
  filter(Weekday==6)

# A tibble: 3 x 14
# Groups:   LbNr, Type [3]
   LbNr Type  Weekday   Time    lie   sit   stand    move    walk     run stairs cycle WalkSlow
  <dbl> <fct>   <dbl>  <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>  <dbl> <dbl>    <dbl>
1 22002 A1. ~       6  0.433 0.0169 0.302 0.0589  0.0378  0.0175  0       0          0  0.00417
2 22002 C0. ~       6 15.7   0.245  9.78  2.34    2.45    0.800   0.00194 0.0581     0  0.14   
3 22002 C4. ~       6  7.9   6.96   0.922 0.00667 0.00806 0.00306 0       0          0  0      
# ... with 1 more variable: WalkFast <dbl>

# summarise across different activities, for each column, on Saturday only
df %>% group_by(LbNr,Type,Weekday) %>% summarise_all(.,sum) %>% 
  filter(Weekday==6) %>% group_by(LbNr) %>% select(-Type,-Weekday) %>% 
  summarise_all(.,sum)

# A tibble: 1 x 12
   LbNr  Time   lie   sit stand  move  walk     run stairs cycle WalkSlow WalkFast
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>  <dbl> <dbl>    <dbl>    <dbl>
1 22002    24  7.22  11.0  2.41  2.49 0.820 0.00194 0.0581     0    0.144    0.670

How do I summarise all columns except one(s) I specify?

Edit:

Modified versions of the two methods below for dplyr version >= 1, since summarise_at is superseded

df %>% 
  summarise(across(where(is.numeric) & !Registered, sum))

df %>% 
  summarise(across(-Registered, sum))

Original Answer:

I would use summarise_at, and just make a logical vector which is FALSE for non-numeric columns and Registered and TRUE otherwise, i.e.

df %>% 
  summarise_at(which(sapply(df, is.numeric) & names(df) != 'Registered'), sum)

If you wanted to just summarise all but one column you could do

df %>% 
  summarise_at(vars(-Registered), sum)

but in this case you have to check if it's numeric also.

Notes:

factors are technically numeric, so if you want to exclude non-numeric columns and factors, replace sapply(df, is.numeric) with sapply(df, function(x) is.numeric(x) & !is.factor(x))
If your data is big I think it is faster to use sapply(df[1,], is.numeric) instead of sapply(df, is.numeric). (Someone please correct me if I'm wrong)

Group by two column and summarize multiple columns

We can use summarise with across from dplyr version > = 1.00

library(dplyr)
df %>%
   group_by(State, Date) %>%
   summarise(across(everything(), sum, na.rm = TRUE), .groups = 'drop')
# A tibble: 6 x 4
#  State Date       Female  Male
#  <chr> <chr>       <int> <int>
#1 Cali  05/06/2005      3     2
#2 Cali  10/06/2005      4     3
#3 NY    11/06/2005     10     5
#4 NY    12/06/2005     11     6
#5 Texas 01/01/2004      5     3
#6 Texas 02/01/2004      5     4

Or using aggregate from base R

aggregate(.~ State + Date, df, sum, na.rm = TRUE)

data

df <-  structure(list(State = c("Texas", "Texas", "Texas", "Cali", "Cali", 
"Cali", "Cali", "NY", "NY"), Female = c(2L, 3L, 5L, 1L, 2L, 3L, 
1L, 10L, 11L), Male = c(2L, 1L, 4L, 1L, 1L, 1L, 2L, 5L, 6L), 
    Date = c("01/01/2004", "01/01/2004", "02/01/2004", "05/06/2005", 
    "05/06/2005", "10/06/2005", "10/06/2005", "11/06/2005", "12/06/2005"
    )), class = "data.frame", row.names = c(NA, -9L))

Summarise multiple columns using dplyr R

Try this:

df %>%
  group_by(County) %>%
  summarise(across(c(Submissions, Population), sum))

tidyverse summarize multiple columns but show result as rows

You can skip the pivot_wider step by using ".value" in names_to.

library(dplyr)

dat %>%
  summarise_all(list(mean = mean,sum  = sum)) %>%
  tidyr::pivot_longer(cols = everything(),
               names_sep = "_",
               names_to  = c("variable", ".value"))

# A tibble: 5 x 3
#  variable  mean   sum
#  <chr>    <dbl> <int>
#1 V1        10.5   210
#2 V2        30.5   610
#3 V3        50.5  1010
#4 V4        70.5  1410
#5 V5        90.5  1810

Summarise multiple columns that have to be grouped tidyverse

Get the data in long format and count :

library(dplyr)
library(tidyr)

df %>% pivot_longer(cols = one:three) %>% count(group1, group2, value)

#  group1 group2 value     n
#  <chr>  <chr>  <chr> <int>
#1 High   female no        1
#2 High   female yes       2
#3 High   male   no        3
#4 High   male   yes       3
#5 Low    female no        2
#6 Low    female yes       4
#7 Low    male   no        1
#8 Low    male   yes       2