Efficiently Sum Across Multiple Columns in R

summing multiple columns in an R data-frame quickly

Here's an alternative approach using tidyverse:

library(tidyverse)

# input columns of interest
cols = c("mpg", "cyl", "disp", "hp", "drat")

mtcars %>% 
  group_by(id = row_number()) %>%  # for each row
  nest(cols) %>%                   # nest selected columns
  mutate(SUM = map_dbl(data, sum)) # calculate the sum of those columns

# # A tibble: 32 x 3
#      id data               SUM
#   <int> <list>           <dbl>
# 1     1 <tibble [1 x 5]>  301.
# 2     2 <tibble [1 x 5]>  301.
# 3     3 <tibble [1 x 5]>  232.
# 4     4 <tibble [1 x 5]>  398.
# 5     5 <tibble [1 x 5]>  565.
# 6     6 <tibble [1 x 5]>  357.
# 7     7 <tibble [1 x 5]>  631.
# 8     8 <tibble [1 x 5]>  241.
# 9     9 <tibble [1 x 5]>  267.
# 10    10 <tibble [1 x 5]>  320.
# # ... with 22 more rows

The output here is a data frame containing the row id (id), the data used at each row (data) and the calculated sum (SUM).

You can get a vector of the calculated SUM if you add ... %>% pull(SUM).

Sum across multiple columns with dplyr

dplyr >= 1.0.0 using across

sum up each row using rowSums (rowwise works for any aggreation, but is slower)

df %>%
   replace(is.na(.), 0) %>%
   mutate(sum = rowSums(across(where(is.numeric))))

sum down each column

df %>%
   summarise(across(everything(), ~ sum(., is.na(.), 0)))

dplyr < 1.0.0

sum up each row

df %>%
   replace(is.na(.), 0) %>%
   mutate(sum = rowSums(.[1:5]))

sum down each column using superseeded summarise_all:

df %>%
   replace(is.na(.), 0) %>%
   summarise_all(funs(sum))

R : How to iterate sum across multiple columns?

We may reshape to 'long' format with pivot_longer and do a group by sum

library(dplyr)
library(tidyr)
df1 <- df %>%
   pivot_longer(cols =-ID, names_to = c("item", ".value"), names_sep = "_") %>% 
  filter(item %in% c("itemA", "itemC", "itemD")) %>%
  group_by(ID) %>%
  summarise(across(where(is.numeric), sum, na.rm = TRUE,
       .names = "total_{.col}")) %>%
  left_join(df, .)

-output

> df1
# A tibble: 5 × 19
     ID itemA_1 itemB_1 itemC_1 itemD_1 itemx_1 itemA_3 itemB_3 itemC_3 itemD_3 itemx_3 itemA_n itemB_n itemC_n itemD_n itemx_n total_1
  <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
1     1      69      27      56      44      54      53      66      28      67      19      65      38      12      45      33     250
2     2      31      65       7      34      84      19      64      70      27      23      98      65      94      71     100     221
3     3      58      34      68      18      69     100      24      47      54      60      47      48      81      61      22     247
4     4      95      16      85      34       9      28      73      57      79      60      57      31      16      24      84     239
5     5      19      66      43      25      35      31      39      17      15      84      10      23     100       6      74     188
# … with 2 more variables: total_3 <int>, total_n <int>

If we want to use the for loop, then paste the column names with i, evaluate (!!) while assigning (:=)

library(stringr)
for (i in c(1, 3, 'n')) {
     df <- df %>% 
   mutate(!! str_c("total_", i) :=   
      rowSums(across(all_of(str_c(c("itemA_", "itemC_", "itemD_"), i)))))
     }

But, note that this will not be dynamic as we have to manually include the 1, 2, ..., n in the loop

-checking the output from for loop and reshaping

> all.equal(df1$total_1, df$total_1)
[1] TRUE
> all.equal(df1$total_3, df$total_3)
[1] TRUE
> all.equal(df1$total_n, df$total_n)
[1] TRUE

How to summarize the top n values across multiple columns row wise?

You do not have to do pivot_wider. Note that the longer format is the tidy format. Just do pivot_longer and left_join:

df %>% 
  left_join(pivot_longer(., -c(Student, ID)) %>%
  group_by(Student, ID) %>%
  summarise(Total = sum(sort(value, TRUE)[1:2]), .groups = 'drop'))

# A tibble: 10 x 7
   Student      ID Quiz1 Quiz2 Quiz3 Quiz4 Total
   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Aaron     30016    31    42    36    36    78
 2 James     87311    25    33    36    43    79
 3 Charlotte 61755    41    34    34    39    80
 4 Katie     55323    10    22    32    46    78
 5 Olivia    94839    35    23    43    40    83
 6 Timothy   38209    19    38    38    38    76
 7 Grant     34096    27    48    44    43    92
 8 Chloe     98432    42    49    42    35    91
 9 Judy      19487    15    23    42    41    83
10 Justin    94029    20    30    37    41    78

Summing multiple columns across a single row, for multiple rows

I'm not sure if this is part of a larger data frame and how you intend to apply this there, but I might use the map function in purrr.

library(purrr)
df <- data.frame(X0.61=c(1, 2, 3, 4, 5),
             X0.225=c(3, 4, 5, 6, 7),
             X0.329=c(4, 5, 6, 7, 8),
             X0.553=c(5, 6, 7, 8, 9))
map(df[c(1,3), c(1,2,4)], sum)


$X0.61
4
$X0.225
8
$X0.553
12

Sum multiple columns that have specific name in columns

Another dplyr way is to use helper functions starts_with to select columns and then use rowSums to sum those columns.

library(dplyr)

df$Vars <- df %>%  select(starts_with("Var")) %>%  rowSums()
df$Cols <-  df %>%  select(starts_with("Col")) %>%  rowSums()

df
#  ID Var1 Var2 Col1 Col2 Vars Cols
#1  1   34   22   34   24   56   58
#2  2    3   25   54   65   28  119
#3  3   87   68   14   78  155   92
#4  4   66   98   98  100  164  198
#5  5   55   13   77    2   68   79