Sum Multiple Variables by Group

Aggregate / summarize multiple variables per group (e.g. sum, mean)

Where is this year() function from?

You could also use the reshape2 package for this task:

require(reshape2)
df_melt <- melt(df1, id = c("date", "year", "month"))
dcast(df_melt, year + month ~ variable, sum)
#  year month         x1           x2
1  2000     1  -80.83405 -224.9540159
2  2000     2 -223.76331 -288.2418017
3  2000     3 -188.83930 -481.5601913
4  2000     4 -197.47797 -473.7137420
5  2000     5 -259.07928 -372.4563522

Sum multiple variables by group and create new column with their sum

You can use mutate after summarize:

data %>% 
    group_by(group) %>%
    summarise_all(sum) %>% 
    mutate(tt1 = n1 + n2)

# A tibble: 3 x 4
#   group    n1    n2   tt1
#  <fctr> <int> <int> <int>
#1      a     3     5     8
#2      b     3     4     7
#3      c     9    11    20

If need to sum all numeric columns, you can use rowSums with select_if (to select numeric columns) to sum columns up:

data %>% 
    group_by(group) %>%
    summarise_all(sum) %>% 
    mutate(tt1 = rowSums(select_if(., is.numeric)))

# A tibble: 3 x 4
#   group    n1    n2   tt1
#  <fctr> <int> <int> <dbl>
#1      a     3     5     8
#2      b     3     4     7
#3      c     9    11    20

how to sum the value for multiple variables by the same group in r

Use across - loop across the columns that starts_with 'score' and get the sum

library(dplyr)
out1 <- df %>% 
   group_by(region, department) %>% 
   summarise(across(starts_with('score'), sum), .groups = 'drop')

In the for loop, the issue is that df is getting updated (df <-..) in each iteration and summarise returns only the columns provided in the group by and the summarised output. Thus, after the first iteration, 'df' wouldn't have the 'score' columns at all. If we want to use a for loop, get the output in a list and then reduce with a join

library(purrr) 
out_list <- vector('list', length(vlist))
names(out_list) <- vlist
for (var in vlist) {
    out_list[[var]] <- df %>% 
            group_by(region, department) %>%
            summarise(!!var := sum(cur_data()[[var]]), .groups = 'drop')
   }
out2 <- reduce(out_list, full_join, by = c('region', 'department'))

-checking the outputs

> identical(out1, out2)
[1] TRUE

How to sum a variable by group

Using aggregate:

aggregate(x$Frequency, by=list(Category=x$Category), FUN=sum)
  Category  x
1    First 30
2   Second  5
3    Third 34

In the example above, multiple dimensions can be specified in the list. Multiple aggregated metrics of the same data type can be incorporated via cbind:

aggregate(cbind(x$Frequency, x$Metric2, x$Metric3) ...

(embedding @thelatemail comment), aggregate has a formula interface too

aggregate(Frequency ~ Category, x, sum)

Or if you want to aggregate multiple columns, you could use the . notation (works for one column too)

aggregate(. ~ Category, x, sum)

or tapply:

tapply(x$Frequency, x$Category, FUN=sum)
 First Second  Third 
    30      5     34

Using this data:

x <- data.frame(Category=factor(c("First", "First", "First", "Second",
                                      "Third", "Third", "Second")), 
                    Frequency=c(10,15,5,2,14,20,3))

R sum a variable by two groups

You can group_by ID and Year then use sum within summarise

library(dplyr)

txt <- "ID Year Amount
3 2000 45
3 2000 55
3 2002 10
3 2002 10
3 2004 30
4 2000 25
4 2002 40
4 2002 15
4 2004 45
4 2004 50"

df <- read.table(text = txt, header = TRUE)

df %>% 
  group_by(ID, Year) %>% 
  summarise(Total = sum(Amount, na.rm = TRUE))
#> # A tibble: 6 x 3
#> # Groups:   ID [?]
#>      ID  Year Total
#>   <int> <int> <int>
#> 1     3  2000   100
#> 2     3  2002    20
#> 3     3  2004    30
#> 4     4  2000    25
#> 5     4  2002    55
#> 6     4  2004    95

If you have more than one Amount column & want to apply more than one function, you can use either summarise_if or summarise_all

df %>% 
  group_by(ID, Year) %>% 
  summarise_if(is.numeric, funs(sum, mean))
#> # A tibble: 6 x 4
#> # Groups:   ID [?]
#>      ID  Year   sum  mean
#>   <int> <int> <int> <dbl>
#> 1     3  2000   100  50  
#> 2     3  2002    20  10  
#> 3     3  2004    30  30  
#> 4     4  2000    25  25  
#> 5     4  2002    55  27.5
#> 6     4  2004    95  47.5

df %>% 
  group_by(ID, Year) %>% 
  summarise_all(funs(sum, mean, max, min))
#> # A tibble: 6 x 6
#> # Groups:   ID [?]
#>      ID  Year   sum  mean   max   min
#>   <int> <int> <int> <dbl> <dbl> <dbl>
#> 1     3  2000   100  50      55    45
#> 2     3  2002    20  10      10    10
#> 3     3  2004    30  30      30    30
#> 4     4  2000    25  25      25    25
#> 5     4  2002    55  27.5    40    15
#> 6     4  2004    95  47.5    50    45

^{Created on 2018-09-19 by the reprex package (v0.2.1.9000)}