Sum by Two Variables

R sum a variable by two groups

You can group_by ID and Year then use sum within summarise

library(dplyr)

txt <- "ID Year Amount
3 2000 45
3 2000 55
3 2002 10
3 2002 10
3 2004 30
4 2000 25
4 2002 40
4 2002 15
4 2004 45
4 2004 50"

df <- read.table(text = txt, header = TRUE)

df %>% 
  group_by(ID, Year) %>% 
  summarise(Total = sum(Amount, na.rm = TRUE))
#> # A tibble: 6 x 3
#> # Groups:   ID [?]
#>      ID  Year Total
#>   <int> <int> <int>
#> 1     3  2000   100
#> 2     3  2002    20
#> 3     3  2004    30
#> 4     4  2000    25
#> 5     4  2002    55
#> 6     4  2004    95

If you have more than one Amount column & want to apply more than one function, you can use either summarise_if or summarise_all

df %>% 
  group_by(ID, Year) %>% 
  summarise_if(is.numeric, funs(sum, mean))
#> # A tibble: 6 x 4
#> # Groups:   ID [?]
#>      ID  Year   sum  mean
#>   <int> <int> <int> <dbl>
#> 1     3  2000   100  50  
#> 2     3  2002    20  10  
#> 3     3  2004    30  30  
#> 4     4  2000    25  25  
#> 5     4  2002    55  27.5
#> 6     4  2004    95  47.5

df %>% 
  group_by(ID, Year) %>% 
  summarise_all(funs(sum, mean, max, min))
#> # A tibble: 6 x 6
#> # Groups:   ID [?]
#>      ID  Year   sum  mean   max   min
#>   <int> <int> <int> <dbl> <dbl> <dbl>
#> 1     3  2000   100  50      55    45
#> 2     3  2002    20  10      10    10
#> 3     3  2004    30  30      30    30
#> 4     4  2000    25  25      25    25
#> 5     4  2002    55  27.5    40    15
#> 6     4  2004    95  47.5    50    45

^{Created on 2018-09-19 by the reprex package (v0.2.1.9000)}

Sum multiple variables by group and create new column with their sum

You can use mutate after summarize:

data %>% 
    group_by(group) %>%
    summarise_all(sum) %>% 
    mutate(tt1 = n1 + n2)

# A tibble: 3 x 4
#   group    n1    n2   tt1
#  <fctr> <int> <int> <int>
#1      a     3     5     8
#2      b     3     4     7
#3      c     9    11    20

If need to sum all numeric columns, you can use rowSums with select_if (to select numeric columns) to sum columns up:

data %>% 
    group_by(group) %>%
    summarise_all(sum) %>% 
    mutate(tt1 = rowSums(select_if(., is.numeric)))

# A tibble: 3 x 4
#   group    n1    n2   tt1
#  <fctr> <int> <int> <dbl>
#1      a     3     5     8
#2      b     3     4     7
#3      c     9    11    20

data.table calculate sums by two variables and add observations for empty groups

One way of going about this is to do a keyed cross-join with the CJ() function and then using .EACHI to note that y should be executed for every row in i.

library(data.table)

set.seed(1)
a <- sample(1:5, 10, replace = TRUE)
b <- sample(1:3, 10, replace = TRUE)
y <- rnorm(10)

dt <- data.table(a = a, b = b, y = y)
setkeyv(dt, c("a", "b"))

dt[CJ(a, b, unique = TRUE), lapply(.SD, sum), by = .EACHI]
#>     a b          y
#>  1: 1 1 -0.7702614
#>  2: 1 2 -0.2992151
#>  3: 1 3         NA
#>  4: 2 1         NA
#>  5: 2 2 -0.4115108
#>  6: 2 3  0.4356833
#>  7: 3 1 -1.2375384
#>  8: 3 2 -0.8919211
#>  9: 3 3 -0.2242679
#> 10: 4 1 -0.2894616
#> 11: 4 2         NA
#> 12: 4 3         NA
#> 13: 5 1         NA
#> 14: 5 2  0.2522234
#> 15: 5 3         NA

^{Created on 2020-10-03 by the reprex package (v0.3.0)}

If you want to skip the key-setting step you could alternatively set the on argument:

dt <- data.table(a = a, b = b, y = y) # Set no key
dt[CJ(a, b, unique = TRUE), lapply(.SD, sum), by = .EACHI, on = c("a", "b")]

sum count across multiple variables

We can use mutate after grouping by 'id', 'date'

library(dplyr)
eg_data <- eg_data %>%
             group_by(id, date) %>%
             mutate(TotalSum = sum(sales))

Or with ave

eg_data$TotalSum = with(eg_data, ave(sales, id, date, FUN = sum))

SQL Server : summing two variables

Remove all the GO words. You have three separate batches here and the variable must be declared within the scope of that batch.

DECLARE @RentsSum MONEY, @SalesSum MONEY, @SalesAndRentsSum MONEY

SET @RentsSum = (SELECT SUM(Price)
                FROM Rents
                WHERE StartDate IS NOT NULL)

SET @SalesSum = (SELECT SUM(Price)
                FROM Purchases
                WHERE DateBought IS NOT NULL)

SET @SalesAndRentsSum = @SalesSum + @PriceSum

SELECT @SalesAndRentsSum

In R, take sum of multiple variables if combination of values in two other columns are unique

You can use dplyr::summarise and across after group_by.

library(dplyr)

df %>% 
  group_by(Locations, seasons) %>% 
  summarise(across(starts_with("ani"), ~sum(.x, na.rm = TRUE))) %>%
  ungroup()

Another option is to reshape the data to long format using functions from the tidyr package. This avoids the issue of having to select columns 3 onwards.

library(dplyr)
library(tidyr)

df %>% 
  pivot_longer(cols = -c(Locations, seasons)) %>% 
  group_by(Locations, seasons, name) %>% 
  summarise(Sum = sum(value, na.rm = TRUE)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = "name", values_from = "Sum")

Result:

# A tibble: 9 x 4
  Locations seasons  ani1  ani2
  <chr>       <int> <int> <int>
1 A               2     2     0
2 A               3     1     1
3 A               4     1     1
4 B               2     0     1
5 B               3     1     1
6 C               1     1     0
7 C               2     1     1
8 D               2     0     0
9 D               4     1     2

How to sum a variable on other aggregated variables, whilst keeping remaining variables in R?

It works for me when literally specifying that you want the first value, i.e.:

library(tidyverse)
df %>%
  group_by(set1, set2) %>%
  summarize(y = sum(y),
            row = row[1],
            set3 = set3[1])

 A tibble: 5 x 5
# Groups:   set1 [3]
   set1  set2     y   row  set3
  <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     1     3     1     1
2     1     2     6     4     2
3     2     1     6     7     4
4     2     2     3     9     5
5     3     1     4    10     5

Edit: To keep every other column without specifying, you can make use of across() and indicate that you want to apply this aggregation to every column except one.

df %>%
  group_by(set1, set2) %>%
  summarize(
    across(!y, ~ .x[1]), 
    y = sum(y)
  )

# A tibble: 5 x 5
# Groups:   set1 [3]
   set1  set2   row  set3     y
  <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     1     1     1     3
2     1     2     4     2     6
3     2     1     7     4     6
4     2     2     9     5     3
5     3     1    10     5     4

Aggregate / summarize multiple variables per group (e.g. sum, mean)

Where is this year() function from?

You could also use the reshape2 package for this task:

require(reshape2)
df_melt <- melt(df1, id = c("date", "year", "month"))
dcast(df_melt, year + month ~ variable, sum)
#  year month         x1           x2
1  2000     1  -80.83405 -224.9540159
2  2000     2 -223.76331 -288.2418017
3  2000     3 -188.83930 -481.5601913
4  2000     4 -197.47797 -473.7137420
5  2000     5 -259.07928 -372.4563522

How to get the sum of combinations of variables of 2 columns in a tibble in r

Group those two variables and summarise. Easy to do with tidyverse, although I'd change the names of the columns to text first.

library(tidyverse)

df %>%
 group_by(col2, col3) %>%
 summarise(count = n())