Sum by Two Variables

R sum a variable by two groups

You can group_by ID and Year then use sum within summarise

library(dplyr)

txt <- "ID Year Amount
3 2000 45
3 2000 55
3 2002 10
3 2002 10
3 2004 30
4 2000 25
4 2002 40
4 2002 15
4 2004 45
4 2004 50"

df <- read.table(text = txt, header = TRUE)

df %>%
group_by(ID, Year) %>%
summarise(Total = sum(Amount, na.rm = TRUE))
#> # A tibble: 6 x 3
#> # Groups: ID [?]
#> ID Year Total
#> <int> <int> <int>
#> 1 3 2000 100
#> 2 3 2002 20
#> 3 3 2004 30
#> 4 4 2000 25
#> 5 4 2002 55
#> 6 4 2004 95

If you have more than one Amount column & want to apply more than one function, you can use either summarise_if or summarise_all

df %>% 
group_by(ID, Year) %>%
summarise_if(is.numeric, funs(sum, mean))
#> # A tibble: 6 x 4
#> # Groups: ID [?]
#> ID Year sum mean
#> <int> <int> <int> <dbl>
#> 1 3 2000 100 50
#> 2 3 2002 20 10
#> 3 3 2004 30 30
#> 4 4 2000 25 25
#> 5 4 2002 55 27.5
#> 6 4 2004 95 47.5

df %>%
group_by(ID, Year) %>%
summarise_all(funs(sum, mean, max, min))
#> # A tibble: 6 x 6
#> # Groups: ID [?]
#> ID Year sum mean max min
#> <int> <int> <int> <dbl> <dbl> <dbl>
#> 1 3 2000 100 50 55 45
#> 2 3 2002 20 10 10 10
#> 3 3 2004 30 30 30 30
#> 4 4 2000 25 25 25 25
#> 5 4 2002 55 27.5 40 15
#> 6 4 2004 95 47.5 50 45

Created on 2018-09-19 by the reprex package (v0.2.1.9000)

Sum multiple variables by group and create new column with their sum

You can use mutate after summarize:

data %>% 
group_by(group) %>%
summarise_all(sum) %>%
mutate(tt1 = n1 + n2)

# A tibble: 3 x 4
# group n1 n2 tt1
# <fctr> <int> <int> <int>
#1 a 3 5 8
#2 b 3 4 7
#3 c 9 11 20

If need to sum all numeric columns, you can use rowSums with select_if (to select numeric columns) to sum columns up:

data %>% 
group_by(group) %>%
summarise_all(sum) %>%
mutate(tt1 = rowSums(select_if(., is.numeric)))

# A tibble: 3 x 4
# group n1 n2 tt1
# <fctr> <int> <int> <dbl>
#1 a 3 5 8
#2 b 3 4 7
#3 c 9 11 20

data.table calculate sums by two variables and add observations for empty groups

One way of going about this is to do a keyed cross-join with the CJ() function and then using .EACHI to note that y should be executed for every row in i.

library(data.table)

set.seed(1)
a <- sample(1:5, 10, replace = TRUE)
b <- sample(1:3, 10, replace = TRUE)
y <- rnorm(10)

dt <- data.table(a = a, b = b, y = y)
setkeyv(dt, c("a", "b"))

dt[CJ(a, b, unique = TRUE), lapply(.SD, sum), by = .EACHI]
#> a b y
#> 1: 1 1 -0.7702614
#> 2: 1 2 -0.2992151
#> 3: 1 3 NA
#> 4: 2 1 NA
#> 5: 2 2 -0.4115108
#> 6: 2 3 0.4356833
#> 7: 3 1 -1.2375384
#> 8: 3 2 -0.8919211
#> 9: 3 3 -0.2242679
#> 10: 4 1 -0.2894616
#> 11: 4 2 NA
#> 12: 4 3 NA
#> 13: 5 1 NA
#> 14: 5 2 0.2522234
#> 15: 5 3 NA

Created on 2020-10-03 by the reprex package (v0.3.0)

If you want to skip the key-setting step you could alternatively set the on argument:

dt <- data.table(a = a, b = b, y = y) # Set no key
dt[CJ(a, b, unique = TRUE), lapply(.SD, sum), by = .EACHI, on = c("a", "b")]

sum count across multiple variables

We can use mutate after grouping by 'id', 'date'

library(dplyr)
eg_data <- eg_data %>%
group_by(id, date) %>%
mutate(TotalSum = sum(sales))

Or with ave

eg_data$TotalSum = with(eg_data, ave(sales, id, date, FUN = sum))

SQL Server : summing two variables

Remove all the GO words. You have three separate batches here and the variable must be declared within the scope of that batch.

DECLARE @RentsSum MONEY, @SalesSum MONEY, @SalesAndRentsSum MONEY

SET @RentsSum = (SELECT SUM(Price)
FROM Rents
WHERE StartDate IS NOT NULL)

SET @SalesSum = (SELECT SUM(Price)
FROM Purchases
WHERE DateBought IS NOT NULL)

SET @SalesAndRentsSum = @SalesSum + @PriceSum

SELECT @SalesAndRentsSum

In R, take sum of multiple variables if combination of values in two other columns are unique

You can use dplyr::summarise and across after group_by.

library(dplyr)

df %>%
group_by(Locations, seasons) %>%
summarise(across(starts_with("ani"), ~sum(.x, na.rm = TRUE))) %>%
ungroup()

Another option is to reshape the data to long format using functions from the tidyr package. This avoids the issue of having to select columns 3 onwards.

library(dplyr)
library(tidyr)

df %>%
pivot_longer(cols = -c(Locations, seasons)) %>%
group_by(Locations, seasons, name) %>%
summarise(Sum = sum(value, na.rm = TRUE)) %>%
ungroup() %>%
pivot_wider(names_from = "name", values_from = "Sum")

Result:

# A tibble: 9 x 4
Locations seasons ani1 ani2
<chr> <int> <int> <int>
1 A 2 2 0
2 A 3 1 1
3 A 4 1 1
4 B 2 0 1
5 B 3 1 1
6 C 1 1 0
7 C 2 1 1
8 D 2 0 0
9 D 4 1 2

How to sum a variable on other aggregated variables, whilst keeping remaining variables in R?

It works for me when literally specifying that you want the first value, i.e.:

library(tidyverse)
df %>%
group_by(set1, set2) %>%
summarize(y = sum(y),
row = row[1],
set3 = set3[1])

A tibble: 5 x 5
# Groups: set1 [3]
set1 set2 y row set3
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 3 1 1
2 1 2 6 4 2
3 2 1 6 7 4
4 2 2 3 9 5
5 3 1 4 10 5

Edit: To keep every other column without specifying, you can make use of across() and indicate that you want to apply this aggregation to every column except one.

df %>%
group_by(set1, set2) %>%
summarize(
across(!y, ~ .x[1]),
y = sum(y)
)

# A tibble: 5 x 5
# Groups: set1 [3]
set1 set2 row set3 y
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 3
2 1 2 4 2 6
3 2 1 7 4 6
4 2 2 9 5 3
5 3 1 10 5 4

Aggregate / summarize multiple variables per group (e.g. sum, mean)

Where is this year() function from?

You could also use the reshape2 package for this task:

require(reshape2)
df_melt <- melt(df1, id = c("date", "year", "month"))
dcast(df_melt, year + month ~ variable, sum)
# year month x1 x2
1 2000 1 -80.83405 -224.9540159
2 2000 2 -223.76331 -288.2418017
3 2000 3 -188.83930 -481.5601913
4 2000 4 -197.47797 -473.7137420
5 2000 5 -259.07928 -372.4563522

How to get the sum of combinations of variables of 2 columns in a tibble in r

Group those two variables and summarise. Easy to do with tidyverse, although I'd change the names of the columns to text first.

library(tidyverse)

df %>%
group_by(col2, col3) %>%
summarise(count = n())



Related Topics



Leave a reply



Submit