R sum a variable by two groups
You can group_by
ID
and Year
then use sum
within summarise
library(dplyr)
txt <- "ID Year Amount
3 2000 45
3 2000 55
3 2002 10
3 2002 10
3 2004 30
4 2000 25
4 2002 40
4 2002 15
4 2004 45
4 2004 50"
df <- read.table(text = txt, header = TRUE)
df %>%
group_by(ID, Year) %>%
summarise(Total = sum(Amount, na.rm = TRUE))
#> # A tibble: 6 x 3
#> # Groups: ID [?]
#> ID Year Total
#> <int> <int> <int>
#> 1 3 2000 100
#> 2 3 2002 20
#> 3 3 2004 30
#> 4 4 2000 25
#> 5 4 2002 55
#> 6 4 2004 95
If you have more than one Amount
column & want to apply more than one function, you can use either summarise_if
or summarise_all
df %>%
group_by(ID, Year) %>%
summarise_if(is.numeric, funs(sum, mean))
#> # A tibble: 6 x 4
#> # Groups: ID [?]
#> ID Year sum mean
#> <int> <int> <int> <dbl>
#> 1 3 2000 100 50
#> 2 3 2002 20 10
#> 3 3 2004 30 30
#> 4 4 2000 25 25
#> 5 4 2002 55 27.5
#> 6 4 2004 95 47.5
df %>%
group_by(ID, Year) %>%
summarise_all(funs(sum, mean, max, min))
#> # A tibble: 6 x 6
#> # Groups: ID [?]
#> ID Year sum mean max min
#> <int> <int> <int> <dbl> <dbl> <dbl>
#> 1 3 2000 100 50 55 45
#> 2 3 2002 20 10 10 10
#> 3 3 2004 30 30 30 30
#> 4 4 2000 25 25 25 25
#> 5 4 2002 55 27.5 40 15
#> 6 4 2004 95 47.5 50 45
Created on 2018-09-19 by the reprex package (v0.2.1.9000)
Sum multiple variables by group and create new column with their sum
You can use mutate
after summarize
:
data %>%
group_by(group) %>%
summarise_all(sum) %>%
mutate(tt1 = n1 + n2)
# A tibble: 3 x 4
# group n1 n2 tt1
# <fctr> <int> <int> <int>
#1 a 3 5 8
#2 b 3 4 7
#3 c 9 11 20
If need to sum all numeric columns, you can use rowSums
with select_if
(to select numeric columns) to sum columns up:
data %>%
group_by(group) %>%
summarise_all(sum) %>%
mutate(tt1 = rowSums(select_if(., is.numeric)))
# A tibble: 3 x 4
# group n1 n2 tt1
# <fctr> <int> <int> <dbl>
#1 a 3 5 8
#2 b 3 4 7
#3 c 9 11 20
data.table calculate sums by two variables and add observations for empty groups
One way of going about this is to do a keyed cross-join with the CJ()
function and then using .EACHI
to note that y
should be executed for every row in i
.
library(data.table)
set.seed(1)
a <- sample(1:5, 10, replace = TRUE)
b <- sample(1:3, 10, replace = TRUE)
y <- rnorm(10)
dt <- data.table(a = a, b = b, y = y)
setkeyv(dt, c("a", "b"))
dt[CJ(a, b, unique = TRUE), lapply(.SD, sum), by = .EACHI]
#> a b y
#> 1: 1 1 -0.7702614
#> 2: 1 2 -0.2992151
#> 3: 1 3 NA
#> 4: 2 1 NA
#> 5: 2 2 -0.4115108
#> 6: 2 3 0.4356833
#> 7: 3 1 -1.2375384
#> 8: 3 2 -0.8919211
#> 9: 3 3 -0.2242679
#> 10: 4 1 -0.2894616
#> 11: 4 2 NA
#> 12: 4 3 NA
#> 13: 5 1 NA
#> 14: 5 2 0.2522234
#> 15: 5 3 NA
Created on 2020-10-03 by the reprex package (v0.3.0)
If you want to skip the key-setting step you could alternatively set the on
argument:
dt <- data.table(a = a, b = b, y = y) # Set no key
dt[CJ(a, b, unique = TRUE), lapply(.SD, sum), by = .EACHI, on = c("a", "b")]
sum count across multiple variables
We can use mutate
after grouping by 'id', 'date'
library(dplyr)
eg_data <- eg_data %>%
group_by(id, date) %>%
mutate(TotalSum = sum(sales))
Or with ave
eg_data$TotalSum = with(eg_data, ave(sales, id, date, FUN = sum))
SQL Server : summing two variables
Remove all the GO
words. You have three separate batches here and the variable must be declared within the scope of that batch.
DECLARE @RentsSum MONEY, @SalesSum MONEY, @SalesAndRentsSum MONEY
SET @RentsSum = (SELECT SUM(Price)
FROM Rents
WHERE StartDate IS NOT NULL)
SET @SalesSum = (SELECT SUM(Price)
FROM Purchases
WHERE DateBought IS NOT NULL)
SET @SalesAndRentsSum = @SalesSum + @PriceSum
SELECT @SalesAndRentsSum
In R, take sum of multiple variables if combination of values in two other columns are unique
You can use dplyr::summarise
and across
after group_by
.
library(dplyr)
df %>%
group_by(Locations, seasons) %>%
summarise(across(starts_with("ani"), ~sum(.x, na.rm = TRUE))) %>%
ungroup()
Another option is to reshape the data to long format using functions from the tidyr
package. This avoids the issue of having to select columns 3 onwards.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -c(Locations, seasons)) %>%
group_by(Locations, seasons, name) %>%
summarise(Sum = sum(value, na.rm = TRUE)) %>%
ungroup() %>%
pivot_wider(names_from = "name", values_from = "Sum")
Result:
# A tibble: 9 x 4
Locations seasons ani1 ani2
<chr> <int> <int> <int>
1 A 2 2 0
2 A 3 1 1
3 A 4 1 1
4 B 2 0 1
5 B 3 1 1
6 C 1 1 0
7 C 2 1 1
8 D 2 0 0
9 D 4 1 2
How to sum a variable on other aggregated variables, whilst keeping remaining variables in R?
It works for me when literally specifying that you want the first value, i.e.:
library(tidyverse)
df %>%
group_by(set1, set2) %>%
summarize(y = sum(y),
row = row[1],
set3 = set3[1])
A tibble: 5 x 5
# Groups: set1 [3]
set1 set2 y row set3
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 3 1 1
2 1 2 6 4 2
3 2 1 6 7 4
4 2 2 3 9 5
5 3 1 4 10 5
Edit: To keep every other column without specifying, you can make use of across()
and indicate that you want to apply this aggregation to every column except one.
df %>%
group_by(set1, set2) %>%
summarize(
across(!y, ~ .x[1]),
y = sum(y)
)
# A tibble: 5 x 5
# Groups: set1 [3]
set1 set2 row set3 y
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 3
2 1 2 4 2 6
3 2 1 7 4 6
4 2 2 9 5 3
5 3 1 10 5 4
Aggregate / summarize multiple variables per group (e.g. sum, mean)
Where is this year()
function from?
You could also use the reshape2
package for this task:
require(reshape2)
df_melt <- melt(df1, id = c("date", "year", "month"))
dcast(df_melt, year + month ~ variable, sum)
# year month x1 x2
1 2000 1 -80.83405 -224.9540159
2 2000 2 -223.76331 -288.2418017
3 2000 3 -188.83930 -481.5601913
4 2000 4 -197.47797 -473.7137420
5 2000 5 -259.07928 -372.4563522
How to get the sum of combinations of variables of 2 columns in a tibble in r
Group those two variables and summarise. Easy to do with tidyverse
, although I'd change the names of the columns to text first.
library(tidyverse)
df %>%
group_by(col2, col3) %>%
summarise(count = n())
Related Topics
Is R Superstitious Regarding Posixct Data Type
Confidence Intervals for Predictions from Logistic Regression
To Find Whether a Column Exists in Data Frame or Not
R Sequence of Dates with Lubridate
R - Common Title and Legend for Combined Plots
Geom_Bar() + Pictograms, How To
Adding Simple Legend to Plot in R
R: How to Get the Last Element from Each Group
Remove All Variables Except Functions
How to Left Align Text in Annotate from Ggplot2
Adding Custom Image to Geom_Polygon Fill in Ggplot
Change the Color of Action Button in Shiny
How to Add a Scale Bar (For Linear Distances) to Ggmap