Aggregate and Weighted Mean in R

Aggregate and Weighted Mean in R

For starters, w=(dat$return, dat$assets)) is a syntax error.

And plyr makes this a little easier:

> set.seed(42)   # fix seed so that you get the same results
> dat <- data.frame(assetclass=sample(LETTERS[1:5], 20, replace=TRUE),
+ return=rnorm(20), assets=1e7+1e7*runif(20))
> library(plyr)
> ddply(dat, .(assetclass), # so by asset class invoke following function
+ function(x) data.frame(wret=weighted.mean(x$return, x$assets)))
assetclass wret
1 A -2.27292
2 B -0.19969
3 C 0.46448
4 D -0.71354
5 E 0.55354
>

Weighted means for groups in r - using aggregate and weighted.mean functions together

Here is a way to compute the weighted means with aggregate called by by().

res <- by(df, df$X1, function(DF){
aggregate(cbind(Y1, Y2, Y3) ~ X1, DF, function(y, w)
weighted.mean(y, w = DF[['wgt']], na.rm = TRUE))
})
do.call(rbind, res)
# X1 Y1 Y2 Y3
#A A 2.152503 2.633935 18.93457
#B B 6.677851 3.589251 16.90102
#C C 10.194695 2.638378 16.70958

adding weighted average with aggregate

We may use dplyr

library(dplyr)
d %>%
group_by(m, group) %>%
summarise(vmean = floor(weighted.mean(value, size)))
# m group vmean
# <dbl> <fctr> <dbl>
#1 1 A 54
#2 2 B 5
#3 5 A 21
#4 5 B 40
#5 8 B 100
#6 10 A 30
#7 10 B 100

Or using base R

by(d[c("value", "size")], list(d$group, d$m), 
FUN = function(x) weighted.mean(x[,1], x[,2]))

Using aggregate to compute monthly weighted average

With aggregate() it is not possible, because your weight vector is not partitionated during aggregate(). You can use by() or split() plus sapply() or additional package data.table or function ddply() from package plyr or functions from the package dplyr

example with split() plus sapply():

sapply(split(df, df$Month), function(d) weighted.mean(d$Variable, w = d$Weighting))

result:

1998-05-01 1998-06-01 
5.89733 10.33142

a variant with by()

by(df, df$Month, FUN=function(d) weighted.mean(d$Variable, w = d$Weighting)) # or
unclass(by(df, df$Month, FUN=function(d) weighted.mean(d$Variable, w = d$Weighting)))

with package plyr

library(plyr)
ddply(df, ~Month, summarize, weighted.mean(Variable, w=Weighting))

with data.table

library(data.table)
setDT(df)[, weighted.mean(Variable, w = Weighting), Month]

Weighted mean using aggregate across groups in r

Instead of mean, use weighted.mean. However, aggregate, may not be an option here because aggregate loop over only the 'Value' column and it doesn't have access to the 'Weight' for each group

library(dplyr)
DF %>%
group_by(Group_1, Group_2) %>%
summarise(wt_mean = weighted.mean(Value, Weight), .groups = 'drop')

-output

# A tibble: 21 x 3
# Groups: Group_1 [4]
# Group_1 Group_2 wt_mean
# <chr> <chr> <dbl>
# 1 a h 24.7
# 2 a i 15
# 3 a j 21.1
# 4 a k 23.6
# 5 a m 14.1
# 6 b i 40
# 7 b j 12.7
# 8 b k 6.88
# 9 b l 30.6
10 b m 5
# … with 11 more rows

If we want to use base R, then by should work

by(DF, DF[c('Group_1', 'Group_2')], function(x) weighted.mean(x$Value, x$Weight))

Display weighted mean by group in the data.frame

If we use mutate, then we can avoid the left_join

library(dplyr)
df %>%
group_by(education) %>%
mutate(weighted_income = weighted.mean(income, weight))
# obs income education weight weighted_income
# <int> <int> <fctr> <int> <dbl>
#1 1 1000 A 10 1166.667
#2 2 2000 B 1 1583.333
#3 3 1500 B 5 1583.333
#4 4 2000 A 2 1166.667

Aggregate an entire data frame with Weighted Mean

You can do it with data.table:

 library(data.table)

#set up your data

dat <- data.frame(date = c("2012-01-01","2012-01-01","2012-01-01","2013-01-01",
"2013-01-01","2013-01-01","2014-01-01","2014-01-01","2014-01-01"),
nwords = 1:9, v1 = rnorm(9), v2 = rnorm(9), v3 = rnorm(9))

#make it into a data.table

dat = data.table(dat, key = "date")

# grab the column names we want, generalized for V1:Vwhatever

c = colnames(dat)[-c(1,2)]

#get the weighted mean by date for each column

for(n in c){
dat[,
n := weighted.mean(get(n), nwords),
with = FALSE,
by = date]
}

#keep only the unique dates and weighted means

wms = unique(dat[,nwords:=NULL])

Aggregate and Weighted Mean for multiple columns in R

So you can do it for several columns of weights

DT <- data.table(assetclass=sample(LETTERS[1:5], 20, replace=TRUE), 
tax=rnorm(20), assets=1e7+1e7*runif(20), asets2=1e6+1e7*runif(20))
DT[, lapply(.SD, FUN=weighted.mean, x=tax), by=assetclass, .SDcols=3:4]
# assetclass assets asets2
# 1: D -0.14179882 -0.003717957
# 2: B 0.61146928 0.523913589
# 3: E -0.28037796 -0.147677384
# 4: C -0.09658125 -0.010338894
# 5: A 0.74954460 0.750190947

or you can exclude the non-weight columns from .SD:

DT[, lapply(.SD, FUN=weighted.mean, x=tax), by=assetclass, .SDcols=-(1:2)]

Here is a variant using matrix multiplication:

DT[, as.list(crossprod(as.matrix(.SD), tax)/colSums(.SD)), by=assetclass, .SDcols=-(1:2)]

The matrix multiplication can do it also for several columns tax1, tax2, ...

DT <- data.table(assetclass=sample(LETTERS[1:5], 20, replace=TRUE), 
tax1=rnorm(20), tax2=rnorm(20), assets=1e7+1e7*runif(20), asets2=1e6+1e7*runif(20))
DT[, as.list(crossprod(as.matrix(.SD), tax1)/colSums(.SD)), by=assetclass, .SDcols=-(1:2)]
DT[, as.list(crossprod(as.matrix(.SD), tax2)/colSums(.SD)), by=assetclass, .SDcols=-(1:2)]
DT[, as.list(crossprod(as.matrix(.SD), cbind(tax1, tax2))/colSums(.SD)), by=assetclass, .SDcols=-(1:2)]


Related Topics



Leave a reply



Submit