Compute All Fixed Window Averages with Dplyr and Rcpproll

Compute all fixed window averages with dplyr and RcppRoll

Using Base R, I hope it help:

storms_wind <- storms %>%
select(name, year, month, day, hour, wind) %>%
group_by(name) %>%
arrange(name, year, month, day, hour)

multi_avg <- function(df, start, end) {
for(i in (strat:end)){
varname <- paste("avg", i , sep="_")
df[[varname]] <- with(df, roll_meanr(wind, n = i, fill = NA))
}
df
}

multi_avg(df=storms_wind, start=4,end=20)

Get the averages of amount by providers and state with few lines of code in R

A bit shorter with data.table and uniqueN:

library(data.table)

setDT(df.sample)

df.sample.state.prov<- df.sample[,{mem_cnt_pvdr=uniqueN(mbr_id );
mean_total_amt_pvdr=round(sum(as.numeric(amount))/mem_cnt_pvdr,2);
.(mem_cnt_pvdr,mean_total_amt_pvdr)},by=.(prov_state,prov_id)]

df.sample.state <- df.sample.state.prov[,.(pvdr_cnt_state=uniqueN(prov_id ),
total_amt_state=sum(as.numeric(mean_total_amt_pvdr)),
mem_cnt_state=sum(mem_cnt_pvdr)),by=.(prov_state)]

df.sample.state[df.sample.state.prov,.(prov_id,
prov_state,
mem_cnt_pvdr,
mean_total_amt_pvdr,
mem_cnt_state,
pvdr_cnt_state,
mean_total_amt_state=total_amt_state/pvdr_cnt_state) ,on=.(prov_state)]

prov_id prov_state mem_cnt_pvdr mean_total_amt_pvdr mem_cnt_state pvdr_cnt_state mean_total_amt_state
1: 599 CA 6 12.83 12 2 31.915
2: 699 CA 6 51.00 12 2 31.915

Rolling weighted average in R (multiple observations)

1) Use by.column = FALSE:

library(data.table)
library(zoo)

wmean <- function(x) weighted.mean(x[, 1], x[, 2])
sam[, rollapplyr(.SD, 3, wmean, by.column = FALSE, fill = NA, align = "left")]

2) Another approach is to encode the values and weights into a complex vector:

wmean_cmplx <- function(x) weighted.mean(Re(x), Im(x))
sam[, rollapply(complex(real = val_mean, imag = N), 3, wmean_cmplx,
fill = NA, align = "left")]

Rolling mean with different window length

Add a yearmon column and then summarize the sum and length of x by yearmon.

Finally divide a rolling sum over x by a rolling sum over the length N.

library(data.table)
library(zoo)

Means <- test[, yearmon := as.yearmon(time)][
, list(x = sum(x), N = .N), by = "yearmon"][
, list(yearmon, mean = rollsumr(x, 2, fill = NA) / rollsumr(N, 2, fill = NA))]

Alternately convert test to a zoo object, sum x and the length by yearmon, calculate the rolling sum of both x and n and divide giving a zoo object with the year/months and means. See ?fortify.zoo if you would like to convert that to a data frame.

z <- cbind(x = read.zoo(test, index = "time"), n = 1)
zym <- aggregate(z, as.yearmon, sum)
transform(rollsumr(zym, 2), mean = x / n)

Note

Input used is:

set.seed(24)
test <- data.table(x = rnorm(762), time=seq(as.Date("1988/03/15"),
as.Date("1990/04/15"), "day"))

R dplyr rolling sum

You can instead use RcppRoll::roll_sum which returns NA if the sample size(n) is less than the window size(k).

set.seed(1)
dg$count = rpois(dim(dg)[1], 5)
library(RcppRoll)
library(dplyr)
dg %>%
arrange(site,year,animal) %>%
group_by(site, animal) %>%
mutate(roll_sum = roll_sum(count, 2, align = "right", fill = NA))
# site year animal count roll_sum
#1 Boston 2000 dog 4 NA
#2 Boston 2001 dog 5 9
#3 Boston 2002 dog 3 8
#4 Boston 2003 dog 9 12
#5 Boston 2004 dog 6 15
#6 New York 2000 dog 4 NA
#7 New York 2001 dog 8 12
#8 New York 2002 dog 8 16
#9 New York 2003 dog 6 14
#10 New York 2004 cat 2 NA

R - Calculate rolling mean of previous k non-NA values

With runner it will be something like mean of 3-elements tail window of non-na values. You can achive the same result with slider

library(runner)
tmp.df <- data.frame(
x = c(NA, 1, 2, NA, 3, 4, 5, NA, NA, NA, 6, 7, NA)
)

# using runner
tmp.df$y_runner <- runner(
x = tmp.df$x,
f = function(x) {
mean(
tail(
x[!is.na(x)],
3
)
)
}
)

# using slider
tmp.df$y_slider <- slider::slide_dbl(
tmp.df$x,
function(x) {
mean(
tail(
x[!is.na(x)],
3
)
)
},
.before = Inf
)

tmp.df

# x y_runner y_slider
# 1 NA NaN NaN
# 2 1 1.0 1.0
# 3 2 1.5 1.5
# 4 NA 1.5 1.5
# 5 3 2.0 2.0
# 6 4 3.0 3.0
# 7 5 4.0 4.0
# 8 NA 4.0 4.0
# 9 NA 4.0 4.0
# 10 NA 4.0 4.0
# 11 6 5.0 5.0
# 12 7 6.0 6.0
# 13 NA 6.0 6.0


Related Topics



Leave a reply



Submit