Applying Rolling Mean by Group in R

Rolling mean (moving average) by group/id with dplyr

If you are not committed to to dplyr this should work:

get.mav <- function(bp,n=2){
  require(zoo)
  if(is.na(bp[1])) bp[1] <- mean(bp,na.rm=TRUE)
  bp <- na.locf(bp,na.rm=FALSE)
  if(length(bp)<n) return(bp)
  c(bp[1:(n-1)],rollapply(bp,width=n,mean,align="right"))  
}
test <- with(test,test[order(ID,YEAR_VISIT),])

test$BLOOD_PRESSURE_UPDATED <- 
  unlist(aggregate(BLOOD_PRESSURE~ID,test,get.mav,na.action=NULL,n=2)$BLOOD_PRESSURE)
test
#    ID AGE YEAR_VISIT BLOOD_PRESSURE TREATMENT BLOOD_PRESSURE_UPDATED
# 1   1  20       2000             NA         3               134.6667
# 2   1  21       2001            129         2               131.8333
# 3   1  22       2002            145         3               137.0000
# 4   1  22       2002            130         2               137.5000
# 5   2  23       2003             NA        NA               130.0000
# 6   2  30       2010            150         2               140.0000
# 7   2  31       2011            110         3               130.0000
# ...

This works for moving averages > 2 as well.

And here's a data.table solution, which is likely to be much faster if your dataset is large.

library(data.table)
setDT(test)     # converts test to a data.table in place
setkey(test,ID,YEAR_VISIT)
test[,BLOOD_PRESSURE_UPDATED:=as.numeric(get.mav(BLOOD_PRESSURE,2)),by=ID]
test
#    ID AGE YEAR_VISIT BLOOD_PRESSURE TREATMENT BLOOD_PRESSURE_UPDATED
#  1:  1  20       2000             NA         3               134.6667
#  2:  1  21       2001            129         2               131.8333
#  3:  1  22       2002            145         3               137.0000
#  4:  1  22       2002            130         2               137.5000
#  5:  2  23       2003             NA        NA               130.0000
#  6:  2  30       2010            150         2               140.0000
#  7:  2  31       2011            110         3               130.0000
# ...

Rolling Mean By Group Dplyr/data.table

you could use map from the purrr package and apply it on 1:n():

df = df %>% 
  na.omit() %>% 
  group_by(ticker) %>% 
  mutate(avg10 = map_dbl(1:n(), ~mean(lag_close[(max(.x-9, 1)):.x], na.rm =T))

Of course you have to decide what should happen with the first 9 rows where there are fewer than 10 observations. In my solution the rows 1 to 9 contain the mean of the last 1 to 9 observations.

Rolling average by group R data.table

Using cumsum:

dt <- as.data.table(df)
dt[, res := cumsum(Sales)/(1:.N), by = Group]
dt
    Group Sales Result res
 1:     a     2    2.0 2.0
 2:     a     4    3.0 3.0
 3:     a     3    3.0 3.0
 4:     a     3    3.0 3.0
 5:     a     5    3.4 3.4
 6:     b     9    9.0 9.0
 7:     b     7    8.0 8.0
 8:     b     8    8.0 8.0
 9:     b    10    8.5 8.5
10:     b    11    9.0 9.0

or with rollapplyr from the zoo package:

dt[, res := rollapplyr(Sales, 1:.N, mean), by = Group]

or with base R:

ave(df$Sales, df$Group, FUN = function(x) cumsum(x) / seq_along(x))

How to calculate rolling mean for multiple columns at once with a groupby and select in dplyr, while ignoring the groupby columns

Data, as defined in the question, has no numeric columns. It is all factors. We fix the definition below. Then we use mutate_at to just apply rollapplyr to the non-grouping columns. So that we can use Data, we roll the sum over the prior 3 values rather than the prior 21. An alternative to the mutate_at line would be mutate_if(is.numeric, ~ rollapplyr(...same...)) .

library(dplyr)
library(zoo)

Data <- data.frame(v1, v2, v3, v4) # v1, v2, v3, v4 are from question  

Data %>%
  group_by(v1, v2) %>%
  mutate_at(vars(-group_cols()), 
    ~ rollapplyr(.x, list(-seq(3)), sum, na.rm = FALSE, partial = TRUE, fill = NA)) %>%
  ungroup

giving:

# A tibble: 15 x 4
   v1    v2       v3    v4
   <fct> <fct> <dbl> <dbl>
 1 a     2010     NA    NA
 2 a     2010      1     6
 3 a     2010      3    19
 4 a     2010      6    24
 5 a     2010      9    24
 6 a     2010      8    24
 7 a     2010      9    23
 8 a     2010     10    82
 9 b     2020     NA    NA
10 b     2020     13     1
11 b     2020     18     3
12 b     2020     24     6
13 b     2020     24     9
14 b     2020     23     8
15 b     2020     82     9

Getting rolling average of multiple column by multiple condition, with dplyr and apply family

Do you want this? (mean_run from library(runner) used).

You can automate this process for as many variables you want. Just use their names in .cols argument of mutate(across...
To change rolling window size just change k in mean_run as per choice.

df %>% pivot_longer(!gmID, names_to = c("H_T", ".value"),
                    names_pattern = "(.+)\\.(.+)") %>%
  group_by(Team) %>%
  mutate(across(.cols = c(PTS, AST), 
                ~ runner::mean_run(x = ., k = 3, lag = 1), 
                .names = '{.col}_av')) %>%
  pivot_wider(id_cols = gmID, 
              names_from = H_T, 
              names_glue = "{H_T}_{.value}", 
              values_from = -c(gmID, H_T))

# A tibble: 20 x 11
    gmID H_Team A_Team H_PTS A_PTS H_AST A_AST H_PTS_av A_PTS_av H_AST_av A_AST_av
   <int> <chr>  <chr>  <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
 1     1 CLE    WAS       94    84    22    26     NA       NA       NA       NA  
 2     2 MIA    BOS      120   107    25    24     NA       NA       NA       NA  
 3     3 LAL    DAL       91    99    24    22     NA       NA       NA       NA  
 4     4 PHI    DEN       84    75    18    19     NA       NA       NA       NA  
 5     5 CLE    IND       88    90    18    22     94       NA       22       NA  
 6     6 DET    HOU       96   105    21    28     NA       NA       NA       NA  
 7     7 CHI    SAC       93    87    21    14     NA       NA       NA       NA  
 8     8 DAL    WAS       95    99    26    22     99       84       22       26  
 9     9 UTA    DAL      113    94    24    20     NA       97       NA       24  
10    10 PHO    CLE       85    87    16    19     NA       91       NA       20  
11    11 POR    LAL      116   106    19    21     NA       91       NA       24  
12    12 WAS    OKC       86    84    27    18     91.5     NA       24       NA  
13    13 ORL    DEN      102    89    24    22     NA       75       NA       19  
14    14 CHA    IND       90    89    18    19     NA       90       NA       22  
15    15 BOS    MIL       88    99    22    26    107       NA       24       NA  
16    16 CHI    CLE       86   115    23    34     93       89.7     21       19.7
17    17 ATL    HOU      102   109    23    22     NA      105       NA       28  
18    18 DAL    MIA      104    84    27    18     96      120       22.7     25  
19    19 CLE    UTA       88    86    23    19     96.7    113       23.7     24  
20    20 WAS    DEN      111    88    25    16     89.7     82       25       20.5

Rolling Mean from fixed starting point (and by Group)

You can try:

library(data.table)

setDT(df)[,cumsum(Pts[order(Date)])/seq(.N), Individual]

How to calculate rolling mean for each column

We can use mutate with across to loop over the columns A to C, specify a lambda function (function(.) or tidyverse shortform ~) to apply the function rollmean on the column

library(dplyr)
library(zoo)
df %>%
   group_by(CONM) %>%
   mutate(across(A:C, ~ rollmean(., 5, fill = NA, align = 'right'), 
             .names = '{col}_mean')) %>%
   ungroup

-output

# A tibble: 7 x 7
#  CONM      A     B     C A_mean B_mean C_mean
#  <chr> <int> <int> <int>  <dbl>  <dbl>  <dbl>
#1 a         1     2     3     NA     NA     NA
#2 a         2     3     4     NA     NA     NA
#3 a         3     4     5     NA     NA     NA
#4 a         4     5     6     NA     NA     NA
#5 a         5     6     7      3      4      5
#6 a         6     7     8      4      5      6
#7 b         1     2     3     NA     NA     NA

Or as @G. Grothendieck mentioned, the rollmeanr would do the right alignment

df %>%
    group_by(CONM) %>%
    mutate(across(A:C, ~ rollmeanr(., 5, fill = NA), .names = '{col}_mean'))

data

df <- structure(list(CONM = c("a", "a", "a", "a", "a", "a", "b"), A = c(1L, 
2L, 3L, 4L, 5L, 6L, 1L), B = c(2L, 3L, 4L, 5L, 6L, 7L, 2L), C = c(3L, 
4L, 5L, 6L, 7L, 8L, 3L)), class = "data.frame", row.names = c(NA, 
-7L))