Adding Multiple Lag Variables Using Dplyr and for Loops

R/dplyr: Using a loop to create lags and calculate cumulative sums based on column names

If I understand you correctly, the following should work:

Reproducible sample data (with 3 variables for summing):

set.seed(123)
df = data.frame(
  id = c("a", "a", "a", "b", "b"),
  date = seq(as.Date("2015-12-01"), as.Date("2015-12-05"), by="days"),
  v1 = sample(seq(1, 20), 5),
  v2 = sample(seq(1, 20), 5),
  v3 = sample(seq(1, 20), 5)
)

> df
  id       date v1 v2 v3
1  a 2015-12-01  6  1 20
2  a 2015-12-02 15 11  9
3  a 2015-12-03  8 17 13
4  b 2015-12-04 16 10 10
5  b 2015-12-05 17  8  2

Group by id, sort by date (in case they aren't in sequence), & mutate for all named variables between the two named ones (v1:v3 in this case):

df %>%
  group_by(id) %>%
  arrange(date) %>%
  mutate_at(vars(v1:v3), funs(Cum = cumsum(lag(., default = 0)))) %>%
  ungroup()

# A tibble: 5 x 8
# Groups: id [2]
  id     date          v1    v2    v3 v1_Cum v2_Cum v3_Cum
  <fctr> <date>     <int> <int> <int>  <int>  <int>  <int>
1 a      2015-12-01     6     1    20      0      0      0
2 a      2015-12-02    15    11     9      6      1     20
3 a      2015-12-03     8    17    13     21     12     29
4 b      2015-12-04    16    10    10      0      0      0
5 b      2015-12-05    17     8     2     16     10     10

Looping all Variables in Data.table to create n-lags

There is a much simpler way to create the additional lag columns. The n parameter to data-table's shift() function is defined as

Non-negative integer vector denoting the offset to lead or lag the
input by. To create multiple lead/lag vectors, provide multiple values
to n

So,

DT[, shift(baz, 0:3)]

returns

    V1 V2 V3 V4
 1:  3 NA NA NA
 2:  6  3 NA NA
 3:  9  6  3 NA
 4: 12  9  6  3
 5: 15 12  9  6
 6: 18 15 12  9
 7: 21 18 15 12
 8: 24 21 18 15
 9: 27 24 21 18
10: 30 27 24 21

Now, the OP has requested to shift each variable and to name the new columns according to the amount of shift. This can be accomplished by

DT[, unlist(lapply(.SD, shift, n = 0:3), recursive = FALSE)]

    foo1 foo2 foo3 foo4 bar1 bar2 bar3 bar4 baz1 baz2 baz3 baz4
 1:    1   NA   NA   NA    2   NA   NA   NA    3   NA   NA   NA
 2:    2    1   NA   NA    4    2   NA   NA    6    3   NA   NA
 3:    3    2    1   NA    6    4    2   NA    9    6    3   NA
 4:    4    3    2    1    8    6    4    2   12    9    6    3
 5:    5    4    3    2   10    8    6    4   15   12    9    6
 6:    6    5    4    3   12   10    8    6   18   15   12    9
 7:    7    6    5    4   14   12   10    8   21   18   15   12
 8:    8    7    6    5   16   14   12   10   24   21   18   15
 9:    9    8    7    6   18   16   14   12   27   24   21   18
10:   10    9    8    7   20   18   16   14   30   27   24   21

Data

For comparison, the sample data of Matt's answer is used

library(data.table)
DT <- data.table(foo = seq_len(10),
                 bar = seq_len(10)*2L,
                 baz = seq_len(10)*3L)

How to loop lapply to create LAG terms over multiple variables in R

data.table and Map to handle the looping:

vars <- c("b","c")
rpv  <- rep(1:2, each=length(vars))
df[, paste(vars, "lag", rpv, sep="_") := Map(shift, .SD, rpv), by=a, .SDcols=vars]

#   a         b        c   b_lag_1  c_lag_1  b_lag_2  c_lag_2
#1: x 10.863180 393.9568        NA       NA       NA       NA
#2: x  6.139258 537.9199 10.863180 393.9568       NA       NA
#3: x 11.896448 483.8036  6.139258 537.9199 10.86318 393.9568
#4: y 18.079188 509.6136        NA       NA       NA       NA
#5: y  5.463224 233.6991 18.079188 509.6136       NA       NA
#6: y  6.363724 869.8406  5.463224 233.6991 18.07919 509.6136

Leads and Lags in for loop

The as.name needs eval to return the value of the column (assuming 'crisisdata' is data.table)

library(data.table)
for (var in crisis_variables){
  
  # add lags
   crisisdata[, (paste0("l",1:4, "_", var)):= 
          shift(eval(as.name(var)),1:4), by = country]

  # add leads
  crisisdata[, (paste0("f",0:4, "_", var)):=  
      shift(eval(as.name(var)),0:-4), by = country]
}

How to use a loop with mutate dplyr

I think this is the perfect case to use purrr::accumulate2().

purrr::accumulate() is often used to calculate conditional cumulative sums. It takes a function as the second argument. This function should have 2 arguments: the cumulative output co, and the currently evaluated value x.

purrr::accumulate2() allows us to use a second variable to iterate on, and here we use lag(check) as lx. The tricky part is that this second variable should be one item shorter, as it does not matter for the initial value.

Here is the code, matching your expected output.

library(tidyverse)

df = structure(list(id = c(8, 8, 8, 8, 8, 8, 8, 8, 8), 
                    check = c(0, 1, 1, 0, 0, 1, 0, 0, 0), 
                    count_x = c(0, 1, 2, 2, 2, 3, 3, 3, 3)), 
               row.names = c(NA, -9L), class = "data.frame")

df %>% 
  mutate(
    count_y = accumulate2(check, lag(check)[-1], function(co, x, lx){
      case_when(
        x==0 ~ co,
        x==1 & lx==0 ~ 1,
        x==1 & lx==1 ~ co+1,
        TRUE ~ 999 #error value in case of unexpected input
      )
    })
  )
#>   id check count_x count_y
#> 1  8     0       0       0
#> 2  8     1       1       1
#> 3  8     1       2       2
#> 4  8     0       2       2
#> 5  8     0       2       2
#> 6  8     1       3       1
#> 7  8     0       3       1
#> 8  8     0       3       1
#> 9  8     0       3       1

^{Created on 2021-05-05 by the reprex package (v2.0.0)}

How to create a lag variable within each group?

You could do this within data.table

 library(data.table)
 data[, lag.value:=c(NA, value[-.N]), by=groups]
  data
 #   time groups       value   lag.value
 #1:    1      a  0.02779005          NA
 #2:    2      a  0.88029938  0.02779005
 #3:    3      a -1.69514201  0.88029938
 #4:    1      b -1.27560288          NA
 #5:    2      b -0.65976434 -1.27560288
 #6:    3      b -1.37804943 -0.65976434
 #7:    4      b  0.12041778 -1.37804943

For multiple columns:

nm1 <- grep("^value", colnames(data), value=TRUE)
nm2 <- paste("lag", nm1, sep=".")
data[, (nm2):=lapply(.SD, function(x) c(NA, x[-.N])), by=groups, .SDcols=nm1]
 data
#    time groups      value     value1      value2  lag.value lag.value1
#1:    1      b -0.6264538  0.7383247  1.12493092         NA         NA
#2:    2      b  0.1836433  0.5757814 -0.04493361 -0.6264538  0.7383247
#3:    3      b -0.8356286 -0.3053884 -0.01619026  0.1836433  0.5757814
#4:    1      a  1.5952808  1.5117812  0.94383621         NA         NA
#5:    2      a  0.3295078  0.3898432  0.82122120  1.5952808  1.5117812
#6:    3      a -0.8204684 -0.6212406  0.59390132  0.3295078  0.3898432
#7:    4      a  0.4874291 -2.2146999  0.91897737 -0.8204684 -0.6212406
#    lag.value2
#1:          NA
#2:  1.12493092
#3: -0.04493361
#4:          NA
#5:  0.94383621
#6:  0.82122120
#7:  0.59390132

Update

From data.table versions >= v1.9.5, we can use shift with type as lag or lead. By default, the type is lag.

data[, (nm2) :=  shift(.SD), by=groups, .SDcols=nm1]
#   time groups      value     value1      value2  lag.value lag.value1
#1:    1      b -0.6264538  0.7383247  1.12493092         NA         NA
#2:    2      b  0.1836433  0.5757814 -0.04493361 -0.6264538  0.7383247
#3:    3      b -0.8356286 -0.3053884 -0.01619026  0.1836433  0.5757814
#4:    1      a  1.5952808  1.5117812  0.94383621         NA         NA
#5:    2      a  0.3295078  0.3898432  0.82122120  1.5952808  1.5117812
#6:    3      a -0.8204684 -0.6212406  0.59390132  0.3295078  0.3898432
#7:    4      a  0.4874291 -2.2146999  0.91897737 -0.8204684 -0.6212406
#    lag.value2
#1:          NA
#2:  1.12493092
#3: -0.04493361
#4:          NA
#5:  0.94383621
#6:  0.82122120
#7:  0.59390132

If you need the reverse, use type=lead

nm3 <- paste("lead", nm1, sep=".")

Using the original dataset

  data[, (nm3) := shift(.SD, type='lead'), by = groups, .SDcols=nm1]
  #  time groups      value     value1      value2 lead.value lead.value1
  #1:    1      b -0.6264538  0.7383247  1.12493092  0.1836433   0.5757814
  #2:    2      b  0.1836433  0.5757814 -0.04493361 -0.8356286  -0.3053884
  #3:    3      b -0.8356286 -0.3053884 -0.01619026         NA          NA
  #4:    1      a  1.5952808  1.5117812  0.94383621  0.3295078   0.3898432
  #5:    2      a  0.3295078  0.3898432  0.82122120 -0.8204684  -0.6212406
  #6:    3      a -0.8204684 -0.6212406  0.59390132  0.4874291  -2.2146999
  #7:    4      a  0.4874291 -2.2146999  0.91897737         NA          NA
 #   lead.value2
 #1: -0.04493361
 #2: -0.01619026
 #3:          NA
 #4:  0.82122120
 #5:  0.59390132
 #6:  0.91897737
 #7:          NA

data

 set.seed(1)
 data <- data.table(time =c(1:3,1:4),groups = c(rep(c("b","a"),c(3,4))),
             value = rnorm(7), value1=rnorm(7), value2=rnorm(7))

R: Create variable using iteratively updated values of previous row (=lag) similar to cumsum (depreciation)

You could use Reduce with accumulate = T:

Reduce(function(prev,value)  delta * prev + ifelse(is.na(value),0,value), x=df$value[-1], init = ifelse(is.na(df$value[1]),0,df$value[1]), accumulate = T)

[1] 1.000000 2.940000 2.763600 5.597784 9.261917

With data.table:

library(data.table)
setDT(df)

df[,output:=Reduce(function(prev,value)  delta * prev + ifelse(is.na(value),0,value), x=value[-1], init = ifelse(is.na(value[1]),0,value[1]), accumulate = T)]