Dplyr/R Cumulative Sum with Reset

R cumulative sum by condition with reset

Here's one way with ave:

ave(df$a, cumsum(c(F, diff(df$a) < 0)), FUN=seq_along) - 1
 [1] 0 1 2 3 0 1 2 3 4 5 6 0 1 0

We can get a running count grouped by diff(df$a) < 0. Which are the positions in the vector that are less than their predecessors. We add c(F, ..) to account for the first position. The cumulative sum of that vector creates an index for grouping. The function ave can carry out a function on that index, we use seq_along for a running count. But since it starts at 1, we subtract by one ave(...) - 1 to start from zero.

A similar approach using dplyr:

library(dplyr)
df %>% 
  group_by(cumsum(c(FALSE, diff(a) < 0))) %>% 
  mutate(row_number() - 1)

dplyr / R cumulative sum with reset

I think you can use accumulate() here to help. And i've also made a wrapper function to use for different thresholds

sum_reset_at <- function(thresh) {
  function(x) {
    accumulate(x, ~if_else(.x>=thresh, .y, .x+.y))
  }  
}

tib %>% mutate(c = sum_reset_at(5)(a))
#       t     a     c
#   <dbl> <dbl> <dbl>
# 1     1     2     2
# 2     2     3     5
# 3     3     1     1
# 4     4     2     3
# 5     5     2     5
# 6     6     3     3
tib %>% mutate(c = sum_reset_at(4)(a))
#       t     a     c
#   <dbl> <dbl> <dbl>
# 1     1     2     2
# 2     2     3     5
# 3     3     1     1
# 4     4     2     3
# 5     5     2     5
# 6     6     3     3
tib %>% mutate(c = sum_reset_at(6)(a))
#       t     a     c
#   <dbl> <dbl> <dbl>
# 1     1     2     2
# 2     2     3     5
# 3     3     1     6
# 4     4     2     2
# 5     5     2     4
# 6     6     3     7

cumsum by participant and reset on 0 R

Does this work?

library(dplyr)
library(data.table)
df %>% 
  mutate(grp = rleid(Correct)) %>%
  group_by(Participant, grp) %>%
  mutate(Count = cumsum(Correct)) %>%
  select(- grp)
# A tibble: 10 x 4
# Groups:   Participant, grp [6]
     grp Participant Correct Count
   <int> <chr>         <dbl> <dbl>
 1     1 A                 1     1
 2     1 A                 1     2
 3     1 A                 1     3
 4     2 A                 0     0
 5     3 A                 1     1
 6     3 B                 1     1
 7     3 B                 1     2
 8     4 B                 0     0
 9     5 B                 1     1
10     5 B                 1     2

Toy data:

df <- data.frame(
  Participant = c(rep("A", 5), rep("B", 5)),
  Correct = c(1,1,1,0,1,1,1,0,1,1)
)

R cumulative sum using dplyr with reset

library(dplyr)
data_right %>%
  group_by(state, p) %>%
  mutate(grp = cumsum(c(TRUE, diff(as.integer(Year)) > 1))) %>%
  group_by(state, p, grp) %>%
  mutate(cy = row_number()) %>%
  ungroup() %>%
  select(-grp)
# # A tibble: 12 x 5
#    state p     Year  Consecutive_Yrs    cy
#    <chr> <chr> <chr>           <dbl> <int>
#  1 NY    n     1973                1     1
#  2 NY    n     1974                2     2
#  3 NY    n     1977                1     1
#  4 NY    n     1978                2     2
#  5 NY    p     1988                1     1
#  6 NY    p     1989                2     2
#  7 PA    n     1991                1     1
#  8 PA    n     1992                2     2
#  9 PA    n     1993                3     3
# 10 PA    p     1920                1     1
# 11 PA    p     1929                1     1
# 12 PA    p     1931                1     1

Assumes the data is already ordered by Year.

Data:

data_right <- data.table(state = c("NY", "NY", "NY", "NY", "NY","NY", "PA", "PA", "PA", "PA", "PA", "PA"), p = c("n", "n","n","n", "p", "p", "n", "n", "n", "p", "p", "p"),Year = c("1973", "1974", "1977", "1978", "1988", "1989" ,"1991", "1992", "1993", "1920", "1929", "1931"), Consecutive_Yrs = c(1,2,1,2,1,2,1,2,3,1,1,1))

Dpylr solution for cumsum with a factor reset

You can create a new group everytime OilChanged == 'Yes' and take cumsum of Diff value in each group.

library(dplyr)

df %>%
  group_by(grp = lag(cumsum(OilChanged == 'Yes'), default = 0)) %>%
  mutate(newcumsum = cumsum(Diff)) %>%
  ungroup %>%
  select(-grp)


#  OilChanged Odometer  Diff CumSum newcumsum
#   <chr>         <dbl> <dbl>  <dbl>     <dbl>
# 1 No              300     0      0         0
# 2 No              350    50     50        50
# 3 Yes             410    60    110       110
# 4 No              420    10     10        10
# 5 No              430    10     20        20
# 6 No              450    20     40        40
# 7 No              500    50     90        90
# 8 No              600   100    190       190
# 9 No              600     0    190       190
#10 No              600     0    190       190
#11 No              650    50    240       240
#12 Yes             660    10    250       250
#13 No              700    40     40        40

cumsum with a condition to restart in R

You may use cumsum to create groups as well.

library(dplyr)

df <- df %>%
  group_by(group = cumsum(dplyr::lag(port == 0, default = 0))) %>%
  mutate(cumsum_G = cumsum(G)) %>%
  ungroup

df

#  inv   ass    port     G group cumsum_G
#  <chr> <chr> <int> <int> <dbl>    <int>
#1 i     x         2     1     0        1
#2 i     x         2     0     0        1
#3 i     x         0     1     0        2
#4 i     x         3     0     1        0
#5 i     x         3     1     1        1

You may remove the group column from output using %>% select(-group).

data

df <- structure(list(inv = c("i", "i", "i", "i", "i"), ass = c("x", 
"x", "x", "x", "x"), port = c(2L, 2L, 0L, 3L, 3L), G = c(1L, 
0L, 1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -5L))

R Cumulative Sum with a condition and a reset

This can be achieved by:

library(tidyverse)
library(data.table)     

z %>%
  group_by(rleid(Signal)) %>% #advance value every time Signal changes and group by that
  mutate(cum = Signal*cumsum(Volume)) %>% #cumsum in each group
  ungroup() %>% #ungroup so you could remove the grouping column
  select(-4) #remove grouping column

or without data.table by using rle:

z %>%
  mutate(rl = rep(1:length(rle(Signal)$length), times = rle(Signal)$length)) %>%
  group_by(rl) %>%
  mutate(cum = Signal*cumsum(Volume)) %>%
  ungroup() %>%
  select(-4)

#output
    date       Signal    Volume        cum

  <fct>       <int>     <int>      <int>
 1 2016-01-04     NA  37912403         NA
 2 2016-01-05    - 1  23258238 - 23258238
 3 2016-01-06    - 1  25096183 - 48354421
 4 2016-01-07    - 1  45172906 - 93527327
 5 2016-01-08    - 1  35402298 -128929625
 6 2016-01-11    - 1  29932385 -158862010
 7 2016-01-12    - 1  28395390 -187257400
 8 2016-01-13    - 1  33410553 -220667953
 9 2016-01-14    - 1  48658623 -269326576
10 2016-01-15      1  46132781   46132781
11 2016-01-19      1  30998256   77131037
12 2016-01-20    - 1  59051429 - 59051429
13 2016-01-21      1  30518939   30518939
14 2016-01-22      1  30495387   61014326
15 2016-01-25      1  32482015   93496341
16 2016-01-26    - 1  26877080 - 26877080
17 2016-01-27    - 1  58699359 - 85576439
18 2016-01-28      1 107475327  107475327
19 2016-01-29      1  62739548  170214875
20 2016-02-01      1  46132726  216347601

data:

z <- read.table(text =      "date     Signal    Volume
           2016-01-04     NA  37912403
           2016-01-05     -1  23258238
           2016-01-06     -1  25096183
           2016-01-07     -1  45172906
           2016-01-08     -1  35402298
           2016-01-11     -1  29932385
           2016-01-12     -1  28395390
           2016-01-13     -1  33410553
           2016-01-14     -1  48658623
           2016-01-15      1  46132781
           2016-01-19      1  30998256
           2016-01-20     -1  59051429
           2016-01-21      1  30518939
           2016-01-22      1  30495387
           2016-01-25      1  32482015
           2016-01-26     -1  26877080
           2016-01-27     -1  58699359
           2016-01-28      1 107475327
           2016-01-29      1  62739548
           2016-02-01      1  46132726", header = T)

Cumulative sum with reset option if multiple conditions are met

Contributing with a base-R solution:

df$amount_cumsum <- 0
df$count_cumsum <- 0    
df$condition_met <- 0  
reset = F
for (i in 1:nrow(df)) {
  if (i == 1 | reset) {
    df$amount_cumsum[i] = df$amount[i]
    df$count_cumsum[i] = df$count[i]
    reset = F
  } else if (df$id[i] != df$id[i-1]) {
    df$amount_cumsum[i] = df$amount[i]
    df$count_cumsum[i] = df$count[i]
    reset = F
  } else {
    df$amount_cumsum[i] = df$amount_cumsum[i-1] + df$amount[i]
    df$count_cumsum[i] = df$count_cumsum[i-1] + df$count[i]
  }
  
  if (df$amount_cumsum[i] >= 10 & df$count_cumsum[i] >= 3) {
    df$condition_met[i] = 1
    reset = T
  }
}

I've expanded your dataset and benchmarked this code against your solution. Benchmark shows the Base-R solution 21 times faster than the tidyverse one!

library(tidyverse)

dates = seq(as.Date("2019-01-01"), as.Date("2020-03-04"), by="days")

df <- data.frame(
  date = c(sample(dates, 300), sample(dates, 400), sample(dates, 350)),
  id = c(rep("A", 300), rep("B", 400), rep("C", 350)),
  amount = floor(runif(1050, 0, 15)),
  count = floor(runif(1050, 0, 5)),
  stringsAsFactors = F
)

rbenchmark::benchmark(
  "Tidy Solution" = {
    df_tidy <- df %>%
      group_by(id) %>%
      nest(data = c(amount, count)) %>%
      mutate(
        data_accumulate = purrr::accumulate(.x = data, .f = function(.x, .y) if (max(.x[1]) < 10 | max(.x[2]) < 3) .x + .y else .y)
      ) %>%
      unnest(cols = c(data_accumulate)) %>%
      rename(amount_cumsum = amount, count_cumsum = count) %>%
      unnest(cols = c(data)) %>%
      mutate(condition_met = case_when(
        amount_cumsum >= 10 & count_cumsum >= 3 ~ 1,
        TRUE ~ 0)
      )
  },
  "Base-R Solution" = {
    df_base <- df
    df_base$amount_cumsum <- 0
    df_base$count_cumsum <- 0    
    df_base$condition_met <- 0  
    reset = F  # to reset the counters
    for (i in 1:nrow(df_base)) {
      if (i == 1 | reset) {
        df_base$amount_cumsum[i] = df_base$amount[i]
        df_base$count_cumsum[i] = df_base$count[i]
        reset = F
      } else if (df_base$id[i] != df_base$id[i-1]) {
        df_base$amount_cumsum[i] = df_base$amount[i]
        df_base$count_cumsum[i] = df_base$count[i]
        reset = F
      } else {
        df_base$amount_cumsum[i] = df_base$amount_cumsum[i-1] + df_base$amount[i]
        df_base$count_cumsum[i] = df_base$count_cumsum[i-1] + df_base$count[i]
      }
      if (df_base$amount_cumsum[i] >= 10 & df_base$count_cumsum[i] >= 3) {
        df_base$condition_met[i] = 1
        reset = T
      }
    }
  },
  replications = 100)

gc()

           test replications elapsed relative user.self sys.self user.child sys.child
Base-R Solution          100    3.89    1.000      3.69      0.0         NA        NA
  Tidy Solution          100   84.00   21.594     78.65      0.2         NA        NA

Conditional running count (cumulative sum) with reset in R (dplyr)

We can use case_when to assign the value which we need based on our conditions. We then add an additional group_by condition using cumsum to switch values when the temp column 0. In the final mutate step we temporarily replace NA values in temp to 0, then take cumsum over it and put back the NA values again to it's place to get the final output.

library(dplyr)

mydata %>%
    group_by(id, age) %>%
    mutate(temp = case_when(accuracy == 0 & block == 2 & condition == 1 ~ 1, 
                            accuracy == 1 & block == 2 & condition == 1 ~ 0, 
                            TRUE ~ NA_real_)) %>%
    ungroup() %>%
    group_by(id, age, group = cumsum(replace(temp == 0, is.na(temp), 0))) %>%
    mutate(cumulative = replace(cumsum(replace(temp, is.na(temp), 0)),
                          is.na(temp), NA)) %>%
    select(-temp, -group)


#    group    id   age block trial condition accuracy cumulative
#   <dbl> <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>      <dbl>
# 1     0     1     1     1     1         1        0         NA
# 2     0     1     1     1     2         1        0         NA
# 3     0     1     1     2     1         1        0          1
# 4     0     1     1     2     2         1        0          2
# 5     0     1     1     2     3         1        0          3
# 6     0     1     1     2     4         2        0         NA
# 7     0     1     1     2     5         1        0          4
# 8     1     1     1     2     6         1        1          0
# 9     1     1     1     2     7         1        0          1
#10     1     1     1     2     8         1        0          2
#11     1     1     2     2     1         1        0          1