Calculate Cumsum() While Ignoring Na Values

Calculate cumsum() while ignoring NA values

Do you want something like this:

x2 <- x
x2[!is.na(x)] <- cumsum(x2[!is.na(x)])

x2

[edit] Alternatively, as suggested by a comment above, you can change NA's to 0's -

miss <- is.na(x)
x[miss] <- 0
cs <- cumsum(x)
cs[miss] <- NA
# cs is the requested cumsum

Cumsum ignoring NA's with reset

With R base you can do:
generate data

criteria1 <- c(rep(0,2), rep(1,5), rep(0,3), rep(1,6),rep(0,2))
criteria1[c(6,9,12,13)] <- NA

get result

l <- length(criteria1)
cum <- cumsum(ifelse(!is.na(criteria1),criteria1,0))
zero <- which(criteria1 == 0)

res <- cum - rep(cum[zero], c(zero[2:length(zero)],l+1)-zero)

optional dplyr solution:

res <- cum - rep(cum[zero], dplyr::coalesce(dplyr::lead(zero),l+1L)-zero)

detect and change repeats of NA > 3 times

NAs <- rle(is.na(criteria1))
NAloc <- which(NAs$lengths > 3 & NAs$values == 1)
for(i in NAloc)
{
res[seq(sum(NAs$lengths[1:(i-1)])+1,sum(NAs$lengths[1:i]))] <- NA
}

Count visits (cumsum) per ID while ignoring NA's and 0's

library(dplyr)
df %>%
    group_by(ID) %>%
    mutate(Visit = if_else(is.na(Attendance), 0, cumsum(if_else(is.na(Attendance), 0, 1))))

Calculate cummean() and cumsd() while ignoring NA values and filling NAs

You might want to use the definition of variance to calculate this

library(data.table)
dt <- data.table(V1=c(1,2,NA,3), V2=c(1,2,NA,3), V3=c(1,2,NA,3))

cols <- copy(names(dt))

#means
dt[ , paste0("mean_",cols) := lapply(.SD, function(x) {
    #get the num of non-NA observations
    lens <- cumsum(!is.na(x))

    #set NA to 0 before doing cumulative sum
    x[is.na(x)] <- 0
    cumsum(x) / lens
}), .SDcols=cols]

#sd
dt[ , paste0("sd_",cols) := lapply(.SD, function(x) {
    lens <- cumsum(!is.na(x))
    x[is.na(x)] <- 0

    #use defn of variance mean of sum of squares minus square of means and also n-1 in denominator
    sqrt(lens/(lens-1) * (cumsum(x^2)/lens - (cumsum(x) / lens)^2))
}), .SDcols=cols]

Count the number of NA values in a row - reset when 0

You can use max instead of cumsum in your attempt :

library(data.table)
setDT(df)[, whatiwant := max(Accumulated), by = rleid(b == 0L)]
df

#    b Accumulated whatiwant
# 1: 1           1         3
# 2: 1           2         3
# 3: 1           3         3
# 4: 0           0         0
# 5: 1           1         4
# 6: 1           2         4
# 7: 1           3         4
# 8: 1           4         4
# 9: 0           0         0
#10: 0           0         0
#11: 0           0         0
#12: 1           1         2
#13: 1           2         2

Skip NAs when using Reduce() in data.table

Consider this example :

library(data.table)
dt <- data.table(a = 1:5, b = c(3, NA, 1, 2, 4), c = c(NA, 1, NA, 3, 4))
dt

#   a  b  c
#1: 1  3 NA
#2: 2 NA  1
#3: 3  1 NA
#4: 4  2  3
#5: 5  4  4

If you want to carry previous value to NA values you can use :

dt[, names(dt) := lapply(.SD, function(x) cumsum(replace(x, is.na(x), 0))), 
     .SDcols = names(dt)]
dt

#    a  b c
#1:  1  3 0
#2:  3  3 1
#3:  6  4 1
#4: 10  6 4
#5: 15 10 8

If you want to keep NA as NA :

dt[, names(dt) := lapply(.SD, function(x) {
  x1 <- cumsum(replace(x, is.na(x), 0))
  x1[is.na(x)] <- NA
  x1
  }), .SDcols = names(dt)]

dt

#    a  b  c
#1:  1  3 NA
#2:  3 NA  1
#3:  6  4 NA
#4: 10  6  4
#5: 15 10  8