Calculate Cumsum() While Ignoring Na Values

Calculate cumsum() while ignoring NA values

Do you want something like this:

x2 <- x
x2[!is.na(x)] <- cumsum(x2[!is.na(x)])

x2

[edit] Alternatively, as suggested by a comment above, you can change NA's to 0's -

miss <- is.na(x)
x[miss] <- 0
cs <- cumsum(x)
cs[miss] <- NA
# cs is the requested cumsum

Cumsum ignoring NA's with reset

With R base you can do:
generate data

criteria1 <- c(rep(0,2), rep(1,5), rep(0,3), rep(1,6),rep(0,2))
criteria1[c(6,9,12,13)] <- NA

get result

l <- length(criteria1)
cum <- cumsum(ifelse(!is.na(criteria1),criteria1,0))
zero <- which(criteria1 == 0)

res <- cum - rep(cum[zero], c(zero[2:length(zero)],l+1)-zero)

optional dplyr solution:

res <- cum - rep(cum[zero], dplyr::coalesce(dplyr::lead(zero),l+1L)-zero)

detect and change repeats of NA > 3 times

NAs <- rle(is.na(criteria1))
NAloc <- which(NAs$lengths > 3 & NAs$values == 1)
for(i in NAloc)
{
res[seq(sum(NAs$lengths[1:(i-1)])+1,sum(NAs$lengths[1:i]))] <- NA
}

Count visits (cumsum) per ID while ignoring NA's and 0's


library(dplyr)
df %>%
group_by(ID) %>%
mutate(Visit = if_else(is.na(Attendance), 0, cumsum(if_else(is.na(Attendance), 0, 1))))

Calculate cummean() and cumsd() while ignoring NA values and filling NAs

You might want to use the definition of variance to calculate this

library(data.table)
dt <- data.table(V1=c(1,2,NA,3), V2=c(1,2,NA,3), V3=c(1,2,NA,3))

cols <- copy(names(dt))

#means
dt[ , paste0("mean_",cols) := lapply(.SD, function(x) {
#get the num of non-NA observations
lens <- cumsum(!is.na(x))

#set NA to 0 before doing cumulative sum
x[is.na(x)] <- 0
cumsum(x) / lens
}), .SDcols=cols]

#sd
dt[ , paste0("sd_",cols) := lapply(.SD, function(x) {
lens <- cumsum(!is.na(x))
x[is.na(x)] <- 0

#use defn of variance mean of sum of squares minus square of means and also n-1 in denominator
sqrt(lens/(lens-1) * (cumsum(x^2)/lens - (cumsum(x) / lens)^2))
}), .SDcols=cols]

Count the number of NA values in a row - reset when 0

You can use max instead of cumsum in your attempt :

library(data.table)
setDT(df)[, whatiwant := max(Accumulated), by = rleid(b == 0L)]
df

# b Accumulated whatiwant
# 1: 1 1 3
# 2: 1 2 3
# 3: 1 3 3
# 4: 0 0 0
# 5: 1 1 4
# 6: 1 2 4
# 7: 1 3 4
# 8: 1 4 4
# 9: 0 0 0
#10: 0 0 0
#11: 0 0 0
#12: 1 1 2
#13: 1 2 2

Skip NAs when using Reduce() in data.table

Consider this example :

library(data.table)
dt <- data.table(a = 1:5, b = c(3, NA, 1, 2, 4), c = c(NA, 1, NA, 3, 4))
dt

# a b c
#1: 1 3 NA
#2: 2 NA 1
#3: 3 1 NA
#4: 4 2 3
#5: 5 4 4

If you want to carry previous value to NA values you can use :

dt[, names(dt) := lapply(.SD, function(x) cumsum(replace(x, is.na(x), 0))), 
.SDcols = names(dt)]
dt

# a b c
#1: 1 3 0
#2: 3 3 1
#3: 6 4 1
#4: 10 6 4
#5: 15 10 8

If you want to keep NA as NA :

dt[, names(dt) := lapply(.SD, function(x) {
x1 <- cumsum(replace(x, is.na(x), 0))
x1[is.na(x)] <- NA
x1
}), .SDcols = names(dt)]

dt

# a b c
#1: 1 3 NA
#2: 3 NA 1
#3: 6 4 NA
#4: 10 6 4
#5: 15 10 8


Related Topics



Leave a reply



Submit