Fastest Way to Add Rows For Missing Time Steps

Fill in missing time steps (yyyy-mm-dd HH:MM:SS) by adding rows with missing times in R

This should do what you want:

library(xts)
x <- as.xts(read.zoo(text="Time,Volume
1996-02-05 00:34:00,0.01
1996-02-05 00:51:00,0.01
1996-02-05 00:52:00,0.01
1996-02-05 01:04:00,0.01
1996-02-05 01:19:00,0.01
1996-02-05 05:00:00,0.01
1996-02-05 05:07:00,0.01
1996-02-05 05:08:00,0.01
1996-02-05 05:14:00,0.01",
sep=",", FUN=as.POSIXct, header=TRUE, drop=FALSE))

# 1) Create POSIXct sequence from midnight of the first day
# until the end of the last day
midnightDay1 <- as.POSIXct(format(start(x),"%Y-%m-%d"))
timesteps <- seq(midnightDay1, end(x), by="30 min")
# 2) Make a copy of your object and set all values for Volume to 1
y <- x
y$Volume <- 1
# 3) Merge the copy with a zero-column xts object that has an index
# with all the values you want. Fill missing values with 0.
m <- merge(y, xts(,timesteps), fill=0)
# 4) Align all index values to 30-minute intervals
a <- align.time(m, 60*30)
# 5) Sum the values for Volume in each period
half_hour <- period.apply(a, endpoints(a, "minutes", 30), sum)

Insert rows for missing time measurements of a negative event

This is a tidyverse solution.

library(tidyverse)
library(lubridate)

df %>%
separate(Timestamp, c("start", "end"), sep = " - ", remove = FALSE) %>%
group_by(Subject) %>%
mutate(Starttime_ms = lag(end, default = "00:00:00.000"),
Endtime_ms = start,
Event = "nf",
Timestamp = paste(Starttime_ms, Endtime_ms, sep = " - "),
across(ends_with("_ms"), ~ hms(.x) * 1000),
Duration = Endtime_ms - Starttime_ms,
across(where(is.period), as.numeric), .keep = "unused") %>%
bind_rows(df) %>%
arrange(Starttime_ms, .by_group = TRUE) %>%
filter(Duration > 0) %>%
ungroup()
Output
# A tibble: 11 × 6
Subject Timestamp Starttime_ms Endtime_ms Duration Event
<chr> <chr> <dbl> <dbl> <dbl> <chr>
1 A 00:00:00.000 - 00:00:00.146 0 146 146 nf
2 A 00:00:00.146 - 00:00:00.889 146 889 743 f
3 A 00:00:00.889 - 00:00:01.568 889 1568 679 nf
4 A 00:00:01.568 - 00:00:02.183 1568 2183 615 f
5 A 00:00:02.183 - 00:00:03.642 2183 3642 1459 nf
6 A 00:00:03.642 - 00:00:04.522 3642 4522 880 f
7 B 00:00:00.000 - 00:00:00.660 0 660 660 f
8 B 00:00:00.660 - 00:00:01.247 660 1247 587 nf
9 B 00:00:01.247 - 00:00:02.229 1247 2229 982 f
10 B 00:00:02.229 - 00:00:03.697 2229 3697 1468 nf
11 B 00:00:03.697 - 00:00:04.926 3697 4926 1229 f

R - How to add rows for missing values for unique group sequences?

DT[setkey(DT[, .(min(period):max(period)), by = project], project, V1)]
# project period v3 v4
# 1: 6 1 a red
# 2: 6 2 b yellow
# 3: 6 3 NA NA
# 4: 6 4 NA NA
# 5: 6 5 c red
# 6: 6 6 d yellow
# 7: 6 7 e red
# 8: 9 2 f yellow
# 9: 9 3 NA NA
#10: 9 4 g red
#11: 9 5 h yellow

Insert rows for missing dates/times

I think the easiest thing ist to set Date first as already described, convert to zoo, and then just set a merge:

df$timestamp<-as.POSIXct(df$timestamp,format="%m/%d/%y %H:%M")

df1.zoo<-zoo(df[,-1],df[,1]) #set date to Index

df2 <- merge(df1.zoo,zoo(,seq(start(df1.zoo),end(df1.zoo),by="min")), all=TRUE)

Start and end are given from your df1 (original data) and you are setting by - e.g min - as you need for your example. all=TRUE sets all missing values at the missing dates to NAs.

add rows to data frame for non-observations

You can use complete from tidyr, i.e.

library(tidyverse)

df %>%
group_by(ID, site) %>%
complete(hr = seq(min(hr), max(hr)))

which gives,

# A tibble: 9 x 5
# Groups: ID, site [2]
ID site hr day nObs
<int> <fct> <int> <int> <int>
1 8 B 8 188 6
2 8 B 9 188 6
3 8 B 10 NA NA
4 8 B 11 188 7
5 19 A 11 202 60
6 19 A 12 NA NA
7 19 A 13 202 18
8 19 A 14 NA NA
9 19 A 15 202 27

Add new rows in R dataframe to fill time gaps

Try this

m <- data.frame(id = 0 , time_0 = 0, time_1 = 0)
k <- 1
for(i in unique(df$id)){
x <- sort(unique(c(0 , unlist(subset(df , id == i)[,2:3]) , 10)))
for(j in 1:(length(x)-1)){
m[k,] <- c(i , x[j] , x[j+1])
k <- k + 1
}
}

ans <- merge(m , df , by = c("id" , "time_0" , "time_1") , all.x = T)

ans$event <- ifelse(is.na(ans$event) , 0 , ans$event)
ans$time_end <- 10
ans

  • output
  id time_0 time_1 event time_end
1 1 0 2 0 10
2 1 2 4 1 10
3 1 4 10 0 10
4 2 0 3 0 10
5 2 3 5 1 10
6 2 5 6 0 10
7 2 6 8 1 10
8 2 8 10 0 10

Add missing rows per group in R

Using data.table, we can do a join after expanding the original dataset

new <- setDT(original)[, .(day = seq(first(day), last(day), by = "1 min"), value = 0),
by = type]
new[original, value := i.value, on = .(type, day)][]
# type day value
# 1: 1 2000-01-01 00:00:00 4
# 2: 1 2000-01-01 00:01:00 3
# 3: 1 2000-01-01 00:02:00 1
# 4: 1 2000-01-01 00:03:00 0
# 5: 1 2000-01-01 00:04:00 1
# 6: 2 2000-01-01 12:00:00 3
# 7: 2 2000-01-01 12:01:00 5
# 8: 2 2000-01-01 12:02:00 6
# 9: 2 2000-01-01 12:03:00 0
#10: 2 2000-01-01 12:04:00 3

Or using tidyverse

library(tidyverse)
original %>%
group_by(type) %>%
complete(day = seq(first(day), last(day), by = "1 min"), fill = list(value = 0))

R: inserting rows for missing observations in sales data

We can use tidyr::complete

tidyr::complete(df,Product,Year = seq(min(Year), max(Year)), fill=list(Sales = 0))

# Product Year Sales
# <fct> <dbl> <dbl>
# 1 A 2013 0
# 2 A 2014 4
# 3 A 2015 0
# 4 A 2016 0
# 5 A 2017 2
# 6 A 2018 3
# 7 B 2013 0
# 8 B 2014 0
# 9 B 2015 0
#....

If the range has to be fixed (2013:2019) irrespective of the years in the data, we can specify it explicitly.

tidyr::complete(df, Product, Year = 2013:2019, fill = list(Sales = 0))


Related Topics



Leave a reply



Submit