Fastest Way to Add Rows For Missing Time Steps

Fill in missing time steps (yyyy-mm-dd HH:MM:SS) by adding rows with missing times in R

This should do what you want:

library(xts)
x <- as.xts(read.zoo(text="Time,Volume    
1996-02-05 00:34:00,0.01
1996-02-05 00:51:00,0.01
1996-02-05 00:52:00,0.01
1996-02-05 01:04:00,0.01
1996-02-05 01:19:00,0.01
1996-02-05 05:00:00,0.01
1996-02-05 05:07:00,0.01
1996-02-05 05:08:00,0.01
1996-02-05 05:14:00,0.01",
sep=",", FUN=as.POSIXct, header=TRUE, drop=FALSE))

# 1) Create POSIXct sequence from midnight of the first day
#    until the end of the last day    
midnightDay1 <- as.POSIXct(format(start(x),"%Y-%m-%d"))
timesteps <- seq(midnightDay1, end(x), by="30 min")
# 2) Make a copy of your object and set all values for Volume to 1
y <- x
y$Volume <- 1
# 3) Merge the copy with a zero-column xts object that has an index
#    with all the values you want.  Fill missing values with 0.
m <- merge(y, xts(,timesteps), fill=0)
# 4) Align all index values to 30-minute intervals
a <- align.time(m, 60*30)
# 5) Sum the values for Volume in each period
half_hour <- period.apply(a, endpoints(a, "minutes", 30), sum)

Insert rows for missing time measurements of a negative event

This is a tidyverse solution.

library(tidyverse)
library(lubridate)

df %>%
  separate(Timestamp, c("start", "end"), sep = " - ", remove = FALSE) %>%
  group_by(Subject) %>%
  mutate(Starttime_ms = lag(end, default = "00:00:00.000"),
         Endtime_ms = start,
         Event = "nf",
         Timestamp = paste(Starttime_ms, Endtime_ms, sep = " - "),
         across(ends_with("_ms"), ~ hms(.x) * 1000),
         Duration = Endtime_ms - Starttime_ms,
         across(where(is.period), as.numeric), .keep = "unused") %>%
  bind_rows(df) %>%
  arrange(Starttime_ms, .by_group = TRUE) %>%
  filter(Duration > 0) %>%
  ungroup()

Output

# A tibble: 11 × 6
   Subject Timestamp                   Starttime_ms Endtime_ms Duration Event
   <chr>   <chr>                              <dbl>      <dbl>    <dbl> <chr>
 1 A       00:00:00.000 - 00:00:00.146            0        146      146 nf   
 2 A       00:00:00.146 - 00:00:00.889          146        889      743 f    
 3 A       00:00:00.889 - 00:00:01.568          889       1568      679 nf   
 4 A       00:00:01.568 - 00:00:02.183         1568       2183      615 f    
 5 A       00:00:02.183 - 00:00:03.642         2183       3642     1459 nf   
 6 A       00:00:03.642 - 00:00:04.522         3642       4522      880 f    
 7 B       00:00:00.000 - 00:00:00.660            0        660      660 f    
 8 B       00:00:00.660 - 00:00:01.247          660       1247      587 nf   
 9 B       00:00:01.247 - 00:00:02.229         1247       2229      982 f    
10 B       00:00:02.229 - 00:00:03.697         2229       3697     1468 nf   
11 B       00:00:03.697 - 00:00:04.926         3697       4926     1229 f

R - How to add rows for missing values for unique group sequences?

DT[setkey(DT[, .(min(period):max(period)), by = project], project, V1)]
#    project period v3     v4
# 1:       6      1  a    red
# 2:       6      2  b yellow
# 3:       6      3 NA     NA
# 4:       6      4 NA     NA
# 5:       6      5  c    red
# 6:       6      6  d yellow
# 7:       6      7  e    red
# 8:       9      2  f yellow
# 9:       9      3 NA     NA
#10:       9      4  g    red
#11:       9      5  h yellow

Insert rows for missing dates/times

I think the easiest thing ist to set Date first as already described, convert to zoo, and then just set a merge:

df$timestamp<-as.POSIXct(df$timestamp,format="%m/%d/%y %H:%M")

df1.zoo<-zoo(df[,-1],df[,1]) #set date to Index

df2 <- merge(df1.zoo,zoo(,seq(start(df1.zoo),end(df1.zoo),by="min")), all=TRUE)

Start and end are given from your df1 (original data) and you are setting by - e.g min - as you need for your example. all=TRUE sets all missing values at the missing dates to NAs.

add rows to data frame for non-observations

You can use complete from tidyr, i.e.

library(tidyverse)

df %>% 
 group_by(ID, site) %>% 
 complete(hr = seq(min(hr), max(hr)))

which gives,

# A tibble: 9 x 5
# Groups:   ID, site [2]
     ID site     hr   day  nObs
  <int> <fct> <int> <int> <int>
1     8 B         8   188     6
2     8 B         9   188     6
3     8 B        10    NA    NA
4     8 B        11   188     7
5    19 A        11   202    60
6    19 A        12    NA    NA
7    19 A        13   202    18
8    19 A        14    NA    NA
9    19 A        15   202    27

Add new rows in R dataframe to fill time gaps

Try this

m <- data.frame(id = 0 , time_0 = 0, time_1 = 0)
k <- 1
for(i in unique(df$id)){
    x <- sort(unique(c(0 , unlist(subset(df , id == i)[,2:3]) , 10)))
    for(j in 1:(length(x)-1)){
        m[k,] <- c(i , x[j] , x[j+1])
        k <- k + 1
    }
}

ans <- merge(m , df , by = c("id" , "time_0" , "time_1") , all.x = T)

ans$event <- ifelse(is.na(ans$event) , 0 , ans$event)
ans$time_end <- 10
ans

output

  id time_0 time_1 event time_end
1  1      0      2     0       10
2  1      2      4     1       10
3  1      4     10     0       10
4  2      0      3     0       10
5  2      3      5     1       10
6  2      5      6     0       10
7  2      6      8     1       10
8  2      8     10     0       10

Add missing rows per group in R

Using data.table, we can do a join after expanding the original dataset

new <- setDT(original)[, .(day = seq(first(day), last(day), by = "1 min"), value = 0),
  by =  type]
new[original, value := i.value, on = .(type, day)][]
#    type                 day value
# 1:    1 2000-01-01 00:00:00     4
# 2:    1 2000-01-01 00:01:00     3
# 3:    1 2000-01-01 00:02:00     1
# 4:    1 2000-01-01 00:03:00     0
# 5:    1 2000-01-01 00:04:00     1
# 6:    2 2000-01-01 12:00:00     3
# 7:    2 2000-01-01 12:01:00     5
# 8:    2 2000-01-01 12:02:00     6
# 9:    2 2000-01-01 12:03:00     0
#10:    2 2000-01-01 12:04:00     3

Or using tidyverse

library(tidyverse)
original %>%
   group_by(type) %>% 
   complete(day = seq(first(day), last(day), by = "1 min"), fill = list(value = 0))

R: inserting rows for missing observations in sales data

We can use tidyr::complete

tidyr::complete(df,Product,Year = seq(min(Year), max(Year)), fill=list(Sales = 0))

#  Product  Year Sales
#  <fct>   <dbl> <dbl>
# 1 A        2013     0
# 2 A        2014     4
# 3 A        2015     0
# 4 A        2016     0
# 5 A        2017     2
# 6 A        2018     3
# 7 B        2013     0
# 8 B        2014     0
# 9 B        2015     0
#....

If the range has to be fixed (2013:2019) irrespective of the years in the data, we can specify it explicitly.

tidyr::complete(df, Product, Year = 2013:2019, fill = list(Sales = 0))