R - Insert Row for Missing Monthly Data and Interpolate

r - insert row for missing monthly data and interpolate

Using data.table and zoo packages you can easily expand your data set and interpolate as long as you don't have NAs at both sizes of the year

Expend the data set

library(data.table)
library(zoo)
res <- setDT(df)[, .SD[match(1:12, Month)], by = Year]

Interpolate on whatever column you want

cols <- c("Month", "DecimDate", "TWS")
res[, (cols) := lapply(.SD, na.approx, na.rm = FALSE), .SDcols = cols]

res
#     Year GridNo GridIndex  Lon  Lat DecimDate Month        TWS
#  1: 2003   GR72        72 35.5 -4.5  2003.000     1 14.2566781
#  2: 2003   GR72        72 35.5 -4.5  2003.083     2  5.0413706
#  3: 2003   GR72        72 35.5 -4.5  2003.167     3  3.8192721
#  4: 2003   GR72        72 35.5 -4.5  2003.250     4  5.8706026
#  5: 2003   GR72        72 35.5 -4.5  2003.333     5  7.8461188
#  6: 2003     NA        NA   NA   NA  2003.417     6  5.1141516
#  7: 2003   GR72        72 35.5 -4.5  2003.500     7  2.3821844
#  8: 2003   GR72        72 35.5 -4.5  2003.583     8  0.1995629
#  9: 2003   GR72        72 35.5 -4.5  2003.667     9 -1.8353604
# 10: 2003   GR72        72 35.5 -4.5  2003.750    10 -2.0410653
# 11: 2003   GR72        72 35.5 -4.5  2003.833    11 -1.4029813
# 12: 2003   GR72        72 35.5 -4.5  2003.917    12 -0.2206872
# 13: 2004   GR72        72 35.5 -4.5  2004.000     1 -0.5090872
# 14: 2004   GR72        72 35.5 -4.5  2004.083     2 -0.4887118
# 15: 2004   GR72        72 35.5 -4.5  2004.167     3 -0.7725966
# 16: 2004   GR72        72 35.5 -4.5  2004.250     4  4.1831581
# 17: 2004   GR72        72 35.5 -4.5  2004.333     5  2.5651040
# 18: 2004   GR72        72 35.5 -4.5  2004.417     6 -2.2511409
# 19: 2004   GR72        72 35.5 -4.5  2004.500     7 -1.6484375
# 20: 2004   GR72        72 35.5 -4.5  2004.583     8 -4.6508982
# 21: 2004   GR72        72 35.5 -4.5  2004.667     9 -5.0053745
# 22: 2004     NA        NA   NA   NA        NA    NA         NA
# 23: 2004     NA        NA   NA   NA        NA    NA         NA
# 24: 2004     NA        NA   NA   NA        NA    NA         NA

Insert rows for missing data and interpolate

This could help using a spline:

library(zoo)

#Data
df <- structure(list(Date = structure(c(17075, 17106, 17318, 17471, 
18017, 18109, 18201, 18414), class = "Date"), Accumulated = c(6902000L, 
9033000L, 15033000L, 24033000L, 24533000L, 25033000L, 27533000L, 
29033000L)), row.names = c("1", "2", "3", "4", "5", "6", "7", 
"8"), class = "data.frame")

#Create seq of dates
df$Date <- as.Date(df$Date)
dfm <- data.frame(Date=seq(min(df$Date),max(df$Date),by='1 month'))
#Now merge
dfmerged <- merge(dfm,df,by = 'Date',all.x=T)
#Now add interpolation
dfmerged$Interpolation <- na.spline(dfmerged$Accumulated)

It will produce:

         Date Accumulated Interpolation
1  2016-10-01     6902000       6902000
2  2016-11-01     9033000       9033000
3  2016-12-01          NA      10525685
4  2017-01-01          NA      11534406
5  2017-02-01          NA      12222432
6  2017-03-01          NA      12753035
7  2017-04-01          NA      13289484
8  2017-05-01          NA      13995049
9  2017-06-01    15033000      15033000
10 2017-07-01          NA      16511487
11 2017-08-01          NA      18318181
12 2017-09-01          NA      20285631
13 2017-10-01          NA      22246387
14 2017-11-01    24033000      24033000
15 2017-12-01          NA      25510428
16 2018-01-01          NA      26673271
17 2018-02-01          NA      27548534
18 2018-03-01          NA      28163225
19 2018-04-01          NA      28544352
20 2018-05-01          NA      28718923
21 2018-06-01          NA      28713943
22 2018-07-01          NA      28556422
23 2018-08-01          NA      28273365
24 2018-09-01          NA      27891781
25 2018-10-01          NA      27438677
26 2018-11-01          NA      26941060
27 2018-12-01          NA      26425938
28 2019-01-01          NA      25920317
29 2019-02-01          NA      25451205
30 2019-03-01          NA      25045611
31 2019-04-01          NA      24730540
32 2019-05-01    24533000      24533000
33 2019-06-01          NA      24484346
34 2019-07-01          NA      24633317
35 2019-08-01    25033000      25033000
36 2019-09-01          NA      25709290
37 2019-10-01          NA      26579313
38 2019-11-01    27533000      27533000
39 2019-12-01          NA      28465321
40 2020-01-01          NA      29291385
41 2020-02-01          NA      29931341
42 2020-03-01          NA      30305333
43 2020-04-01          NA      30333510
44 2020-05-01          NA      29936017
45 2020-06-01    29033000      29033000

Interpolate and insert missing rows into dataframe R

one approach, adapt to your case as appropriate:

library(dplyr)
library(lubridate) ## facilitates date-time manipulations

## example data:
patchy_data <- data.frame(date = as.Date('2021-11-01') + sample(1:10, 6),
                          value = rnorm(12)) %>%
    arrange(date)

## create vector of -only!- missing dates:
missing_dates <- 
    setdiff(
        seq.Date(from = min(patchy_data$date),
                 to = max(patchy_data$date),
                 by = '1 day'
                 ),
        patchy_data$date
    ) %>% as.Date(origin = '1970-01-01')

## extend initial dataframe with rows per missing date:
full_data <-
    patchy_data %>%
        bind_rows(data.frame(date = missing_dates,
                             value = NA)
                  ) %>%
        arrange(date)

## group by month and impute missing data from monthwise statistic:
full_data %>%
    mutate(month = lubridate::month(date)) %>%
    group_by(month) %>%
    ## coalesce conveniently replaces ifelse-constructs to replace NAs
    mutate(imputed = coalesce(.$value, mean(.$value, na.rm = TRUE)))

edit
One possibility to granulate generated data (missing dates) with additional parameters (e. g. measuring depths) is to use expand.grid as follows. Assuming object names from previous code:

## depths of daily measurements:
observation_depths <- c(0.5, 1.1, 1.5) ## example

## generate dataframe with missing dates x depths:
missing_dates_and_depths  <- 
    setNames(expand.grid(missing_dates, observation_depths),
             c('date','depthR')
             )

## stack both dataframes as above:
full_data <-
    patchy_data %>%
        bind_rows(missing_dates_and_depths) %>%
        arrange(date)

Insert rows for missing dates/times

I think the easiest thing ist to set Date first as already described, convert to zoo, and then just set a merge:

df$timestamp<-as.POSIXct(df$timestamp,format="%m/%d/%y %H:%M")

df1.zoo<-zoo(df[,-1],df[,1]) #set date to Index

df2 <- merge(df1.zoo,zoo(,seq(start(df1.zoo),end(df1.zoo),by="min")), all=TRUE)

Start and end are given from your df1 (original data) and you are setting by - e.g min - as you need for your example. all=TRUE sets all missing values at the missing dates to NAs.

Interpolation' of a missing date/value in R?

We can use complete

library(dplyr)
library(tidyr)
df1 %>%
  complete(Month = seq(min(Month), max(Month), by = '1 month')) %>%
  fill(CumulativeSum)

-output

# A tibble: 7 x 2
#  Month      CumulativeSum
#  <date>             <int>
#1 2019-02-01            40
#2 2019-03-01            70
#3 2019-04-01            80
#4 2019-05-01            80
#5 2019-06-01            80
#6 2019-07-01           100
#7 2019-08-01           120

data

df1 <- structure(list(Month = structure(c(17928, 17956, 17987, 18078, 
18109), class = "Date"), CumulativeSum = c(40L, 70L, 80L, 100L, 
120L)), row.names = c(NA, -5L), class = "data.frame")

Insert new rows of imputed data into data table by group

You can use tidyr::complete to expand the years and zoo::na.approx for interpolation of the values.

library(dplyr)

dt %>%
  group_by(ID, attrib1, attrib2) %>%
  tidyr::complete(Year = min(Year):max(Year)) %>%
  mutate(value = zoo::na.approx(value))

#     ID attrib1 attrib2  Year value
#   <int> <chr>   <chr>   <dbl> <dbl>
# 1     1 sdf     444      1990  12  
# 2     1 sdf     444      1991  10.8
# 3     1 sdf     444      1992   9.6
# 4     1 sdf     444      1993   8.4
# 5     1 sdf     444      1994   7.2
# 6     1 sdf     444      1995   6  
# 7     1 sdf     444      1996   7  
# 8     2 gghgf   222      1990   6  
# 9     2 gghgf   222      1991   5.4
#10     2 gghgf   222      1992   4.8
# … with 11 more rows

Interpolate zoo object with missing Dates

Merge with an "empty" object that has all the dates you want, then use na.approx (or na.spline, etc.) to fill in the missing values.

x <- merge(serie, zoo(,seq(start(serie),end(serie),by="day")), all=TRUE)
x <- na.approx(x)

linear interpolate missing values in time series

Here is one way. I created a data frame with a sequence of date using the first and last date. Using full_join() in the dplyr package, I merged the data frame and mydf. I then used na.approx() in the zoo package to handle the interpolation in the mutate() part.

mydf <- data.frame(date = as.Date(c("2015-10-05","2015-10-08","2015-10-09",
                                    "2015-10-12","2015-10-14")),       
                   value = c(8,3,9,NA,5))

library(dplyr)
library(zoo)

data.frame(date = seq(mydf$date[1], mydf$date[nrow(mydf)], by = 1)) %>%
full_join(mydf, by = "date") %>%
mutate(approx = na.approx(value))

#         date value   approx
#1  2015-10-05     8 8.000000
#2  2015-10-06    NA 6.333333
#3  2015-10-07    NA 4.666667
#4  2015-10-08     3 3.000000
#5  2015-10-09     9 9.000000
#6  2015-10-10    NA 8.200000
#7  2015-10-11    NA 7.400000
#8  2015-10-12    NA 6.600000
#9  2015-10-13    NA 5.800000
#10 2015-10-14     5 5.000000

R - Insert Row for Missing Monthly Data and Interpolate