Insert rows for missing dates/times
I think the easiest thing ist to set Date first as already described, convert to zoo, and then just set a merge:
df$timestamp<-as.POSIXct(df$timestamp,format="%m/%d/%y %H:%M")
df1.zoo<-zoo(df[,-1],df[,1]) #set date to Index
df2 <- merge(df1.zoo,zoo(,seq(start(df1.zoo),end(df1.zoo),by="min")), all=TRUE)
Start and end are given from your df1 (original data) and you are setting by - e.g min - as you need for your example. all=TRUE sets all missing values at the missing dates to NAs.
Adding missing dates in time series data
Use tidyr::complete
:
library(dplyr)
df %>%
mutate(Date = as.Date(Date, "%B %d, %Y")) %>%
tidyr::complete(Date = seq(as.Date('2008-01-01'), as.Date('2020-03-31'),
by = 'day'), fill = list(Val = 1)) %>%
mutate(Date = format(Date, "%B %d, %Y"))
# A tibble: 4,475 x 2
# Date Val
# <chr> <dbl>
# 1 January 01, 2008 1
# 2 January 02, 2008 1
# 3 January 03, 2008 1
# 4 January 04, 2008 1
# 5 January 05, 2008 26
# 6 January 06, 2008 1
# 7 January 07, 2008 1
# 8 January 08, 2008 1
# 9 January 09, 2008 1
#10 January 10, 2008 1
# … with 4,465 more rows
data
df <- structure(list(Date = c("September 16, 2012", "September 19, 2014",
"January 05, 2008", "June 07, 2017", "December 15, 2019", "May 28, 2020"
), Val = c(32L, 33L, 26L, 2L, 3L, 18L)), class = "data.frame",
row.names = c(NA, -6L))
How to add only missing Dates in Dataframe
Here's a correction of your approach, in base R.
Replace max(t1$Date)
bySys.Date()
in your real application:
t2<-merge(data.frame(Date= as.Date(min(t1$Date):max(t1$Date),"1970-1-1")),
t1, by = "Date", all = TRUE)
t2[is.na(t2)] <- 0
# Date Val1 Val2
# 1 2018-04-01 125 0.05
# 2 2018-04-02 0 0.00
# 3 2018-04-03 458 2.99
# 4 2018-04-04 0 0.00
# 5 2018-04-05 354 1.25
data
t1 <- read.table(text="Date Val1 Val2
'2018-04-01' 125 0.05
'2018-04-03' 458 2.99
'2018-04-05' 354 1.25",h=T,strin=F)
t1$Date <- as.Date(df$Date)
Filling missing date months in a time series by Item
You can create a sequence of yearmon
objects for each ITEM
and use it in complete
.
library(dplyr)
library(zoo)
library(tidyr)
df1 %>%
mutate(Date = as.yearmon(Date, '%b-%Y')) %>%
group_by(ITEM) %>%
complete(Date = seq(min(Date), max(Date), 1/12)) %>%
ungroup
# ITEM Date
# <chr> <yearmon>
# 1 A Jan 2020
# 2 A Feb 2020
# 3 A Mar 2020
# 4 A Apr 2020
# 5 A May 2020
# 6 A Jun 2020
# 7 A Jul 2020
# 8 B Jan 2020
# 9 B Feb 2020
#10 B Mar 2020
#11 B Apr 2020
#12 B May 2020
#13 B Jun 2020
#14 B Jul 2020
#15 B Aug 2020
If you want a sequence of date objects you can use :
df1 %>%
mutate(Date = as.Date(as.yearmon(Date, '%b-%Y'))) %>%
group_by(ITEM) %>%
complete(Date = seq(min(Date), max(Date), 'month')) %>%
ungroup()
Filling missing dates in a grouped time series - a tidyverse-way?
tidyr
has some great tools for these sorts of problems. Take a look at complete
.
library(dplyr)
library(tidyr)
library(lubridate)
want <- df.missing %>%
ungroup() %>%
complete(nesting(d1, d2), date = seq(min(date), max(date), by = "day"))
want %>% filter(d1 == "A" & d2 == 5)
#> # A tibble: 10 x 5
#> d1 d2 date v1 v2
#> <fctr> <dbl> <date> <dbl> <dbl>
#> 1 A 5 2017-01-01 NA NA
#> 2 A 5 2017-01-02 0.21879954 0.1335497
#> 3 A 5 2017-01-03 0.32977018 0.9802127
#> 4 A 5 2017-01-04 0.23902573 0.1206089
#> 5 A 5 2017-01-05 0.19617465 0.7378315
#> 6 A 5 2017-01-06 0.13373890 0.9493668
#> 7 A 5 2017-01-07 0.48613541 0.3392834
#> 8 A 5 2017-01-08 0.35698708 0.3696965
#> 9 A 5 2017-01-09 0.08498474 0.8354756
#> 10 A 5 2017-01-10 NA NA
Fill missing values in time series using previous day data - R
Edit
Thanks to @G. Grothendieck to mention that na.locf0
has maxgap
argument which can handle the 5-day condition directly.
data[-1] <- lapply(data[-1], zoo::na.locf0, maxgap = 5)
data
Earlier Answer
You can write a function with rle
and zoo::na.locf0
to replace NA
only if the length of consecutive NA
is less than equal to 5. Apply this function for multiple columns with lapply
.
conditionally_replace_na <- function(x) {
ifelse(with(rle(is.na(x)), rep(lengths, lengths)) <= 5 & is.na(x),
zoo::na.locf0(x), x)
}
data[-1] <- lapply(data[-1], conditionally_replace_na)
data
# Date time_series_1 time_series_2 time_series_3
#1 01-01-2019 NA 10 8
#2 02-01-2019 5 10 10
#3 03-01-2019 10 10 20
#4 04-01-2019 20 6 40
#5 05-01-2019 30 6 40
#6 06-01-2019 30 8 40
#7 07-01-2019 7 NA 40
#8 08-01-2019 5 NA 40
39 09-01-2019 NA NA 5
#10 10-01-2019 NA NA 5
#11 11-01-2019 NA NA 7
#12 12-01-2019 NA NA 10
#13 13-01-2019 NA NA 11
#14 14-01-2019 NA NA 12
#15 15-01-2019 NA NA 12
#16 16-01-2019 NA NA 9
#17 17-01-2019 NA NA 10
#18 18-01-2019 NA NA 10
#19 19-01-2019 5 NA 11
#20 20-01-2019 5 NA 11
#21 21-01-2019 5 NA 11
#22 22-01-2019 6 NA 11
Function can also be applied with dplyr::across
library(dplyr)
data %>% mutate(across(starts_with('time_series'), conditionally_replace_na))
Fill missing dates by group
tidyr::complete()
fills missing values
add id
and date
as the columns (...
) to expand for
library(tidyverse)
complete(dat, id, date)
# A tibble: 16 x 3
id date value
<dbl> <date> <dbl>
1 1.00 2017-01-01 30.0
2 1.00 2017-02-01 30.0
3 1.00 2017-03-01 NA
4 1.00 2017-04-01 25.0
5 2.00 2017-01-01 NA
6 2.00 2017-02-01 25.0
7 2.00 2017-03-01 NA
8 2.00 2017-04-01 NA
9 3.00 2017-01-01 25.0
10 3.00 2017-02-01 25.0
11 3.00 2017-03-01 25.0
12 3.00 2017-04-01 NA
13 4.00 2017-01-01 20.0
14 4.00 2017-02-01 20.0
15 4.00 2017-03-01 NA
16 4.00 2017-04-01 20.0
Related Topics
How to Get Rstudio to Automatically Compile R Markdown Vignettes
How to Identify the Distribution of the Given Data Using R
Install a Local R Package with Dependencies from Cran Mirror
How to Make Object Created Within Function Usable Outside
Predicting Lda Topics for New Data
Kruskal-Wallis Test with Details on Pairwise Comparisons
How to Get the Most Frequent Level of a Categorical Variable in R
How to Install Multiple Packages
How to Remove "Rows" with a Na Value
Clustering List for Hclust Function
Multiply Permutations of Two Vectors in R
Which Library Could Be Used to Make a Chord Diagram in R