R: Expand and Fill Data Frame by Date in Series

R: expand and fill data frame by date in series

First, make sure date is stored as a date object:

demo_df$date <- as.Date(demo_df$date, format = "%Y-%m-%d")

Then using tidyverse, we first complete the sequence, then fill the group down:

library(tidyverse)

demo_df %>% complete(date = seq.Date(min(date), max(date), by = "day")) %>% 
 fill(igroup)

Expand dataframe with sequential dates based on a column of dates in R

I think this should work fine:

merge(
    x = data.frame(
        Date = seq.Date(min(df$Date), max(df$Date), by = "day")
    ),
    y = df,
    all.x = TRUE
)
#          Date Group  Draw
# 1  2006-05-11    bb  TRUE
# 2  2006-05-11    bb FALSE
# 3  2006-05-12  <NA>    NA
# 4  2006-05-13  <NA>    NA
# 5  2006-05-14    aa  TRUE
# 6  2006-05-15  <NA>    NA
# 7  2006-05-16    aa  TRUE
# 8  2006-05-17  <NA>    NA
# 9  2006-05-18  <NA>    NA
# 10 2006-05-19  <NA>    NA
# 11 2006-05-20    cc FALSE
# 12 2006-05-20    bb FALSE
# 13 2006-05-21    aa  TRUE

All this is doing is creating a date sequence spanning the range of your actual data, and then performing a left join.

And the same idea, using data.table:

dt[dt[,.(Date = seq.Date(min(Date), max(Date), by = "day"))], on = .(Date)]
#           Date Group  Draw
#  1: 2006-05-11    bb  TRUE
#  2: 2006-05-11    bb FALSE
#  3: 2006-05-12    NA    NA
#  4: 2006-05-13    NA    NA
#  5: 2006-05-14    aa  TRUE
#  6: 2006-05-15    NA    NA
#  7: 2006-05-16    aa  TRUE
#  8: 2006-05-17    NA    NA
#  9: 2006-05-18    NA    NA
# 10: 2006-05-19    NA    NA
# 11: 2006-05-20    cc FALSE
# 12: 2006-05-20    bb FALSE
# 13: 2006-05-21    aa  TRUE

zz <- "Date Group Draw
1  2006-05-11    bb     T
2  2006-05-11    bb     F
3  2006-05-14    aa     T
4  2006-05-16    aa     T
5  2006-05-20    cc     F
6  2006-05-20    bb     F
7  2006-05-21    aa     T"

df <- read.table(
    text = zz, 
    header = TRUE
)
df$Date <- as.Date(df$Date) 

library(data.table)
dt <- data.table(read.table(text = zz, header = TRUE))[,Date := as.Date(Date)]

R - Expanding a value between a sequence of dates and add as columns to data.table

We convert the wide format with pivot_wider, do a group by summarise to create the 'Mean' row by concatenating the other observation with the mean value. With dplyr version >=1.0, summarise can return more than one row per group

library(dplyr)
library(tidyr)
ratings.dt %>%
     select(-VALID_THRU_DATE, -RATING_DATE_SEQ) %>% 
     pivot_wider(names_from = RATING_DATE, values_from = RATING) %>% 
     group_by(ISSUE_ID) %>% 
     summarise(RATING_TYPE = c(RATING_TYPE, "Mean"), 
       across(where(is.numeric), ~ c(., mean(., na.rm = TRUE))), .groups = 'drop')

-output

# A tibble: 4 x 11
#  ISSUE_ID RATING_TYPE `2000-01-31` `2000-05-31` `2001-03-31` `2001-05-31` `2001-04-30` `2004-01-31` `1999-04-31`
#     <int> <chr>              <dbl>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
#1      123 FR                  3.33            4         3.66         2            2.33            3        NA   
#2      123 MR                  2.66           NA         3            3.33        NA              NA         2.33
#3      123 SP                 NA              NA        NA           NA           NA              NA         3.33
#4      123 Mean                3.00            4         3.33         2.66         2.33            3         2.83
# … with 2 more variables: `2003-02-28` <dbl>, `2002-04-31` <dbl>

Or using data.table

library(data.table)
dt1 <- dcast(setDT(ratings.dt), ISSUE_ID + RATING_TYPE ~ RATING_DATE, 
      value.var = 'RATING')
rbind(dt1, dt1[, c(.(RATING_TYPE = 'Mean'), lapply(.SD, mean, na.rm = TRUE)), .(ISSUE_ID), .SDcols = -(1:2)])
#   ISSUE_ID RATING_TYPE 1999-04-31 2000-01-31 2000-05-31 2001-03-31 2001-04-30 2001-05-31 2002-04-31 2003-02-28
#1:      123          FR         NA      3.330          4       3.66       2.33      2.000         NA         NA
#2:      123          MR       2.33      2.660         NA       3.00         NA      3.330         NA          3
#3:      123          SP       3.33         NA         NA         NA         NA         NA          3         NA
#4:      123        Mean       2.83      2.995          4       3.33       2.33      2.665          3          3
#   2004-01-31
#1:          3
#2:         NA
#3:         NA
#4:          3

data

ratings.dt <- structure(list(ISSUE_ID = c(123L, 123L, 123L, 123L, 123L, 123L, 
123L, 123L, 123L, 123L, 123L, 123L, 123L), RATING_TYPE = c("FR", 
"FR", "FR", "FR", "FR", "FR", "MR", "MR", "MR", "MR", "MR", "SP", 
"SP"), RATING = c(3.33, 4, 3.66, 2, 2.33, 3, 2.33, 2.66, 3, 3.33, 
3, 3.33, 3), RATING_DATE = c("2000-01-31", "2000-05-31", "2001-03-31", 
"2001-05-31", "2001-04-30", "2004-01-31", "1999-04-31", "2000-01-31", 
"2001-03-31", "2001-05-31", "2003-02-28", "1999-04-31", "2002-04-31"
), VALID_THRU_DATE = c("2000-04-31", "2000-02-28", "2001-04-31", 
"2001-04-30", "2003-12-31", "2004-06-30", "1999-12-31", "2000-04-31", 
"2001-04-30", "2003-01-31", "2003-07-31", "2002-03-31", "2003-05-31"
), RATING_DATE_SEQ = c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 
5L, 1L, 2L)), class = "data.frame", row.names = c(NA, -13L))

Expand rows by date range using start and end date

Using data.table:

require(data.table) ## 1.9.2+
setDT(df)[ , list(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]

# you may use dot notation as a shorthand alias of list in j:
setDT(df)[ , .(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]

setDT converts df to a data.table. Then for each row, by = 1:nrow(df), we create idnum and month as required.

R Expand time series data based on start and end point

Using a tidyverse solution could look like:

library(dplyr)
library(stringr)
library(purrr)
library(tidyr)

data <- tibble(place = c(1, 2),
               start_date = c('2007-09-01',
                              '2013-09-27'),
               end_date = c('2010-10-12',
                            '2015-10-11'),
               value = c(0.5, 0.7))

data %>%
  mutate(year = map2(start_date,
                     end_date,
                     ~ as.character(str_extract(.x, '\\d{4}'): 
                                    str_extract(.y, '\\d{4}')))) %>%
  separate_rows(year) %>%
  filter(!year %in% c('c', '')) %>%
  select(place, year, value)

#   place year  value
#   <dbl> <chr> <dbl>
# 1     1 2007    0.5
# 2     1 2008    0.5
# 3     1 2009    0.5
# 4     1 2010    0.5
# 5     2 2013    0.7
# 6     2 2014    0.7
# 7     2 2015    0.7

I'm having problems understanding the third paragraph of your question ("There are ..."). It seems to me to be a separate question. If that is the case, please consider moving the question to a separate post here on SO. If it is not a separate question, please reformulate the paragraph.

Fill dates by groups in a data frame R

Assuming that your dates are actually in date format rather than character format (we can't tell from the table in the question), and assuming that the 7th row has the wrong year in it (2021 as opposed to 2022), you can do:

library(tidyverse)

df %>% 
  split(.$name) %>%
  lapply(function(x) {
    complete(x, expand(x, date = seq(min(x$date), max(x$date), by = 'day')),
             fill = list(name = x$name[1], code = x$code[1]))}) %>%
  bind_rows()
#> # A tibble: 16 x 5
#>    date       name              code usage result
#>    <date>     <chr>            <int> <int>  <int>
#>  1 2021-11-04 Jennifer Aniston 23211   345      1
#>  2 2021-11-05 Jennifer Aniston 23211   260      1
#>  3 2021-11-06 Jennifer Aniston 23211   230      0
#>  4 2021-11-07 Jennifer Aniston 23211     0      0
#>  5 2022-01-01 Lisa Kudrow      55120   132      0
#>  6 2022-01-02 Lisa Kudrow      55120   125      0
#>  7 2022-01-03 Lisa Kudrow      55120    NA     NA
#>  8 2022-01-04 Lisa Kudrow      55120   345      1
#>  9 2022-01-05 Lisa Kudrow      55120    NA     NA
#> 10 2022-01-06 Lisa Kudrow      55120   321      1
#> 11 2022-01-07 Lisa Kudrow      55120   431      1
#> 12 2022-10-01 Matthew Perry    44215   312      1
#> 13 2022-10-02 Matthew Perry    44215    NA     NA
#> 14 2022-10-03 Matthew Perry    44215    NA     NA
#> 15 2022-10-04 Matthew Perry    44215   230      0
#> 16 2022-10-05 Matthew Perry    44215   232      0

^{Created on 2022-06-02 by the reprex package (v2.0.1)}

Data taken from question in reproducible format

df <- structure(list(name = c("Jennifer Aniston", "Jennifer Aniston", 
"Jennifer Aniston", "Jennifer Aniston", "Matthew Perry", "Matthew Perry", 
"Matthew Perry", "Lisa Kudrow", "Lisa Kudrow", "Lisa Kudrow", 
"Lisa Kudrow", "Lisa Kudrow"), code = c(23211L, 23211L, 23211L, 
23211L, 44215L, 44215L, 44215L, 55120L, 55120L, 55120L, 55120L, 
55120L), date = structure(c(18935, 18936, 18937, 18938, 19266, 
19269, 19270, 18993, 18994, 18996, 18998, 18999), class = "Date"), 
    usage = c(345L, 260L, 230L, 0L, 312L, 230L, 232L, 132L, 125L, 
    345L, 321L, 431L), result = c(1L, 1L, 0L, 0L, 1L, 0L, 0L, 
    0L, 0L, 1L, 1L, 1L)), row.names = c(NA, -12L), class = "data.frame")

expand a series, then fill remaining columns

library(dplyr)
library(tidyr)

df %>% 
  group_by(group) %>% 
  complete(val = min(val):max(val))

# # A tibble: 10 x 2
# # Groups:   group [2]
#    group   val
#    <fct> <dbl>
# 1  A         1
# 2  A         2
# 3  A         3
# 4  A         4
# 5  A         5
# 6  B         1
# 7  B         2
# 8  B         3
# 9  B         4
# 10 B         5

Extend a dataframe based on the last known value

Here is a base R idea,

rbind(df, setNames(data.frame(x = seq(6, 10), y = df$y[nrow(df)]), names(df)))

#    x   y
#1   1 0.4
#2   2 0.6
#3   3 0.2
#4   4 0.2
#5   5 0.1
#6   6 0.1
#7   7 0.1
#8   8 0.1
#9   9 0.1
#10 10 0.1

How to fill dates between two dates

Here, we may need to use by as sequence of rows

library(data.table)
setDT(df)[, .(date = seq(as.Date(start_date), as.Date(end_date),
  by = 'day')), .(rn = seq_len(nrow(df)), name, value)][, rn := NULL][]

Or create a list column by looping over corresponding elements of 'start_date', 'end_date' to create a sequence of dates in Map and then unnest the list

library(tidyr)
library(magrittr)
setDT(df)[, .(name, date = Map(seq, MoreArgs = list(by = '1 day'), 
      as.Date(start_date), as.Date(end_date)), value)] %>% 
   unnest(date)
# A tibble: 731 x 3
#   name  date       value
#   <chr> <date>     <dbl>
# 1 A     2020-01-23   8.1
# 2 A     2020-01-24   8.1
# 3 A     2020-01-25   8.1
# 4 A     2020-01-26   8.1
# 5 A     2020-01-27   8.1
# 6 A     2020-01-28   8.1
# 7 A     2020-01-29   8.1
# 8 A     2020-01-30   8.1
# 9 A     2020-01-31   8.1
#10 A     2020-02-01   8.1
# … with 721 more rows

R: Expand and Fill Data Frame by Date in Series