Expand Rows by Date Range Using Start and End Date

Expand rows by date range using start and end date

Using data.table:

require(data.table) ## 1.9.2+
setDT(df)[ , list(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]

# you may use dot notation as a shorthand alias of list in j:
setDT(df)[ , .(idnum = idnum, month = seq(start, end, by = "month")), by = 1:nrow(df)]

setDT converts df to a data.table. Then for each row, by = 1:nrow(df), we create idnum and month as required.

Expand rows by date range having start and end in Pandas

Assuming you are on pandas v0.25, use explode:

df['hour'] = df.apply(lambda row: pd.date_range(row.name, row['date_end'], freq='H'), axis=1)
df = df.explode('hour').reset_index() \
.drop(columns=['date_start', 'date_end']) \
.rename(columns={'hour': 'date_start'}) \
.set_index('date_start')

For the rows with nan, you may reindex your dataframe.

# Report from Jan 4 - 5, 2018, from 7AM - 7PM
days = pd.date_range('2018-01-04', '2018-01-05')
hours = pd.to_timedelta(range(7, 20), unit='h')
tmp = pd.MultiIndex.from_product([days, hours], names=['Date', 'Hour']).to_frame()

s = tmp['Date'] + tmp['Hour']
df.reindex(s)

Given start date and end date, reshape/expand data for each day between (each day on a row)

Using data.table

library(data.table)
setDT(x)[, list(DATE=seq(START_DAY, END_DAY, by = 'day')), PER_COST]
# PER_COST DATE
# 1: 3451380 2013-02-26
# 2: 3451380 2013-02-27
# 3: 3451380 2013-02-28
# 4: 3451380 2013-03-01
# 5: 3451380 2013-03-02
#---
#116: 3575311 2013-06-21
#117: 3575311 2013-06-22
#118: 3575311 2013-06-23
#119: 3575311 2013-06-24
#120: 3575311 2013-06-25

If there are duplicate PER_COST, then it may be better to use 1:nrow(x) as the grouping variable

setDT(x)[, list(DATE=seq(START_DAY, END_DAY, by = 'day'), 
PER_COST=rep(PER_COST, END_DAY-START_DAY+1)), 1:nrow(x)]

Update

Using dplyr

library(dplyr)
x %>%
rowwise() %>%
do(data.frame(DATE=seq(.$START_DAY, .$END_DAY, by='day'),
PER_COST= rep(.$PER_COST, .$END_DAY-.$START_DAY+1)))

Expand row year by year using end and start date

Here is one approach using tidyverse and lubridate.

First, remove rows where there are missing dates (they will be added back at the end). Then, create intervals from start to end, and divide rows into years. Then you can intersect these intervals with calendar years.

After that, you can right_join with original data to replace the missing rows, and use coalesce to replace any missing dates if available.

library(tidyverse)
library(lubridate)

df %>%
filter(!is.na(DateStartRole) & !is.na(DateEndRole)) %>%
mutate(date_int = interval(DateStartRole, DateEndRole),
year = map2(year(DateStartRole), year(DateEndRole), seq)) %>%
unnest(year) %>%
mutate(year_int = interval(as.Date(paste0(year, '-01-01')), as.Date(paste0(year, '-12-31'))),
year_sect = intersect(date_int, year_int),
start_new = as.Date(int_start(year_sect)),
end_new = as.Date(int_end(year_sect))) %>%
select(DirectorID, CompanyID, start_new, end_new) %>%
right_join(df) %>%
mutate(start_new = coalesce(start_new, DateStartRole),
end_new = coalesce(end_new, DateEndRole))

Output

    DirectorID CompanyID start_new  end_new    DateStartRole DateEndRole
<int> <int> <date> <date> <date> <date>
1 2363928 3262324 2009-01-01 2009-12-31 2009-01-01 2020-11-01
2 2363928 3262324 2010-01-01 2010-12-31 2009-01-01 2020-11-01
3 2363928 3262324 2011-01-01 2011-12-31 2009-01-01 2020-11-01
4 2363928 3262324 2012-01-01 2012-12-31 2009-01-01 2020-11-01
5 2363928 3262324 2013-01-01 2013-12-31 2009-01-01 2020-11-01
6 2363928 3262324 2014-01-01 2014-12-31 2009-01-01 2020-11-01
7 2363928 3262324 2015-01-01 2015-12-31 2009-01-01 2020-11-01
8 2363928 3262324 2016-01-01 2016-12-31 2009-01-01 2020-11-01
9 2363928 3262324 2017-01-01 2017-12-31 2009-01-01 2020-11-01
10 2363928 3262324 2018-01-01 2018-12-31 2009-01-01 2020-11-01
11 2363928 3262324 2019-01-01 2019-12-31 2009-01-01 2020-11-01
12 2363928 3262324 2020-01-01 2020-11-01 2009-01-01 2020-11-01
13 2374506 2301528 2008-01-01 2008-12-31 2008-01-01 2009-01-01
14 2374506 2301528 2009-01-01 2009-01-01 2008-01-01 2009-01-01
15 599185 54210 1988-01-01 1988-12-31 1988-01-01 2000-01-01
16 599185 54210 1989-01-01 1989-12-31 1988-01-01 2000-01-01
17 599185 54210 1990-01-01 1990-12-31 1988-01-01 2000-01-01
18 599185 54210 1991-01-01 1991-12-31 1988-01-01 2000-01-01
19 599185 54210 1992-01-01 1992-12-31 1988-01-01 2000-01-01
20 599185 54210 1993-01-01 1993-12-31 1988-01-01 2000-01-01
21 599185 54210 1994-01-01 1994-12-31 1988-01-01 2000-01-01
22 599185 54210 1995-01-01 1995-12-31 1988-01-01 2000-01-01
23 599185 54210 1996-01-01 1996-12-31 1988-01-01 2000-01-01
24 599185 54210 1997-01-01 1997-12-31 1988-01-01 2000-01-01
25 599185 54210 1998-01-01 1998-12-31 1988-01-01 2000-01-01
26 599185 54210 1999-01-01 1999-12-31 1988-01-01 2000-01-01
27 599185 54210 2000-01-01 2000-01-01 1988-01-01 2000-01-01
28 1973398 4142 2018-04-01 2018-12-31 2018-04-01 2020-11-01
29 1973398 4142 2019-01-01 2019-12-31 2018-04-01 2020-11-01
30 1973398 4142 2020-01-01 2020-11-01 2018-04-01 2020-11-01
31 1262068 922842 2011-01-01 2011-12-31 2011-01-01 2018-08-01
32 1262068 922842 2012-01-01 2012-12-31 2011-01-01 2018-08-01
33 1262068 922842 2013-01-01 2013-12-31 2011-01-01 2018-08-01
34 1262068 922842 2014-01-01 2014-12-31 2011-01-01 2018-08-01
35 1262068 922842 2015-01-01 2015-12-31 2011-01-01 2018-08-01
36 1262068 922842 2016-01-01 2016-12-31 2011-01-01 2018-08-01
37 1262068 922842 2017-01-01 2017-12-31 2011-01-01 2018-08-01
38 1262068 922842 2018-01-01 2018-08-01 2011-01-01 2018-08-01
39 32554 15457 1997-01-01 1997-12-31 1997-01-01 2001-01-01
40 32554 15457 1998-01-01 1998-12-31 1997-01-01 2001-01-01
41 32554 15457 1999-01-01 1999-12-31 1997-01-01 2001-01-01
42 32554 15457 2000-01-01 2000-12-31 1997-01-01 2001-01-01
43 32554 15457 2001-01-01 2001-01-01 1997-01-01 2001-01-01
44 2305779 1378145 2007-01-01 2007-12-31 2007-01-01 2012-01-01
45 2305779 1378145 2008-01-01 2008-12-31 2007-01-01 2012-01-01
46 2305779 1378145 2009-01-01 2009-12-31 2007-01-01 2012-01-01
47 2305779 1378145 2010-01-01 2010-12-31 2007-01-01 2012-01-01
48 2305779 1378145 2011-01-01 2011-12-31 2007-01-01 2012-01-01
49 2305779 1378145 2012-01-01 2012-01-01 2007-01-01 2012-01-01
50 2305779 829105 2020-05-07 2020-11-01 2020-05-07 2020-11-01
51 1935158 573779 2011-07-01 2011-12-31 2011-07-01 2017-07-03
52 1935158 573779 2012-01-01 2012-12-31 2011-07-01 2017-07-03
53 1935158 573779 2013-01-01 2013-12-31 2011-07-01 2017-07-03
54 1935158 573779 2014-01-01 2014-12-31 2011-07-01 2017-07-03
55 1935158 573779 2015-01-01 2015-12-31 2011-07-01 2017-07-03
56 1935158 573779 2016-01-01 2016-12-31 2011-07-01 2017-07-03
57 1935158 573779 2017-01-01 2017-07-03 2011-07-01 2017-07-03
58 836789 829244 2006-01-01 2006-12-31 2006-01-01 2007-01-01
59 836789 829244 2007-01-01 2007-01-01 2006-01-01 2007-01-01
60 490624 3443 2004-01-01 2004-12-31 2004-01-01 2007-01-01
61 490624 3443 2005-01-01 2005-12-31 2004-01-01 2007-01-01
62 490624 3443 2006-01-01 2006-12-31 2004-01-01 2007-01-01
63 490624 3443 2007-01-01 2007-01-01 2004-01-01 2007-01-01
64 554430 30279 1975-05-15 1975-12-31 1975-05-15 2011-10-28
65 554430 30279 1976-01-01 1976-12-31 1975-05-15 2011-10-28
66 554430 30279 1977-01-01 1977-12-31 1975-05-15 2011-10-28
67 554430 30279 1978-01-01 1978-12-31 1975-05-15 2011-10-28
68 554430 30279 1979-01-01 1979-12-31 1975-05-15 2011-10-28
69 554430 30279 1980-01-01 1980-12-31 1975-05-15 2011-10-28
70 554430 30279 1981-01-01 1981-12-31 1975-05-15 2011-10-28
71 554430 30279 1982-01-01 1982-12-31 1975-05-15 2011-10-28
72 554430 30279 1983-01-01 1983-12-31 1975-05-15 2011-10-28
73 554430 30279 1984-01-01 1984-12-31 1975-05-15 2011-10-28
74 554430 30279 1985-01-01 1985-12-31 1975-05-15 2011-10-28
75 554430 30279 1986-01-01 1986-12-31 1975-05-15 2011-10-28
76 554430 30279 1987-01-01 1987-12-31 1975-05-15 2011-10-28
77 554430 30279 1988-01-01 1988-12-31 1975-05-15 2011-10-28
78 554430 30279 1989-01-01 1989-12-31 1975-05-15 2011-10-28
79 554430 30279 1990-01-01 1990-12-31 1975-05-15 2011-10-28
80 554430 30279 1991-01-01 1991-12-31 1975-05-15 2011-10-28
81 554430 30279 1992-01-01 1992-12-31 1975-05-15 2011-10-28
82 554430 30279 1993-01-01 1993-12-31 1975-05-15 2011-10-28
83 554430 30279 1994-01-01 1994-12-31 1975-05-15 2011-10-28
84 554430 30279 1995-01-01 1995-12-31 1975-05-15 2011-10-28
85 554430 30279 1996-01-01 1996-12-31 1975-05-15 2011-10-28
86 554430 30279 1997-01-01 1997-12-31 1975-05-15 2011-10-28
87 554430 30279 1998-01-01 1998-12-31 1975-05-15 2011-10-28
88 554430 30279 1999-01-01 1999-12-31 1975-05-15 2011-10-28
89 554430 30279 2000-01-01 2000-12-31 1975-05-15 2011-10-28
90 554430 30279 2001-01-01 2001-12-31 1975-05-15 2011-10-28
91 554430 30279 2002-01-01 2002-12-31 1975-05-15 2011-10-28
92 554430 30279 2003-01-01 2003-12-31 1975-05-15 2011-10-28
93 554430 30279 2004-01-01 2004-12-31 1975-05-15 2011-10-28
94 554430 30279 2005-01-01 2005-12-31 1975-05-15 2011-10-28
95 554430 30279 2006-01-01 2006-12-31 1975-05-15 2011-10-28
96 554430 30279 2007-01-01 2007-12-31 1975-05-15 2011-10-28
97 554430 30279 2008-01-01 2008-12-31 1975-05-15 2011-10-28
98 554430 30279 2009-01-01 2009-12-31 1975-05-15 2011-10-28
99 554430 30279 2010-01-01 2010-12-31 1975-05-15 2011-10-28
100 554430 30279 2011-01-01 2011-10-28 1975-05-15 2011-10-28
101 777766 44584 NA NA NA NA
102 892379 32342 NA 2018-01-01 NA 2018-01-01
103 328171 583820 NA NA NA NA
104 1973398 550686 NA 2010-01-01 NA 2010-01-01
105 1041558 829450 NA NA NA NA
106 542061 10961 NA 2009-09-28 NA 2009-09-28
107 330769 21696 NA 2000-11-01 NA 2000-11-01
108 836789 591529 NA NA NA NA

Expanding rows to include interim years in a date range

sample data

library(data.table)
DT <- fread("id start end
1 2000 2004
2 2005 2005
3 2005 2007
4 2001 2002")

code

year is numeric (and not a date), so you can create a vector from start to end, by id.

DT[, .(year = start:end), by = .(id)][]

output

#     id year
# 1: 1 2000
# 2: 1 2001
# 3: 1 2002
# 4: 1 2003
# 5: 1 2004
# 6: 2 2005
# 7: 3 2005
# 8: 3 2006
# 9: 3 2007
# 10: 4 2001
# 11: 4 2002

R: Expand rows according to start and end date and calculate hours between days

Though it is a bit crude but it'll work

library(tidyverse)
library(lubridate)

df %>%
pivot_longer(cols = -c(ID, Stay_in_days), names_to = "Event", values_to = "DATE") %>%
group_by(ID) %>%
mutate(dummy = case_when(Event == "Admission" ~ 0,
Event == "Discharge" ~ max(floor(Stay_in_days),1))) %>%
complete(dummy = seq(min(dummy), max(dummy), 1)) %>%
mutate(Event = ifelse(is.na(Event), "Dummy", Event),
DATE = if_else(is.na(DATE), first(DATE)+dummy*24*60*60, DATE),
Stay_in_days = case_when(Event == "Admission" ~ as.numeric(difftime(ceiling_date(DATE, "day"), DATE, units = "days")),
Event == "Discharge" ~ Stay_in_days,
TRUE ~ dummy + as.numeric(difftime(ceiling_date(first(DATE), "day"), first(DATE), units = "days")))) %>%
select(ID, DATE, Stay_in_days)

# A tibble: 199 x 3
# Groups: ID [20]
ID DATE Stay_in_days
<dbl> <dttm> <dbl>
1 1 2020-08-20 15:25:03 0.358
2 1 2020-08-21 21:09:49 1.24
3 2 2020-08-29 16:16:08 0.322
4 2 2020-08-30 16:16:08 1.32
5 2 2020-08-31 18:08:57 2.08
6 3 2020-09-02 15:28:41 0.355
7 3 2020-09-03 14:51:10 0.974
8 4 2020-09-10 17:14:47 0.281
9 4 2020-09-11 17:25:18 1.01
10 5 2020-09-11 09:11:36 0.617
# ... with 189 more rows

Explanation of logic For the first date in every ID, the stay_in_days gives the duration from admission date-time to following 24 Hrs. For intermediate dates, it just adds 1 to previous value. For discharge_date it retains the stay value calculated prior to pivoting. Hope this was you after.

Explanation of code After pivoting longer, I used a dummy column to create intermediate date-time objects. After that I just mutate the columns for generating output as described above.

Expand pandas dataframe date ranges to individual rows

Use pandas.date_range in a list comprehension ,then use DataFrame.explode (you need to be using at least pandas v 0.25.0 for the explode method):

# Minimal example setup
df = pd.DataFrame({
'TRIPNAME': ['HIGHSEASON', 'LOWSEASON', 'MEDSEASON'],
'TRIPCAT': ['H', 'L', 'M'],
'STARTDATE' : ['01JAN20', '16SEP20', '29JAN20'],
'ENDDATE': ['15JAN20', '30NOV20', '19JUL20'],
'FARE': [345, 280, 250]
})


df['DATE'] = [pd.date_range(s, e, freq='d') for s, e in
zip(pd.to_datetime(df['STARTDATE']),
pd.to_datetime(df['ENDDATE']))]

df = df.explode('DATE').drop(['STARTDATE', 'ENDDATE'], axis=1)

print(df)

[out]

      TRIPNAME TRIPCAT  FARE       DATE
0 HIGHSEASON H 345 2020-01-01
0 HIGHSEASON H 345 2020-01-02
0 HIGHSEASON H 345 2020-01-03
0 HIGHSEASON H 345 2020-01-04
0 HIGHSEASON H 345 2020-01-05
.. ... ... ... ...
2 MEDSEASON M 250 2020-07-15
2 MEDSEASON M 250 2020-07-16
2 MEDSEASON M 250 2020-07-17
2 MEDSEASON M 250 2020-07-18
2 MEDSEASON M 250 2020-07-19

[264 rows x 4 columns]

Pandas df, adding rows between start and end dates

Suppose your df can have multiple rows and we need to create new records for each day between both dates in each row, we can create a date range record for each row by pd.date_range and then expand the date range records to multiple rows (each day in one row) by .explode(), as follows:

df['DATE_VALUE'] = df.apply(lambda x: pd.date_range(start=x['START_DATE'], end=x['END_DATE']), axis=1)

df = df.explode('DATE_VALUE').reset_index(drop=True)

Result:

print(df)

START_DATE END_DATE DAYS_BETWEEN NAME TOTAL_AMT DATE_VALUE
0 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-16
1 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-17
2 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-18
3 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-19
4 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-20
5 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-21
6 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-22
7 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-23
8 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-24
9 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-25
10 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-26
11 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-27
12 8/16/2021 8/28/2021 13 LOCATION1 1000 2021-08-28

Expand rows between begin date and exit date and count the number of days in R

One approach is with the uncount function:

library(dplyr)
library(tidyr)
library(lubridate)
data %>%
mutate(start_date = mdy(paste(begin_month,begin_day,begin_year,sep="-")),
end_date = mdy(paste(exit_month,exit_day,exit_year,sep="-")),
number_days = as.integer(end_date-start_date + 1)) %>%
uncount(as.integer(number_days)) %>%
group_by(id_min) %>%
mutate(begin_day = day(seq(start_date[1], end_date[1], by = "days")),
begin_month = month(seq(start_date[1], end_date[1], by = "days")),
begin_year = year(seq(start_date[1], end_date[1], by = "days"))) %>%
dplyr::select(-start_date, -end_date)
# A tibble: 13 x 8
# Groups: id_min [2]
id_min begin_day begin_month begin_year exit_day exit_month exit_year number_days
<chr> <int> <int> <int> <chr> <chr> <chr> <int>
1 1030015 29 12 2019 3 1 2020 6
2 1030015 30 12 2019 3 1 2020 6
3 1030015 31 12 2019 3 1 2020 6
4 1030015 1 1 2020 3 1 2020 6
5 1030015 2 1 2020 3 1 2020 6
6 1030015 3 1 2020 3 1 2020 6
7 1030028 4 1 2020 10 1 2020 7
8 1030028 5 1 2020 10 1 2020 7
9 1030028 6 1 2020 10 1 2020 7
10 1030028 7 1 2020 10 1 2020 7
11 1030028 8 1 2020 10 1 2020 7
12 1030028 9 1 2020 10 1 2020 7
13 1030028 10 1 2020 10 1 2020 7

Given a date range how to expand to the number of days per month in that range?

A tidyverse solution:

# example data
df = read.table(text = "
id startDate endDate someOtherCol
1 2017-09-23 2018-02-01 val1
2 2018-01-01 2018-03-31 val2
", header=T, stringsAsFactors=F)

library(tidyverse)
library(lubridate)


df %>%
mutate_at(vars(startDate, endDate), ymd) %>% # update to date columns (if needed)
group_by(id) %>% # for each id
mutate(d = list(seq(startDate, endDate, by="1 day"))) %>% # create a sequence of dates (as a list)
unnest() %>% # unnest data
group_by(id, year=year(d), month=month(d), someOtherCol) %>% # group by those variables (while getting year and month of each date in the sequence)
summarise(numberOfDaysInMonth = n()) %>% # count days
ungroup() # forget the grouping

# # A tibble: 9 x 5
# id year month someOtherCol numberOfDaysInMonth
# <int> <dbl> <dbl> <chr> <int>
# 1 1 2017 9 val1 8
# 2 1 2017 10 val1 31
# 3 1 2017 11 val1 30
# 4 1 2017 12 val1 31
# 5 1 2018 1 val1 31
# 6 1 2018 2 val1 1
# 7 2 2018 1 val2 31
# 8 2 2018 2 val2 28
# 9 2 2018 3 val2 31


Related Topics



Leave a reply



Submit