R Find Overlap Among Time Periods

R Find overlap among time periods

I think this is the code that you are looking for? Let me know.

data<- structure(list(ID= c(34L, 34L, 80L, 80L, 81L, 81L, 81L, 94L, 
                            94L), Start = structure(c(1072911600, 1262300400, 1157061600, 
                                                      1277935200, 1157061600, 1277935200, 1157061600, 1075590000, 1285891200
                            ), class = c("POSIXct", "POSIXt"), tzone = ""), End = structure(c(1262214000, 
                                                                                              1409436000, 1251669600, 1404079200, 1251669600, 1404079200, 1251669600, 
                                                                                              1264892400, 1475193600), class = c("POSIXct", "POSIXt"), tzone = ""), 
                      Overlap = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, 
                                  FALSE, FALSE)), .Names = c("ID", "Start", "End", "Overlap"
                                  ), row.names = c(NA, -9L), class = "data.frame")

library("dplyr")
library("lubridate")

overlaps<- function(intervals){
        for(i in 1:(length(intervals)-1)){
                for(j in (i+1):length(intervals)){
                        if(int_overlaps(intervals[i],intervals[j])){
                                return(TRUE)
                        }
                }
        }
        return(FALSE)
}

data %>%
        mutate(Interval=interval(Start,End))%>%
        group_by(ID) %>% 
       do({
               df<-.
               ovl<- overlaps(df$Interval)
               return(data.frame(ID=df$ID[1], ovl))
       })

Also, I hope that someone comes up with a more elegant solution to my overlaps function..

Efficiently find the overlap between two time intervals in R

Maybe try data.table foverlaps function:

library(data.table)
setDT(dat)
setkey(dat, start, end)
foverlaps(dat, dat)[id != i.id]

Extract overlapping and non-overlapping time periods using R (data.table)

This method does a small explosion of looking at all dates in the range, so it may not scale very well if your data gets large.

library(data.table)
  alldates <- data.table(date = seq(min(data$start_dt), max(data$end_dt), by = "day"))
  data[alldates, on = .(start_dt <= date, end_dt >= date)] %>%
    .[, .N, by = .(start_dt, type) ] %>%
    .[ !is.na(type), ] %>%
    dcast(start_dt ~ type, value.var = "N") %>%
    .[, r := do.call(rleid, .SD), .SDcols = setdiff(colnames(.), "start_dt") ] %>%
    .[, .(type = fcase(is.na(`1`[1]), "2", is.na(`2`[1]), "1", TRUE, "3"),
          start_dt = min(start_dt), end_dt = max(start_dt)), by = r ]
#         r   type   start_dt     end_dt
#     <int> <char>     <Date>     <Date>
#  1:     1      1 2015-01-09 2015-04-13
#  2:     2      3 2015-04-14 2015-05-04
#  3:     3      1 2015-05-05 2015-06-18
#  4:     4      3 2015-06-19 2015-08-27
#  5:     5      1 2015-08-28 2015-10-29
#  6:     6      3 2015-10-30 2015-11-19
#  7:     7      1 2015-11-20 2016-02-29
#  8:     8      3 2016-03-01 2016-03-21
#  9:     9      1 2016-03-22 2016-05-23
# 10:    10      3 2016-05-24 2016-06-09
# 11:    11      1 2016-06-10 2016-08-02
# 12:    12      3 2016-08-03 2017-07-18
# 13:    13      1 2017-07-19 2017-07-24
# 14:    14      3 2017-08-18 2018-01-23
# 15:    15      2 2018-01-24 2018-01-31
# 16:    16      3 2018-02-01 2018-04-25
# 17:    17      2 2018-04-26 2018-05-06
# 18:    18      3 2018-05-07 2018-07-29
# 19:    19      2 2018-07-30 2018-08-08
# 20:    20      3 2018-08-09 2019-01-15
# 21:    21      2 2019-01-16 2019-01-30
# 22:    22      3 2019-01-31 2019-02-21
# 23:    23      1 2019-02-22 2019-03-21
# 24:    24      3 2019-03-22 2019-04-24
# 25:    25      2 2019-04-25 2019-05-15
# 26:    26      3 2019-05-16 2019-09-13
# 27:    27      1 2019-09-14 2019-10-13
# 28:    28      3 2019-11-04 2020-01-26
# 29:    29      2 2020-01-27 2020-02-05
# 30:    30      3 2020-02-06 2020-04-29
# 31:    31      2 2020-04-30 2020-05-27
# 32:    32      3 2020-05-28 2020-08-19
# 33:    33      2 2020-08-20 2020-08-24
# 34:    34      3 2020-08-25 2020-11-16
# 35:    35      2 2020-11-17 2020-12-13
# 36:    36      3 2020-12-14 2020-12-23
# 37:    37      1 2020-12-24 2021-03-07
#         r   type   start_dt     end_dt

It drops the id field, I don't know how to map it well back to your original data.

Find overlapping intervals in groups and retain largest non-overlapping periods

Having searched for related problems on stackoverflow, I found that the following approaches (here: Collapse and merge overlapping time intervals) and (here: How to flatten / merge overlapping time periods) could be adapted to my issue.

# Solution adapted from:
# here https://stackoverflow.com/questions/53213418/collapse-and-merge-overlapping-time-intervals
# and here: https://stackoverflow.com/questions/28938147/how-to-flatten-merge-overlapping-time-periods/28938694#28938694 

# Note: df and df1 created in the initial reprex (above)

df2 <- df %>%
  group_by(group) %>%
  arrange(group, start) %>%
  mutate(indx = c(0, cumsum(as.numeric(lead(start))  >            # find overlaps
                              cummax(as.numeric(end)))[-n()])) %>%
  ungroup() %>%
  group_by(group, indx) %>%
  arrange(desc(intval_length)) %>%                                # retain largest interval
  filter(row_number() == 1) %>%
  ungroup() %>%
  select(-indx) %>%
  arrange(group, start)

# Desired output?
identical(df1, df2)
#> [1] TRUE

R Identify cases of overlap in time intervals within the same ID

Another option from the thread you mentioned separately counts the overlapping values and adds them in as a separate column in a separate dataframe. Try this. It worked for me. I get the same output you provided.

library(data.frame)
dt <- data.table(df, key=c("Join.Time", "Leave.Time"))[, `:=`(Overlap=NULL, row=1:nrow(df))]
overlapping <- unique(foverlaps(dt, dt)[Email==i.Email & row!=i.row, Email])
dt[, `:=`(Overlap=FALSE, row=NULL)][Email %in% overlapping, Overlap:=TRUE][order(Email, Join.Time)]

How to find overlapping between date and time?

Here is an approach which does not use any additional packages:

Firstly, we merge all start and end dates and then we arrange them in an increasing order. Like this, we get a table of all time slots.

df$Start_Date = strptime(as.character(df$Start_Date),format = "%d/%m/%Y %H:%M")
df$End_Date = strptime(as.character(df$End_Date),format = "%d/%m/%Y %H:%M")

dates=sort(unique(c(df[,2],df[,3])))

df2=data.frame(start=dates[1:length(dates)-1],end=dates[2:length(dates)])

For each time slot we check which product was active during the given time period. If there is no such product, then "NA" is returned.

active<-function(start,end){
  tmp<-as.vector(df[df$Start_Date<=start & df$End_Date>= end,1])
  if(length(tmp)>0){
    paste(tmp, collapse="/")
  }
  else{
    return(NA)
  }
}

activeproducts <- mapply(active,df2[,1],df2[,2])
df2<-transform(df2,products=activeproducts)

To get rid of the time slots where no product was active, we use the complete.cases function.

df2<- df2[complete.cases(df2),]

>df2

                start                 end products
1 2015-01-01 08:00:00 2015-01-02 09:00:00        x
3 2015-01-03 10:00:00 2015-01-04 09:00:00        y
4 2015-01-04 09:00:00 2015-01-04 12:34:00      y/z
5 2015-01-04 12:34:00 2015-01-05 12:00:00    y/z/x
6 2015-01-05 12:00:00 2015-01-05 13:00:00      z/x
7 2015-01-05 13:00:00 2015-01-07 11:23:00        x

Is there a function for checking if a time interval overlaps in a single column and sort by group in R

The sample data doesn't have any overlapping periods. The following change was made:

start <-as.POSIXct(c("2017-06-27 09:30:00","2017-06-27 15:30:00",
                     "2017-06-27 14:30:00","2017-06-28 09:30:00","2017-06-28 15:00:00"), tz= "UTC")

Using lead will return NA if it is the last record in a group

library(dplyr)

new_df %>%
  group_by(id) %>%
  arrange(int_start(inter1), .by_group = TRUE) %>%
  mutate(overlap2 = lead(int_start(inter1)) < int_end(inter1))

# A tibble: 5 x 4
# Groups:   id [2]
     id inter1                                           overlap overlap2
  <dbl> <Interval>                                       <lgl>   <lgl>   
1     1 2017-06-27 09:30:00 UTC--2017-06-27 10:30:00 UTC FALSE   FALSE   
2     1 2017-06-27 14:30:00 UTC--2017-06-28 18:30:00 UTC TRUE    TRUE    
3     1 2017-06-27 15:30:00 UTC--2017-06-27 17:30:00 UTC TRUE    NA      
4     2 2017-06-28 09:30:00 UTC--2017-06-28 10:30:00 UTC FALSE   FALSE   
5     2 2017-06-28 15:00:00 UTC--2017-06-28 16:00:00 UTC FALSE   NA

If needing to compare each row to all rows within the group

library(tidyverse)

new_df %>%
  group_by(id) %>%
  arrange(int_start(inter1), .by_group = TRUE) %>%
  mutate(overlap2 = map_int(inter1, ~ sum(int_overlaps(.x, inter1))) > 1)

# A tibble: 5 x 4
# Groups:   id [2]
     id inter1                                           overlap overlap2
  <dbl> <Interval>                                       <lgl>   <lgl>   
1     1 2017-06-27 09:30:00 UTC--2017-06-27 10:30:00 UTC FALSE   FALSE   
2     1 2017-06-27 14:30:00 UTC--2017-06-28 18:30:00 UTC TRUE    TRUE    
3     1 2017-06-27 15:30:00 UTC--2017-06-27 17:30:00 UTC TRUE    TRUE    
4     2 2017-06-28 09:30:00 UTC--2017-06-28 10:30:00 UTC FALSE   FALSE   
5     2 2017-06-28 15:00:00 UTC--2017-06-28 16:00:00 UTC FALSE   FALSE

R find if overlaps in Multiple time periods

library(data.table)

ds <- as.Date(c('2014-9-1', '2015-5-11', '2016-11-1','2015-1-1','2015-10-1'))  # start dd
de <- as.Date(c('2015-9-30', '2016-10-31', '2030-1-1','2015-5-30','2015-12-31')) # end dd
id <- c(1,2,3,1,2)
prodid <- c('20','30','20','20','20')
custid <- c(123,123,123,4444,4444)

df <- data.frame(custid, ds,de,id,prodid)
df <- data.table(df)
setkey(df, ds, de)

ovl <- foverlaps(df, df, type = "within")
ovl[custid == i.custid & id != i.id]

What I've done is set a key, this is required for foverlaps to work.
Then I filter the output, you are interested only in overlaps where the custid == i.custid AND when its not itself, so id != i.id.

> ovl[custid == i.custid & id != i.id]
   custid         ds         de id prodid i.custid       i.ds       i.de i.id i.prodid
1:    123 2015-05-11 2016-10-31  2     30      123 2014-09-01 2015-09-30    1       20
2:    123 2014-09-01 2015-09-30  1     20      123 2015-05-11 2016-10-31    2       30

this shows the overlap of interest in both combinations.

How to flatten / merge overlapping time periods

Here's a possible solution. The basic idea here is to compare lagged start date with the maximum end date "until now" using the cummax function and create an index that will separate the data into groups

data %>%
  arrange(ID, start) %>% # as suggested by @Jonno in case the data is unsorted
  group_by(ID) %>%
  mutate(indx = c(0, cumsum(as.numeric(lead(start)) >
                     cummax(as.numeric(end)))[-n()])) %>%
  group_by(ID, indx) %>%
  summarise(start = first(start), end = last(end))

# Source: local data frame [3 x 4]
# Groups: ID
# 
#   ID indx      start        end
# 1  A    0 2013-01-01 2013-01-06
# 2  A    1 2013-01-07 2013-01-11
# 3  A    2 2013-01-12 2013-01-15

R Find Overlap Among Time Periods