Sql: Merge Date Ranges

Combine consecutive date ranges

The strange bit you see with my use of the date '31211231' is just a very large date to handle your "no-end-date" scenario. I have assumed you won't really have many date ranges per employee, so I've used a simple Recursive Common Table Expression to combine the ranges.

To make it run faster, the starting anchor query keeps only those dates that will not link up to a prior range (per employee). The rest is just tree-walking the date ranges and growing the range. The final GROUP BY keeps only the largest date range built up per starting ANCHOR (employmentid, startdate) combination.

SQL Fiddle

MS SQL Server 2008 Schema Setup:

create table Tbl (
  employmentid int,
  startdate datetime,
  enddate datetime);

insert Tbl values
(5, '2007-12-03', '2011-08-26'),
(5, '2013-05-02', null),
(30, '2006-10-02', '2011-01-16'),
(30, '2011-01-17', '2012-08-12'),
(30, '2012-08-13', null),
(66, '2007-09-24', null);

/*
-- expected outcome
EmploymentId StartDate   EndDate
5            2007-12-03  2011-08-26
5            2013-05-02  NULL
30           2006-10-02  NULL
66           2007-09-24  NULL
*/

Query 1:

;with cte as (
   select a.employmentid, a.startdate, a.enddate
     from Tbl a
left join Tbl b on a.employmentid=b.employmentid and a.startdate-1=b.enddate
    where b.employmentid is null
    union all
   select a.employmentid, a.startdate, b.enddate
     from cte a
     join Tbl b on a.employmentid=b.employmentid and b.startdate-1=a.enddate
)
   select employmentid,
          startdate,
          nullif(max(isnull(enddate,'32121231')),'32121231') enddate
     from cte
 group by employmentid, startdate
 order by employmentid

Results:

| EMPLOYMENTID |                        STARTDATE |                       ENDDATE |
-----------------------------------------------------------------------------------
|            5 |  December, 03 2007 00:00:00+0000 | August, 26 2011 00:00:00+0000 |
|            5 |       May, 02 2013 00:00:00+0000 |                        (null) |
|           30 |   October, 02 2006 00:00:00+0000 |                        (null) |
|           66 | September, 24 2007 00:00:00+0000 |                        (null) |

Merging date intervals in SQL Server

It takes longer for me to set up the sample data than to write the query - it would be better if you posted questions that include CREATE TABLE and INSERT/SELECT statements. I don't know what your table is called, I've called mine Periods:

create table Periods (
    StartDate date not null,
    EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '19820302','19820930' union all
select '19821001','19850117' union all
select '19850626','19850726' union all
select '19850730','19911231' union all
select '19920101','19951231' union all
select '19960101','20040531' union all
select '20040605','20060131' union all
select '20060201','20110520'
go
; with MergedPeriods as (
    Select p1.StartDate, p1.EndDate
    from
        Periods p1
            left join
        Periods p2
            on
                p1.StartDate = DATEADD(day,1,p2.EndDate)
    where
        p2.StartDate is null
    union all
    select p1.StartDate,p2.EndDate
    from
        MergedPeriods p1
            inner join
        Periods p2
            on
                p1.EndDate = DATEADD(day,-1,p2.StartDate)
)
select StartDate,MAX(EndDate) as EndDate
from MergedPeriods group by StartDate

Result:

StartDate   EndDate
1982-03-02  1985-01-17
1985-06-26  1985-07-26
1985-07-30  2004-05-31
2004-06-05  2011-05-20

Merge Datetime Ranges Oracle SQL or PL/SQL

This is adapted from this answer which contains an explanation of the code. All that has changed is to add PARTITION BY order_id to calculate the date ranges for each order_id and then to return the ranges (rather than total the values, as per the linked answer):

SELECT order_id,
       start_date_time,
       end_date_time
FROM   (
  SELECT order_id,
         LAG( dt ) OVER ( PARTITION BY order_id ORDER BY dt ) AS start_date_time,
         dt AS end_date_time,
         start_end
  FROM   (
    SELECT order_id,
           dt,
           CASE SUM( value ) OVER ( PARTITION BY order_id ORDER BY dt ASC, value DESC, ROWNUM ) * value
             WHEN 1 THEN 'start'
             WHEN 0 THEN 'end'
           END AS start_end
    FROM   table_name
    UNPIVOT ( dt FOR value IN ( start_date_time AS 1, end_date_time AS -1 ) )
  )
  WHERE start_end IS NOT NULL
)
WHERE  start_end = 'end';

From Oracle 12, you can use MATCH_RECONIZE to do row-by-row processing:

SELECT *
FROM   table_name
MATCH_RECOGNIZE(
  PARTITION BY order_id
  ORDER     BY start_date_time
  MEASURES
    FIRST(start_date_time) AS start_date_time,
    MAX(end_date_time)     AS end_date_time
  ONE ROW PER MATCH
  PATTERN (overlapping_rows* last_row)
  DEFINE
    overlapping_rows AS NEXT(start_date_time) <= MAX(end_date_time)
)

Which, for your test data:

CREATE TABLE table_name (
  order_id NUMBER,
  start_date_time DATE,
  end_date_time DATE
);

INSERT INTO table_name ( order_id, start_date_time, end_date_time )
SELECT 3933, TIMESTAMP '2020-02-04 08:00:00', TIMESTAMP '2020-02-04 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-04 13:30:00', TIMESTAMP '2020-02-04 17:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-04 14:00:00', TIMESTAMP '2020-02-04 19:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 13:40:12', TIMESTAMP '2020-02-05 14:34:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 14:00:00', TIMESTAMP '2020-02-05 18:55:12' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 14:49:48', TIMESTAMP '2020-02-05 15:04:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 08:00:00', TIMESTAMP '2020-02-06 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 13:30:00', TIMESTAMP '2020-02-06 17:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 14:10:12', TIMESTAMP '2020-02-06 18:49:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 08:00:00', TIMESTAMP '2020-02-07 10:30:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 08:00:00', TIMESTAMP '2020-02-07 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 13:30:00', TIMESTAMP '2020-02-07 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 09:00:00', TIMESTAMP '2020-05-14 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 09:00:00', TIMESTAMP '2020-05-14 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 15:00:00', TIMESTAMP '2020-05-14 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 08:40:12', TIMESTAMP '2020-05-15 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 09:40:12', TIMESTAMP '2020-05-15 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 10:15:00', TIMESTAMP '2020-05-15 12:15:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 13:19:48', TIMESTAMP '2020-05-15 16:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 08:49:48', TIMESTAMP '2020-05-18 09:45:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 10:00:00', TIMESTAMP '2020-05-18 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 10:00:00', TIMESTAMP '2020-05-18 16:58:12' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 15:34:48', TIMESTAMP '2020-05-18 16:10:12' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 16:30:00', TIMESTAMP '2020-05-18 16:45:00' FROM DUAL;

Which both output:


ORDER_ID | START_DATE_TIME     | END_DATE_TIME      
-------: | :------------------ | :------------------
    3933 | 2020-02-04 08:00:00 | 2020-02-04 12:00:00
    3933 | 2020-02-04 13:30:00 | 2020-02-04 19:00:00
    3933 | 2020-02-05 13:40:12 | 2020-02-05 18:55:12
    3933 | 2020-02-06 08:00:00 | 2020-02-06 12:00:00
    3933 | 2020-02-06 13:30:00 | 2020-02-06 18:49:48
    3933 | 2020-02-07 08:00:00 | 2020-02-07 12:00:00
    3933 | 2020-02-07 13:30:00 | 2020-02-07 17:00:00
   11919 | 2020-05-14 09:00:00 | 2020-05-14 17:00:00
   11919 | 2020-05-15 08:40:12 | 2020-05-15 16:30:00
   11919 | 2020-05-18 08:49:48 | 2020-05-18 09:45:00
   11919 | 2020-05-18 10:00:00 | 2020-05-18 17:00:00

db<>fiddle here

merge consecutive dates ranges using SQL

This is a Gaps & Islands problem. You can use the typical solution using LAG(). For example:

select
  max(client_id) as client_id,
  max(status) as status,
  min(start_date) as start_date,
  max(end_date) as end_date
from (
  select *, sum(i) over(partition by client_id order by start_date) as g
  from (
    select *,
      case when dateadd(day, -1, start_date) <> 
        lag(end_date) over(partition by client_id order by start_date) 
      then 1 else 0 end as i
    from t
  ) x
) y
group by client_id, g
order by client_id, g

Result:

 client_id  status  start_date  end_date   
 ---------- ------- ----------- ---------- 
 1          1       2022-01-01  2022-01-04 
 1          1       2022-01-12  2022-01-15 
 2          1       2022-01-03  2022-01-03 
 2          1       2022-01-05  2022-01-06

See running example at db<>fiddle.

Detect and merge date range successive overlaps in SQL

Gaps and Islands has multiple steps.

First, mark the gaps

with mark as (
  select *, 
         lag(end_datum) over w
           not between start_datum and end_datum as island
    from konto
  window w as (partition by konto_nummer
                   order by start_datum, end_datum)
),

Then, number the islands

 grps as (
  select *, 
         sum(coalesce(island, true)::int) over w as grpnum 
    from mark
  window w as (partition by konto_nummer
                   order by start_datum, end_datum)
)

Then aggregate by group

select konto_nummer, 
       min(start_datum) as start_datum, 
       max(end_datum) as end_datum
  from grps
 group by konto_nummer, grpnum
 order by 1, 2, 3;

Working fiddle here.

How to combine date ranges in SQL with small gaps

You can identify where the new periods begin. For a general problem, I would go with not exists. Then you can assign a group using cumulative sums:

select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
             (case when not exists (select 1
                                    from t t2
                                    where t2.id = t.id and
                                          t2.date1 >= dateadd(day, -30, t1.date1) and
                                          t2.date2 < dateadd(day, 30, t1.date2)
                                   )
                   then 1 else 0
              end) as is_start
      from t
     ) t;

The final step is aggregation:

with g as (
      select id, sum(is_start) over (partition by id order by datestart) as grp
      from (select t.*,
                   (case when not exists (select 1
                                          from t t2
                                          where t2.id = t.id and
                                                t2.date1 >= dateadd(day, -30, t1.date1) and
                                                t2.date2 < dateadd(day, 30, t1.date2)
                                         )
                         then 1 else 0
                    end) as is_start
            from t
           ) t
      )
select id, min(date1), max(date2)
from g
group by id, grp;

Merge overlapping dates in SQL Server

SQL DEMO

declare @t table (Name varchar(100),  Datetime_Start  datetime,  Datetime_End datetime);
insert into @t values
 ('A'   , '2017-01-02 00:00' , '2017-03-28 00:10'),
 ('A'   , '2017-05-14 23:50' , '2017-05-29 23:50'),
 ('B'   , '2017-05-18 00:00' , '2017-05-18 04:00'),
 ('B'   , '2017-05-18 02:00' , '2017-05-18 03:00'),
 ('C'   , '2017-01-02 00:00' , '2017-01-17 15:50'),
 ('C'   , '2017-01-14 03:50' , '2017-01-28 15:50');

with Datetime_Starts as 
( 
  select distinct name, Datetime_Start 
  from @t as t1 
  where not exists 
    (select * from @t as t2 
     where t2.name = t1.name 
       and t2.Datetime_Start < t1.Datetime_Start 
       and t2.Datetime_End >= t1.Datetime_Start) 
), 
Datetime_Ends as 
( 
  select distinct name, Datetime_End 
  from @t as t1 
  where not exists 
    (select * from @t as t2 
     where t2.name = t1.name 
       and t2.Datetime_End > t1.Datetime_End 
       and t2.Datetime_Start <= t1.Datetime_End) 
) 

select name, Datetime_Start, 
      (select min(Datetime_End) 
        from Datetime_Ends as e 
        where e.name = s.name 
            and Datetime_End >= Datetime_Start) as Datetime_End 
    from Datetime_Starts as s;

Merge Contiguous date ranges that are from same id and have same amount

Is this what you're after?

WITH Gaps AS(
    SELECT DT.ID,
           DT.StartDate,
           DT.EndDate,
           DT.Amount,
           DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.StartDate ASC), DT.StartDate) AS Gap
    FROM @DataTable DT),
Grps AS(
    SELECT G.ID,
           G.StartDate,
           G.EndDate,
           G.Amount,
           ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.StartDate) - 
           ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.StartDate) + Gap AS Grp
    FROM Gaps G)
SELECT G.ID,
       MIN(G.StartDate) AS StartDate,
       MAX(EndDate) AS EndDate,
       G.Amount
FROM Grps G
GROUP BY G.ID,
         G.Amount,
         G.Grp
ORDER BY ID,
         StartDate;

Added a Unique ID, to get around a "feature":

DECLARE @DataTable TABLE (
    UniqueID int IDENTITY(1,1),
    ID [int] NULL,
    StartDate [date] NULL,
    EndDate [date] NULL,
    Amount [decimal](12,2) NULL
)

INSERT INTO @DataTable
SELECT 1, '20180101','20180513', 10.00 UNION ALL
SELECT 1, '20180630','20190301', 15.00 UNION ALL
SELECT 1, '20190302','20190615', 15.00 UNION ALL
SELECT 1, '20190616','20991231', 5.00 UNION ALL
SELECT 2, '20190101','20190331', 35.00 UNION ALL
SELECT 2, '20190401','20191031', 30.00  UNION ALL
SELECT 3, '20180505','20180930', 19.00 UNION ALL
SELECT 3, '20181001','20190228', 1.00 UNION ALL
SELECT 3, '20190501','20190815', 1.00 UNION ALL
SELECT 3, '20190819','20190827', 5.00 UNION ALL
SELECT 3, '20190828','20991231', 1.00 UNION ALL
SELECT 4, '20171001', '20171231',   688.96 UNION ALL
SELECT 4, '20180101', '20180430',   707.96 UNION ALL
SELECT 4, '20180501', '20180531',   783.96 UNION ALL
SELECT 4, '20180601', '20181231',   707.96 UNION ALL
SELECT 4, '20190101', '20190331',   707.96 UNION ALL
SELECT 4, '20190401', '20190430',   571.46 UNION ALL
SELECT 4, '20190501', '20190630',   707.96 UNION ALL
SELECT 4, '20190701', '20991231',   707.96;

--SELECT *
--FROM @DataTable;

WITH Gaps AS(
    SELECT DT.UniqueID,
           DT.ID,
           DT.StartDate,
           DT.EndDate,
           DT.Amount,
           DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.UniqueID ASC), DT.StartDate) AS Gap
    FROM @DataTable DT),
Grps AS(
    SELECT G.UniqueID,
           G.ID,
           G.StartDate,
           G.EndDate,
           G.Amount,
           G.Gap,
           ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.UniqueID) - 
           ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.UniqueID) + (Gap * UniqueID) AS Grp
    FROM Gaps G)
SELECT G.ID,
       MIN(G.StartDate) AS StartDate,
       MAX(EndDate) AS EndDate,
       G.Amount
FROM Grps G
GROUP BY G.ID,
         G.Amount,
         G.Grp
ORDER BY ID,
         StartDate;