Sql: Merge Date Ranges

Combine consecutive date ranges

The strange bit you see with my use of the date '31211231' is just a very large date to handle your "no-end-date" scenario. I have assumed you won't really have many date ranges per employee, so I've used a simple Recursive Common Table Expression to combine the ranges.

To make it run faster, the starting anchor query keeps only those dates that will not link up to a prior range (per employee). The rest is just tree-walking the date ranges and growing the range. The final GROUP BY keeps only the largest date range built up per starting ANCHOR (employmentid, startdate) combination.


SQL Fiddle

MS SQL Server 2008 Schema Setup:

create table Tbl (
employmentid int,
startdate datetime,
enddate datetime);

insert Tbl values
(5, '2007-12-03', '2011-08-26'),
(5, '2013-05-02', null),
(30, '2006-10-02', '2011-01-16'),
(30, '2011-01-17', '2012-08-12'),
(30, '2012-08-13', null),
(66, '2007-09-24', null);

/*
-- expected outcome
EmploymentId StartDate EndDate
5 2007-12-03 2011-08-26
5 2013-05-02 NULL
30 2006-10-02 NULL
66 2007-09-24 NULL
*/

Query 1:

;with cte as (
select a.employmentid, a.startdate, a.enddate
from Tbl a
left join Tbl b on a.employmentid=b.employmentid and a.startdate-1=b.enddate
where b.employmentid is null
union all
select a.employmentid, a.startdate, b.enddate
from cte a
join Tbl b on a.employmentid=b.employmentid and b.startdate-1=a.enddate
)
select employmentid,
startdate,
nullif(max(isnull(enddate,'32121231')),'32121231') enddate
from cte
group by employmentid, startdate
order by employmentid

Results:

| EMPLOYMENTID |                        STARTDATE |                       ENDDATE |
-----------------------------------------------------------------------------------
| 5 | December, 03 2007 00:00:00+0000 | August, 26 2011 00:00:00+0000 |
| 5 | May, 02 2013 00:00:00+0000 | (null) |
| 30 | October, 02 2006 00:00:00+0000 | (null) |
| 66 | September, 24 2007 00:00:00+0000 | (null) |

Merging date intervals in SQL Server

It takes longer for me to set up the sample data than to write the query - it would be better if you posted questions that include CREATE TABLE and INSERT/SELECT statements. I don't know what your table is called, I've called mine Periods:

create table Periods (
StartDate date not null,
EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '19820302','19820930' union all
select '19821001','19850117' union all
select '19850626','19850726' union all
select '19850730','19911231' union all
select '19920101','19951231' union all
select '19960101','20040531' union all
select '20040605','20060131' union all
select '20060201','20110520'
go
; with MergedPeriods as (
Select p1.StartDate, p1.EndDate
from
Periods p1
left join
Periods p2
on
p1.StartDate = DATEADD(day,1,p2.EndDate)
where
p2.StartDate is null
union all
select p1.StartDate,p2.EndDate
from
MergedPeriods p1
inner join
Periods p2
on
p1.EndDate = DATEADD(day,-1,p2.StartDate)
)
select StartDate,MAX(EndDate) as EndDate
from MergedPeriods group by StartDate

Result:

StartDate   EndDate
1982-03-02 1985-01-17
1985-06-26 1985-07-26
1985-07-30 2004-05-31
2004-06-05 2011-05-20

Merge Datetime Ranges Oracle SQL or PL/SQL

This is adapted from this answer which contains an explanation of the code. All that has changed is to add PARTITION BY order_id to calculate the date ranges for each order_id and then to return the ranges (rather than total the values, as per the linked answer):

SELECT order_id,
start_date_time,
end_date_time
FROM (
SELECT order_id,
LAG( dt ) OVER ( PARTITION BY order_id ORDER BY dt ) AS start_date_time,
dt AS end_date_time,
start_end
FROM (
SELECT order_id,
dt,
CASE SUM( value ) OVER ( PARTITION BY order_id ORDER BY dt ASC, value DESC, ROWNUM ) * value
WHEN 1 THEN 'start'
WHEN 0 THEN 'end'
END AS start_end
FROM table_name
UNPIVOT ( dt FOR value IN ( start_date_time AS 1, end_date_time AS -1 ) )
)
WHERE start_end IS NOT NULL
)
WHERE start_end = 'end';

From Oracle 12, you can use MATCH_RECONIZE to do row-by-row processing:

SELECT *
FROM table_name
MATCH_RECOGNIZE(
PARTITION BY order_id
ORDER BY start_date_time
MEASURES
FIRST(start_date_time) AS start_date_time,
MAX(end_date_time) AS end_date_time
ONE ROW PER MATCH
PATTERN (overlapping_rows* last_row)
DEFINE
overlapping_rows AS NEXT(start_date_time) <= MAX(end_date_time)
)

Which, for your test data:

CREATE TABLE table_name (
order_id NUMBER,
start_date_time DATE,
end_date_time DATE
);

INSERT INTO table_name ( order_id, start_date_time, end_date_time )
SELECT 3933, TIMESTAMP '2020-02-04 08:00:00', TIMESTAMP '2020-02-04 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-04 13:30:00', TIMESTAMP '2020-02-04 17:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-04 14:00:00', TIMESTAMP '2020-02-04 19:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 13:40:12', TIMESTAMP '2020-02-05 14:34:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 14:00:00', TIMESTAMP '2020-02-05 18:55:12' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 14:49:48', TIMESTAMP '2020-02-05 15:04:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 08:00:00', TIMESTAMP '2020-02-06 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 13:30:00', TIMESTAMP '2020-02-06 17:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 14:10:12', TIMESTAMP '2020-02-06 18:49:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 08:00:00', TIMESTAMP '2020-02-07 10:30:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 08:00:00', TIMESTAMP '2020-02-07 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 13:30:00', TIMESTAMP '2020-02-07 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 09:00:00', TIMESTAMP '2020-05-14 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 09:00:00', TIMESTAMP '2020-05-14 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 15:00:00', TIMESTAMP '2020-05-14 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 08:40:12', TIMESTAMP '2020-05-15 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 09:40:12', TIMESTAMP '2020-05-15 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 10:15:00', TIMESTAMP '2020-05-15 12:15:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 13:19:48', TIMESTAMP '2020-05-15 16:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 08:49:48', TIMESTAMP '2020-05-18 09:45:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 10:00:00', TIMESTAMP '2020-05-18 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 10:00:00', TIMESTAMP '2020-05-18 16:58:12' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 15:34:48', TIMESTAMP '2020-05-18 16:10:12' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 16:30:00', TIMESTAMP '2020-05-18 16:45:00' FROM DUAL;

Which both output:


ORDER_ID | START_DATE_TIME | END_DATE_TIME
-------: | :------------------ | :------------------
3933 | 2020-02-04 08:00:00 | 2020-02-04 12:00:00
3933 | 2020-02-04 13:30:00 | 2020-02-04 19:00:00
3933 | 2020-02-05 13:40:12 | 2020-02-05 18:55:12
3933 | 2020-02-06 08:00:00 | 2020-02-06 12:00:00
3933 | 2020-02-06 13:30:00 | 2020-02-06 18:49:48
3933 | 2020-02-07 08:00:00 | 2020-02-07 12:00:00
3933 | 2020-02-07 13:30:00 | 2020-02-07 17:00:00
11919 | 2020-05-14 09:00:00 | 2020-05-14 17:00:00
11919 | 2020-05-15 08:40:12 | 2020-05-15 16:30:00
11919 | 2020-05-18 08:49:48 | 2020-05-18 09:45:00
11919 | 2020-05-18 10:00:00 | 2020-05-18 17:00:00

db<>fiddle here

merge consecutive dates ranges using SQL

This is a Gaps & Islands problem. You can use the typical solution using LAG(). For example:

select
max(client_id) as client_id,
max(status) as status,
min(start_date) as start_date,
max(end_date) as end_date
from (
select *, sum(i) over(partition by client_id order by start_date) as g
from (
select *,
case when dateadd(day, -1, start_date) <>
lag(end_date) over(partition by client_id order by start_date)
then 1 else 0 end as i
from t
) x
) y
group by client_id, g
order by client_id, g

Result:

 client_id  status  start_date  end_date   
---------- ------- ----------- ----------
1 1 2022-01-01 2022-01-04
1 1 2022-01-12 2022-01-15
2 1 2022-01-03 2022-01-03
2 1 2022-01-05 2022-01-06

See running example at db<>fiddle.

Detect and merge date range successive overlaps in SQL

Gaps and Islands has multiple steps.

First, mark the gaps

with mark as (
select *,
lag(end_datum) over w
not between start_datum and end_datum as island
from konto
window w as (partition by konto_nummer
order by start_datum, end_datum)
),

Then, number the islands

 grps as (
select *,
sum(coalesce(island, true)::int) over w as grpnum
from mark
window w as (partition by konto_nummer
order by start_datum, end_datum)
)

Then aggregate by group

select konto_nummer, 
min(start_datum) as start_datum,
max(end_datum) as end_datum
from grps
group by konto_nummer, grpnum
order by 1, 2, 3;

Working fiddle here.

How to combine date ranges in SQL with small gaps

You can identify where the new periods begin. For a general problem, I would go with not exists. Then you can assign a group using cumulative sums:

select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.id = t.id and
t2.date1 >= dateadd(day, -30, t1.date1) and
t2.date2 < dateadd(day, 30, t1.date2)
)
then 1 else 0
end) as is_start
from t
) t;

The final step is aggregation:

with g as (
select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.id = t.id and
t2.date1 >= dateadd(day, -30, t1.date1) and
t2.date2 < dateadd(day, 30, t1.date2)
)
then 1 else 0
end) as is_start
from t
) t
)
select id, min(date1), max(date2)
from g
group by id, grp;

Merge overlapping dates in SQL Server

SQL DEMO

declare @t table (Name varchar(100),  Datetime_Start  datetime,  Datetime_End datetime);
insert into @t values
('A' , '2017-01-02 00:00' , '2017-03-28 00:10'),
('A' , '2017-05-14 23:50' , '2017-05-29 23:50'),
('B' , '2017-05-18 00:00' , '2017-05-18 04:00'),
('B' , '2017-05-18 02:00' , '2017-05-18 03:00'),
('C' , '2017-01-02 00:00' , '2017-01-17 15:50'),
('C' , '2017-01-14 03:50' , '2017-01-28 15:50');

with Datetime_Starts as
(
select distinct name, Datetime_Start
from @t as t1
where not exists
(select * from @t as t2
where t2.name = t1.name
and t2.Datetime_Start < t1.Datetime_Start
and t2.Datetime_End >= t1.Datetime_Start)
),
Datetime_Ends as
(
select distinct name, Datetime_End
from @t as t1
where not exists
(select * from @t as t2
where t2.name = t1.name
and t2.Datetime_End > t1.Datetime_End
and t2.Datetime_Start <= t1.Datetime_End)
)

select name, Datetime_Start,
(select min(Datetime_End)
from Datetime_Ends as e
where e.name = s.name
and Datetime_End >= Datetime_Start) as Datetime_End
from Datetime_Starts as s;

Merge Contiguous date ranges that are from same id and have same amount

Is this what you're after?

WITH Gaps AS(
SELECT DT.ID,
DT.StartDate,
DT.EndDate,
DT.Amount,
DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.StartDate ASC), DT.StartDate) AS Gap
FROM @DataTable DT),
Grps AS(
SELECT G.ID,
G.StartDate,
G.EndDate,
G.Amount,
ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.StartDate) -
ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.StartDate) + Gap AS Grp
FROM Gaps G)
SELECT G.ID,
MIN(G.StartDate) AS StartDate,
MAX(EndDate) AS EndDate,
G.Amount
FROM Grps G
GROUP BY G.ID,
G.Amount,
G.Grp
ORDER BY ID,
StartDate;

Added a Unique ID, to get around a "feature":

DECLARE @DataTable TABLE (
UniqueID int IDENTITY(1,1),
ID [int] NULL,
StartDate [date] NULL,
EndDate [date] NULL,
Amount [decimal](12,2) NULL
)

INSERT INTO @DataTable
SELECT 1, '20180101','20180513', 10.00 UNION ALL
SELECT 1, '20180630','20190301', 15.00 UNION ALL
SELECT 1, '20190302','20190615', 15.00 UNION ALL
SELECT 1, '20190616','20991231', 5.00 UNION ALL
SELECT 2, '20190101','20190331', 35.00 UNION ALL
SELECT 2, '20190401','20191031', 30.00 UNION ALL
SELECT 3, '20180505','20180930', 19.00 UNION ALL
SELECT 3, '20181001','20190228', 1.00 UNION ALL
SELECT 3, '20190501','20190815', 1.00 UNION ALL
SELECT 3, '20190819','20190827', 5.00 UNION ALL
SELECT 3, '20190828','20991231', 1.00 UNION ALL
SELECT 4, '20171001', '20171231', 688.96 UNION ALL
SELECT 4, '20180101', '20180430', 707.96 UNION ALL
SELECT 4, '20180501', '20180531', 783.96 UNION ALL
SELECT 4, '20180601', '20181231', 707.96 UNION ALL
SELECT 4, '20190101', '20190331', 707.96 UNION ALL
SELECT 4, '20190401', '20190430', 571.46 UNION ALL
SELECT 4, '20190501', '20190630', 707.96 UNION ALL
SELECT 4, '20190701', '20991231', 707.96;

--SELECT *
--FROM @DataTable;

WITH Gaps AS(
SELECT DT.UniqueID,
DT.ID,
DT.StartDate,
DT.EndDate,
DT.Amount,
DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.UniqueID ASC), DT.StartDate) AS Gap
FROM @DataTable DT),
Grps AS(
SELECT G.UniqueID,
G.ID,
G.StartDate,
G.EndDate,
G.Amount,
G.Gap,
ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.UniqueID) -
ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.UniqueID) + (Gap * UniqueID) AS Grp
FROM Gaps G)
SELECT G.ID,
MIN(G.StartDate) AS StartDate,
MAX(EndDate) AS EndDate,
G.Amount
FROM Grps G
GROUP BY G.ID,
G.Amount,
G.Grp
ORDER BY ID,
StartDate;


Related Topics



Leave a reply



Submit