Combine consecutive date ranges
The strange bit you see with my use of the date '31211231' is just a very large date to handle your "no-end-date" scenario. I have assumed you won't really have many date ranges per employee, so I've used a simple Recursive Common Table Expression to combine the ranges.
To make it run faster, the starting anchor query keeps only those dates that will not link up to a prior range (per employee). The rest is just tree-walking the date ranges and growing the range. The final GROUP BY keeps only the largest date range built up per starting ANCHOR (employmentid, startdate) combination.
SQL Fiddle
MS SQL Server 2008 Schema Setup:
create table Tbl (
employmentid int,
startdate datetime,
enddate datetime);
insert Tbl values
(5, '2007-12-03', '2011-08-26'),
(5, '2013-05-02', null),
(30, '2006-10-02', '2011-01-16'),
(30, '2011-01-17', '2012-08-12'),
(30, '2012-08-13', null),
(66, '2007-09-24', null);
/*
-- expected outcome
EmploymentId StartDate EndDate
5 2007-12-03 2011-08-26
5 2013-05-02 NULL
30 2006-10-02 NULL
66 2007-09-24 NULL
*/
Query 1:
;with cte as (
select a.employmentid, a.startdate, a.enddate
from Tbl a
left join Tbl b on a.employmentid=b.employmentid and a.startdate-1=b.enddate
where b.employmentid is null
union all
select a.employmentid, a.startdate, b.enddate
from cte a
join Tbl b on a.employmentid=b.employmentid and b.startdate-1=a.enddate
)
select employmentid,
startdate,
nullif(max(isnull(enddate,'32121231')),'32121231') enddate
from cte
group by employmentid, startdate
order by employmentid
Results:
| EMPLOYMENTID | STARTDATE | ENDDATE |
-----------------------------------------------------------------------------------
| 5 | December, 03 2007 00:00:00+0000 | August, 26 2011 00:00:00+0000 |
| 5 | May, 02 2013 00:00:00+0000 | (null) |
| 30 | October, 02 2006 00:00:00+0000 | (null) |
| 66 | September, 24 2007 00:00:00+0000 | (null) |
Merging date intervals in SQL Server
It takes longer for me to set up the sample data than to write the query - it would be better if you posted questions that include CREATE TABLE
and INSERT/SELECT
statements. I don't know what your table is called, I've called mine Periods:
create table Periods (
StartDate date not null,
EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '19820302','19820930' union all
select '19821001','19850117' union all
select '19850626','19850726' union all
select '19850730','19911231' union all
select '19920101','19951231' union all
select '19960101','20040531' union all
select '20040605','20060131' union all
select '20060201','20110520'
go
; with MergedPeriods as (
Select p1.StartDate, p1.EndDate
from
Periods p1
left join
Periods p2
on
p1.StartDate = DATEADD(day,1,p2.EndDate)
where
p2.StartDate is null
union all
select p1.StartDate,p2.EndDate
from
MergedPeriods p1
inner join
Periods p2
on
p1.EndDate = DATEADD(day,-1,p2.StartDate)
)
select StartDate,MAX(EndDate) as EndDate
from MergedPeriods group by StartDate
Result:
StartDate EndDate
1982-03-02 1985-01-17
1985-06-26 1985-07-26
1985-07-30 2004-05-31
2004-06-05 2011-05-20
Merge Datetime Ranges Oracle SQL or PL/SQL
This is adapted from this answer which contains an explanation of the code. All that has changed is to add PARTITION BY order_id
to calculate the date ranges for each order_id
and then to return the ranges (rather than total the values, as per the linked answer):
SELECT order_id,
start_date_time,
end_date_time
FROM (
SELECT order_id,
LAG( dt ) OVER ( PARTITION BY order_id ORDER BY dt ) AS start_date_time,
dt AS end_date_time,
start_end
FROM (
SELECT order_id,
dt,
CASE SUM( value ) OVER ( PARTITION BY order_id ORDER BY dt ASC, value DESC, ROWNUM ) * value
WHEN 1 THEN 'start'
WHEN 0 THEN 'end'
END AS start_end
FROM table_name
UNPIVOT ( dt FOR value IN ( start_date_time AS 1, end_date_time AS -1 ) )
)
WHERE start_end IS NOT NULL
)
WHERE start_end = 'end';
From Oracle 12, you can use MATCH_RECONIZE
to do row-by-row processing:
SELECT *
FROM table_name
MATCH_RECOGNIZE(
PARTITION BY order_id
ORDER BY start_date_time
MEASURES
FIRST(start_date_time) AS start_date_time,
MAX(end_date_time) AS end_date_time
ONE ROW PER MATCH
PATTERN (overlapping_rows* last_row)
DEFINE
overlapping_rows AS NEXT(start_date_time) <= MAX(end_date_time)
)
Which, for your test data:
CREATE TABLE table_name (
order_id NUMBER,
start_date_time DATE,
end_date_time DATE
);
INSERT INTO table_name ( order_id, start_date_time, end_date_time )
SELECT 3933, TIMESTAMP '2020-02-04 08:00:00', TIMESTAMP '2020-02-04 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-04 13:30:00', TIMESTAMP '2020-02-04 17:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-04 14:00:00', TIMESTAMP '2020-02-04 19:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 13:40:12', TIMESTAMP '2020-02-05 14:34:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 14:00:00', TIMESTAMP '2020-02-05 18:55:12' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-05 14:49:48', TIMESTAMP '2020-02-05 15:04:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 08:00:00', TIMESTAMP '2020-02-06 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 13:30:00', TIMESTAMP '2020-02-06 17:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-06 14:10:12', TIMESTAMP '2020-02-06 18:49:48' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 08:00:00', TIMESTAMP '2020-02-07 10:30:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 08:00:00', TIMESTAMP '2020-02-07 12:00:00' FROM DUAL UNION ALL
SELECT 3933, TIMESTAMP '2020-02-07 13:30:00', TIMESTAMP '2020-02-07 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 09:00:00', TIMESTAMP '2020-05-14 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 09:00:00', TIMESTAMP '2020-05-14 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-14 15:00:00', TIMESTAMP '2020-05-14 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 08:40:12', TIMESTAMP '2020-05-15 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 09:40:12', TIMESTAMP '2020-05-15 16:30:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 10:15:00', TIMESTAMP '2020-05-15 12:15:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-15 13:19:48', TIMESTAMP '2020-05-15 16:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 08:49:48', TIMESTAMP '2020-05-18 09:45:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 10:00:00', TIMESTAMP '2020-05-18 17:00:00' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 10:00:00', TIMESTAMP '2020-05-18 16:58:12' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 15:34:48', TIMESTAMP '2020-05-18 16:10:12' FROM DUAL UNION ALL
SELECT 11919, TIMESTAMP '2020-05-18 16:30:00', TIMESTAMP '2020-05-18 16:45:00' FROM DUAL;
Which both output:
ORDER_ID | START_DATE_TIME | END_DATE_TIME
-------: | :------------------ | :------------------
3933 | 2020-02-04 08:00:00 | 2020-02-04 12:00:00
3933 | 2020-02-04 13:30:00 | 2020-02-04 19:00:00
3933 | 2020-02-05 13:40:12 | 2020-02-05 18:55:12
3933 | 2020-02-06 08:00:00 | 2020-02-06 12:00:00
3933 | 2020-02-06 13:30:00 | 2020-02-06 18:49:48
3933 | 2020-02-07 08:00:00 | 2020-02-07 12:00:00
3933 | 2020-02-07 13:30:00 | 2020-02-07 17:00:00
11919 | 2020-05-14 09:00:00 | 2020-05-14 17:00:00
11919 | 2020-05-15 08:40:12 | 2020-05-15 16:30:00
11919 | 2020-05-18 08:49:48 | 2020-05-18 09:45:00
11919 | 2020-05-18 10:00:00 | 2020-05-18 17:00:00
db<>fiddle here
merge consecutive dates ranges using SQL
This is a Gaps & Islands problem. You can use the typical solution using LAG()
. For example:
select
max(client_id) as client_id,
max(status) as status,
min(start_date) as start_date,
max(end_date) as end_date
from (
select *, sum(i) over(partition by client_id order by start_date) as g
from (
select *,
case when dateadd(day, -1, start_date) <>
lag(end_date) over(partition by client_id order by start_date)
then 1 else 0 end as i
from t
) x
) y
group by client_id, g
order by client_id, g
Result:
client_id status start_date end_date
---------- ------- ----------- ----------
1 1 2022-01-01 2022-01-04
1 1 2022-01-12 2022-01-15
2 1 2022-01-03 2022-01-03
2 1 2022-01-05 2022-01-06
See running example at db<>fiddle.
Detect and merge date range successive overlaps in SQL
Gaps and Islands has multiple steps.
First, mark the gaps
with mark as (
select *,
lag(end_datum) over w
not between start_datum and end_datum as island
from konto
window w as (partition by konto_nummer
order by start_datum, end_datum)
),
Then, number the islands
grps as (
select *,
sum(coalesce(island, true)::int) over w as grpnum
from mark
window w as (partition by konto_nummer
order by start_datum, end_datum)
)
Then aggregate by group
select konto_nummer,
min(start_datum) as start_datum,
max(end_datum) as end_datum
from grps
group by konto_nummer, grpnum
order by 1, 2, 3;
Working fiddle here.
How to combine date ranges in SQL with small gaps
You can identify where the new periods begin. For a general problem, I would go with not exists
. Then you can assign a group using cumulative sums:
select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.id = t.id and
t2.date1 >= dateadd(day, -30, t1.date1) and
t2.date2 < dateadd(day, 30, t1.date2)
)
then 1 else 0
end) as is_start
from t
) t;
The final step is aggregation:
with g as (
select id, sum(is_start) over (partition by id order by datestart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.id = t.id and
t2.date1 >= dateadd(day, -30, t1.date1) and
t2.date2 < dateadd(day, 30, t1.date2)
)
then 1 else 0
end) as is_start
from t
) t
)
select id, min(date1), max(date2)
from g
group by id, grp;
Merge overlapping dates in SQL Server
SQL DEMO
declare @t table (Name varchar(100), Datetime_Start datetime, Datetime_End datetime);
insert into @t values
('A' , '2017-01-02 00:00' , '2017-03-28 00:10'),
('A' , '2017-05-14 23:50' , '2017-05-29 23:50'),
('B' , '2017-05-18 00:00' , '2017-05-18 04:00'),
('B' , '2017-05-18 02:00' , '2017-05-18 03:00'),
('C' , '2017-01-02 00:00' , '2017-01-17 15:50'),
('C' , '2017-01-14 03:50' , '2017-01-28 15:50');
with Datetime_Starts as
(
select distinct name, Datetime_Start
from @t as t1
where not exists
(select * from @t as t2
where t2.name = t1.name
and t2.Datetime_Start < t1.Datetime_Start
and t2.Datetime_End >= t1.Datetime_Start)
),
Datetime_Ends as
(
select distinct name, Datetime_End
from @t as t1
where not exists
(select * from @t as t2
where t2.name = t1.name
and t2.Datetime_End > t1.Datetime_End
and t2.Datetime_Start <= t1.Datetime_End)
)
select name, Datetime_Start,
(select min(Datetime_End)
from Datetime_Ends as e
where e.name = s.name
and Datetime_End >= Datetime_Start) as Datetime_End
from Datetime_Starts as s;
Merge Contiguous date ranges that are from same id and have same amount
Is this what you're after?
WITH Gaps AS(
SELECT DT.ID,
DT.StartDate,
DT.EndDate,
DT.Amount,
DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.StartDate ASC), DT.StartDate) AS Gap
FROM @DataTable DT),
Grps AS(
SELECT G.ID,
G.StartDate,
G.EndDate,
G.Amount,
ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.StartDate) -
ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.StartDate) + Gap AS Grp
FROM Gaps G)
SELECT G.ID,
MIN(G.StartDate) AS StartDate,
MAX(EndDate) AS EndDate,
G.Amount
FROM Grps G
GROUP BY G.ID,
G.Amount,
G.Grp
ORDER BY ID,
StartDate;
Added a Unique ID, to get around a "feature":
DECLARE @DataTable TABLE (
UniqueID int IDENTITY(1,1),
ID [int] NULL,
StartDate [date] NULL,
EndDate [date] NULL,
Amount [decimal](12,2) NULL
)
INSERT INTO @DataTable
SELECT 1, '20180101','20180513', 10.00 UNION ALL
SELECT 1, '20180630','20190301', 15.00 UNION ALL
SELECT 1, '20190302','20190615', 15.00 UNION ALL
SELECT 1, '20190616','20991231', 5.00 UNION ALL
SELECT 2, '20190101','20190331', 35.00 UNION ALL
SELECT 2, '20190401','20191031', 30.00 UNION ALL
SELECT 3, '20180505','20180930', 19.00 UNION ALL
SELECT 3, '20181001','20190228', 1.00 UNION ALL
SELECT 3, '20190501','20190815', 1.00 UNION ALL
SELECT 3, '20190819','20190827', 5.00 UNION ALL
SELECT 3, '20190828','20991231', 1.00 UNION ALL
SELECT 4, '20171001', '20171231', 688.96 UNION ALL
SELECT 4, '20180101', '20180430', 707.96 UNION ALL
SELECT 4, '20180501', '20180531', 783.96 UNION ALL
SELECT 4, '20180601', '20181231', 707.96 UNION ALL
SELECT 4, '20190101', '20190331', 707.96 UNION ALL
SELECT 4, '20190401', '20190430', 571.46 UNION ALL
SELECT 4, '20190501', '20190630', 707.96 UNION ALL
SELECT 4, '20190701', '20991231', 707.96;
--SELECT *
--FROM @DataTable;
WITH Gaps AS(
SELECT DT.UniqueID,
DT.ID,
DT.StartDate,
DT.EndDate,
DT.Amount,
DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.UniqueID ASC), DT.StartDate) AS Gap
FROM @DataTable DT),
Grps AS(
SELECT G.UniqueID,
G.ID,
G.StartDate,
G.EndDate,
G.Amount,
G.Gap,
ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.UniqueID) -
ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.UniqueID) + (Gap * UniqueID) AS Grp
FROM Gaps G)
SELECT G.ID,
MIN(G.StartDate) AS StartDate,
MAX(EndDate) AS EndDate,
G.Amount
FROM Grps G
GROUP BY G.ID,
G.Amount,
G.Grp
ORDER BY ID,
StartDate;
Related Topics
Crosstab with a Large or Undefined Number of Categories
Unexpected Results from SQL Query with Between Timestamps
How to Perform Update Query with Subquery in Access
Sp_Msforeachdb: Only Include Results from Databases with Results
Combine Two Tables in Select (SQL Server 2008)
Split Function by Comma in SQL Server 2008
Sample Query to Show Cardinality Estimation Error in Postgresql
Rails Way to Reset Seed on Id Field
How to Get the Current Year Using SQL on Oracle
SQL Server Using Wildcard Within In
How to Do Select Unique with Linq
Ruby on Rails - Search in Database Based on a Query
How to Concatenate All Columns in a Select with SQL Server
What Is the Internal Representation of Datetime in SQL Server
How to Sort in Order as Entered in SQL Server