SQL Query for Grouping the Results Based on Sequence

SQL Query for Grouping the results based on sequence

WITH numbered AS (
SELECT
ID, Seq, Amt,
SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
FROM atable
)
SELECT
ID,
Start = MIN(Seq),
[End] = MAX(Seq),
TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;

How to group the same values which is in sequence order

This is a gaps and islands problem. Here is one way to solve it using lag() and a cumulative sum():

select
min(num) num,
count(*) count_num
from (
select
t.*,
sum(case when num = lag_num then 0 else 1 end) over(order by id) grp
from (
select
t.*,
lag(num) over(order by id) lag_num
from #temp t
) t
) t
group by grp

Demo on DB Fiddlde:


num | count_num
--: | --------:
1 | 3
2 | 1
1 | 1
2 | 2
3 | 3

Group by rows which are in sequence

One way to deal with such a gaps-and-islands problem is to calculate a ranking for the gaps.

Then group also on that ranking.

SELECT PASSENGER, CITY
, COUNT(*) AS "Count"
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
SELECT q1.*
, SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
FROM (
SELECT PASSENGER, CITY, "DATE"
, CASE
WHEN 1 = TRUNC("DATE")
- TRUNC(LAG("DATE")
OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE"))
THEN 0 ELSE 1 END as gap
FROM table_name t
) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER




































PASSENGERCITYCount
43NEW YORK3
43LONDON1
44CHICAGO1
44ROME1
44CHICAGO2

Grouping sequence number in SQL

It looks like the RequiredResult column is simple a running sequence that resets after each broken sequence in the Order column when you process the records in the order they were inserted.

This is a typical Data Island analysis task, except in this case the islands are the rows that are sequential sets, the boundary is when the numbering resets back to 1.

  1. Record the input sequence by adding an IDENTITY column to the table variable.
  2. Calculate an island identifier
    • Due to the rule about the rows being in sequence based on the Order column, we can calculate a unique number for the Island by subtracting the Order from the IDENTITY column, in this case Id
  3. We can then use DENSE_RANK() ordering by the Island Number

Putting all that together:

DECLARE @Table TABLE (
[Id] int IDENTITY(1,1),
[Text] varchar(100),
[Order] int,
[RequiredResult] int
);

INSERT INTO @Table
VALUES
('A',1,1),
('B',2,1),
('C',3,1),
('D',1,2),
('A',2,2),
('B',3,2),
('G',4,2),
('H',1,3),
('B',2,3);

SELECT [Text],[Order]
, [Id]-[Order] as Island
, RequiredResult
, DENSE_RANK() OVER (ORDER BY [ID]-[ORDER]) AS CalculatedResult
FROM @Table
ORDER BY [ID]












































































TextOrderIslandRequiredResultCalculatedResult
A1011
B2011
C3011
D1322
A2322
B3322
G4322
H1733
B2733

SQL Count Grouping by a sequence of numbers

If your DBMS supports window functions (e.g. SQL Server 2005+)

SELECT id,
count(*) AS res
FROM (SELECT *,
[pNum] - ROW_NUMBER() OVER (PARTITION BY [id] ORDER BY [pNum]) AS Grp
FROM YourTable) T
GROUP BY id,
Grp

SQL Fiddle

Group by records in sequence

If your strings look like a series of characters then a number (e.g. "XXX001" or "XXXXXXXX001" or "ABC1"), then one way you can do this is by first extracting the numbers with PATINDEX, then using LEAD() and LAG() to find the start and end of the patterns. Then use a ROW_NUMBER() to join the corresponding start and end and do a count of the rows in between the start and end.

For example:

DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');

WITH CTE AS (
SELECT *,
StartOfSequence = CASE WHEN LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID) IS NULL OR StartNum - LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID) > 1 THEN 1 ELSE 0 END,
EndOfSequence = CASE WHEN LEAD(StartNum) OVER (PARTITION BY Prefix ORDER BY ID) IS NULL OR LEAD(StartNum) OVER (PARTITION BY Prefix ORDER BY ID) - EndNum > 1 THEN 1 ELSE 0 END
FROM (
SELECT ID,
Start,
[End],
StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
FROM @T
) AS T
)
SELECT C1.Start, C2.[End], Total = (SELECT COUNT(*) FROM CTE WHERE ID >= C1.ID AND ID <= C2.ID AND Prefix = C1.Prefix)
FROM (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE
WHERE StartOfSequence = 1
) AS C1
JOIN (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE
WHERE EndOfSequence = 1
) AS C2 ON C1.RN = C2.RN
AND C1.Prefix = C2.Prefix;

Or using a modification of @Pரதீப்'s suggestion below to include the prefix,

DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');

WITH CTE AS (
SELECT *,
prevEndNum = LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID)
FROM (
SELECT ID,
Start,
[End],
StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
FROM @T
) AS T
)
SELECT DISTINCT Prefix,
First_value(start) OVER(PARTITION BY Prefix, grp ORDER BY id),
First_value([end]) OVER(PARTITION BY Prefix, grp ORDER BY id DESC),
Count(*) OVER( partition BY grp)
FROM (SELECT grp = Sum(CASE WHEN startnum = prevendnum + 1 THEN 0 ELSE 1 END)
OVER(PARTITION BY Prefix ORDER BY id),*
FROM CTE)a

Note: If the IDs were all out of order for some reason, you could swap all the ORDER BY ID parts with ORDER BY StartNum or something like that, if necessary.

EDIT FOR SQL Server 2005:

DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');

WITH CTE1 AS (
SELECT ID,
Start,
[End],
StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
FROM @T
),
CTE2 AS (
SELECT C.*,
StartOfSequence = CASE WHEN PrevRow.EndNum IS NULL OR C.StartNum - PrevRow.EndNum > 1 THEN 1 ELSE 0 END,
EndOfSequence = CASE WHEN NextRow.StartNum IS NULL OR Nextrow.StartNum - C.EndNum > 1 THEN 1 ELSE 0 END
FROM CTE1 AS C
OUTER APPLY (SELECT TOP 1 StartNum FROM CTE1 WHERE Prefix = C.Prefix AND ID > C.ID ORDER BY ID) NextRow
OUTER APPLY (SELECT TOP 1 EndNum FROM CTE1 WHERE Prefix = C.Prefix AND ID < C.ID ORDER BY ID DESC) PrevRow
)
SELECT C1.Start, C2.[End], Total = (SELECT COUNT(*) FROM CTE2 WHERE ID >= C1.ID AND ID <= C2.ID AND Prefix = C1.Prefix)
FROM (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE2
WHERE StartOfSequence = 1
) AS C1
JOIN (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE2
WHERE EndOfSequence = 1
) AS C2 ON C1.RN = C2.RN
AND C1.Prefix = C2.Prefix;

Grouping rows and adding sequence numbers in SQL SELECT statement without modifying table

This is what row_number() does:

select t.*,
row_number() over (partition by user order by date) as seqnum
from t;


Related Topics



Leave a reply



Submit