SQL Query for Grouping the results based on sequence
WITH numbered AS (
SELECT
ID, Seq, Amt,
SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
FROM atable
)
SELECT
ID,
Start = MIN(Seq),
[End] = MAX(Seq),
TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;
How to group the same values which is in sequence order
This is a gaps and islands problem. Here is one way to solve it using lag()
and a cumulative sum()
:
select
min(num) num,
count(*) count_num
from (
select
t.*,
sum(case when num = lag_num then 0 else 1 end) over(order by id) grp
from (
select
t.*,
lag(num) over(order by id) lag_num
from #temp t
) t
) t
group by grp
Demo on DB Fiddlde:
num | count_num
--: | --------:
1 | 3
2 | 1
1 | 1
2 | 2
3 | 3
Group by rows which are in sequence
One way to deal with such a gaps-and-islands problem is to calculate a ranking for the gaps.
Then group also on that ranking.
SELECT PASSENGER, CITY
, COUNT(*) AS "Count"
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
SELECT q1.*
, SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
FROM (
SELECT PASSENGER, CITY, "DATE"
, CASE
WHEN 1 = TRUNC("DATE")
- TRUNC(LAG("DATE")
OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE"))
THEN 0 ELSE 1 END as gap
FROM table_name t
) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER
PASSENGER | CITY | Count |
---|---|---|
43 | NEW YORK | 3 |
43 | LONDON | 1 |
44 | CHICAGO | 1 |
44 | ROME | 1 |
44 | CHICAGO | 2 |
Grouping sequence number in SQL
It looks like the RequiredResult
column is simple a running sequence that resets after each broken sequence in the Order
column when you process the records in the order they were inserted.
This is a typical Data Island analysis task, except in this case the islands are the rows that are sequential sets, the boundary is when the numbering resets back to 1.
- Record the input sequence by adding an
IDENTITY
column to the table variable. - Calculate an island identifier
- Due to the rule about the rows being in sequence based on the
Order
column, we can calculate a unique number for the Island by subtracting theOrder
from the IDENTITY column, in this caseId
- Due to the rule about the rows being in sequence based on the
- We can then use
DENSE_RANK()
ordering by the Island Number
Putting all that together:
DECLARE @Table TABLE (
[Id] int IDENTITY(1,1),
[Text] varchar(100),
[Order] int,
[RequiredResult] int
);
INSERT INTO @Table
VALUES
('A',1,1),
('B',2,1),
('C',3,1),
('D',1,2),
('A',2,2),
('B',3,2),
('G',4,2),
('H',1,3),
('B',2,3);
SELECT [Text],[Order]
, [Id]-[Order] as Island
, RequiredResult
, DENSE_RANK() OVER (ORDER BY [ID]-[ORDER]) AS CalculatedResult
FROM @Table
ORDER BY [ID]
Text | Order | Island | RequiredResult | CalculatedResult |
---|---|---|---|---|
A | 1 | 0 | 1 | 1 |
B | 2 | 0 | 1 | 1 |
C | 3 | 0 | 1 | 1 |
D | 1 | 3 | 2 | 2 |
A | 2 | 3 | 2 | 2 |
B | 3 | 3 | 2 | 2 |
G | 4 | 3 | 2 | 2 |
H | 1 | 7 | 3 | 3 |
B | 2 | 7 | 3 | 3 |
SQL Count Grouping by a sequence of numbers
If your DBMS supports window functions (e.g. SQL Server 2005+)
SELECT id,
count(*) AS res
FROM (SELECT *,
[pNum] - ROW_NUMBER() OVER (PARTITION BY [id] ORDER BY [pNum]) AS Grp
FROM YourTable) T
GROUP BY id,
Grp
SQL Fiddle
Group by records in sequence
If your strings look like a series of characters then a number (e.g. "XXX001" or "XXXXXXXX001" or "ABC1"), then one way you can do this is by first extracting the numbers with PATINDEX
, then using LEAD()
and LAG()
to find the start and end of the patterns. Then use a ROW_NUMBER()
to join the corresponding start and end and do a count of the rows in between the start and end.
For example:
DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');
WITH CTE AS (
SELECT *,
StartOfSequence = CASE WHEN LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID) IS NULL OR StartNum - LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID) > 1 THEN 1 ELSE 0 END,
EndOfSequence = CASE WHEN LEAD(StartNum) OVER (PARTITION BY Prefix ORDER BY ID) IS NULL OR LEAD(StartNum) OVER (PARTITION BY Prefix ORDER BY ID) - EndNum > 1 THEN 1 ELSE 0 END
FROM (
SELECT ID,
Start,
[End],
StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
FROM @T
) AS T
)
SELECT C1.Start, C2.[End], Total = (SELECT COUNT(*) FROM CTE WHERE ID >= C1.ID AND ID <= C2.ID AND Prefix = C1.Prefix)
FROM (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE
WHERE StartOfSequence = 1
) AS C1
JOIN (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE
WHERE EndOfSequence = 1
) AS C2 ON C1.RN = C2.RN
AND C1.Prefix = C2.Prefix;
Or using a modification of @Pரதீப்'s suggestion below to include the prefix,
DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');
WITH CTE AS (
SELECT *,
prevEndNum = LAG(EndNum) OVER (PARTITION BY Prefix ORDER BY ID)
FROM (
SELECT ID,
Start,
[End],
StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
FROM @T
) AS T
)
SELECT DISTINCT Prefix,
First_value(start) OVER(PARTITION BY Prefix, grp ORDER BY id),
First_value([end]) OVER(PARTITION BY Prefix, grp ORDER BY id DESC),
Count(*) OVER( partition BY grp)
FROM (SELECT grp = Sum(CASE WHEN startnum = prevendnum + 1 THEN 0 ELSE 1 END)
OVER(PARTITION BY Prefix ORDER BY id),*
FROM CTE)a
Note: If the IDs were all out of order for some reason, you could swap all the ORDER BY ID
parts with ORDER BY StartNum
or something like that, if necessary.
EDIT FOR SQL Server 2005:
DECLARE @T TABLE (ID INT, Start VARCHAR(20), [End] VARCHAR(20));
INSERT @T VALUES
(1, 'sam001', 'sam005'),
(2, 'sam006', 'sam008'),
(3, 'sam009', 'sam014'),
(4, 'sam018', 'sam024'),
(5, 'sam030', 'sam035'),
(6, 'sam036', 'sam040'),
(7, 'wazza01', 'wazza2'),
(8, 'wazza03', 'wazza000005'),
(9, 'wazza09', 'wazza12'),
(10, 'sam041', 'sam42');
WITH CTE1 AS (
SELECT ID,
Start,
[End],
StartNum = CAST(SUBSTRING(Start, PATINDEX('%[0-9]%', Start), LEN(Start)) AS INT),
EndNum = CAST(SUBSTRING([End], PATINDEX('%[0-9]%', [End]), LEN([End])) AS INT),
Prefix = SUBSTRING(Start, 1, PATINDEX('%[0-9]%', Start) - 1)
FROM @T
),
CTE2 AS (
SELECT C.*,
StartOfSequence = CASE WHEN PrevRow.EndNum IS NULL OR C.StartNum - PrevRow.EndNum > 1 THEN 1 ELSE 0 END,
EndOfSequence = CASE WHEN NextRow.StartNum IS NULL OR Nextrow.StartNum - C.EndNum > 1 THEN 1 ELSE 0 END
FROM CTE1 AS C
OUTER APPLY (SELECT TOP 1 StartNum FROM CTE1 WHERE Prefix = C.Prefix AND ID > C.ID ORDER BY ID) NextRow
OUTER APPLY (SELECT TOP 1 EndNum FROM CTE1 WHERE Prefix = C.Prefix AND ID < C.ID ORDER BY ID DESC) PrevRow
)
SELECT C1.Start, C2.[End], Total = (SELECT COUNT(*) FROM CTE2 WHERE ID >= C1.ID AND ID <= C2.ID AND Prefix = C1.Prefix)
FROM (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE2
WHERE StartOfSequence = 1
) AS C1
JOIN (
SELECT *, RN = ROW_NUMBER() OVER (PARTITION BY Prefix ORDER BY ID)
FROM CTE2
WHERE EndOfSequence = 1
) AS C2 ON C1.RN = C2.RN
AND C1.Prefix = C2.Prefix;
Grouping rows and adding sequence numbers in SQL SELECT statement without modifying table
This is what row_number()
does:
select t.*,
row_number() over (partition by user order by date) as seqnum
from t;
Related Topics
Get Every Hour for a Time Range
From Keyword Not Found Where Expected (Oracle SQL)
Transpose Rows into Columns in SQL Server 2008 R2
SQL Server Foreign Key to Multiple Tables
Pyodbc Execute Command Not Accepting? Parameters Correctly
Handling Unicode Characters That Aren't Displayed Correctly in SQL Query
Select Top Rows Until Value in Specific Column Has Appeared Twice
SQL Server 2008 - Conditional Query
Oracle 12C - Select String After Last Occurrence of a Character
SQL Server - Group Records by N Minutes Interval
SQL Server:Pivot with Custom Column Names
Passing SQL Stored Procedure Entirety of Where Clause