SQL Group by Only Rows Which Are in Sequence

sql group by only rows which are in sequence

This is known as the "islands" problem. Using Itzik Ben Gan's approach:

;WITH YourTable AS
(
SELECT 1 AS N, 'A' AS C UNION ALL
SELECT 2 AS N, 'A' AS C UNION ALL
SELECT 3 AS N, 'A' AS C UNION ALL
SELECT 4 AS N, 'B' AS C UNION ALL
SELECT 5 AS N, 'B' AS C UNION ALL
SELECT 6 AS N, 'B' AS C UNION ALL
SELECT 7 AS N, 'A' AS C UNION ALL
SELECT 8 AS N, 'A' AS C
),
T
AS (SELECT N,
C,
DENSE_RANK() OVER (ORDER BY N) -
DENSE_RANK() OVER (PARTITION BY C ORDER BY N) AS Grp
FROM YourTable)
SELECT COUNT(*),
C
FROM T
GROUP BY C,
Grp
ORDER BY MIN(N)

Group by rows which are in sequence

One way to deal with such a gaps-and-islands problem is to calculate a ranking for the gaps.

Then group also on that ranking.

SELECT PASSENGER, CITY
, COUNT(*) AS "Count"
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
SELECT q1.*
, SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
FROM (
SELECT PASSENGER, CITY, "DATE"
, CASE
WHEN 1 = TRUNC("DATE")
- TRUNC(LAG("DATE")
OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE"))
THEN 0 ELSE 1 END as gap
FROM table_name t
) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER




































PASSENGERCITYCount
43NEW YORK3
43LONDON1
44CHICAGO1
44ROME1
44CHICAGO2

How to group the same values which is in sequence order

This is a gaps and islands problem. Here is one way to solve it using lag() and a cumulative sum():

select
min(num) num,
count(*) count_num
from (
select
t.*,
sum(case when num = lag_num then 0 else 1 end) over(order by id) grp
from (
select
t.*,
lag(num) over(order by id) lag_num
from #temp t
) t
) t
group by grp

Demo on DB Fiddlde:


num | count_num
--: | --------:
1 | 3
2 | 1
1 | 1
2 | 2
3 | 3

Get top 1 row of each group


;WITH cte AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY DocumentID ORDER BY DateCreated DESC) AS rn
FROM DocumentStatusLogs
)
SELECT *
FROM cte
WHERE rn = 1

If you expect 2 entries per day, then this will arbitrarily pick one. To get both entries for a day, use DENSE_RANK instead

As for normalised or not, it depends if you want to:

  • maintain status in 2 places
  • preserve status history
  • ...

As it stands, you preserve status history. If you want latest status in the parent table too (which is denormalisation) you'd need a trigger to maintain "status" in the parent. or drop this status history table.

SQL Query for Grouping the results based on sequence


WITH numbered AS (
SELECT
ID, Seq, Amt,
SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
FROM atable
)
SELECT
ID,
Start = MIN(Seq),
[End] = MAX(Seq),
TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;

Group by numbers that are in sequence


create table #temp
(
IDUnique int Identity(1,1),
ID int,
grp int
)


Insert into #temp(ID) Values(1)
Insert into #temp(ID) Values(36)
Insert into #temp(ID) Values(37)
Insert into #temp(ID) Values(38)
Insert into #temp(ID) Values(50)
Insert into #temp(ID) Values(51)

declare @IDUnique int
declare @PreviousUnique int
declare @ID int
declare @grp int
declare @Previous int
declare @Row int

DECLARE @getAccountID CURSOR SET @getAccountID = CURSOR FOR SELECT Row_Number() Over(Order by IDUnique) Row, IDUnique, ID From #temp
OPEN @getAccountID
FETCH NEXT FROM @getAccountID INTO @Row, @IDUnique, @ID
WHILE @@FETCH_STATUS = 0
BEGIN
IF(@Row = 1)
Begin
update #temp set grp = 1 Where IDUnique = @IDUnique
set @Previous = @ID
set @grp = 1
End
Else If (@Previous + 1 = @ID)
Begin
update #temp set grp = @grp Where IDUnique = @IDUnique
set @Previous = @ID
End
Else
Begin
set @Previous = @ID
set @grp = @grp + 1
update #temp set grp = @grp Where IDUnique = @IDUnique
End
FETCH NEXT FROM @getAccountID INTO @Row, @IDUnique, @ID
END
CLOSE @getAccountID
DEALLOCATE @getAccountID

Select * from #temp
Drop Table #temp

sql query to return single record per group id in sequence

You can use row_number() in the order by:

select t.*
from t
order by row_number() over (partition by agency_id order by person),
agency_id;

The second key is so the rows are in the same order in each group.

SQL Count of rows in a group in a sequence

The key here is to create a sequence id you can use in a window function to get the count. You won't be able to do it in one query because window functions can't be combined, but you can pull it off with a subquery or CTE.

To determine the sequence number for a row, you need to count the number of times the group key has changed in the preceding rows. So to determine the changes, create an inner query that checks if the current group key is different from the previous by using the lag window function. Use a case statement that results in 1 or 0 depending on if the lagged value is different from the current. The outer query then just has to sum up the values for all rows preceding up to the current.

Once you have the sequence number, you can use a count window function to count all the rows with matching numbers.

WITH src AS ( -- cte to mimic table.
SELECT *
FROM (VALUES
(1, N'00001.000001', N'00001.000001', 2),
/* ... test data ... */
(10, N'00003.000001', N'00003.000001', 1)
) [src] ( [Id],[MinCode],[MaxCode],[ExpectedResult] )
)
SELECT src.Id, MinCode, MaxCode, ExpectedResult
, COUNT(1) OVER (PARTITION BY seq.SequenceId) [Result]
FROM src
INNER JOIN (
SELECT x.Id, SUM(x.IsNew) OVER (ORDER BY Id ROWS UNBOUNDED PRECEDING) [SequenceId]
FROM (
SELECT Id, CASE WHEN LAG(MinCode) OVER (ORDER BY Id) <> MinCode THEN 1 ELSE 0 END [IsNew]
FROM src
) x
) seq ON seq.Id = src.Id
ORDER BY Id

Group a sequence of rows by their first value in SQL

You can use the window function sum() to create the groups and then aggregate:

select min(id) id, name, sum(metric) metric
from (
select *, sum((key = 'a')::int) over (partition by name order by id) grp
from dataset
) t
group by name, grp
order by id

See the demo.

Results:

> id | name  | metric
> -: | :---- | -----:
> 1 | alice | 2
> 4 | alice | 1
> 8 | bob | 2
> 10 | bob | 0


Related Topics



Leave a reply



Submit