SQL Group by Only Rows Which Are in Sequence

sql group by only rows which are in sequence

This is known as the "islands" problem. Using Itzik Ben Gan's approach:

;WITH YourTable AS
(
SELECT 1 AS N, 'A' AS C UNION ALL
SELECT 2 AS N, 'A' AS C UNION ALL
SELECT 3 AS N, 'A' AS C UNION ALL
SELECT 4 AS N, 'B' AS C UNION ALL
SELECT 5 AS N, 'B' AS C UNION ALL
SELECT 6 AS N, 'B' AS C UNION ALL
SELECT 7 AS N, 'A' AS C UNION ALL
SELECT 8 AS N, 'A' AS C
),
     T
     AS (SELECT N,
                C,
                DENSE_RANK() OVER (ORDER BY N) - 
                DENSE_RANK() OVER (PARTITION BY C ORDER BY N) AS Grp
         FROM   YourTable)
SELECT COUNT(*),
       C
FROM   T
GROUP  BY C,
          Grp 
ORDER BY MIN(N)

Group by rows which are in sequence

One way to deal with such a gaps-and-islands problem is to calculate a ranking for the gaps.

Then group also on that ranking.

SELECT PASSENGER, CITY
, COUNT(*) AS "Count" 
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
  SELECT q1.*
  , SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
  FROM (
    SELECT PASSENGER, CITY, "DATE"
    , CASE
      WHEN 1 = TRUNC("DATE")
             - TRUNC(LAG("DATE") 
                     OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE")) 
      THEN 0 ELSE 1 END as gap
    FROM table_name t
  ) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER

PASSENGER	CITY	Count
43	NEW YORK	3
43	LONDON	1
44	CHICAGO	1
44	ROME	1
44	CHICAGO	2

How to group the same values which is in sequence order

This is a gaps and islands problem. Here is one way to solve it using lag() and a cumulative sum():

select
    min(num) num,
    count(*) count_num
from (
    select
        t.*,
        sum(case when num = lag_num then 0 else 1 end) over(order by id) grp
    from (
        select 
            t.*,
            lag(num) over(order by id) lag_num
        from #temp t
    ) t
) t
group by grp

Demo on DB Fiddlde:


num | count_num
--: | --------:
  1 |         3
  2 |         1
  1 |         1
  2 |         2
  3 |         3

Get top 1 row of each group

;WITH cte AS
(
   SELECT *,
         ROW_NUMBER() OVER (PARTITION BY DocumentID ORDER BY DateCreated DESC) AS rn
   FROM DocumentStatusLogs
)
SELECT *
FROM cte
WHERE rn = 1

If you expect 2 entries per day, then this will arbitrarily pick one. To get both entries for a day, use DENSE_RANK instead

As for normalised or not, it depends if you want to:

maintain status in 2 places
preserve status history
...

As it stands, you preserve status history. If you want latest status in the parent table too (which is denormalisation) you'd need a trigger to maintain "status" in the parent. or drop this status history table.

SQL Query for Grouping the results based on sequence

WITH numbered AS (
  SELECT
    ID, Seq, Amt,
    SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
  FROM atable
)
SELECT
  ID,
  Start = MIN(Seq),
  [End] = MAX(Seq),
  TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;

Group by numbers that are in sequence

create table #temp
(
    IDUnique int Identity(1,1),
    ID int, 
    grp int
)


Insert into #temp(ID) Values(1)
Insert into #temp(ID) Values(36)
Insert into #temp(ID) Values(37)
Insert into #temp(ID) Values(38)
Insert into #temp(ID) Values(50)
Insert into #temp(ID) Values(51)

declare @IDUnique int
declare @PreviousUnique int
declare @ID int
declare @grp int
declare @Previous int
declare @Row int

DECLARE @getAccountID CURSOR SET @getAccountID = CURSOR FOR SELECT Row_Number() Over(Order by IDUnique) Row, IDUnique, ID  From #temp
OPEN @getAccountID
FETCH NEXT FROM @getAccountID INTO @Row, @IDUnique, @ID 
WHILE @@FETCH_STATUS = 0
BEGIN
    IF(@Row = 1)
    Begin
        update #temp set grp = 1 Where IDUnique = @IDUnique
        set @Previous = @ID
        set @grp = 1
    End
    Else If (@Previous + 1 = @ID)
    Begin
        update #temp set grp = @grp Where IDUnique = @IDUnique
        set @Previous = @ID
    End
    Else
    Begin
        set @Previous = @ID
        set @grp = @grp + 1
        update #temp set grp = @grp Where IDUnique = @IDUnique
    End
    FETCH NEXT FROM @getAccountID INTO @Row, @IDUnique, @ID
END
CLOSE @getAccountID
DEALLOCATE @getAccountID

Select * from #temp
Drop Table #temp

sql query to return single record per group id in sequence

You can use row_number() in the order by:

select t.*
from t
order by row_number() over (partition by agency_id order by person),
         agency_id;

The second key is so the rows are in the same order in each group.

SQL Count of rows in a group in a sequence

The key here is to create a sequence id you can use in a window function to get the count. You won't be able to do it in one query because window functions can't be combined, but you can pull it off with a subquery or CTE.

To determine the sequence number for a row, you need to count the number of times the group key has changed in the preceding rows. So to determine the changes, create an inner query that checks if the current group key is different from the previous by using the lag window function. Use a case statement that results in 1 or 0 depending on if the lagged value is different from the current. The outer query then just has to sum up the values for all rows preceding up to the current.

Once you have the sequence number, you can use a count window function to count all the rows with matching numbers.

WITH src AS ( -- cte to mimic table.
    SELECT * 
    FROM (VALUES 
        (1, N'00001.000001', N'00001.000001', 2),
        /* ... test data ... */
        (10, N'00003.000001', N'00003.000001', 1)
    ) [src] ( [Id],[MinCode],[MaxCode],[ExpectedResult] )
)
SELECT src.Id, MinCode, MaxCode, ExpectedResult
    , COUNT(1) OVER (PARTITION BY seq.SequenceId) [Result]
FROM src 
INNER JOIN (
    SELECT x.Id, SUM(x.IsNew) OVER (ORDER BY Id ROWS UNBOUNDED PRECEDING) [SequenceId]
    FROM (
        SELECT Id, CASE WHEN LAG(MinCode) OVER (ORDER BY Id) <> MinCode THEN 1 ELSE 0 END [IsNew]
        FROM src 
    ) x
) seq ON seq.Id = src.Id
ORDER BY Id

Group a sequence of rows by their first value in SQL

You can use the window function sum() to create the groups and then aggregate:

select min(id) id, name, sum(metric) metric
from (
  select *, sum((key = 'a')::int) over (partition by name order by id) grp 
  from dataset
) t
group by name, grp
order by id

See the demo.

Results:

> id | name  | metric
> -: | :---- | -----:
>  1 | alice |      2
>  4 | alice |      1
>  8 | bob   |      2
> 10 | bob   |      0

SQL Group by Only Rows Which Are in Sequence