How to Select Only the First Rows for Each Unique Value of a Column

How to select only the first rows for each unique value of a column?

A very simple answer if you say you don't care which address is used.

SELECT
CName, MIN(AddressLine)
FROM
MyTable
GROUP BY
CName

If you want the first according to, say, an "inserted" column then it's a different query

SELECT
M.CName, M.AddressLine,
FROM
(
SELECT
CName, MIN(Inserted) AS First
FROM
MyTable
GROUP BY
CName
) foo
JOIN
MyTable M ON foo.CName = M.CName AND foo.First = M.Inserted

Get the first row of each group of unique values in another column

Use groupby + first:

firsts = df.groupby('col_B', as_index=False).first()

Output:

>>> firsts
col_B col_A
0 x 1
1 xx 2
2 y 4

If the order of the columns is important:

firsts = df.loc[df.groupby('col_B', as_index=False).first().index]

Output:

>>> firsts
col_A col_B
0 1 x
1 2 xx
2 3 xx

Extract all rows containing first value for each unique value of another column

We can do this with dplyr

library(dplyr)
test %>%
group_by(ID) %>%
filter(yr==first(yr))
# ID yr
# <fctr> <dbl>
#1 54V 1
#2 54V 1
#3 54V 1
#4 56V 2
#5 56V 2
#6 59V 1

Or using data.table

library(data.table)
setDT(test)[, .SD[yr==yr[1L]], ID]

Or using base R

test[with(test, as.logical(ave(yr, ID, FUN = function(x) x==x[1L]))),]

SQL - How to use GROUP BY to select 1 row for each distinct value

As suggested by JustinStolle, he was correct in his suggestion for aggregate functions and a GROUP BY:

SELECT 
(
SELECT count(distinct cases.casenum)
FROM (cases INNER JOIN user_case_data ON cases.casenum=user_case_data.casenum)
WHERE (user_case_data.discharged_date >= '##START##'
AND user_case_data.discharged_date <= '##END##')
)
AS "Total Lost Files", cases.casenum AS "Case Number", user_case_data.discharged_date AS "Discharged Date",

MAX(case when case_notes.topic like 'LOS Case Status Update' THEN case_notes.note_date else null end)
AS GENDOC_31_Mailed,

MAX(case when case_checklist.code='101' then case_checklist.due_date else null end)
AS ADVISED_ATTORNEY,

MAX(case when case_notes.topic like 'LOS Updated Lein Ltr' THEN case_notes.note_date else null end)
AS "Sent Updated Lien Ltr",

MAX(case when case_checklist.code='109' then case_checklist.due_date else null end)
AS "Time Allocation Completed",

MAX(case when case_checklist.code='110' then case_checklist.due_date else null end)
AS "Attorney Signed Affidavit",

MAX(case when case_checklist.code='111' then case_checklist.due_date else null end)
AS "Lien Letters Sent",

MAX(case when case_checklist.code='112' then case_checklist.due_date else null end)
AS "Sent Lien to Counsel",

MAX(case when case_checklist.code='113' then case_checklist.due_date else null end)
AS "Received Costs and Transferred"

FROM cases
LEFT JOIN case_checklist ON cases.casenum = case_checklist.case_id
LEFT JOIN user_case_data ON case_checklist.case_id=user_case_data.casenum
LEFT JOIN case_notes ON user_case_data.casenum=case_notes.case_num AND case_notes.topic LIKE 'LOS Case Status Update'
WHERE (user_case_data.discharged_date >= '##START##'
AND user_case_data.discharged_date <= '##END##')
GROUP BY cases.casenum, user_case_data.discharged_date
ORDER BY user_case_data.discharged_date ASC;

Select rows that contain the first N unique values in a certain column

You can use DENSE_RANK to achieve your required output as below-

DEMO HERE

 SELECT * FROM 
(
SELECT *,
DENSE_RANK() OVER(ORDER BY [X1]) RN
FROM your_table
)A
WHERE RN <= 3

First row of SELECT DISTINCT

Try this:

with cte as
(
select r.data,c.committed_at, c.hash,timestamp,
ROW_NUMBER() over (Partition by r.data order by c.committed_at DESC) as ranking
from results r
inner join commits c on (r.commit_id=c.id)
where r.repository_id=65 AND data_type='data_model'
)
Select data,committed_at, hash
from cte where ranking=1 order by timestamp DESC


Related Topics



Leave a reply



Submit