How can I find duplicate entries and delete the oldest ones in SQL?
Find duplicates and delete oldest one
Here is the Code
create table #Product (
ID int identity(1, 1) primary key,
Name varchar(800),
DateAdded datetime default getdate()
)
insert #Product(Name) select 'Chocolate'
insert #Product(Name,DateAdded) select 'Candy', GETDATE() + 1
insert #Product(Name,DateAdded) select 'Chocolate', GETDATE() + 5
select * from #Product
;with Ranked as (
select ID,
dense_rank()
over (partition by Name order by DateAdded desc) as DupeCount
from #Product P
)
delete R
from Ranked R
where R.DupeCount > 1
select * from #Product
TSQL - Deleting a duplicate oldest value
I like to use an updatable CTE for this:
with cte as (
select row_number() over(partition by userid order by logintim desc) rn
from mytable
)
delete from cte where rn > 1
For each userid
, this retains the row with the most recent logintim
and deletes the others (if any).
Oracle SQL query: Delete the oldest duplicate records from the table
You could use this query:
delete
from users
where (id, time) in
(select id, time
from (select id, time,
row_number() over (partition by id order by time desc) as rn,
from users) sub
where rn > 1)
It will delete all "duplicates" for a certain person, except the most recent one. The idea is that when you number the occurrences of a certain id, from recent to old, only the records numbered with 1 should be kept.
Pseudo-column ROWID
As guigui42 mentioned in comments below, the Oracle specific pseudo-column rowid
may give a further performance improvement. This would be certainly the case if you do not have an index starting with the id, time
fields:
delete
from users
where rowid in
(select rowid
from (select rowid,
row_number() over (partition by id order by time desc) as rn,
from users) sub
where rn > 1)
Delete duplicate data in SQL preserving the oldest entry
Declare @YourTable table (col1 varchar(25),col2 varchar(25))
Insert Into @YourTable values
('0010','1111'),
('0011','1112'),
('0012','1111')
;with cteBase as (
Select *,RowNr=Row_Number() over (Partition By Col2 Order By Col1 Desc) from @YourTable
)
Select * From cteBase where RowNr>1
-- Delete From cteBase where RowNr>1
-- Remove Select if satisfied with results
Record(s) to be Deleted
col1 col2 RowNr
0010 1111 2
Remove oldest duplicates and keep latest duplicate by time stamp
Of course you can use timestamp
column with ROW_NUMBER()
& you don't need to use recursion hint as your CTE
has not any recursion level.
;WITH Duplicates AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY ChannelName, SerialNumber, ReadingDate ORDER BY timestamp DESC) AS Rownumber
FROM [Staging].[UriData]
)
DELETE d
FROM Duplicates d
WHERE Rownumber > 1;
Delete duplicate records with oldest GUID
If you want to delete duplicate records but without a timestamp and GUID as Primary Key you can try to use %%physloc%%
.
%%physloc%%
is a Virtual Column. This column is undocumented. Hence you can use it at you own risk.
WITH cte
AS(SELECT ID, ROW_NUMBER() OVER(PARTITION BY CodeOne, CodeTwo
ORDER BY %%physloc%%) RN
FROM [InvoiceDatabase].[dbo].[LookUpCode])
DELETE FROM cte
WHERE RN > 1;
sqlfiddle
Note
But I would suggest you create a timestamp for the order.
Delete duplicate rows with older/newer date
You can use a where
clause in the delete
:
delete from t
where t.datetime > (select min(t2.datetime)
from t t2
where t2.id = t.id
);
If you don't actually want to delete the rows, but just run a query and not see the red ones:
select id, min(datetime)
from t
group by id;
Related Topics
Performance Value of Comb Guids
Insert Empty String into Int Column for SQL Server
Only Show Effective SQL String P6Spy
Preserve Parent-Child Relationships When Copying Hierarchical Data
Does Except Execute Faster Than a Join When the Table Columns Are the Same
How Can a Do a "Greatest-N-Per-Group" Query in Django
4 Byte Unsigned Int in SQL Server
H2 SQL Database - Insert If the Record Does Not Exist
MySQL Join/Group_Concat Second Table
Sqlite Get Name of Attached Databases
Are There Downsides to Using Prepared Statements
Update Columns with Null Values
Bigquery: How to Group and Count Rows Within Rolling Timestamp Window