Home
Manage Your Code
Snippet: Removing duplicate rows from a table with identity column (SQL)
Title: Removing duplicate rows from a table with identity column Language: SQL
Description: Removes duplicates from a table, leaves the original rows being ones with highest PK value Views: 421
Author: Szymon Kowalsky Date Added: 9/3/2007
Copy Code  
--CREATE TABLE #TableWithDuplicates (PK int PRIMARY KEY IDENTITY, Col1 nvarchar(100), Col2 nvarchar(100))
--INSERT INTO #TableWithDuplicates  VALUES ('a', 'a')
--INSERT INTO #TableWithDuplicates  VALUES ('b', 'b')
--INSERT INTO #TableWithDuplicates  VALUES ('a', 'a')
--INSERT INTO #TableWithDuplicates  VALUES ('c', 'c')
--SELECT * FROM #TableWithDuplicates  

--Show all the duplicates (including those to be left in the table)
SELECT *
FROM #TableWithDuplicates 
WHERE EXISTS (
	SELECT 1
	FROM #TableWithDuplicates AS twd
	WHERE twd.Col1 = #TableWithDuplicates.Col1 AND twd.Col2 = #TableWithDuplicates.Col2
		AND twd.PK <> #TableWithDuplicates.PK)

--Delete all duplicates - leave only the ones with the hightes value of PK
DELETE FROM #TableWithDuplicates 
WHERE EXISTS (
	SELECT 1
	FROM #TableWithDuplicates AS twd
	WHERE twd.Col1 = #TableWithDuplicates.Col1 AND twd.Col2 = #TableWithDuplicates.Col2
		AND twd.PK > #TableWithDuplicates.PK)