The purpose of this code is to search the entire database for duplicate records and produce a script that a user could then run to delete all of the duplicates.
The stored procedure takes the following parameters:
@DiscludeTables varchar(max): a string of Table Names separated by semicolons. These tables are not included in the search for duplicate records and therefore will not be affected in any way by the resulting script when it is run.
@DiscludeColumns varchar(max): a string of Column Names separated by semicolons. These columns are not used to determine whether records are duplicates or not. Including any column here will effectively identify that records can have varying information within this column and still be identified as a duplicate.
@Output varchar(max): this is the string that the output will be passed to.
The output string defines the use database, a cte of duplicates and delete statement for each table in the database.
I've just started to get my hands dirty with SQL and could use some enlightenment on how to improve this script in any way.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[sp_getDatabaseWideDuplicateDeleteScript] (@DiscludeTables varchar(max), @DiscludeColumns varchar(max), @Output varchar(max) output )
AS
--START SETUP DISCLUDE TABLES AND DISCLUDE COLUMNS
--***************************************************
--this section basically just splits the strings @discludetables and @discludecolumns by ';' occurance
-- and sticks the resulting strings in to tables @@discludecolumns and @@discludeTables
--Debug Variables
--DECLARE @DiscludeTables varchar(max)
--SET @DiscludeTables = '__MigrationHistory'
--DECLARE @DiscludeColumns varchar(max)
--SET @DiscludeColumns = 'Name'
--DECLARE @Output varchar(max)
DECLARE @pos INT
DECLARE @string varchar(max)
DECLARE @@DiscludeTables TABLE
(
tableName varchar(max)
)
DECLARE @@DiscludeColumns TABLE
(
columnName varchar(max)
)
DECLARE @stringToSplit varchar(max)
SET @stringToSplit = @DiscludeTables
WHILE CHARINDEX(';', @stringToSplit) > 0
BEGIN
SELECT @pos = CHARINDEX(';', @stringToSplit)
SELECT @string = SUBSTRING(@stringToSplit, 1, @pos-1)
INSERT INTO @@DiscludeTables
SELECT @string
SELECT @stringToSplit = SUBSTRING(@stringToSplit, @pos+1, LEN(@stringToSplit)-@pos)
END
IF @stringToSplit IS NOT NULL AND @stringToSplit != ''
BEGIN
INSERT INTO @@DiscludeTables
SELECT @stringToSplit
END
SET @stringToSplit = @DiscludeColumns
WHILE CHARINDEX(';', @stringToSplit) > 0
BEGIN
SELECT @pos = CHARINDEX(';', @stringToSplit)
SELECT @string = SUBSTRING(@stringToSplit, 1, @pos-1)
INSERT INTO @@DiscludeColumns
SELECT @string
SELECT @stringToSplit = SUBSTRING(@stringToSplit, @pos+1, LEN(@stringToSplit)-@pos)
END
IF @stringToSplit IS NOT NULL AND @stringToSplit != ''
BEGIN
INSERT INTO @@DiscludeColumns
SELECT @stringToSplit
END
SET @Output = ''
DECLARE @DBName varchar(max)
SET @DBName = DB_NAME()
SELECT * FROM @@DiscludeColumns
SELECT * FROM @@DiscludeTables
--**********************************************
--END SETUP DISCLUDE TABLES AND DISCLUDE COLUMNS
--LOOP THROUGH ALL TABLES IN DATABASE NOT IN DISCLUDE TABLES
--***********************************
DECLARE @TableName nvarchar(256), @ColumnName nvarchar(128)
SET @TableName = ''
WHILE @TableName IS NOT NULL
BEGIN
--GET THE CURRENT TABLE
SET @TableName =
(
SELECT MIN(QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME))
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
AND QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME) > @TableName
AND OBJECTPROPERTY(
OBJECT_ID(
QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME)
), 'IsMSShipped'
) = 0
AND TABLE_NAME NOT in (select tablename from @@DiscludeTables)
)
--GET THE PRIMARY KEY COLUMN FOR THIS TABLE IF ANY
--************************************************
DECLARE @PkColumnName NVARCHAR(128) =
(
SELECT top 1 ccu.COLUMN_NAME
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE ccu ON tc.CONSTRAINT_NAME = ccu.Constraint_name
WHERE tc.CONSTRAINT_TYPE = 'Primary Key' AND '[dbo].['+TC.TABLE_NAME+']' = @TableName
)
--LOOP THROUGH ALL COLUMNS NOT IN DISCLUDE COLUMNS
--************************************************
SET @ColumnName = ''
DECLARE @columns nvarchar(max) = ' '
WHILE @ColumnName IS NOT NULL
BEGIN
SET @ColumnName =
(
SELECT MIN(QUOTENAME(COLUMN_NAME))
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = PARSENAME(@TableName, 2)
AND TABLE_NAME = PARSENAME(@TableName, 1)
AND QUOTENAME(COLUMN_NAME) > @ColumnName
AND COLUMN_NAME NOT in (SELECT columnname FROM @@DiscludeColumns)
)
if @columnname != '['+@pkColumnName+']'
BEGIN
if @columns != ' '
begin
set @columns = @columns + ', '
END
set @columns = @columns + 't.'+@ColumnName
END
END
if @columns != ' '
BEGIN
DECLARE @SQLSTRING varchar(max) =
'
;WITH CTE_META AS (
SELECT [TableName] = ' + ''''+@tablename+''', t.*
, [Rank] = ROW_NUMBER() OVER(
PARTITION BY ' + @columns + '
ORDER BY ' + 't.['+@pkColumnName+']
) FROM ' + @tablename + ' t
)
DELETE FROM ' + @tablename + '
WHERE ' + @pkColumnName + ' IN (SELECT cte.'+@pkColumnName+' FROM CTE_META cte WHERE cte.Rank > 1)
--SELECT cte.*
--FROM CTE_META cte
--WHERE cte.RANK > 1
'
SET @Output = @Output + @SQLSTRING
END
END
--END OF LOOP THROUGH TABLES
--**************************
if @OUTPUT IS NOT NULL AND @OUTPUT != ''
BEGIN
SET @OUTPUT =
'use [' + @DBName + ']
' + @Output
END
PRINT @OUTPUT