象山九九信息网

sqlserver CTE删除重复数据的实现方法

发布:2026-03-25 17:58 浏览:0
特别声明:本页信息由用户及第三方发布,真实性、合法性由发布人负责。详情请阅读九九信息网免责条款
详细信息

方法1:使用 ROW_NUMBER() 识别重复行

WITH CTE_Duplicates AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (
            PARTITION BY column1, column2, column3  -- 根据这些列判断重复
            ORDER BY (SELECT NULL)  -- 或无特定顺序
        ) AS RowNum
    FROM YourTable
)
DELETE FROM CTE_Duplicates
WHERE RowNum > 1;

方法2:保留特定记录(如最新记录)

WITH CTE_Duplicates AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (
            PARTITION BY column1, column2
            ORDER BY CreateDate DESC  -- 按创建日期倒序,保留最新的
        ) AS RowNum
    FROM YourTable
)
DELETE FROM CTE_Duplicates
WHERE RowNum > 1;

方法3:使用 RANK() 或 DENSE_RANK()

WITH CTE_Duplicates AS (
    SELECT 
        *,
        DENSE_RANK() OVER (
            PARTITION BY column1, column2
            ORDER BY column3
        ) AS RankNum
    FROM YourTable
)
DELETE FROM CTE_Duplicates
WHERE RankNum > 1;

方法4:删除完全重复的行(所有列都相同)

WITH CTE_Duplicates AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (
            PARTITION BY column1, column2, column3, ...  -- 列出所有列
            ORDER BY (SELECT NULL)
        ) AS RowNum
    FROM YourTable
)
DELETE FROM CTE_Duplicates
WHERE RowNum > 1;

完整示例

-- 1. 先查看重复数据
WITH CTE_Check AS (
    SELECT 
        ID, Name, Email,
        ROW_NUMBER() OVER (
            PARTITION BY Email
            ORDER BY CreateDate DESC
        ) AS RowNum,
        COUNT(*) OVER (PARTITION BY Email) AS DuplicateCount
    FROM Users
)
SELECT * FROM CTE_Check
WHERE DuplicateCount > 1
ORDER BY Email, RowNum;

-- 2. 删除重复数据(保留每个Email的最新记录)
WITH CTE_Delete AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (
            PARTITION BY Email
            ORDER BY CreateDate DESC
        ) AS RowNum
    FROM Users
)
DELETE FROM CTE_Delete
WHERE RowNum > 1;

-- 3. 验证删除结果
SELECT Email, COUNT(*) 
FROM Users 
GROUP BY Email 
HAVING COUNT(*) > 1;

方法5:使用临时表辅助(适合大量数据)

-- 第一步:将需要保留的数据插入临时表
SELECT DISTINCT column1, column2, column3
INTO #TempTable
FROM YourTable;

-- 第二步:清空原表
TRUNCATE TABLE YourTable;

-- 第三步:将去重后的数据插回
INSERT INTO YourTable (column1, column2, column3)
SELECT column1, column2, column3
FROM #TempTable;

-- 第四步:清理临时表
DROP TABLE #TempTable;

注意事项

备份数据:在执行删除操作前,务必备份数据 事务处理:使用事务确保数据一致性
BEGIN TRANSACTION;
-- 删除操作
COMMIT TRANSACTION;
-- 或 ROLLBACK TRANSACTION; 如果出错
性能考虑:对于大表,考虑添加索引或分批次处理 唯一标识:如果有主键,可以基于主键删除:
WITH CTE AS (
    SELECT 
        ID,
        ROW_NUMBER() OVER (PARTITION BY Email ORDER BY CreateDate) AS RN
    FROM Users
)
DELETE FROM Users
WHERE ID IN (SELECT ID FROM CTE WHERE RN > 1);

推荐的最佳实践

-- 1. 先查询确认要删除的数据
SELECT COUNT(*) AS ToDeleteCount
FROM (
    SELECT 
        ROW_NUMBER() OVER (PARTITION BY KeyColumns ORDER BY SortColumn) AS RN
    FROM YourTable
) t
WHERE t.RN > 1;

-- 2. 在事务中执行删除
BEGIN TRY
    BEGIN TRANSACTION;

    WITH CTE AS (
        SELECT 
            *,
            ROW_NUMBER() OVER (
                PARTITION BY KeyColumns 
                ORDER BY SortColumn DESC
            ) AS RN
        FROM YourTable
    )
    DELETE FROM CTE WHERE RN > 1;

    COMMIT TRANSACTION;
    PRINT '删除完成';
END TRY
BEGIN CATCH
    ROLLBACK TRANSACTION;
    PRINT '删除失败: ' + ERROR_MESSAGE();
END CATCH

选择哪种方法取决于具体需求:

相关推荐