Sql-server – Exclude Duplicates With Certain Value

duplicationquerysql server

I'm been beating my head up against a wall with this all day.

I have a SQL Server table called Accounts

CREATE TABLE [dbo].[Accounts](
    [IDNum] [INT] NULL,
    [FirstName] [VARCHAR](50) NULL,
    [LastName] [VARCHAR](50) NULL)


    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (123, N'Joe', N'McDaniels')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (124, N'Mary', N'Cain')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (125, N'John', N'Johnson')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (126, N'Sue', N'Bird')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (127, N'Michael', N'Bennett')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (128, N'Larry ', N'Jackson')
    GO
 INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (128, 0, 0)
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (129, N'0', N'0')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (130, N'Tanya', N'Hall')
    GO
 INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (130, 0, 0)
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (131, N'0', N'0')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (132, N'Joe', N'Johnson')
    GO
    INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (133, N'Stephen', N'Cole')
    GO

I'm trying to eliminate the duplicate rows with 0 as the FirstName and LastName.

The non duplicate rows with zeros I want to keep. I want to eliminate only the rows that contain zeros that have a duplicate IDNum.

I can't add an a primary key column.

My first approach was to find the duplicates, put in table variable, filter the zero rows then union that back with the table excluding those ID numbers. Then I thought it has to be a better way. Any help?

End Result

enter image description here

Best Answer

If I understand your question correctly, how about something like this:

DROP TABLE IF EXISTS dbo.Accounts
go
CREATE TABLE [dbo].[Accounts](
    [IDNum] [INT] NULL,
    [FirstName] [VARCHAR](50) NULL,
    [LastName] [VARCHAR](50) NULL)


INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (123, N'Joe', N'McDaniels')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (124, N'Mary', N'Cain')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (125, N'John', N'Johnson')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (126, N'Sue', N'Bird')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (127, N'Michael', N'Bennett')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (128, N'Larry ', N'Jackson')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (128, 0, 0)
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (129, N'0', N'0')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (130, N'Tanya', N'Hall')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (130, 0, 0)
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (131, N'0', N'0')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (132, N'Joe', N'Johnson')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (133, N'Stephen', N'Cole')
GO

;WITH _dups
AS (
    SELECT IDNum
        ,count(*) cnt
    FROM accounts
    GROUP BY IDNum
    HAVING count(*) > 1
    )
--now join _cte to the real table to delete the one's with zeros in First and Last name
DELETE a
FROM accounts a
JOIN _dups d
    ON d.idnum = a.idnum
        AND a.firstname = '0'
        AND a.lastname = '0'

SELECT *
FROM accounts

OR

DROP TABLE IF EXISTS dbo.Accounts
go
CREATE TABLE [dbo].[Accounts](
    [IDNum] [INT] NULL,
    [FirstName] [VARCHAR](50) NULL,
    [LastName] [VARCHAR](50) NULL)


INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (123, N'Joe', N'McDaniels')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (124, N'Mary', N'Cain')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (125, N'John', N'Johnson')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (126, N'Sue', N'Bird')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (127, N'Michael', N'Bennett')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (128, N'Larry ', N'Jackson')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (128, 0, 0)
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (129, N'0', N'0')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (130, N'Tanya', N'Hall')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (130, 0, 0)
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (131, N'0', N'0')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (132, N'Joe', N'Johnson')
GO
INSERT [dbo].[Accounts] ([IDNum], [FirstName], [LastName]) VALUES (133, N'Stephen', N'Cole')
GO

DELETE a
FROM accounts a
WHERE FirstName = '0'
    AND LastName = '0'
    AND EXISTS (
        SELECT *
        FROM accounts
        WHERE IDNum = a.IDNum
            AND FirstName <> '0'
            AND LastName <> '0'
        )

SELECT *
FROM accounts

Both of these solutions produce the results you're looking for.

| IDNum | FirstName | LastName  |
|-------|-----------|-----------|
| 123   | Joe       | McDaniels |
| 124   | Mary      | Cain      |
| 125   | John      | Johnson   |
| 126   | Sue       | Bird      |
| 127   | Michael   | Bennett   |
| 128   | Larry     | Jackson   |
| 129   | 0         | 0         |
| 130   | Tanya     | Hall      |
| 131   | 0         | 0         |
| 132   | Joe       | Johnson   |
| 133   | Stephen   | Cole      |

UPDATE 8/2/18 - Excluding records instead of deleting them.

Based on your comment

What about excluding those records from the resultset? So don't delete them just return the others., see if this gives you what you want.

;with ExcludeTheseRecords as 
(
select a.*
FROM accounts a
WHERE FirstName = '0'
    AND LastName = '0'
    AND EXISTS (
        SELECT *
        FROM accounts
        WHERE IDNum = a.IDNum
            AND FirstName <> '0'
            AND LastName <> '0'
        )
)

SELECT * from accounts 
except
select * from ExcludeTheseRecords