SQL Server – How to Find Duplicate Subscriptions

sql server

I have these table:

create table Customers
(
    Id bigint identity(1,1) primary key not null,
    Name varchar(100) not null
)

create table CustomerActions
(
    Id bigint identity(1,1) primary key not null,
    [Date] datetime not null,
    CustomerId bigint not null
    [ActionId] bigint not null
)

create table Actions
(
    Id bigint identity(1,1) primary key not null,
    Name varchar(100) not null
)

and I have this view:

create view CustomerActionsView
as
select 
    CustomerActions.*,
    Customers.Name as CustomerName,
    Action.Name as ActionName
from CustomerActions
inner join Actions
on CustomerActions.ActionId = Actions.Id
inner join Customers
on CustomerActions.CustomerId = Customers.Id

I have two actions: Subscription, and Cancellation.

So basically this is a simple design to store when a customer subscribes, and when he cancells.

A sample data might look like this:

[Date]-CustomerName-Action

"2018-04-24 17:44:20.000"-Rose-Subscribed
"2019-01-11 10:37:45.000"-Rose-Cancelled

Now I'm stuck at finding customers who have subscribed twice (or more than once). That is, they've subscribed when they were already subscribed. Or they've cancelled when they were already cancelled.

In other words, I'm trying to find consecutive actions, ordered by [Date] descending.

I've tried this:

select *
from 
(
    select [Date], CustomerName, ActionName, row_number() over (partition by CustomerName, ActionName order by [Date] desc) as RowNumber
    from CustomerActionsView
) as Temp
where RowNumber > 1

If I exclude ActionName then row_number() won't be useful. If I include ActionName then it preceeds [Date] column.

I'm stuck at this point. How can I get repetitive consecutive actions?

Update: SQL Server version using select @@VERSION returns this value:

Microsoft SQL Server 2014 – 12.0.2000.8 (X64) Feb 20 2014 20:04:26 Copyright (c) Microsoft Corporation Express Edition (64-bit) on Windows NT 6.3 (Build 9600: ) (Hypervisor)

Best Answer

You can use LEAD function that will 'give' access to the next record value

LEAD(ActionID,1,NULL) OVER(PARTITION BY CustomerID ORDER BY Date ASC) as next_ActionID - this is the key point here. For more details , please read on BOL https://docs.microsoft.com/en-us/sql/t-sql/functions/lead-transact-sql?view=sql-server-2017

Test values;

INSERT [dbo].[Actions] ([Name]) 
    VALUES ( N'Subscription')
            ,(N'Cancellation')

INSERT [dbo].[Customers] ([Name]) 
    VALUES ( N'test')

INSERT [dbo].[CustomerActions] ([Date], [CustomerId], [ActionId]) 
    VALUES ( CAST(N'2018-04-24T17:44:20.000' AS DateTime), 1, 1)
    ,(CAST(N'2019-01-11T10:37:45.000' AS DateTime), 1, 2)
    ,(CAST(N'2019-01-12T10:37:45.000' AS DateTime), 1, 1)
    ,(CAST(N'2019-01-13T10:37:45.000' AS DateTime), 1, 1)

Query:

SELECT DISTINCT c.Name as CustomerName --,ca.id
FROM
    (
        SELECT
            id,customerid,actionID
            ,ROW_NUMBER() OVER(PARTITION BY CustomerID ORDER BY Date ASC) as RN
            ,LEAD(ActionID,1,NULL) OVER(PARTITION BY CustomerID ORDER BY Date ASC) as next_ActionID
        FROM dbo.CustomerActions
    ) as CA /* customerActions  with next Action field */
    INNER JOIN dbo.Customers as c
    ON CA.CustomerID = c.ID
WHERE
    actionID = next_ActionID

or you can do it with ROW_NUMBER

;WITH CA AS
(
        SELECT
            id,customerid,actionID
            ,ROW_NUMBER() OVER(PARTITION BY CustomerID ORDER BY Date ASC) as RN
            --,LEAD(ActionID,1,ActionID) OVER(PARTITION BY CustomerID ORDER BY Date ASC) as next_ActionID
        FROM dbo.CustomerActions
)
SELECT Distinct m.Name  --,c.CustomerID, c.ID
FROM CA as C -- current set
    INNER JOIN CA as N -- next set
        ON c.CustomerID = N.CustomerID
        AND c.rn + 1 = n.rn
    INNER JOIN Customers as m
        ON m.ID = c.CustomerID
WHERE
    c.ActionID = n.ActionID 

output:

Name
test