Condition 1:
WITH ord as (
SELECT ID, CustomerID, CheckInDate, CheckOutDate
, n = ROW_NUMBER() over(partition by [CustomerID] order by [CheckInDate], [CheckOutDate])
FROM @data d1
), first as (
SELECT o1.ID, o1.CustomerID, o1.CheckInDate, o1.CheckOutDate, o1.n
, m = ROW_NUMBER() over(partition by o1.[CustomerID] order by o1.[CheckInDate], o1.[CheckOutDate])
FROM ord o1
INNER JOIN ord o2 ON o1.CustomerID = o2.CustomerID AND o2.n+1 = o1.n AND o1.CheckInDate > o2.CheckOutDate
), groups as (
SELECT o.ID, o.CustomerID, nx = MIN(coalesce(f.n, 1)), n = MAX(o.n)
, p = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID)
FROM ord o
LEFT JOIN first f ON o.CustomerID = f.CustomerID AND o.n < f.n
GROUP BY o.ID, o.CustomerID
), dates as (
SELECT g.CustomerID, g.nx, CheckInDate = MIN(o.CheckInDate)
, CheckOutDate = CASE WHEN SUM(CASE WHEN o.CheckOutDate IS NULL THEN 1 END) IS NULL THEN MAX(o.CheckOutDate) END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
GROUP BY g.nx, g.CustomerID
HAVING COUNT(g.nx) > 1
)
SELECT o.ID, o.CustomerID
, CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN dates d on g.CustomerID = d.CustomerID AND g.nx = d.nx
ORDER BY ID
This query output the rows that must be updated:
- ord = I first partition by CustomerID and order by CheckInDate
- first = I join consecutive rows from previous CTE where the next CheckInDate does not overlap previous CheckInDate and CheckDate and I partition and order them
- groups = I group by the previous partitioned number in order to know to which group a row belongs
- dates = I join with the original data in order to get the first and last date for each groups. Groups with only 1 row are removed
- main select output the dates for p=1 or NULL otherwise
Output:
ID CustomerID CheckInDate CheckOutDate
1 1 2015-03-04 NULL
3 1 NULL NULL
4 1 NULL NULL
Condition 2:
WITH ord as (
SELECT ID, CustomerID, CheckInDate, CheckOutDate
, n = ROW_NUMBER() over(partition by [CustomerID] order by [CheckInDate], [CheckOutDate])
FROM @data d1
), first as (
SELECT o1.ID, o1.CustomerID, o1.CheckInDate, o1.CheckOutDate, o1.n
, m = ROW_NUMBER() over(partition by o1.[CustomerID] order by o1.[CheckInDate], o1.[CheckOutDate])
FROM ord o1
INNER JOIN ord o2 ON o1.CustomerID = o2.CustomerID AND o2.n+1 = o1.n AND o1.CheckInDate > o2.CheckOutDate
), groups as (
SELECT o.ID, o.CustomerID, nx = MIN(coalesce(f.n, 1)), n = MAX(o.n)
, p = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID)
, last = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID DESC)
FROM ord o
LEFT JOIN first f ON o.CustomerID = f.CustomerID AND o.n < f.n
GROUP BY o.ID, o.CustomerID
), dates as (
SELECT g.CustomerID, g.nx, CheckInDate = MIN(o.CheckInDate)
, CheckOutDate = MAX(o2.CheckOutDate)
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN (SELECT ID, CustomerID, nx FROM groups WHERE last = 1) l ON g.CustomerID = l.CustomerID AND g.nx = l.nx
INNER JOIN ord o2 ON l.ID = o2.ID
GROUP BY g.nx, g.CustomerID
HAVING COUNT(g.nx) > 1
)
SELECT o.ID, o.CustomerID
, CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN dates d on g.CustomerID = d.CustomerID AND g.nx = d.nx
ORDER BY ID
Output:
ID CustomerID CheckInDate CheckOutDate
1 1 2015-03-04 2015-05-03
3 1 NULL NULL
4 1 NULL NULL
For Updates, replace SELECT by UPDATE:
UPDATE g SET
CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM ...
Your data:
declare @data table([ID] int, [CustomerID] int, [CheckInDate] date, [CheckOutDate] date);
Insert into @data([ID], [CustomerID], [CheckInDate], [CheckOutDate])
VALUES
(1, 1, '2015-04-02', '2015-04-05'),
(2, 2, '2015-03-04', '2015-05-02'),
(3, 1, '2015-04-01', NULL),
(4, 1, '2015-03-04', '2015-05-03'),
(5, 1, '2015-01-03', '2015-02-03')
;
It works as well with this sample:
(1, 1, '2015-04-02', '2015-04-05'),
(2, 2, '2015-03-04', '2015-05-02'),
(3, 1, '2015-04-01', NULL),
(4, 1, '2015-03-04', '2015-05-03'),
(5, 1, '2015-01-03', '2015-02-03'),
(6, 1, '2015-01-02', '2015-02-03'),
(7, 1, '2015-03-04', '2015-03-06'),
(8, 1, '2015-03-04', '2015-05-06'),
(9, 1, '2014-04-02', '2014-04-05'),
(10, 1, '2014-03-04', '2014-05-02')
If it does not work with some of your data, update Input and Output table with more relevant values.
Best Answer
On SQL Server 2012, you can use
LAG
andSUM
with anORDER BY
clause to get what you're after without any self-joins.LAG
is used to figure out if the[End Date]
value from the previous row for a partition is exactly one day away from the[Start Date]
value of the current row. The running total is used to tie the groups together and you get your desired results with a simple aggregate at the end.Results: