Condition 1:
WITH ord as (
SELECT ID, CustomerID, CheckInDate, CheckOutDate
, n = ROW_NUMBER() over(partition by [CustomerID] order by [CheckInDate], [CheckOutDate])
FROM @data d1
), first as (
SELECT o1.ID, o1.CustomerID, o1.CheckInDate, o1.CheckOutDate, o1.n
, m = ROW_NUMBER() over(partition by o1.[CustomerID] order by o1.[CheckInDate], o1.[CheckOutDate])
FROM ord o1
INNER JOIN ord o2 ON o1.CustomerID = o2.CustomerID AND o2.n+1 = o1.n AND o1.CheckInDate > o2.CheckOutDate
), groups as (
SELECT o.ID, o.CustomerID, nx = MIN(coalesce(f.n, 1)), n = MAX(o.n)
, p = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID)
FROM ord o
LEFT JOIN first f ON o.CustomerID = f.CustomerID AND o.n < f.n
GROUP BY o.ID, o.CustomerID
), dates as (
SELECT g.CustomerID, g.nx, CheckInDate = MIN(o.CheckInDate)
, CheckOutDate = CASE WHEN SUM(CASE WHEN o.CheckOutDate IS NULL THEN 1 END) IS NULL THEN MAX(o.CheckOutDate) END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
GROUP BY g.nx, g.CustomerID
HAVING COUNT(g.nx) > 1
)
SELECT o.ID, o.CustomerID
, CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN dates d on g.CustomerID = d.CustomerID AND g.nx = d.nx
ORDER BY ID
This query output the rows that must be updated:
- ord = I first partition by CustomerID and order by CheckInDate
- first = I join consecutive rows from previous CTE where the next CheckInDate does not overlap previous CheckInDate and CheckDate and I partition and order them
- groups = I group by the previous partitioned number in order to know to which group a row belongs
- dates = I join with the original data in order to get the first and last date for each groups. Groups with only 1 row are removed
- main select output the dates for p=1 or NULL otherwise
Output:
ID CustomerID CheckInDate CheckOutDate
1 1 2015-03-04 NULL
3 1 NULL NULL
4 1 NULL NULL
Condition 2:
WITH ord as (
SELECT ID, CustomerID, CheckInDate, CheckOutDate
, n = ROW_NUMBER() over(partition by [CustomerID] order by [CheckInDate], [CheckOutDate])
FROM @data d1
), first as (
SELECT o1.ID, o1.CustomerID, o1.CheckInDate, o1.CheckOutDate, o1.n
, m = ROW_NUMBER() over(partition by o1.[CustomerID] order by o1.[CheckInDate], o1.[CheckOutDate])
FROM ord o1
INNER JOIN ord o2 ON o1.CustomerID = o2.CustomerID AND o2.n+1 = o1.n AND o1.CheckInDate > o2.CheckOutDate
), groups as (
SELECT o.ID, o.CustomerID, nx = MIN(coalesce(f.n, 1)), n = MAX(o.n)
, p = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID)
, last = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID DESC)
FROM ord o
LEFT JOIN first f ON o.CustomerID = f.CustomerID AND o.n < f.n
GROUP BY o.ID, o.CustomerID
), dates as (
SELECT g.CustomerID, g.nx, CheckInDate = MIN(o.CheckInDate)
, CheckOutDate = MAX(o2.CheckOutDate)
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN (SELECT ID, CustomerID, nx FROM groups WHERE last = 1) l ON g.CustomerID = l.CustomerID AND g.nx = l.nx
INNER JOIN ord o2 ON l.ID = o2.ID
GROUP BY g.nx, g.CustomerID
HAVING COUNT(g.nx) > 1
)
SELECT o.ID, o.CustomerID
, CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN dates d on g.CustomerID = d.CustomerID AND g.nx = d.nx
ORDER BY ID
Output:
ID CustomerID CheckInDate CheckOutDate
1 1 2015-03-04 2015-05-03
3 1 NULL NULL
4 1 NULL NULL
For Updates, replace SELECT by UPDATE:
UPDATE g SET
CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM ...
Your data:
declare @data table([ID] int, [CustomerID] int, [CheckInDate] date, [CheckOutDate] date);
Insert into @data([ID], [CustomerID], [CheckInDate], [CheckOutDate])
VALUES
(1, 1, '2015-04-02', '2015-04-05'),
(2, 2, '2015-03-04', '2015-05-02'),
(3, 1, '2015-04-01', NULL),
(4, 1, '2015-03-04', '2015-05-03'),
(5, 1, '2015-01-03', '2015-02-03')
;
It works as well with this sample:
(1, 1, '2015-04-02', '2015-04-05'),
(2, 2, '2015-03-04', '2015-05-02'),
(3, 1, '2015-04-01', NULL),
(4, 1, '2015-03-04', '2015-05-03'),
(5, 1, '2015-01-03', '2015-02-03'),
(6, 1, '2015-01-02', '2015-02-03'),
(7, 1, '2015-03-04', '2015-03-06'),
(8, 1, '2015-03-04', '2015-05-06'),
(9, 1, '2014-04-02', '2014-04-05'),
(10, 1, '2014-03-04', '2014-05-02')
If it does not work with some of your data, update Input and Output table with more relevant values.
Best Answer
Your sample data does not match your description and confused me at first. As sp_BlitzErik points out this is an island-and-gap problem. The solution is pretty straightforward if you have access to window functions. First, we can enumerate the table per member alone, let's call this full_order (this happens to be the same as day, but I'll ad it for generality). Second, we can enumerate the table per member and whether they were active on that day, let's call this partial_order
If the difference between full_order and partial_order changes, it means that active has changed from null to a value, or vice versa. Therefore we can form a group with this difference. Within each such group we can pick the min(active) and max(active) to form an interval:
It's probably easiest to add another level of nesting to get the desired result: