Condition 1:
WITH ord as (
SELECT ID, CustomerID, CheckInDate, CheckOutDate
, n = ROW_NUMBER() over(partition by [CustomerID] order by [CheckInDate], [CheckOutDate])
FROM @data d1
), first as (
SELECT o1.ID, o1.CustomerID, o1.CheckInDate, o1.CheckOutDate, o1.n
, m = ROW_NUMBER() over(partition by o1.[CustomerID] order by o1.[CheckInDate], o1.[CheckOutDate])
FROM ord o1
INNER JOIN ord o2 ON o1.CustomerID = o2.CustomerID AND o2.n+1 = o1.n AND o1.CheckInDate > o2.CheckOutDate
), groups as (
SELECT o.ID, o.CustomerID, nx = MIN(coalesce(f.n, 1)), n = MAX(o.n)
, p = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID)
FROM ord o
LEFT JOIN first f ON o.CustomerID = f.CustomerID AND o.n < f.n
GROUP BY o.ID, o.CustomerID
), dates as (
SELECT g.CustomerID, g.nx, CheckInDate = MIN(o.CheckInDate)
, CheckOutDate = CASE WHEN SUM(CASE WHEN o.CheckOutDate IS NULL THEN 1 END) IS NULL THEN MAX(o.CheckOutDate) END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
GROUP BY g.nx, g.CustomerID
HAVING COUNT(g.nx) > 1
)
SELECT o.ID, o.CustomerID
, CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN dates d on g.CustomerID = d.CustomerID AND g.nx = d.nx
ORDER BY ID
This query output the rows that must be updated:
- ord = I first partition by CustomerID and order by CheckInDate
- first = I join consecutive rows from previous CTE where the next CheckInDate does not overlap previous CheckInDate and CheckDate and I partition and order them
- groups = I group by the previous partitioned number in order to know to which group a row belongs
- dates = I join with the original data in order to get the first and last date for each groups. Groups with only 1 row are removed
- main select output the dates for p=1 or NULL otherwise
Output:
ID CustomerID CheckInDate CheckOutDate
1 1 2015-03-04 NULL
3 1 NULL NULL
4 1 NULL NULL
Condition 2:
WITH ord as (
SELECT ID, CustomerID, CheckInDate, CheckOutDate
, n = ROW_NUMBER() over(partition by [CustomerID] order by [CheckInDate], [CheckOutDate])
FROM @data d1
), first as (
SELECT o1.ID, o1.CustomerID, o1.CheckInDate, o1.CheckOutDate, o1.n
, m = ROW_NUMBER() over(partition by o1.[CustomerID] order by o1.[CheckInDate], o1.[CheckOutDate])
FROM ord o1
INNER JOIN ord o2 ON o1.CustomerID = o2.CustomerID AND o2.n+1 = o1.n AND o1.CheckInDate > o2.CheckOutDate
), groups as (
SELECT o.ID, o.CustomerID, nx = MIN(coalesce(f.n, 1)), n = MAX(o.n)
, p = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID)
, last = ROW_NUMBER() over(partition by o.CustomerID, MIN(coalesce(f.n, 1)) ORDER BY o.ID DESC)
FROM ord o
LEFT JOIN first f ON o.CustomerID = f.CustomerID AND o.n < f.n
GROUP BY o.ID, o.CustomerID
), dates as (
SELECT g.CustomerID, g.nx, CheckInDate = MIN(o.CheckInDate)
, CheckOutDate = MAX(o2.CheckOutDate)
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN (SELECT ID, CustomerID, nx FROM groups WHERE last = 1) l ON g.CustomerID = l.CustomerID AND g.nx = l.nx
INNER JOIN ord o2 ON l.ID = o2.ID
GROUP BY g.nx, g.CustomerID
HAVING COUNT(g.nx) > 1
)
SELECT o.ID, o.CustomerID
, CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM groups g
INNER JOIN ord o ON g.ID = o.ID
INNER JOIN dates d on g.CustomerID = d.CustomerID AND g.nx = d.nx
ORDER BY ID
Output:
ID CustomerID CheckInDate CheckOutDate
1 1 2015-03-04 2015-05-03
3 1 NULL NULL
4 1 NULL NULL
For Updates, replace SELECT by UPDATE:
UPDATE g SET
CheckInDate = CASE WHEN g.p = 1 THEN d.CheckInDate END
, CheckOutDate = CASE WHEN g.p = 1 THEN d.CheckOutDate END
FROM ...
Your data:
declare @data table([ID] int, [CustomerID] int, [CheckInDate] date, [CheckOutDate] date);
Insert into @data([ID], [CustomerID], [CheckInDate], [CheckOutDate])
VALUES
(1, 1, '2015-04-02', '2015-04-05'),
(2, 2, '2015-03-04', '2015-05-02'),
(3, 1, '2015-04-01', NULL),
(4, 1, '2015-03-04', '2015-05-03'),
(5, 1, '2015-01-03', '2015-02-03')
;
It works as well with this sample:
(1, 1, '2015-04-02', '2015-04-05'),
(2, 2, '2015-03-04', '2015-05-02'),
(3, 1, '2015-04-01', NULL),
(4, 1, '2015-03-04', '2015-05-03'),
(5, 1, '2015-01-03', '2015-02-03'),
(6, 1, '2015-01-02', '2015-02-03'),
(7, 1, '2015-03-04', '2015-03-06'),
(8, 1, '2015-03-04', '2015-05-06'),
(9, 1, '2014-04-02', '2014-04-05'),
(10, 1, '2014-03-04', '2014-05-02')
If it does not work with some of your data, update Input and Output table with more relevant values.
Best Answer
I think what you want to do is rank the data based upon the smallest difference between the dates, per year - but you only need to consider any rows where
date1
is greater thatdate2
. For this, I would personally use a CTE to filter and rank the results before returning it like so:You can find the results on dbfiddle.
For more information see the official documentation of RANK and CTE.