Sql-server – Continuous Date Ranges – Gaps and Islands

gaps-and-islandssql serversql-server-2008-r2

CREATE TABLE T1
    (Asset_Id int, Trailer_Id int, AssignStart datetime, AssignEnd DATETIME)
;
INSERT INTO T1
    (Asset_Id, Trailer_Id, AssignStart, AssignEnd)
VALUES
    (37124, 32607, '2018-04-19 08:55:00', '2018-05-05 10:00:00.000'),
    (37124, 32607, '2018-05-05 11:23:00', NULL),
    (33000, 30000, '2018-04-01 15:00:00', '2018-04-15 10:30:00.000'),
    (34000, 31000, '2018-04-05 10:00:00', '2018-04-10 09:30:00.000'),
    (34000, 32500, '2018-04-10 09:31:00', NULL),
    (37000, 32600, '2018-04-19 08:55:00', '2018-04-25 08:30:00.000'),
    (37000, 32600, '2018-04-25 09:23:00', '2018-04-25 10:00:00.000'),
    (37000, 32600, '2018-04-25 11:23:00', '2018-04-30 15:00:00.000'),
    (37000, 32600, '2018-04-30 16:15:00', '2018-04-30 17:30:00.000'),
    (37000, 32600, '2018-05-01 18:23:00', NULL),
    (38000, 36000, '2018-05-01 10:00:00', '2018-05-10 06:30:00.000'),
    (38000, 36000, '2018-05-15 09:00:00', '2018-05-20 11:00:00.000'),
    (38000, 36000, '2018-05-20 12:00:00', NULL),
    (33000, 30000, '2018-05-01 10:00:00', NULL)
;

I have the following sample data –

Asset_Id    Trailer_Id  AssignStart             AssignEnd
37124       32607       2018-04-19 08:55:00.000 2018-05-05 10:00:00.000
37124       32607       2018-05-05 11:23:00.000 NULL
33000       30000       2018-04-01 15:00:00.000 2018-04-15 10:30:00.000
34000       31000       2018-04-05 10:00:00.000 2018-04-10 09:30:00.000
34000       32500       2018-04-10 09:31:00.000 NULL
37000       32600       2018-04-19 08:55:00.000 2018-04-25 08:30:00.000
37000       32600       2018-04-25 09:23:00.000 2018-04-25 10:00:00.000
37000       32600       2018-04-25 11:23:00.000 2018-04-30 15:00:00.000
37000       32600       2018-04-30 16:15:00     2018-04-30 17:30:00.000
37000       32600       2018-05-01 18:23:00     NULL
38000       36000       2018-05-01 10:00:00.000 2018-05-10 06:30:00.000
38000       36000       2018-05-15 09:00:00.000 2018-05-20 11:00:00.000
38000       36000       2018-05-20 12:00:00.000 NULL
33000       30000       2018-05-01 10:00:00.000 NULL

As you can see some of the assignments between Assets and Trailers are ended and started again on the same day – except for last row – Gaps and Islands

Example (1)

Asset_Id    Trailer_Id  AssignStart             AssignEnd
37000       32600       2018-04-19 08:55:00.000 2018-04-25 08:30:00.000
37000       32600       2018-04-25 09:23:00.000 2018-04-25 10:00:00.000
37000       32600       2018-04-25 11:23:00.000 2018-04-30 15:00:00.000
37000       32600       2018-04-30 16:15:00.000 2018-04-30 17:30:00.000
37000       32600       2018-05-01 18:23:00.000 NULL

Output for this sample I am expecting is

Asset_Id    Trailer_Id  AssignStart             AssignEnd
37000       32600       2018-04-19 08:55:00.000 2018-04-30 17:30:00.000
37000       32600       2018-05-01 18:23:00.000 NULL

Now this is another section with the Gaps and Islands.
Some of assignments between the same assets and trailers are ended and then started again on a future date

Example (2)

Asset_Id    Trailer_Id  AssignStart             AssignEnd
33000       30000       2018-04-01 15:00:00.000 2018-04-15 10:30:00.000
33000       30000       2018-05-01 10:00:00.000 NULL

Output for this sample I am expecting is

Asset_Id    Trailer_Id  AssignStart             AssignEnd
33000       30000       2018-04-01 15:00:00.000 2018-04-15 10:30:00.000
33000       30000       2018-05-01 10:00:00.000 NULL

Example (3)

Asset_Id    Trailer_Id  AssignStart             AssignEnd
38000       36000       2018-05-01 10:00:00.000 2018-05-10 06:30:00.000
38000       36000       2018-05-15 09:00:00.000 2018-05-20 11:00:00.000
38000       36000       2018-05-20 12:00:00.000 NULL

Output for this sample I am expecting is

Asset_Id    Trailer_Id  AssignStart             AssignEnd
38000       36000       2018-05-01 10:00:00.000 2018-05-10 06:30:00.000
38000       36000       2018-05-15 09:00:00.000 NULL

I struggling to write a query that will provide the following output

Asset_Id    Trailer_Id  AssignStart             AssignEnd
37124       32607       2018-04-19 08:55:00.000 NULL
33000       30000       2018-04-01 15:00:00.000 2018-04-15 10:30:00.000
34000       31000       2018-04-05 10:00:00.000 2018-04-10 09:30:00.000
34000       32500       2018-04-10 09:31:00.000 NULL
37000       32600       2018-04-19 08:55:00.000 2018-04-30 17:30:00.000
37000       32600       2018-05-01 18:23:00.000 NULL
38000       36000       2018-05-01 10:00:00.000 2018-05-10 06:30:00.000
38000       36000       2018-05-15 09:00:00.000 NULL
33000       30000       2018-05-01 10:00:00.000 NULL

Using the answer from this SO answer as a base this is my attempt, it's not quite right

Best Answer

You can use the recursive approach , with the help of a CTE. For this , I added a Row_Number so , to be sure that it's the next record verified.

Then creating the recursive CTE...named cte. Here it's important the field rn as rn_init - this is the base when it's creating the chain of records.

;
WITH fr AS -- first records
(
SELECT [Asset_Id], [Trailer_Id], [AssignStart], [AssignEnd]
  ,ROW_NUMBER()OVER( PARTITION BY [Asset_Id], [Trailer_Id] ORDER BY 
   [AssignStart], [AssignEnd]) AS rn
FROM  T1
)
,cte AS
(
 SELECT [Asset_Id], [Trailer_Id]
      , [AssignStart] as [AssignStart_init] , [AssignEnd] as [AssignEnd_Init] 
      , [AssignStart] as [AssignStart],  [AssignEnd] as [AssignEnd]
      , 0 as lvl
      , rn as rn
      , rn as rn_init
 FROM fr
 --WHERE [AssignEnd] is not null

 UNION ALL

 SELECT 
     T1.[Asset_Id], T1.[Trailer_Id]
     ,C.[AssignStart_init],C.[AssignEnd_Init]
     ,T1.[AssignStart], T1.[AssignEnd]
     ,C.lvl + 1 
     ,T1.rn
     ,C.rn_init
 FROM fr AS T1
     INNER JOIN cte as C
        ON T1.[Asset_Id] = C.[Asset_Id]
        AND T1.[Trailer_Id] = C.[Trailer_Id]
        AND CONVERT(DATE,C.[AssignEnd]) = CONVERT(DATE,T1.[AssignStart])
        AND C.[AssignEnd] <= T1.[AssignStart]
        AND C.rn + 1 = T1.rn
)

Here is dealing with NULL case for the AssignEnd and also important , to count the 'chain of records' count(rn_init) as c_rn_init. Base on this, it's later filtered.

--select * from cte
,myPrecious AS
(
select  --C.* 
   C.[Asset_Id],c.[Trailer_Id]
    ,MIN([AssignStart]) as AssignStart
    --,MAX([AssignEnd]) as AssignEnd
    ,CASE WHEN MAX(isnull([AssignEnd],[AssignStart])) = MAX([AssignStart]) THEN NULL ELSE MAX(isnull([AssignEnd],[AssignStart])) END as [AssignEnd]
    ,count(rn_init) as c_rn_init
 from cte as c
group by C.[Asset_Id],c.[Trailer_Id], C.rn_init

 )

This if the final query.Added Row_number , so to filter out records , ORDER BY c_rn_init DESC

 SELECT 
  Asset_Id,Trailer_Id,AssignStart,AssignEnd
  --,c_rn_init
  --,rn
  FROM(
          SELECT * 
               ,row_number() OVER(PARTITION BY [Asset_Id],[Trailer_Id] , AssignEnd ORDER BY c_rn_init DESC) as rn
          FROM myPrecious
      )A
 WHERE A.rn = 1
 order by [Asset_Id],[Trailer_Id]  ,[AssignStart],[AssignEnd]

The output:

Asset_Id    Trailer_Id  AssignStart         AssignEnd
33000       30000       01/04/2018 15:00:00 15/04/2018 10:30:00
33000       30000       01/05/2018 10:00:00 null
34000       31000       05/04/2018 10:00:00 10/04/2018 09:30:00
34000       32500       10/04/2018 09:31:00 null
37000       32600       19/04/2018 08:55:00 30/04/2018 17:30:00
37000       32600       01/05/2018 18:23:00 null
37124       32607       19/04/2018 08:55:00 null
38000       36000       01/05/2018 10:00:00 10/05/2018 06:30:00
38000       36000       15/05/2018 09:00:00 null

dbfiddle