Below a complete solution for sql-server.
I was trying analytical functions, but didn't succeed. I'm tempted to repost the question to SO requiring a solution using analytical functions. I think there must be one.
And here is my working solution. I'm proud not to have used cursors.
set language us_english
Set Nocount ON
create table #t( PROJ_NBR char(4), STATUS char(2), START_DT datetime);
insert into #t values ('AL20', 'AC', '1/14/2010 4:31');
insert into #t values ('AL20', 'AC', '1/14/2010 4:32');
insert into #t values ('AL20', 'AC', '1/14/2010 4:32');
insert into #t values ('AL20', 'DE', '1/14/2010 4:34');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'AC', '3/4/2010 4:31');
insert into #t values ('AL20', 'AC', '3/4/2010 4:39');
insert into #t values ('AL20', 'AC', '3/21/2010 13:24');
insert into #t values ('AL20', 'AC', '2/4/2011 13:54');
insert into #t values ('AL20', 'AC', '2/4/2011 14:14');
insert into #t values ('AL20', 'AC', '2/4/2011 14:27');
insert into #t values ('AL20', 'DE', '2/11/2011 16:24');
select
distinct PROJ_NBR, STATUS, [start], [end]
from(
select
*,
(select MIN(START_DT)
from #t t1
where t.STATUS = t1.STATUS
and t.START_DT >= t1.START_DT
and not exists ( select * from #t t2
where t.STATUS <> t2.STATUS
and t.START_DT >= t2.START_DT
and t1.START_DT <= t2.START_DT)
) [start],
(select Max(START_DT)
from #t t1
where t.STATUS = t1.STATUS
and t.START_DT <= t1.START_DT
and not exists ( select * from #t t2
where t.STATUS <> t2.STATUS
and t.START_DT <= t2.START_DT
and t1.START_DT >= t2.START_DT)
) [end]
from #t t
) t1
order by [start]
I think besides column and table names this can easily adapted to Oracle too.
This one uses a recursive CTE. Its result is identical to the example in the question. It was a nightmare to come up with... The code includes comments to ease through its convoluted logic.
SET DATEFIRST 1 -- Make Monday weekday=1
DECLARE @Ranked TABLE (RowID int NOT NULL IDENTITY PRIMARY KEY, -- Incremental uninterrupted sequence in the right order
ID int NOT NULL UNIQUE, ContractID int NOT NULL, dt date, -- Original relevant values (ID is not really necessary)
WeekNo int NOT NULL, dowBit int NOT NULL); -- Useful to find gaps in days or weeks
INSERT INTO @Ranked
SELECT ID, ContractID, dt,
DATEDIFF(WEEK, '1900-01-01', DATEADD(DAY, 1-DATEPART(dw, dt), dt)) AS WeekNo,
POWER(2, DATEPART(dw, dt)-1) AS dowBit
FROM @Src
ORDER BY ContractID, WeekNo, dowBit
/*
Each evaluated date makes part of the carried sequence if:
- this is not a new contract, and
- sequence started this week, or
- same day last week was part of the sequence, or
- sequence started last week and today is a lower day than the accumulated weekdays list
- and there are no sequence gaps since previous day
(otherwise it does not make part of the old sequence, so it starts a new one) */
DECLARE @RankedRanges TABLE (RowID int NOT NULL PRIMARY KEY, WeekDays int NOT NULL, StartRowID int NULL);
WITH WeeksCTE AS -- Needed for building the sequence gradually, and comparing the carried sequence (and previous day) with a current evaluated day
(
SELECT RowID, ContractID, dowBit, WeekNo, RowID AS StartRowID, WeekNo AS StartWN, dowBit AS WeekDays, dowBit AS StartWeekDays
FROM @Ranked
WHERE RowID = 1
UNION ALL
SELECT RowID, ContractID, dowBit, WeekNo, StartRowID,
CASE WHEN StartRowID IS NULL THEN StartWN ELSE WeekNo END AS WeekNo,
CASE WHEN StartRowID IS NULL THEN WeekDays | dowBit ELSE dowBit END AS WeekDays,
CASE WHEN StartRowID IS NOT NULL THEN dowBit WHEN WeekNo = StartWN THEN StartWeekDays | dowBit ELSE StartWeekDays END AS StartWeekDays
FROM (
SELECT w.*, pre.StartWN, pre.WeekDays, pre.StartWeekDays,
CASE WHEN w.ContractID <> pre.ContractID OR -- New contract always break the sequence
NOT (w.WeekNo = pre.StartWN OR -- Same week as a new sequence always keeps the sequence
w.dowBit & pre.WeekDays > 0 OR -- Days in the sequence keep the sequence (provided there are no gaps, checked later)
(w.WeekNo = pre.StartWN+1 AND (w.dowBit-1) & pre.StartWeekDays = 0)) OR -- Days in the second week when less than a week passed since the sequence started remain in sequence
(w.WeekNo > pre.StartWN AND -- look for gap after initial week
w.WeekNo > pre.WeekNo+1 OR -- look for full-week gaps
(w.WeekNo = pre.WeekNo AND -- when same week as previous day,
((w.dowBit-1) ^ (pre.dowBit*2-1)) & pre.WeekDays > 0 -- days between this and previous weekdays, compared to current series
) OR
(w.WeekNo > pre.WeekNo AND -- when following week of previous day,
((-1 ^ (pre.dowBit*2-1)) | (w.dowBit-1)) & pre.WeekDays > 0 -- days between this and previous weekdays, compared to current series
)) THEN w.RowID END AS StartRowID
FROM WeeksCTE pre
JOIN @Ranked w ON (w.RowID = pre.RowID + 1)
) w
)
INSERT INTO @RankedRanges -- days sequence and starting point of each sequence
SELECT RowID, WeekDays, StartRowID
--SELECT *
FROM WeeksCTE
OPTION (MAXRECURSION 0)
--SELECT * FROM @RankedRanges
DECLARE @Ranges TABLE (RowNo int NOT NULL IDENTITY PRIMARY KEY, RowID int NOT NULL);
INSERT INTO @Ranges -- @RankedRanges filtered only by start of each range, with numbered rows to easily find the end of each range
SELECT StartRowID
FROM @RankedRanges
WHERE StartRowID IS NOT NULL
ORDER BY 1
-- Final result putting everything together
SELECT rs.ContractID, rs.dt AS StartDT, re.dt AS EndDT, re.RowID-rs.RowID+1 AS DayCount,
CASE WHEN rr.WeekDays & 64 > 0 THEN 'Sun,' ELSE '' END +
CASE WHEN rr.WeekDays & 1 > 0 THEN 'Mon,' ELSE '' END +
CASE WHEN rr.WeekDays & 2 > 0 THEN 'Tue,' ELSE '' END +
CASE WHEN rr.WeekDays & 4 > 0 THEN 'Wed,' ELSE '' END +
CASE WHEN rr.WeekDays & 8 > 0 THEN 'Thu,' ELSE '' END +
CASE WHEN rr.WeekDays & 16 > 0 THEN 'Fri,' ELSE '' END +
CASE WHEN rr.WeekDays & 32 > 0 THEN 'Sat,' ELSE '' END AS WeekDays
FROM (
SELECT r.RowID AS StartRowID, COALESCE(pos.RowID-1, (SELECT MAX(RowID) FROM @Ranked)) AS EndRowID
FROM @Ranges r
LEFT JOIN @Ranges pos ON (pos.RowNo = r.RowNo + 1)
) g
JOIN @Ranked rs ON (rs.RowID = g.StartRowID)
JOIN @Ranked re ON (re.RowID = g.EndRowID)
JOIN @RankedRanges rr ON (rr.RowID = re.RowID)
Another strategy
This one should be significantly faster than the previous one because it doesn't rely on the slow limited recursive CTE in SQL Server 2008, although it implements more or less the same strategy.
There is a WHILE
loop (I couldn't devise a way to avoid it), but goes for a reduced number of iterations (the highest number of sequences (minus one) on any given contract).
It's a simple strategy, and could be used for sequences either shorter or longer than a week (replacing any occurrence of the constant 7 for any other number, and the dowBit
calculated from MODULUS x of DayNo
rather than DATEPART(wk)
) and up to 32.
SET DATEFIRST 1 -- Make Monday weekday=1
-- Get the minimum information needed to calculate sequences
DECLARE @Days TABLE (ContractID int NOT NULL, dt date, DayNo int NOT NULL, dowBit int NOT NULL, PRIMARY KEY (ContractID, DayNo));
INSERT INTO @Days
SELECT ContractID, dt, CAST(CAST(dt AS datetime) AS int) AS DayNo, POWER(2, DATEPART(dw, dt)-1) AS dowBit
FROM @Src
DECLARE @RangeStartFirstPass TABLE (ContractID int NOT NULL, DayNo int NOT NULL, PRIMARY KEY (ContractID, DayNo))
-- Calculate, from the above list, which days are not present in the previous 7
INSERT INTO @RangeStartFirstPass
SELECT r.ContractID, r.DayNo
FROM @Days r
LEFT JOIN @Days pr ON (pr.ContractID = r.ContractID AND pr.DayNo BETWEEN r.DayNo-7 AND r.DayNo-1) -- Last 7 days
GROUP BY r.ContractID, r.DayNo, r.dowBit
HAVING r.dowBit & COALESCE(SUM(pr.dowBit), 0) = 0
-- Update the previous list with all days that occur right after a missing day
INSERT INTO @RangeStartFirstPass
SELECT *
FROM (
SELECT DISTINCT ContractID, (SELECT MIN(DayNo) FROM @Days WHERE ContractID = d.ContractID AND DayNo > d.DayNo + 7) AS DayNo
FROM @Days d
WHERE NOT EXISTS (SELECT 1 FROM @Days WHERE ContractID = d.ContractID AND DayNo = d.DayNo + 7)
) d
WHERE DayNo IS NOT NULL AND
NOT EXISTS (SELECT 1 FROM @RangeStartFirstPass WHERE ContractID = d.ContractID AND DayNo = d.DayNo)
DECLARE @RangeStart TABLE (ContractID int NOT NULL, DayNo int NOT NULL, PRIMARY KEY (ContractID, DayNo));
-- Fetch the first sequence for each contract
INSERT INTO @RangeStart
SELECT ContractID, MIN(DayNo)
FROM @RangeStartFirstPass
GROUP BY ContractID
-- Add to the list above the next sequence for each contract, until all are added
-- (ensure no sequence is added with less than 7 days)
WHILE @@ROWCOUNT > 0
INSERT INTO @RangeStart
SELECT f.ContractID, MIN(f.DayNo)
FROM (SELECT ContractID, MAX(DayNo) AS DayNo FROM @RangeStart GROUP BY ContractID) s
JOIN @RangeStartFirstPass f ON (f.ContractID = s.ContractID AND f.DayNo > s.DayNo + 7)
GROUP BY f.ContractID
-- Summarise results
SELECT ContractID, StartDT, EndDT, DayCount,
CASE WHEN WeekDays & 64 > 0 THEN 'Sun,' ELSE '' END +
CASE WHEN WeekDays & 1 > 0 THEN 'Mon,' ELSE '' END +
CASE WHEN WeekDays & 2 > 0 THEN 'Tue,' ELSE '' END +
CASE WHEN WeekDays & 4 > 0 THEN 'Wed,' ELSE '' END +
CASE WHEN WeekDays & 8 > 0 THEN 'Thu,' ELSE '' END +
CASE WHEN WeekDays & 16 > 0 THEN 'Fri,' ELSE '' END +
CASE WHEN WeekDays & 32 > 0 THEN 'Sat,' ELSE '' END AS WeekDays
FROM (
SELECT r.ContractID,
MIN(d.dt) AS StartDT,
MAX(d.dt) AS EndDT,
COUNT(*) AS DayCount,
SUM(DISTINCT d.dowBit) AS WeekDays
FROM (SELECT *, COALESCE((SELECT MIN(DayNo) FROM @RangeStart WHERE ContractID = rs.ContractID AND DayNo > rs.DayNo), 999999) AS DayEnd FROM @RangeStart rs) r
JOIN @Days d ON (d.ContractID = r.ContractID AND d.DayNo BETWEEN r.DayNo AND r.DayEnd-1)
GROUP BY r.ContractID, r.DayNo
) d
ORDER BY ContractID, StartDT
Best Answer
Window Function Method
First we generate resets with a window function
Then we generate numeric groups
Then we use
chr(65+)
to get the alpha group names you want. Original query at the top..