SQL Server Window Functions – Using SUM() OVER (PARTITION BY … ORDER BY …)

sql serversumwindow functions

I have following dataset.

enter image description here

green: original data.

yellow: values that I want to calculate.

red: dummy row that is manually added to set up starting point?

PureAsk = MAX(Ask – PreviousBalance, 0)

Balance = SUM(D$2:Dn) – SUM(C$2:Cn)
WHERE n stands for the current row

My server environment is azure data warehouse, which doesn't allow recursive cte.

I am currently using WHILE loop, which is very time consuming and ineffective.

I believe there should be a more efficient way to do this calculation using Window SUM but I got confused how I can get the (previous) Balance for current PureAsk calculation.

The problem is that I have a circular (or cyclic) dependency: how to get row (n – 1)'s balance to calculate row n's PureAsk.

so, this is an architectural problem but I couldn't think of better way to prepare data for this.

IF OBJECT_ID('tempdb..#D') IS NOT NULL DROP TABLE #D;
CREATE TABLE #D (
    WeekOrder       INT
    , Ask           INT
    , Delivery      INT
    , PureAsk       INT
    , Balance       INT
);

INSERT INTO #D VALUES (1, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (2, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (3, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (4, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (5, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (6, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (7, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (8, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (9, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (10, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (11, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (12, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (13, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (14, 0, 0, NULL, NULL);
INSERT INTO #D VALUES (15, 28800, 0, NULL, NULL);
INSERT INTO #D VALUES (16, 20815, 11185, NULL, NULL);
INSERT INTO #D VALUES (17, 41600, 0, NULL, NULL);
INSERT INTO #D VALUES (18, 46000, 0, NULL, NULL);
INSERT INTO #D VALUES (19, 39350, 0, NULL, NULL);
INSERT INTO #D VALUES (20, 45750, 2000, NULL, NULL);
INSERT INTO #D VALUES (21, 47750, 3850, NULL, NULL);
INSERT INTO #D VALUES (22, 41800, 3000, NULL, NULL);
INSERT INTO #D VALUES (23, 48000, 3000, NULL, NULL);
INSERT INTO #D VALUES (24, 37175, 9000, NULL, NULL);
INSERT INTO #D VALUES (25, 46175, 9000, NULL, NULL);
INSERT INTO #D VALUES (26, 47825, 6400, NULL, NULL);
INSERT INTO #D VALUES (27, 42225, 13725, NULL, NULL);
INSERT INTO #D VALUES (28, 42800, 12000, NULL, NULL);
INSERT INTO #D VALUES (29, 49200, 0, NULL, NULL);

-- now I insert the dummy week order = 0 for starting point.
INSERT INTO #D VALUES (0, 0, 0, 0, 0);

SELECT C.WeekOrder
        , C.Ask
        , C.Delivery
        , PureAsk = SUM(CASE WHEN C.Ask - P.Balance < 0 THEN 0 ELSE C.Ask - P.Balance END) OVER (PARTITION BY NULL ORDER BY C.WeekOrder ASC)
        , Balance = SUM(C.PureAsk) OVER (PARTITION BY NULL ORDER BY C.WeekOrder ASC)
                    - SUM(C.Delivery) OVER (PARTITION BY NULL ORDER BY C.WeekOrder ASC)
    FROM #D AS C            -- C for current
    INNER JOIN #D AS P      -- P for previous
        ON C.WeekOrder = P.WeekOrder + 1
    WHERE C.WeekOrder >= 1
    ORDER BY 1
;

below is how I do this using WHILE loop, which takes pretty long time, and eventually, I need to implement this in CTE, so that I can't use WHILE loop in actual implementation.

-- pure ask calculation
DECLARE @WeekOrder AS INT       = 1;
WHILE @WeekOrder <= (SELECT MAX(WeekOrder) FROM #D) 
BEGIN
    -- #PAU, pure ask update
    IF OBJECT_ID('tempdb..#PAU') IS NOT NULL DROP TABLE #PAU;
    SELECT C.WeekOrder
            , PureAsk = CASE WHEN C.Ask - P.Balance > 0 THEN C.Ask - P.Balance ELSE 0 END
        INTO #PAU
        FROM #D AS C
        INNER JOIN #D AS P
            ON C.WeekOrder = P.WeekOrder + 1
        WHERE C.WeekOrder = @WeekOrder
    ;

    -- update pure ask
    UPDATE #D
        SET #D.PureAsk = U.PureAsk
        FROM #PAU AS U
        WHERE #D.WeekOrder = U.WeekOrder
    ;

    -- #BU, balance update
    IF OBJECT_ID('tempdb..#BU') IS NOT NULL DROP TABLE #BU;
    SELECT WeekOrder = @WeekOrder
            , Balance = CASE WHEN SUM(PureAsk) - SUM(Delivery) > 0 
THEN SUM(PureAsk) - SUM(Delivery) ELSE 0 END
        INTO #BU
        FROM #D
        WHERE WeekOrder <= @WeekOrder
    ;

    -- update balance
    UPDATE #D
        SET #D.Balance = U.Balance
        FROM #BU AS U
        WHERE #D.WeekOrder = U.WeekOrder
    ;

    SET @WeekOrder = @WeekOrder + 1
END

Best Answer

The script below works on your test data and I think I have managed to capture the logic in a non-recursive way, though you will absolutely want to test this on a larger dataset. I don't think performance will be the best, but it should be better than an looping through each row in the table:

with d as
(
select d.*
      ,Ask - Delivery as Diff

      ,case when Ask < lag(Ask,1,0) over (order by WeekOrder)
            then 0
            else (Ask - lag(Ask,1,0) over (order by WeekOrder))
            end
        - Delivery as AskRunDiff

      ,mw.mw
from @d as d
    outer apply(select max(WeekOrder) as mw from @d as d2 where d.WeekOrder > d2.WeekOrder and d.Ask < d2.Ask) as mw
)
,b as
(
select d.*
    ,case when d.AskRunDiff <= 0
        then d2.Diff - d.Delivery
        else
            case when d.Diff >= lag(d.Diff) over (order by d.WeekOrder)
                then case when d.Ask >= lag(d.Ask) over (order by d.WeekOrder)
                            and d.AskRunDiff > 0
                            and d.mw is not null
                        then d2.Diff - d.Delivery
                        else d.Diff
                        end
                else lag(d.Diff,1,0) over (order by d.WeekOrder) - d.Delivery
                end
        end as b1

    ,case when d.Ask >= lag(d.Ask) over (order by d.WeekOrder) and d.AskRunDiff > 0 and d.mw is not null
        then d2.Diff - d.Delivery
        else d.Diff
        end as b2
from d
    left join d as d2
        on d.mw = d2.WeekOrder
)
select WeekOrder
    ,Ask
    ,Delivery
    ,case when Ask - lag(case when b1 > b2 then b1 else b2 end,1,0) over (order by WeekOrder) > 0
        then Ask - lag(case when b1 > b2 then b1 else b2 end,1,0) over (order by WeekOrder)
        else 0
        end as PureAsk
    ,case when b1 > b2 then b1 else b2 end as Balance
from b;

Output:

+-----------+-------+----------+---------+---------+
| WeekOrder |  Ask  | Delivery | PureAsk | Balance |
+-----------+-------+----------+---------+---------+
|         1 |     0 |        0 |       0 |       0 |
|         2 |     0 |        0 |       0 |       0 |
|         3 |     0 |        0 |       0 |       0 |
|         4 |     0 |        0 |       0 |       0 |
|         5 |     0 |        0 |       0 |       0 |
|         6 |     0 |        0 |       0 |       0 |
|         7 |     0 |        0 |       0 |       0 |
|         8 |     0 |        0 |       0 |       0 |
|         9 |     0 |        0 |       0 |       0 |
|        10 |     0 |        0 |       0 |       0 |
|        11 |     0 |        0 |       0 |       0 |
|        12 |     0 |        0 |       0 |       0 |
|        13 |     0 |        0 |       0 |       0 |
|        14 |     0 |        0 |       0 |       0 |
|        15 | 28800 |        0 |   28800 |   28800 |
|        16 | 20815 |    11185 |       0 |   17615 |
|        17 | 41600 |        0 |   23985 |   41600 |
|        18 | 46000 |        0 |    4400 |   46000 |
|        19 | 39350 |        0 |       0 |   46000 |
|        20 | 45750 |     2000 |       0 |   44000 |
|        21 | 47750 |     3850 |    3750 |   43900 |
|        22 | 41800 |     3000 |       0 |   40900 |
|        23 | 48000 |     3000 |    7100 |   45000 |
|        24 | 37175 |     9000 |       0 |   36000 |
|        25 | 46175 |     9000 |   10175 |   37175 |
|        26 | 47825 |     6400 |   10650 |   41425 |
|        27 | 42225 |    13725 |     800 |   28500 |
|        28 | 42800 |    12000 |   14300 |   30800 |
|        29 | 49200 |        0 |   18400 |   49200 |
+-----------+-------+----------+---------+---------+