To testing I created small sample table:
INSERT INTO daily_data (
company_id
,trade_date
,daily_val
,wh_calc_id
)
SELECT
(random() * 10)::int,
'2010-01-01'::timestamp + (i::text || ' day')::interval,
random() * 100,
(random() * 20) + 230
FROM
generate_series(1, 200000) AS i
;
Original query:
EXPLAIN ANALYZE
with dd2 as (select * from daily_data where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, ct, ct_lt
, round(CASE WHEN x.ct = 0 THEN numeric '1'
ELSE x.ct_lt / x.ct END, 6) AS pctl_calc
FROM dd2 d, LATERAL (
SELECT count(daily_val) AS ct
, count(daily_val < d.daily_val OR NULL)::numeric As ct_lt
FROM dd2
WHERE company_id = d.company_id
-- and company_id < 8
AND trade_date < d.trade_date
and wh_calc_id = d.wh_calc_id
-- and wh_calc_id = 241
) x
ORDER BY company_id, trade_date;
Subquery version:
with dd2 as (
select company_id, trade_date
, (SELECT count(daily_val) FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct
, (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
from daily_data AS d where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, round(CASE WHEN d.ct = 0 THEN numeric '1'
ELSE d.ct_lt / d.ct END, 6) AS pctl_calc
FROM dd2 d
ORDER BY company_id, trade_date;
Partial window + partial subquery version:#
EXPLAIN ANALYZE
with dd2 as (select *,
count(daily_val) OVER (PARTITION BY company_id, wh_calc_id ORDER BY trade_date) - 1 AS ct,
(SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
from daily_data AS d where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd2 d
ORDER BY company_id, trade_date;
Pure window version (but some tricky):
EXPLAIN ANALYZE
with dd2 as (select d.company_id, d.trade_date, d.daily_val,
count(daily_val) OVER w - 1 AS ct,
array_agg(daily_val) OVER w AS ct_lt2
from daily_data AS d
where wh_calc_id = 241 AND company_id < 8
WINDOW w AS (PARTITION BY company_id, wh_calc_id ORDER BY trade_date)
) , dd1 AS (
SELECT d.*
, (SELECT count(*) FILTER (WHERE u < d.daily_val) FROM unnest(ct_lt2) AS u) AS ct_lt
FROM dd2 d
) SELECT *
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd1 AS d
ORDER BY company_id, trade_date;
Original query:
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=2653.28..2653.84 rows=226 width=132) (actual time=8399.345..8399.886 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 1253kB
CTE dd2
-> Bitmap Heap Scan on daily_data (cost=21.55..1224.03 rows=226 width=60) (actual time=1.026..4.794 rows=7540 loops=1)
Recheck Cond: (wh_calc_id = 241)
Filter: (company_id < 8)
Rows Removed by Filter: 2526
Heap Blocks: exact=1470
-> Bitmap Index Scan on daily_data_wh_calc_id_idx (cost=0.00..21.50 rows=677 width=0) (actual time=0.848..0.848 rows=10066 loops=1)
Index Cond: (wh_calc_id = 241)
-> Nested Loop (cost=6.22..1420.41 rows=226 width=132) (actual time=6.196..8391.961 rows=7540 loops=1)
-> CTE Scan on dd2 d (cost=0.00..4.52 rows=226 width=60) (actual time=1.029..1.933 rows=7540 loops=1)
-> Aggregate (cost=6.22..6.24 rows=1 width=40) (actual time=1.111..1.111 rows=1 loops=7540)
-> CTE Scan on dd2 (cost=0.00..6.22 rows=1 width=32) (actual time=0.003..0.995 rows=484 loops=7540)
Filter: ((trade_date < d.trade_date) AND (company_id = d.company_id) AND (wh_calc_id = d.wh_calc_id))
Rows Removed by Filter: 7056
Planning time: 0.345 ms
Execution time: 8400.233 ms
(19 rows)
Subquery version:
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=9949.69..9950.25 rows=226 width=80) (actual time=10558.505..10559.017 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 782kB
CTE dd2
-> Bitmap Heap Scan on daily_data d_1 (cost=21.55..9934.07 rows=226 width=48) (actual time=1.138..10538.523 rows=7540 loops=1)
Recheck Cond: (wh_calc_id = 241)
Filter: (company_id < 8)
Rows Removed by Filter: 2526
Heap Blocks: exact=1470
-> Bitmap Index Scan on daily_data_wh_calc_id_idx (cost=0.00..21.50 rows=677 width=0) (actual time=0.945..0.945 rows=10066 loops=1)
Index Cond: (wh_calc_id = 241)
SubPlan 1
-> Aggregate (cost=19.26..19.27 rows=1 width=8) (actual time=0.722..0.722 rows=1 loops=7540)
-> Index Scan using daily_data_company_id_trade_date_wh_calc_id_key on daily_data (cost=0.42..19.26 rows=1 width=32) (actual time=0.007..0.671 rows=484 loops=7540)
Index Cond: ((company_id = d_1.company_id) AND (trade_date < d_1.trade_date) AND (wh_calc_id = d_1.wh_calc_id))
SubPlan 2
-> Aggregate (cost=19.26..19.27 rows=1 width=32) (actual time=0.672..0.672 rows=1 loops=7540)
-> Index Scan using daily_data_company_id_trade_date_wh_calc_id_key on daily_data daily_data_1 (cost=0.42..19.26 rows=1 width=32) (actual time=0.006..0.551 rows=484 loops=7540)
Index Cond: ((company_id = d_1.company_id) AND (trade_date < d_1.trade_date) AND (wh_calc_id = d_1.wh_calc_id))
-> CTE Scan on dd2 d (cost=0.00..6.78 rows=226 width=80) (actual time=1.142..10552.746 rows=7540 loops=1)
Planning time: 0.248 ms
Execution time: 10559.387 ms
(22 rows)
Partial window + partial subquery version:
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=5609.72..5610.28 rows=226 width=132) (actual time=4931.963..4932.433 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 1253kB
CTE dd2
-> WindowAgg (cost=1232.87..5594.10 rows=226 width=100) (actual time=9.498..4914.892 rows=7540 loops=1)
-> Sort (cost=1232.87..1233.43 rows=226 width=60) (actual time=9.468..11.291 rows=7540 loops=1)
Sort Key: d_1.company_id, d_1.trade_date
Sort Method: quicksort Memory: 782kB
-> Bitmap Heap Scan on daily_data d_1 (cost=21.55..1224.03 rows=226 width=60) (actual time=1.002..5.251 rows=7540 loops=1)
Recheck Cond: (wh_calc_id = 241)
Filter: (company_id < 8)
Rows Removed by Filter: 2526
Heap Blocks: exact=1470
-> Bitmap Index Scan on daily_data_wh_calc_id_idx (cost=0.00..21.50 rows=677 width=0) (actual time=0.814..0.814 rows=10066 loops=1)
Index Cond: (wh_calc_id = 241)
SubPlan 1
-> Aggregate (cost=19.26..19.27 rows=1 width=32) (actual time=0.649..0.649 rows=1 loops=7540)
-> Index Scan using daily_data_company_id_trade_date_wh_calc_id_key on daily_data (cost=0.42..19.26 rows=1 width=32) (actual time=0.005..0.529 rows=484 loops=7540)
Index Cond: ((company_id = d_1.company_id) AND (trade_date < d_1.trade_date) AND (wh_calc_id = d_1.wh_calc_id))
-> CTE Scan on dd2 d (cost=0.00..6.78 rows=226 width=132) (actual time=9.504..4928.264 rows=7540 loops=1)
Planning time: 0.223 ms
Execution time: 4932.854 ms
(22 rows)
Pure window version (but some tricky):
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=1601.04..1601.61 rows=226 width=120) (actual time=2006.537..2018.331 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 63526kB
CTE dd2
-> WindowAgg (cost=1232.87..1239.08 rows=226 width=84) (actual time=8.298..86.164 rows=7540 loops=1)
-> Sort (cost=1232.87..1233.43 rows=226 width=44) (actual time=8.285..9.752 rows=7540 loops=1)
Sort Key: d_1.company_id, d_1.trade_date
Sort Method: quicksort Memory: 782kB
-> Bitmap Heap Scan on daily_data d_1 (cost=21.55..1224.03 rows=226 width=44) (actual time=0.935..5.078 rows=7540 loops=1)
Recheck Cond: (wh_calc_id = 241)
Filter: (company_id < 8)
Rows Removed by Filter: 2526
Heap Blocks: exact=1470
-> Bitmap Index Scan on daily_data_wh_calc_id_idx (cost=0.00..21.50 rows=677 width=0) (actual time=0.722..0.722 rows=10066 loops=1)
Index Cond: (wh_calc_id = 241)
CTE dd1
-> CTE Scan on dd2 d_2 (cost=0.00..346.34 rows=226 width=88) (actual time=8.315..1573.920 rows=7540 loops=1)
SubPlan 2
-> Aggregate (cost=1.50..1.51 rows=1 width=8) (actual time=0.155..0.156 rows=1 loops=7540)
-> Function Scan on unnest u (cost=0.00..1.00 rows=100 width=32) (actual time=0.034..0.058 rows=485 loops=7540)
-> CTE Scan on dd1 d (cost=0.00..6.78 rows=226 width=120) (actual time=8.321..1774.450 rows=7540 loops=1)
Planning time: 0.177 ms
Execution time: 2443.855 ms
(23 rows)
Conclusion
Windows version is the best. I found no easy way to convert ct_lt
calculations for it. As a workaround I used the aggregation of values in the array and count ct_lt
from it. Probably, it can create some penalty for large amounts of data. But even with that, this version will be better than others.
UPD:
Adapting for 9.4:
EXPLAIN ANALYZE
with dd2 as (select d.company_id, d.trade_date, d.daily_val,
count(daily_val) OVER w - 1 AS ct,
array_agg(daily_val) OVER w AS ct_lt2
from daily_data AS d
where wh_calc_id = 241 AND company_id < 8
WINDOW w AS (PARTITION BY company_id, wh_calc_id ORDER BY trade_date)
) , dd1 AS (
SELECT d.*
, (SELECT count(*) FROM unnest(ct_lt2) AS u WHERE u < d.daily_val) AS ct_lt
FROM dd2 d
) SELECT *
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt::numeric / ct END, 6) AS pctl_calc
FROM dd1 AS d
ORDER BY company_id, trade_date;
Index:
CREATE INDEX daily_data_i1_idx
ON daily_data
(wh_calc_id, company_id, trade_date);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=1311.98..1312.82 rows=333 width=120) (actual time=1597.558..1613.481 rows=7407 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 61659kB
CTE dd2
-> WindowAgg (cost=824.34..833.50 rows=333 width=84) (actual time=10.416..87.668 rows=7407 loops=1)
-> Sort (cost=824.34..825.17 rows=333 width=44) (actual time=10.381..11.851 rows=7407 loops=1)
Sort Key: d_1.company_id, d_1.trade_date
Sort Method: quicksort Memory: 771kB
-> Bitmap Heap Scan on daily_data d_1 (cost=11.83..810.39 rows=333 width=44) (actual time=2.934..6.548 rows=7407 loops=1)
Recheck Cond: ((wh_calc_id = 241) AND (company_id < 8))
Heap Blocks: exact=1460
-> Bitmap Index Scan on daily_data_i1_idx (cost=0.00..11.75 rows=333 width=0) (actual time=2.661..2.661 rows=7407 loops=1)
Index Cond: ((wh_calc_id = 241) AND (company_id < 8))
CTE dd1
-> CTE Scan on dd2 d_2 (cost=0.00..454.55 rows=333 width=88) (actual time=10.442..1320.450 rows=7407 loops=1)
SubPlan 2
-> Aggregate (cost=1.33..1.34 rows=1 width=8) (actual time=0.143..0.143 rows=1 loops=7407)
-> Function Scan on unnest u (cost=0.00..1.25 rows=33 width=0) (actual time=0.036..0.124 rows=234 loops=7407)
Filter: (u < d_2.daily_val)
Rows Removed by Filter: 243
-> CTE Scan on dd1 d (cost=0.00..9.99 rows=333 width=120) (actual time=10.450..1468.533 rows=7407 loops=1)
Planning time: 0.471 ms
Execution time: 1960.835 ms
Best Answer
Use the
-e
flag when you start psql.For example:
Or, in case of using a script, try the
-a
flag.Details in the manual.