PostgreSQL – Aggregating Additional Values to Min and Max

aggregatedatatypesgreatest-n-per-grouppostgresqlpostgresql-10

This is a follow-up question to:
Make custom aggregate function easier to use (accept more input types without creating variants)

This SQL statement is given:

SELECT
    c.name AS commodity_name,
    c.category AS commodity_category,
    l.name AS location_name,
    min(b.price)::numeric(8, 3) AS min_price,
    valued_min(b.price, g.name) AS gameversion_of_min_price,
    max(b.price)::numeric(8, 3) AS max_price,
    valued_max(b.price, g.name) AS gameversion_of_max_price,
    avg(b.price)::numeric(8, 3) AS avg_price,
    count(b.price) AS price_measure_count
FROM buy AS b
JOIN location AS l ON l.id = b.location_id
JOIN commodity AS c ON c.id = b.commodity_id
JOIN gameversion AS g ON g.id = b.gameversion_id
GROUP BY l.name, c.name, c.category
ORDER BY c.name, l.name;

The custom aggregate functions from the previous question are used (value_min(), value_max()) plus avg() and count().

Can that be written more efficiently? Maybe with window functions?

Desired result

Sample with first 18 rows

Table definition:

CREATE TABLE public.commodity
(
    id bigint NOT NULL DEFAULT nextval('commodity_id_seq'::regclass),
    name character varying COLLATE pg_catalog."default" NOT NULL,
    category character varying COLLATE pg_catalog."default" NOT NULL,
    CONSTRAINT commodity_pkey PRIMARY KEY (id),
    CONSTRAINT commodity_name_key UNIQUE (name)
);
CREATE TABLE public.location
(
    id bigint NOT NULL DEFAULT nextval('location_id_seq'::regclass),
    name character varying COLLATE pg_catalog."default" NOT NULL,
    parent_location_id bigint,
    type character varying COLLATE pg_catalog."default",
    can_trade boolean,
    CONSTRAINT location_pkey PRIMARY KEY (id),
    CONSTRAINT location_name_key UNIQUE (name),
    CONSTRAINT location_parent_location_id_fkey FOREIGN KEY (parent_location_id)
        REFERENCES public.location (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION
);
CREATE TABLE public.gameversion
(
    id bigint NOT NULL DEFAULT nextval('gameversion_id_seq'::regclass),
    name character varying(20) COLLATE pg_catalog."default" NOT NULL,
    CONSTRAINT gameversion_pkey PRIMARY KEY (id)
);
CREATE TABLE public.buy
(
    id bigint NOT NULL DEFAULT nextval('buy_id_seq'::regclass),
    location_id bigint NOT NULL,
    commodity_id bigint NOT NULL,
    price numeric NOT NULL,
    scantime timestamp without time zone NOT NULL DEFAULT now(),
    gameversion_id bigint NOT NULL,
    CONSTRAINT buy_pkey PRIMARY KEY (id),
    CONSTRAINT buy_commodity_id_fkey FOREIGN KEY (commodity_id)
        REFERENCES public.commodity (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION,
    CONSTRAINT buy_location_id_fkey FOREIGN KEY (location_id)
        REFERENCES public.location (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION
);

Plain text:

"Agricium"  "Metal" "ArcCorp Mining Area 141"   "24.280"    "3.1.0-live.738964" "25.720"    "3.2.2-live.846694" "25.000"    "2"
"Agricium"  "Metal" "Grim HEX"  "25.000"    "3.1.0-live.738964" "36.490"    "3.0.0-live.695052" "30.983"    "6"
"Agricium"  "Metal" "Kudre Ore" "24.280"    "3.1.0-live.738964" "24.280"    "3.1.0-live.738964" "24.280"    "1"
"Agricium"  "Metal" "Levski"    "36.715"    "3.0.0-live.695052" "36.730"    "3.0.0-live.695052" "36.719"    "6"
"Agricium"  "Metal" "Port Olisar A" "0.751" "3.2.0-live.796019" "36.299"    "3.0.0-live.695052" "24.450"    "3"
"Agricium"  "Metal" "Port Olisar B" "36.229"    "3.0.0-live.695052" "36.300"    "3.0.0-live.695052" "36.276"    "3"
"Agricium"  "Metal" "Port Olisar C" "35.747"    "3.0.0-live.695052" "36.299"    "3.0.0-live.695052" "36.023"    "2"
"Agricium"  "Metal" "Port Olisar D" "36.299"    "3.0.0-live.695052" "36.300"    "3.0.0-live.695052" "36.300"    "2"
"Agricium"  "Metal" "Tram & Myers Mining"   "27.900"    "3.0.0-live.695052" "27.900"    "3.0.0-live.695052" "27.900"    "1"
"Agricultural Supply"   "Agricultural Supply"   "Hickes Research Outpost"   "0.728" "3.0.0-live.695052" "0.728" "3.0.0-live.695052" "0.728" "1"
"Agricultural Supply"   "Agricultural Supply"   "Levski"    "0.694" "3.1.0-live.738964" "0.722" "3.2.2-live.846694" "0.708" "2"
"Agricultural Supply"   "Agricultural Supply"   "Port Olisar A" "0.750" "3.2.2-live.846694" "2.025" "3.0.0-live.695052" "1.600" "3"
"Agricultural Supply"   "Agricultural Supply"   "Port Olisar B" "0.745" "3.1.0-live.738964" "2.025" "3.0.0-live.695052" "1.705" "4"
"Agricultural Supply"   "Agricultural Supply"   "Port Olisar C" "0.737" "3.1.0-live.738964" "2.025" "3.0.0-live.695052" "1.384" "4"
"Agricultural Supply"   "Agricultural Supply"   "Port Olisar D" "2.025" "3.0.0-live.695052" "2.025" "3.0.0-live.695052" "2.025" "2"
"Aluminum"  "Metal" "ArcCorp Mining Area 157"   "0.874" "3.0.0-live.695052" "0.875" "3.0.0-live.695052" "0.875" "2"
"Aluminum"  "Metal" "Grim HEX"  "1.149" "3.0.0-live.695052" "1.149" "3.0.0-live.695052" "1.149" "3"
"Aluminum"  "Metal" "Levski"    "1.143" "3.0.0-live.695052" "1.176" "3.2.2-live.846694" "1.153" "8"

Best Answer

This should do it, with window functions and without your custom aggregate function:

SELECT c.name     AS commodity_name
     , c.category AS commodity_category
     , l.name     AS location_name
     , b.min_price
     , gmin.name  AS gameversion_of_min_price
     , b.max_price
     , gmax.name  AS gameversion_of_max_price
     , b.avg_price
     , b.price_measure_count
FROM  (
   SELECT DISTINCT ON (location_id, commodity_id)
          location_id
        , commodity_id
        , first_value(price)          OVER w::numeric(8, 3) AS min_price
        , first_value(gameversion_id) OVER w AS gameversion_of_min_price

        , last_value(price)           OVER w::numeric(8, 3) AS max_price
        , last_value(gameversion_id)  OVER w AS gameversion_of_max_price

        , avg(price)                  OVER w::numeric(8, 3) AS avg_price
        , count(price)                OVER w                AS price_measure_count
   FROM   buy
   WINDOW w AS (PARTITION BY location_id, commodity_id ORDER BY price
                ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
   ORDER  BY location_id, commodity_id
   ) b
JOIN   location    l ON l.id = b.location_id
JOIN   commodity   c ON c.id = b.commodity_id
JOIN   gameversion gmin ON gmin.id = b.gameversion_of_min_price
JOIN   gameversion gmax ON gmax.id = b.gameversion_of_max_price
ORDER  BY c.name, l.name;

Details might be optimized depending on exact table definitions (NOT NULL, PK, FK constraints etc.) and requirements.

Related answer with more explanation:

Basics for DISTINCT ON:

Potential performance optimization: