Postgresql – GROUP BY very slow when returning many rows

group byperformancepostgresqlquery-performance

I have a query to aggregate data per day. When I run the query for a single day it is very fast. However, when I run the query for multiple days the time cost increases much faster than O(number_of_days).

zabbix=> \copy (
select 
  h.hostid, 
  sum(value), 
  extract(day from to_timestamp(clock)), 
  extract(month from to_timestamp(clock)), 
  2015, 
  'DailyExemptUsage'
from 
  hosts h, 
  items i, 
  history_uint hu
where 
 h.hostid = i.hostid
 and i.itemid = hu.itemid
 and i.name like 'Air%bound%Tot%'
 and clock between 1430463600  and 1432969200 
 and extract(hour from to_timestamp(clock)) between 1 and 5
 and h.name like '172.xxx.%'
group by 
 h.hostid, 
 extract(day from to_timestamp(clock)), 
 extract(month from to_timestamp(clock))
) 
to '/home/me/my_file' with csv;

Because the time required grows rapidly with the number of days for which the query is run, I could get the same data much faster with a stored procedure that looped over a month and ran the query once for each day.

Q: Is there any way I could increase the performance without using a stored procedure?

EDIT: Wed May 27 14:54:13 PDT 2015

  • history_uint contains ~500 million rows
  • items contains ~50,000 rows
  • hosts contains < 10,000 rows

Table schema:

HOSTS:

                                 Table "public.hosts"
       Column       |          Type          |               Modifiers                
--------------------+------------------------+----------------------------------------
 hostid             | bigint                 | not null
 proxy_hostid       | bigint                 | 
 host               | character varying(128) | not null default ''::character varying
 status             | integer                | not null default 0
 disable_until      | integer                | not null default 0
 error              | character varying(128) | not null default ''::character varying
 available          | integer                | not null default 0
 errors_from        | integer                | not null default 0
 lastaccess         | integer                | not null default 0
 ipmi_authtype      | integer                | not null default 0
 ipmi_privilege     | integer                | not null default 2
 ipmi_username      | character varying(16)  | not null default ''::character varying
 ipmi_password      | character varying(20)  | not null default ''::character varying
 ipmi_disable_until | integer                | not null default 0
 ipmi_available     | integer                | not null default 0
 snmp_disable_until | integer                | not null default 0
 snmp_available     | integer                | not null default 0
 maintenanceid      | bigint                 | 
 maintenance_status | integer                | not null default 0
 maintenance_type   | integer                | not null default 0
 maintenance_from   | integer                | not null default 0
 ipmi_errors_from   | integer                | not null default 0
 snmp_errors_from   | integer                | not null default 0
 ipmi_error         | character varying(128) | not null default ''::character varying
 snmp_error         | character varying(128) | not null default ''::character varying
 jmx_disable_until  | integer                | not null default 0
 jmx_available      | integer                | not null default 0
 jmx_errors_from    | integer                | not null default 0
 jmx_error          | character varying(128) | not null default ''::character varying
 name               | character varying(128) | not null default ''::character varying
 flags              | integer                | not null default 0
 templateid         | bigint                 | 
 description        | text                   | not null default ''::text
Indexes:
    "hosts_pkey" PRIMARY KEY, btree (hostid)
    "hosts_1" btree (host)
    "hosts_2" btree (status)
    "hosts_3" btree (proxy_hostid)
    "hosts_4" btree (name)
    "hosts_5" btree (maintenanceid)
Foreign-key constraints:
    "c_hosts_1" FOREIGN KEY (proxy_hostid) REFERENCES hosts(hostid)
    "c_hosts_2" FOREIGN KEY (maintenanceid) REFERENCES maintenances(maintenanceid)
    "c_hosts_3" FOREIGN KEY (templateid) REFERENCES hosts(hostid) ON DELETE CASCADE
Referenced by:
    TABLE "applications" CONSTRAINT "c_applications_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "autoreg_host" CONSTRAINT "c_autoreg_host_1" FOREIGN KEY (proxy_hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "drules" CONSTRAINT "c_drules_1" FOREIGN KEY (proxy_hostid) REFERENCES hosts(hostid)
    TABLE "group_prototype" CONSTRAINT "c_group_prototype_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "host_discovery" CONSTRAINT "c_host_discovery_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "host_discovery" CONSTRAINT "c_host_discovery_2" FOREIGN KEY (parent_hostid) REFERENCES hosts(hostid)
    TABLE "host_inventory" CONSTRAINT "c_host_inventory_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "hostmacro" CONSTRAINT "c_hostmacro_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "hosts" CONSTRAINT "c_hosts_1" FOREIGN KEY (proxy_hostid) REFERENCES hosts(hostid)
    TABLE "hosts" CONSTRAINT "c_hosts_3" FOREIGN KEY (templateid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "hosts_groups" CONSTRAINT "c_hosts_groups_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "hosts_templates" CONSTRAINT "c_hosts_templates_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "hosts_templates" CONSTRAINT "c_hosts_templates_2" FOREIGN KEY (templateid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "httptest" CONSTRAINT "c_httptest_2" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "interface" CONSTRAINT "c_interface_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "items" CONSTRAINT "c_items_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "maintenances_hosts" CONSTRAINT "c_maintenances_hosts_2" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    TABLE "opcommand_hst" CONSTRAINT "c_opcommand_hst_2" FOREIGN KEY (hostid) REFERENCES hosts(hostid)
    TABLE "optemplate" CONSTRAINT "c_optemplate_2" FOREIGN KEY (templateid) REFERENCES hosts(hostid)
    TABLE "screens" CONSTRAINT "c_screens_1" FOREIGN KEY (templateid) REFERENCES hosts(hostid) ON DELETE CASCADE

ITEMS:

                                    Table "public.items"
        Column         |          Type           |                Modifiers                 
-----------------------+-------------------------+------------------------------------------
 itemid                | bigint                  | not null
 type                  | integer                 | not null default 0
 snmp_community        | character varying(64)   | not null default ''::character varying
 snmp_oid              | character varying(255)  | not null default ''::character varying
 hostid                | bigint                  | not null
 name                  | character varying(255)  | not null default ''::character varying
 key_                  | character varying(255)  | not null default ''::character varying
 delay                 | integer                 | not null default 0
 history               | integer                 | not null default 90
 trends                | integer                 | not null default 365
 status                | integer                 | not null default 0
 value_type            | integer                 | not null default 0
 trapper_hosts         | character varying(255)  | not null default ''::character varying
 units                 | character varying(255)  | not null default ''::character varying
 multiplier            | integer                 | not null default 0
 delta                 | integer                 | not null default 0
 snmpv3_securityname   | character varying(64)   | not null default ''::character varying
 snmpv3_securitylevel  | integer                 | not null default 0
 snmpv3_authpassphrase | character varying(64)   | not null default ''::character varying
 snmpv3_privpassphrase | character varying(64)   | not null default ''::character varying
 formula               | character varying(255)  | not null default ''::character varying
 error                 | character varying(2048) | not null default ''::character varying
 lastlogsize           | numeric(20,0)           | not null default (0)::numeric
 logtimefmt            | character varying(64)   | not null default ''::character varying
 templateid            | bigint                  | 
 valuemapid            | bigint                  | 
 delay_flex            | character varying(255)  | not null default ''::character varying
 params                | text                    | not null default ''::text
 ipmi_sensor           | character varying(128)  | not null default ''::character varying
 data_type             | integer                 | not null default 0
 authtype              | integer                 | not null default 0
 username              | character varying(64)   | not null default ''::character varying
 password              | character varying(64)   | not null default ''::character varying
 publickey             | character varying(64)   | not null default ''::character varying
 publickey             | character varying(64)   | not null default ''::character varying
 privatekey            | character varying(64)   | not null default ''::character varying
 mtime                 | integer                 | not null default 0
 flags                 | integer                 | not null default 0
 interfaceid           | bigint                  | 
 port                  | character varying(64)   | not null default ''::character varying
 description           | text                    | not null default ''::text
 inventory_link        | integer                 | not null default 0
 lifetime              | character varying(64)   | not null default '30'::character varying
 snmpv3_authprotocol   | integer                 | not null default 0
 snmpv3_privprotocol   | integer                 | not null default 0
 state                 | integer                 | not null default 0
 snmpv3_contextname    | character varying(255)  | not null default ''::character varying
 evaltype              | integer                 | not null default 0
Indexes:
    "items_pkey" PRIMARY KEY, btree (itemid)
    "items_1" UNIQUE, btree (hostid, key_)
    "items_3" btree (status)
    "items_4" btree (templateid)
    "items_5" btree (valuemapid)
    "items_6" btree (interfaceid)
Foreign-key constraints:
    "c_items_1" FOREIGN KEY (hostid) REFERENCES hosts(hostid) ON DELETE CASCADE
    "c_items_2" FOREIGN KEY (templateid) REFERENCES items(itemid) ON DELETE CASCADE
    "c_items_3" FOREIGN KEY (valuemapid) REFERENCES valuemaps(valuemapid)
    "c_items_4" FOREIGN KEY (interfaceid) REFERENCES interface(interfaceid)
Referenced by:
    TABLE "functions" CONSTRAINT "c_functions_1" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "graphs" CONSTRAINT "c_graphs_2" FOREIGN KEY (ymin_itemid) REFERENCES items(itemid)
    TABLE "graphs" CONSTRAINT "c_graphs_3" FOREIGN KEY (ymax_itemid) REFERENCES items(itemid)
    TABLE "graphs_items" CONSTRAINT "c_graphs_items_2" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "host_discovery" CONSTRAINT "c_host_discovery_3" FOREIGN KEY (parent_itemid) REFERENCES items(itemid)
    TABLE "httpstepitem" CONSTRAINT "c_httpstepitem_2" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "httptestitem" CONSTRAINT "c_httptestitem_2" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "item_condition" CONSTRAINT "c_item_condition_1" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "item_discovery" CONSTRAINT "c_item_discovery_1" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "item_discovery" CONSTRAINT "c_item_discovery_2" FOREIGN KEY (parent_itemid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "items" CONSTRAINT "c_items_2" FOREIGN KEY (templateid) REFERENCES items(itemid) ON DELETE CASCADE
    TABLE "items_applications" CONSTRAINT "c_items_applications_2" FOREIGN KEY (itemid) REFERENCES items(itemid) ON DELETE CASCADE

HISTORY_UINT:

              Table "public.history_uint"
 Column |     Type      |           Modifiers           
--------+---------------+-------------------------------
 itemid | bigint        | not null
 clock  | integer       | not null default 0
 value  | numeric(20,0) | not null default (0)::numeric
 ns     | integer       | not null default 0
Indexes:
    "history_uint_1" btree (itemid, clock)
Triggers:
    partition_trg BEFORE INSERT ON history_uint FOR EACH ROW EXECUTE PROCEDURE trg_partition('day')
Number of child tables: 18 (Use \d+ to list them.)

Best Answer

Off the top of my head you can reduce the number of cycles by using a query wrapper and grouping on that. For example:

zabbix=> \copy (
select hostid,
    value,
    extractDay,
    extractMonth,
    intYear,
    DailyUsage
from
(
select 
    h.hostid, 
    sum(value) as value, 
    extract(day from to_timestamp(clock)) as extractDay, 
    extract(month from to_timestamp(clock)) as extractMonth, 
    2015 as intYear, 
   'DailyExemptUsage' as DailyUsage
from 
   hosts h, 
   items i, 
   history_uint hu
where 
   h.hostid = i.hostid
   and i.itemid = hu.itemid
   and i.name like 'Air%bound%Tot%'
   and clock between 1430463600  and 1432969200 
   and extract(hour from to_timestamp(clock)) between 1 and 5
   and h.name like '172.xxx.%'
) as source
group by 
   hostid, 
   extractDay,
   extractMonth
) 
to '/home/me/my_file' with csv;