PostgreSQL partition hierarchy design for centralized logging

partitioningpostgresql

I am looking into setting up logging from all of my servers into a centralized postgresql database. It makes sense to be able to drop logs by date or by host, so I'd like to set it up with partitioning, but a multi-layer partition:host-service inherits service, host-service-yyyymm inherits host-service.

I've worked out an example scheme for a specific service (php errors in this case) and am looking for some PostgreSQL experts to critique it for obvious performance bottlenecks.

// SET UP MASTER PHP LOG TABLE //
CREATE TABLE php (
 log_id     int not null,
 host       char(5),    
 logdate     date not null,
 message        text
);

// SET UP HOST-SPECIFIC 'PARTITIONS' //
CREATE TABLE host1_php (
 CHECK ( host = 'host1' )
) INHERITS (php);

CREATE TABLE host2_php (
 CHECK ( host = 'host2' )
) INHERITS (php);

// SET UP HOST-SPECIFIC TIME 'PARTITIONS' //
CREATE TABLE host1_php_2011m12 (
 CHECK ( logdate >= DATE '2011-12-01' AND logdate < DATE '2012-01-01' )
) INHERITS (host1_php);

CREATE TABLE host1_php_2012m01 (
 CHECK ( logdate >= DATE '2012-01-01' AND logdate < DATE '2012-02-01' )
) INHERITS (host1_php);

CREATE TABLE host2_php_2011m12 (
 CHECK ( logdate >= DATE '2011-12-01' AND logdate < DATE '2012-01-01' )
) INHERITS (host2_php);

CREATE TABLE host2_php_2012m01 (
 CHECK ( logdate >= DATE '2012-01-01' AND logdate < DATE '2012-02-01' )
) INHERITS (host2_php);

CREATE INDEX host1_php_2011m12_logdate ON host1_php_2011m12 (logdate);
CREATE INDEX host1_php_2012m01_logdate ON host1_php_2012m01 (logdate);
CREATE INDEX host2_php_2011m12_logdate ON host2_php_2011m12 (logdate);
CREATE INDEX host2_php_2012m01_logdate ON host2_php_2012m01 (logdate);

I'll also be adding services such as apache access/errors.

I think I have to trigger to insert into the host_service (to utilize the host check constraint), and then each host_service must trigger to insert into the host_service_yyyymm table.

What can I expect performance-wise of such a trigger/partition scheme?

Some extra info discussed in chat:

  • PostgreSQL version 9.1.2
  • Queries on data won't be very often, and mostly on the current month of data for multiple hosts.
  • PHP is only about 1 per minute insert, but total apache is probably going to be around 300-500 per second across all hosts.

Best Answer

What can I expect performance-wise of such a trigger/partition scheme?

Context switches mean using triggers is always going to use a lot more CPU than a simple insert. The script below can be used to quantify how much impact that will have - and also demonstrates auto-creating partitions using triggers and compares the performance either way.

Please note I haven't included any indexing, or any consideration of update statements.

begin; 
set role dba;
create role stack;
grant stack to dba;
create schema authorization stack;
set role stack;
--
--******** the above creates a nice clean schema as a test area
--
set client_min_messages to warning; --******** or you get a lot of "NOTICE:  merging column "xyz" with inherited definition" notices
--
create table phpheap(log_id serial not null, host text not null, logdate date not null, message text not null); --******** This table is used to compare 'insert' performance with that on the partitioned version
create table php(log_id serial not null, host text not null, logdate date not null, message text not null);
--
create function php_host_insert() returns trigger language plpgsql security definer as $$
begin 
  set search_path to 'stack';
  execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
  return null;
exception when undefined_table then
  execute 'create table php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id int not null, host text not null check(host='''||new.host||'''), logdate date not null check(to_char(logdate, ''YYYYmMM'')='''||to_char(new.logdate, 'YYYYmMM')||'''), message text not null) inherits (php_'||new.host||')';
  execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
  return null;
end;$$;
--
create function php_insert() returns trigger language plpgsql security definer as $$
begin 
  set search_path to 'stack';
  execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
  return null; 
exception when undefined_table then
  execute 'create table php_'||new.host||'(log_id int not null, host text not null check(host='''||new.host||'''), logdate date not null, message text not null) inherits(php)';
  execute 'create trigger trig_insert_php_'||new.host||' before insert on php_'||new.host||' for each row execute procedure php_host_insert()';
  execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
  return null;
end;$$;
--
create trigger trig_insert_php before insert on php for each row execute procedure php_insert();
--
\timing on
insert into phpheap(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 100000
--Time: 1102.140 ms
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 0
--Time: 35615.498 ms
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 0
--Time: 34074.579 ms
\timing off
--
--******** Now we replace the trigger functions with the 'normal' kind that don't auto-create partitions
--
create or replace function php_host_insert() returns trigger language plpgsql security definer as $$
begin 
  set search_path to 'stack';
  execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
  return null;
end;$$;
--
create or replace function php_insert() returns trigger language plpgsql security definer as $$
begin 
  set search_path to 'stack';
  execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
  return null; 
end;$$;
--
\timing on
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--******** output
--INSERT 0 0
--Time: 28457.146 ms
\timing off
--
rollback;