Postgresql – Postgres query slow despite index being used

indexoptimizationpostgresql

I have the following tables:

The main lead table with close to 500M rows:

create table lead
(
    id                  integer,
    client_id           integer,
    insert_date         integer  (a transformed date that looks like 20201231)
)

create index lead_id_index
    on lead (id);

create index lead_insert_date_index
    on lead (insert_date) include (id, client_id);

create index lead_client_id_index
    on lead (client_id) include (id, insert_date);

And then the other tables

create table last_activity_with_client
(
    lead_id       integer,
    last_activity timestamp,
    last_modified timestamp,
    client_id     integer
);

create index last_activity_with_client_client_id_index
    on last_activity_with_client (client_id) include (lead_id, last_activity);

create index last_activity_with_client_last_activity_index
    on last_activity_with_client (last_activity desc);

create index last_activity_with_client_lead_id_client_id_index
    on last_activity_with_client (lead_id, client_id);


create table lead_last_response_time
(
    lead_id            integer,
    last_response_time timestamp,
    last_modified      timestamp
);

create index lead_last_response_time_last_response_time_index
    on lead_last_response_time (last_response_time desc);

create index lead_last_response_time_lead_id_index
    on lead_last_response_time (lead_id);



create table lead_last_response_time
(
    lead_id            integer,
    last_response_time timestamp,
    last_modified      timestamp
);

create index lead_last_response_time_last_response_time_index
    on lead_last_response_time (last_response_time desc);

create index lead_last_response_time_lead_id_index
    on lead_last_response_time (lead_id);



create table date_dimensions
(
    key                      integer,  (a transformed date that looks like 20201231)
    date                     date,
    description              varchar(256),
    day                      smallint,
    month                    smallint,
    quarter                  char(2),
    year                     smallint
    past_30                  boolean
);

create index date_dimensions_key_index
    on date_dimensions (key);

I try running the following query on different client_id and it is always slowed down by the bitmap index scan on client_id in the lead_table

EXPLAIN ANALYZE
with TempResult AS (
    select DISTINCT lead.id AS lead_id,
                    last_activity,
                    last_response_time
    from lead
             left join (select * from last_activity_with_client where client_id = 13189) last_activity_join on
        lead.id = last_activity_join.lead_id

             left join lead_last_response_time lead_last_response_time on
        lead.id = lead_last_response_time.lead_id

             join date_dimensions date_dimensions on
        lead.insert_date = date_dimensions.key

    where (date_dimensions.past_30 = true)
      and (lead.client_id in (13189))
),
     TempCount AS (
         select COUNT(*) as total_rows
         fromt TempResult
     )
select *
from TempResult, TempCount
order by last_response_time desc NULLS LAST
limit 25 offset 1;

A few results:
explain analyze result 1
explain analyze result 2

As you can see, it's using the index but it's quite slow. Always more than 50 seconds. What can I do to make this query run faster? I have some freedom to change the query and the tables too.

Best Answer

with TempResult AS (
    select DISTINCT lead.id AS lead_id,
                    last_activity,
                    last_response_time
      from lead
      left join last_activity_with_client last_activity_join
                 on  lead.client_id = last_activity_join.client_id
                 and lead.id = last_activity_join.lead_id
      left join lead_last_response_time lead_last_response_time
                 on lead.id = lead_last_response_time.lead_id
    where lead.insert_date >= to_char(current_date-30,'YYYYMMDD')::integer
      and lead.client_id = 13189),
     TempCount AS (
         select COUNT(*) as total_rows
         from TempResult
     )
select *
from TempResult, TempCount
order by last_response_time desc NULLS LAST
limit 25 offset 1;

Possibly "TempCount" can be dropped and replaced by a window function "count() over ()".

with TempResult AS (
    select DISTINCT lead.id AS lead_id,
                    last_activity,
                    last_response_time
      from lead
      left join last_activity_with_client last_activity_join
                 on  lead.client_id = last_activity_join.client_id
                 and lead.id = last_activity_join.lead_id
      left join lead_last_response_time lead_last_response_time
                 on lead.id = lead_last_response_time.lead_id
    where lead.insert_date >= to_char(current_date-30,'YYYYMMDD')::integer
      and lead.client_id = 13189)
select *, count(*) over () TempCount
from TempResult
order by last_response_time desc NULLS LAST
limit 25 offset 1;