Postgresql – How to handle this complicated join condition(s)

postgresqlpostgresql-9.4

drop table if exists company_tags;
drop table if exists project_tags;
drop table if exists companies;
drop table if exists projects;
drop table if exists tags;
create table companies (id serial primary key, name text unique not null, company_range int4range not null);
create table projects (id serial primary key, name text unique not null, target_range int4range not null);
create table tags (id serial primary key, name text unique not null);
create table company_tags(company_id integer references companies(id) not null, tag_id integer references tags(id) not null, primary key (company_id, tag_id));
create table project_tags(project_id integer references projects(id) not null, tag_id integer references tags(id) not null, primary key (project_id, tag_id));

-- company hasMany tag through company_tags
-- project hasMany tag through project_tags

insert into companies values 
  (1, 'company1', '[1,50)')
 ,(2, 'company2', '[50,100)')
 ,(3, 'company3', '[100,150)')
 ,(4, 'company4', '[150,200)')
 ,(5, 'company5', '[200,250)');
insert into projects values 
  (1, 'project1', '[50,200)')  -- range will match company 2, 3 and 4
 ,(2, 'project2', '[50,200)'); -- range will match company 2, 3 and 4
insert into tags values 
  (1, 'tag1')
 ,(2, 'tag2')
 ,(3, 'tag3');
insert into company_tags values
  (2, 1)  -- second company belongs to tag1
 ,(3, 2); -- third company belongs to tag2
          -- fourth company does not belong to any tag
insert into project_tags values
  (1, 1); -- first project belongs to tag1
          -- second project does not belong to any tag

Given a project id I have to find all companies that match project target range (companies.company_range && projects.target_range) AND ( ( project does not belong to any tag AND company belongs to at least one (any) tag) OR both project and company belong to at least one same tag ). So for the dataset above it should return:

  • for project 1 company 2 since both of them belong to tag 1.
  • for project 2 companies 2 and 3 since project does not belong to any tag and only companies 2 and 3 belong to any tag.

So once more, one of following rules must match (except range match):

  • project does not belong to ANY tag and company belongs to AT LEAST ONE ANY tag
  • project and company have AT LEAST ONE tag IN COMMON (they both belong to same tag)

Matching by tags in common is easy (two inner joins) but I can't figure out how to handle option when project does not belong to any tag and company must belong to at least one tag.

I need distinct company ids that match given project. In production environment there will be thousends of companies, thousends of projects, ~100 different tags and usually each company/project will belong to 0-5 tags.


Sorry my example was broken (associated tags with wrong companies) – I've updated code above and now is OK. Meantime I found solution that seems to work, but it takes 50ms on my machine to get valid results for this example:

select c.id as company_id, c.tag_id as company_tag_id, p.tag_id as project_tag_id
from (
  select p.id, p.target_range, pt.tag_id
  from projects p
  left join project_tags pt on pt.project_id = p.id -- left join to keep project even without tags
  where p.id = 2
) p
inner join ( -- inner join to ensure at least one company found for project
  select c.id, c.company_range, ct.tag_id
  from companies c
  inner join company_tags ct on ct.company_id = c.id -- inner join to ensure company belongs to at least one tag 
) c 
on c.company_range && p.target_range -- range filter
  and (p.tag_id is null or p.tag_id = c.tag_id) -- either project does not belong to tag or it belongs to same tag as company
;

Any ideas how to make it better?


found another solution that might be faster

with project as (
  select p.id, p.target_range
  from projects p
  where p.id = 1 -- given project
)
select c.id
from project p
inner join companies c
  on c.company_range && p.target_range -- filter by range
inner join company_tags ct -- ensure that company belong to at least one tag
  on ct.company_id = c.id
left join project_tags pt -- join project tags, null if project does not belong to tag
  on pt.project_id = p.id
where pt.tag_id is null or pt.tag_id = ct.tag_id; -- either project does not belong to tag or it belongs to same tag as company

Best Answer

Something like this will solve the issue:

WITH given_tags AS
      ( SELECT pt.tag_id
        FROM project_tags AS pt
        WHERE pt.project_id = @given_project
      ),
     given_range AS
      ( SELECT p.target_range
        FROM projects AS p
        WHERE p.id = @given_project
      )
SELECT c.*
FROM companies AS c
WHERE c.company_range && (TABLE given_range)
  AND c.id IN
      ( SELECT ct.company_id
        FROM company_tags AS ct
        WHERE ct.tag_id IN (TABLE given_tags)
           OR NOT EXISTS (TABLE given_tags) 
      ) ;

Test in SQLfiddle.