Postgresql – CTE works as expected, but not when wrapped into a function

cteplpgsqlpostgresqlstored-procedures

--testing table
CREATE TABLE public.test_patient_table (
                entity_id INTEGER NOT NULL,
                site_held_at INTEGER NOT NULL,
                CONSTRAINT entityid_pk PRIMARY KEY (entity_id)
);

CREATE TABLE public.test_messageq_table (
                entity_id VARCHAR NOT NULL,
                master_id INTEGER NOT NULL,
                message_body VARCHAR NOT NULL,
                CONSTRAINT mq_entity_id_pk PRIMARY KEY (entity_id)
);

CREATE INDEX test_patient_table_siteid_idx
 ON public.test_patient_table
 ( site_held_at );

ALTER TABLE public.test_messageq_table
ADD CONSTRAINT test_patient_table_test_messageq_table_fk
FOREIGN KEY (master_id)
REFERENCES public.test_patient_table (entity_id)
ON DELETE NO ACTION
ON UPDATE NO ACTION
NOT DEFERRABLE;

--test patient data
insert into test_patient_table values (1, 11111);
insert into test_patient_table values (2, 11111);
insert into test_patient_table values (3, 11111);
insert into test_patient_table values (4, 11111);

insert into test_patient_table values (5, 22222);
insert into test_patient_table values (6, 22222);
insert into test_patient_table values (7, 22222);
insert into test_patient_table values (8, 22222);

insert into test_patient_table values (9, 33333);
insert into test_patient_table values (10, 33333);

insert into test_patient_table values (11, 44444);

--testing message
insert into test_messageq_table values (1, 1, 'aaa');
insert into test_messageq_table values (2, 1, 'aaa');
insert into test_messageq_table values (3, 1, 'aaa');
insert into test_messageq_table values (4, 1, 'aaa');
insert into test_messageq_table values (5, 2, 'aaa');
insert into test_messageq_table values (6, 2, 'aaa');
insert into test_messageq_table values (7, 5, 'aaa');
insert into test_messageq_table values (8, 8, 'aaa');
insert into test_messageq_table values (9, 11, 'aaa');
insert into test_messageq_table values (10, 11, 'bbb');    

When I tried to find all messages from message table in site I am interested, I wrote a CTE and it works fine, let's say I am interested in site 11111 and 22222:

WITH patient_msg_in_branches AS (
    select distinct test_messageq_table.master_id AS patient_id,
    test_patient_table.site_held_at as site_id
    from test_messageq_table 
    inner join test_patient_table 
    ON test_messageq_table.master_id = test_patient_table.entity_id 
    and site_held_at in (11111,22222) order by patient_id
),
messages_for_patients AS(
    select * from test_messageq_table where master_id in 
        (select patient_msg_in_branches.patient_id 
            from patient_msg_in_branches)
)select * from messages_for_patients

The result is as expected:

"1";1;"aaa"
"2";1;"aaa"
"3";1;"aaa"
"4";1;"aaa"
"5";2;"aaa"
"6";2;"aaa"
"7";5;"aaa"
"8";8;"aaa"

But when I wrap the whole thing in a function, it's returning the wrong rows. Can you help me see why?

drop function getMessageFromSites(text);
CREATE OR REPLACE FUNCTION getMessageFromSites(IN ids TEXT) RETURNS 
setof test_messageq_table AS $$ 
DECLARE
       sites INT[];
       result test_messageq_table%rowtype;

BEGIN
       sites = string_to_array(ids,',');
        raise info 'entire array: %', sites;

WITH patient_msg_in_branches AS (
    select distinct test_messageq_table.master_id AS patient_id,
    test_patient_table.site_held_at as site_id
    from test_messageq_table 
    inner join test_patient_table 
    ON test_messageq_table.master_id = test_patient_table.entity_id 
    and site_held_at = ANY(sites) order by patient_id
),
messages_for_patients AS(
    select * from test_messageq_table where master_id in 
        (select patient_msg_in_branches.patient_id 
            from patient_msg_in_branches)
)select * into result from messages_for_patients;
return query select * from result;
END;     
$$ LANGUAGE plpgsql;

When using the function:

select * from getMessageFromSites('11111,22222');
select * from getMessageFromSites('1')
select * from getMessageFromSites('33333')

it always returns the same result below of multiple rows but obviously wrong rows, why ? can you help here ?

"1";1;"aaa"
"2";1;"aaa"
"3";1;"aaa"
"4";1;"aaa"
"5";2;"aaa"
"6";2;"aaa"
"9";11;"aaa"
"10";11;"bbb"

Solution

Thanks to @a_horse_with_no_name, now I have two working solutions, one with sql, one with pl/pgsql:

Solution 1 (pl/pgsql)

CREATE OR REPLACE FUNCTION getMessageFromSites(IN ids TEXT) RETURNS 
setof test_messageq_table AS $$ 
DECLARE
       sites INT[];
       result test_messageq_table%rowtype;

BEGIN
       sites = string_to_array(ids,',');
       raise info 'entire array: %', sites;
 return QUERY

    WITH patient_msg_in_branches AS (
        select distinct test_messageq_table.master_id AS patient_id,
        test_patient_table.site_held_at as site_id
        from test_messageq_table 
        inner join test_patient_table 
        ON test_messageq_table.master_id = test_patient_table.entity_id 
        and site_held_at = ANY(sites) order by patient_id
    ),
    messages_for_patients AS(
        select * from test_messageq_table where master_id in 
            (select patient_msg_in_branches.patient_id 
                from patient_msg_in_branches)
    )
    select * from messages_for_patients;

END;     
$$ LANGUAGE plpgsql;

Solution 2 (sql)

CREATE OR REPLACE FUNCTION getMessageFromSites2(ids TEXT) RETURNS 
   setof test_messageq_table 
AS 
$$ 
  WITH patient_msg_in_branches AS (
      select distinct test_messageq_table.master_id AS patient_id,
             test_patient_table.site_held_at as site_id
      from test_messageq_table 
      join test_patient_table ON test_messageq_table.master_id = test_patient_table.entity_id 
                                and site_held_at = ANY (string_to_array($1,',')::int[]) 
  ),
  messages_for_patients AS
  (
    select * 
    from test_messageq_table 
    where master_id in (select patient_msg_in_branches.patient_id 
                        from patient_msg_in_branches)
  )
  select * 
  from messages_for_patients;
$$ 
LANGUAGE sql;

Testing of the code

select * from getMessageFromSites('11111,44444');
select * from getMessageFromSites('22222');
select * from getMessageFromSites('1')
select * from getMessageFromSites('33333')

select * from getMessageFromSites2('11111');
select * from getMessageFromSites2('22222');
select * from getMessageFromSites2('33333');
select * from getMessageFromSites('44444,11111');
select * from getMessageFromSites('1');

Both PG stored procedure are working as expected!

Solution 3:
A better simplified solution see from Erwin's answer below.

Now case closed !

Best Answer

I think this is because you only ever return the first row from the query's result.

The select ... into ... will only retrieve one row and the query select * from result returns only that single record:

You also don't need a PL/pgSQL function, a plain SQL function will work just fine:

CREATE OR REPLACE FUNCTION getMessageFromSites(ids TEXT) RETURNS 
   setof test_messageq_table 
AS 
$$ 
  WITH patient_msg_in_branches AS (
      select distinct test_messageq_table.master_id AS patient_id,
             test_patient_table.site_held_at as site_id
      from test_messageq_table 
         join test_patient_table ON test_messageq_table.master_id = test_patient_table.entity_id 
                                and site_held_at = ANY (string_to_array(ids,',')::int[]) 
  ),
  messages_for_patients AS
  (
    select * 
    from test_messageq_table 
    where master_id in (select patient_msg_in_branches.patient_id 
                        from patient_msg_in_branches)
  )
  select * 
  from messages_for_patients;
$$ 
LANGUAGE sql;

Note that the order by inside the CTE is not really useful. You have to sort the final select, not the intermediate steps.

If you do need PL/pgSQL because you are doing more stuff in the function, you should simply change it to:

begin
  ....
  return query
    WITH patient_msg_in_branches AS (
        select distinct test_messageq_table.master_id AS patient_id,
        test_patient_table.site_held_at as site_id
        from test_messageq_table 
        inner join test_patient_table 
        ON test_messageq_table.master_id = test_patient_table.entity_id 
        and site_held_at = ANY(sites) order by patient_id
    ),
    messages_for_patients AS(
        select * from test_messageq_table where master_id in 
            (select patient_msg_in_branches.patient_id 
                from patient_msg_in_branches)
    )
    select * from messages_for_patients;
end;