Mysql – Converting SELECT subquery to LEFT JOIN doesnt return same value

MySQL

as title says i have an issue converting SELECT subquery to LEFT JOIN doesnt return same value 🙁

I need to change this as subquery really slows down query.

Here are my original query (gross_end_out returns 6000):

SELECT  

    ( 
        SELECT SUM(ile.gross)                   
        FROM item_ledger_entry AS ile 
        WHERE clr.docNr=ile.docNr 
        AND clr.serialNo=ile.serialNo 
        AND clr.productNr=ile.productNr 
        AND (ile.cargoLine=clr.id OR ile.orgLine=clr.id) 
        AND ile.id NOT IN (
                            SELECT MIN(zz.id) 
                            FROM item_ledger_entry AS zz 
                            WHERE zz.docNr=ile.docNr 
                            AND zz.serialNo=ile.serialNo 
                            AND zz.productNr=ile.productNr
                          ) 
        AND Date_format(ile.activityDate, '%Y-%m-%d') <= '2019-08-05' 
        AND ile.status='40' 
    ) AS gross_end_out

FROM cargo_line_receive AS clr 

LEFT JOIN cargo_header_receive AS chr 
ON clr.docNr=chr.docNr  

LEFT JOIN n_items AS ni 
ON clr.productNr=ni.code 

LEFT JOIN item_ledger_entry AS i 
ON clr.docNr=i.docNr AND i.status=40 
AND (i.id=clr.id OR i.orgLine=clr.id)   


WHERE ((clr.status='20' OR clr.status='30') && clr.status!='40')  

AND Date_format(clr.activityDate, '%Y-%m-%d') <= '2019-08-05' 

GROUP BY clr.id, clr.docNr, clr.batchNo, clr.serialNo, clr.productNr
ORDER BY clr.activityDate DESC, clr.id DESC

Here are new query (gross_end_out returns 18000):

SELECT  

    SUM(a.gross) AS gross_end_out

FROM cargo_line_receive AS clr 

LEFT JOIN cargo_header_receive AS chr 
ON clr.docNr=chr.docNr  

LEFT JOIN n_items AS ni 
ON clr.productNr=ni.code 

LEFT JOIN item_ledger_entry AS i 
ON clr.docNr=i.docNr AND i.status=40 
AND (i.id=clr.id OR i.orgLine=clr.id)   



LEFT JOIN (

    SELECT ile.gross, 
    ile.docNr, ile.serialNo, ile.productNr, ile.cargoLine, ile.orgLine
    FROM item_ledger_entry AS ile 
    WHERE ile.id NOT IN (
                            SELECT MIN(zz.id) 
                            FROM item_ledger_entry AS zz 
                            WHERE zz.docNr=ile.docNr 
                            AND zz.serialNo=ile.serialNo 
                            AND zz.productNr=ile.productNr
                        ) 
    AND Date_format(ile.activityDate, '%Y-%m-%d') <= '2019-08-05' 
    AND ile.status='40'

) AS a 
ON clr.docNr=a.docNr 
AND clr.serialNo=a.serialNo 
AND clr.productNr=a.productNr 
AND (a.cargoLine=clr.id OR a.orgLine=clr.id)

WHERE ((clr.status='20' OR clr.status='30') AND clr.status!='40')  

AND Date_format(clr.activityDate, '%Y-%m-%d') <= '2019-08-05' 

GROUP BY clr.id, clr.docNr, clr.batchNo, clr.serialNo, clr.productNr
ORDER BY clr.activityDate DESC, clr.id DESC

Also i need to see only result where gross_end_out > 0

EDIT: As requested i add table structure:

cargo_header_receive:

CREATE TABLE `cargo_header_receive` (
    `id` INT(11) NOT NULL AUTO_INCREMENT,
    `docNr` VARCHAR(50) NULL DEFAULT '0',

    PRIMARY KEY (`id`),
    INDEX `docNr` (`docNr`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
AUTO_INCREMENT=1
;

cargo_line_receive:

CREATE TABLE `cargo_line_receive` (
    `id` INT(11) NOT NULL AUTO_INCREMENT,
    `docNr` VARCHAR(250) NOT NULL DEFAULT '',
    `batchNo` VARCHAR(250) NULL DEFAULT '',
    `serialNo` VARCHAR(250) NULL DEFAULT '',
    `productNr` VARCHAR(250) NOT NULL DEFAULT '',   
    `activityDate` TIMESTAMP NOT NULL DEFAULT '0000-00-00 00:00:00',
    `status` INT(11) NOT NULL DEFAULT '0',
    `orgLine` INT(11) NOT NULL DEFAULT '0',

    PRIMARY KEY (`id`),
    INDEX `docNr` (`docNr`),
    INDEX `serialNo` (`serialNo`),
    INDEX `productNr` (`productNr`),
    INDEX `status` (`status`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
AUTO_INCREMENT=1
;

item_ledger_entry:

CREATE TABLE `item_ledger_entry` (
    `id` INT(11) NOT NULL AUTO_INCREMENT,
    `docNr` VARCHAR(250) NOT NULL DEFAULT '0',
    `cargoLine` INT(11) NULL DEFAULT NULL,
    `activityDate` TIMESTAMP NOT NULL DEFAULT '0000-00-00 00:00:00',
    `productNr` VARCHAR(250) NOT NULL,
    `orgLine` INT(11) NULL DEFAULT '0',
    `status` INT(11) NOT NULL DEFAULT '0',  
    `serialNo` VARCHAR(250) NULL DEFAULT '0',   
    `gross` DECIMAL(20,10) NOT NULL DEFAULT '0.0000000000',

    PRIMARY KEY (`id`),
    INDEX `cargoLine` (`cargoLine`),
    INDEX `orgLine` (`orgLine`),
    INDEX `docNr` (`docNr`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
AUTO_INCREMENT=1
;

n_items:

CREATE TABLE `n_items` (
    `code` VARCHAR(250) NOT NULL,

    PRIMARY KEY (`code`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;

What em i doing wrong?
Also i cant use gross_end_out in WHERE

Thanks in advance!

EDIT FOR Nomis:

SELECT  

    ( 
        SELECT SUM(ile.gross)                   
        FROM item_ledger_entry AS ile 
        WHERE clr.docNr=ile.docNr 
        AND clr.serialNo=ile.serialNo 
        AND clr.productNr=ile.productNr 
        AND (ile.cargoLine=clr.id OR ile.orgLine=clr.id) 
        AND ile.id NOT IN (
                            SELECT MIN(zz.id) 
                            FROM item_ledger_entry AS zz 
                            WHERE zz.docNr=ile.docNr 
                            AND zz.serialNo=ile.serialNo 
                            AND zz.productNr=ile.productNr
                          ) 
        AND Date_format(ile.activityDate, '%Y-%m-%d') <= '2019-08-05' 
        AND ile.status='40' 
    ) AS gross_end_out,

    ( 
        SELECT SUM(ile.gross)
        FROM cargo_line_receive AS ile 
        WHERE clr.docNr=ile.docNr 
        AND clr.serialNo=ile.serialNo 
        AND clr.productNr=ile.productNr 
        AND (ile.id=clr.id OR ile.orgLine=clr.id) 
        AND ile.id NOT IN (
                            SELECT MIN(zz.id) 
                            FROM item_ledger_entry AS zz 
                            WHERE zz.docNr=ile.docNr 
                            AND zz.serialNo=ile.serialNo 
                            AND zz.productNr=ile.productNr
                          ) 
        AND Date_format(ile.activityDate, '%Y-%m-%d') < '2019-08-01' 
    ) AS gross_start_in,    

    ( 
        SELECT SUM(ile.gross)
        FROM item_ledger_entry AS ile 
        WHERE clr.docNr=ile.docNr 
        AND clr.serialNo=ile.serialNo 
        AND clr.productNr=ile.productNr 
        AND (ile.cargoLine=clr.id OR ile.orgLine=clr.id) 
        AND ile.id NOT IN (
                            SELECT MIN(zz.id) 
                            FROM item_ledger_entry AS zz 
                            WHERE zz.docNr=ile.docNr 
                            AND zz.serialNo=ile.serialNo 
                            AND zz.productNr=ile.productNr
                          ) 
        AND Date_format(ile.activityDate, '%Y-%m-%d') <  '2019-08-01' 
        AND ile.status='40' 
    ) AS gross_start_out

FROM cargo_line_receive AS clr 

LEFT JOIN cargo_header_receive AS chr 
ON clr.docNr=chr.docNr  

LEFT JOIN n_items AS ni 
ON clr.productNr=ni.code 

LEFT JOIN item_ledger_entry AS i 
ON clr.docNr=i.docNr AND i.status=40 
AND (i.id=clr.id OR i.orgLine=clr.id)   


WHERE ((clr.status='20' OR clr.status='30') && clr.status!='40')  

AND Date_format(clr.activityDate, '%Y-%m-%d') <= '2019-08-05' 

GROUP BY clr.id, clr.docNr, clr.batchNo, clr.serialNo, clr.productNr
ORDER BY clr.activityDate DESC, clr.id DESC

Best Answer

1/What are you doing wrong : This is the easy part, you moved problematic subqueries to a left join still doing the subqueries. Then you removed the function grouping the results as Akina pointed in the comments, resulting in multiple lines joining with multiple lines

2/What to do to solve the immediate issue at hand (bad idea) : just put back the sum function SELECT SUM(ile.gross). With a little luck it should gave you back the 6000. But it should perform worst than the first query since it basically does the same with more join.

3/The real solution : start back from the initial query and make it lighter.

I think should look like something like that :

SELECT SUM(ile.gross) AS gross_end_out
FROM item_ledger_entry AS ile
JOIN cargo_line_receive AS clr 
ON clr.docNr=ile.docNr 
AND clr.serialNo=ile.serialNo 
AND clr.productNr=ile.productNr 
AND (ile.cargoLine=clr.id OR ile.orgLine=clr.id)
AND ((clr.status='20' OR clr.status='30') && clr.status!='40')
AND Date_format(clr.activityDate, '%Y-%m-%d') <= '2019-08-05'
WHERE ile.id NOT IN (
                    SELECT MIN(zz.id) 
                    FROM item_ledger_entry AS zz 
                    WHERE zz.docNr=ile.docNr 
                    AND zz.serialNo=ile.serialNo 
                    AND zz.productNr=ile.productNr
                  ) 
AND Date_format(ile.activityDate, '%Y-%m-%d') <= '2019-08-05' 
AND ile.status='40'
GROUP BY clr.id, clr.docNr, clr.batchNo, clr.serialNo, clr.productNr -- not sure how this part play out in the initial query, may need to be removed
HAVING SUM(ile.gross) > 0

Basically, all others tables than item_ledger_entry and cargo_line_receiver serve no purpose : no conditions, no data from them. So remove them and directly join cargo_line_receiver to the actually useful part of the query.

I didn't test my example so it may need a little tinkering to make it work as you want, but it should be pretty close. And if you still have performance issue by then, your query will be a LOT easier for people to help you with ^^

Edit for the edit :

SELECT
SUM(IF( ile.status='40' ,ile.gross,0)) AS gross_end_out,
SUM(IF( (Date_format(ile.activityDate, '%Y-%m-%d') < '2019-08-01') ,ile.gross,0)) AS gross_start_in,
SUM(IF( (Date_format(ile.activityDate, '%Y-%m-%d') <  '2019-08-01'  AND ile.status='40') ,ile.gross,0)) AS gross_start_out
FROM item_ledger_entry AS ile
JOIN cargo_line_receive AS clr 
ON clr.docNr=ile.docNr 
AND clr.serialNo=ile.serialNo 
AND clr.productNr=ile.productNr 
AND (ile.cargoLine=clr.id OR ile.orgLine=clr.id)
AND ((clr.status='20' OR clr.status='30') && clr.status!='40')
AND Date_format(clr.activityDate, '%Y-%m-%d') <= '2019-08-05'
WHERE ile.id NOT IN (
                    SELECT MIN(zz.id) 
                    FROM item_ledger_entry AS zz 
                    WHERE zz.docNr=ile.docNr 
                    AND zz.serialNo=ile.serialNo 
                    AND zz.productNr=ile.productNr
                  ) 
AND Date_format(ile.activityDate, '%Y-%m-%d') <= '2019-08-05' --don't forget to remove the status condition
GROUP BY clr.id, clr.docNr, clr.batchNo, clr.serialNo, clr.productNr
HAVING SUM(ile.gross) > 0