MySQL – GROUP BY with JOIN query

join;MySQLmysql-8.0query

I am using MySQL 8.0.23

I have 4 tables: houses, intervals, discounts, and availability.

Houses:

create table houses
(
    id int auto_increment
        primary key,
    name varchar(255) null,
    occupancy tinyint null
);

insert into houses (id, name, occupancy)
values  (1, 'Celopatra', 2);

+----+-----------+-----------+
| id | name      | occupancy |
+----+-----------+-----------+
|  1 | Cleopatra |         4 |
+----+-----------+-----------+ 

Intervals:

create table intervals
(
    id int auto_increment
        primary key,
    house_id int null,
    start_date date null,
    end_date date null,
    adult_price int null,
    child_price int null,
    min_stay int null,
    max_stay int null
);

insert into intervals (id, house_id, start_date, end_date, adult_price, child_price, min_stay, max_stay)
values  (1, 1, '2021-02-01', '2021-02-05', 40, 30, 2, 4),
        (2, 1, '2021-02-06', '2021-02-10', 50, 40, 2, 4),
        (3, 1, '2021-02-11', '2021-02-15', 60, 50, 2, 4);


+----+----------+------------+------------+-------------+-------------+----------+----------+
| id | house_id | start_date | end_date   | adult_price | child_price | min_stay | max_stay |
+----+----------+------------+------------+-------------+-------------+----------+----------+
|  1 |        1 | 2021-02-01 | 2021-02-05 |          40 |          30 |        2 |        4 |
|  2 |        1 | 2021-02-06 | 2021-02-10 |          50 |          40 |        2 |        4 |
|  3 |        1 | 2021-02-11 | 2021-02-15 |          60 |          50 |        2 |        4 |
+----+----------+------------+------------+-------------+-------------+----------+----------+

Availability:
This table is an expanded version of the intervals table. I am using the PHP backend to generate days for each interval period to query for availability. However, the price column in this table is not related to adult_price, child_price in the intervals table. It is a minimum deposit price for each day.

 create table availability
(
    id int null,
    interval_id int null,
    date date null,
    price int null,
    is_available int null
);



insert into availability (id, interval_id, date, price, is_available)
values  (1, 1, '2021-02-01', 100, 1),
        (2, 1, '2021-02-02', 100, 1),
        (3, 1, '2021-02-03', 100, 1),
        (4, 1, '2021-02-04', 100, 1),
        (5, 1, '2021-02-05', 100, 1),
        (6, 2, '2021-02-06', 120, 1),
        (7, 2, '2021-02-07', 120, 1),
        (8, 2, '2021-02-08', 120, 1),
        (9, 2, '2021-02-09', 120, 1),
        (10, 2, '2021-02-10', 120, 1),
        (11, 3, '2021-02-11', 130, 1),
        (12, 3, '2021-02-12', 130, 1),
        (13, 3, '2021-02-13', 130, 1),
        (14, 3, '2021-02-14', 130, 1),
        (15, 3, '2021-02-15', 130, 1);

+------+-------------+------------+-------+--------------+
| id   | interval_id | date       | price | is_available |
+------+-------------+------------+-------+--------------+
|    1 |           1 | 2021-02-01 |   100 |            1 |
|    2 |           1 | 2021-02-02 |   100 |            1 |
|    3 |           1 | 2021-02-03 |   100 |            1 |
|    4 |           1 | 2021-02-04 |   100 |            1 |
|    5 |           1 | 2021-02-05 |   100 |            1 |
|    6 |           2 | 2021-02-06 |   120 |            1 |
|    7 |           2 | 2021-02-07 |   120 |            1 |
|    8 |           2 | 2021-02-08 |   120 |            1 |
|    9 |           2 | 2021-02-09 |   120 |            1 |
|   10 |           2 | 2021-02-10 |   120 |            1 |
|   11 |           3 | 2021-02-11 |   130 |            1 |
|   12 |           3 | 2021-02-12 |   130 |            1 |
|   13 |           3 | 2021-02-13 |   130 |            1 |
|   14 |           3 | 2021-02-14 |   130 |            1 |
|   15 |           3 | 2021-02-15 |   130 |            1 |
+------+-------------+------------+-------+--------------+

Discounts:
Type 1 represents percent discounts, 2 represents fixed amount discounts.

create table discounts
(
    id int auto_increment
        primary key,
    interval_id int null,
    discount_type int null,
    discount_amount int null,
    status int null
);

insert into discounts (id, interval_id, discount_type, discount_amount, status)
values  (1, 1, 1, 20, 1),
        (2, 1, 2, 40, 1);

+----+-------------+---------------+-----------------+--------+
| id | interval_id | discount_type | discount_amount | status |
+----+-------------+---------------+-----------------+--------+
|  1 |           1 |             1 |              20 |      1 |
|  2 |           1 |             2 |              40 |      1 |
+----+-------------+---------------+-----------------+--------+

Final goal:
I want to list available houses by their final price (after discount applied – discount may not occur as well) for the specific date period.
To achieve that firstly I am looking for available houses for the given date, after that I am sending the following parameters: sum, minimum start date, house id, and day_count to the calculate function.

Now, I was wondering is it possible to optimize this query. I thought, maybe instead of passing the min start date to calculate function, passing interval id directly would be much efficient.

How would you implement the same functionality in a better way? What would you say about the performance of this query?

You can view DDL and DML statements via this link

CREATE FUNCTION `calculate`(`sum` INTEGER, `minStartDate` DATE, house_id INTEGER, `day_count` INTEGER) RETURNS INTEGER
    DETERMINISTIC
BEGIN
    DECLARE  interval_id INTEGER;
    set interval_id = (select id from intervals s where s.start_date = minStartDate  and s.house_id = house_id);

    SET sum = IFNULL((SELECT sum - sum * d.discount_amount / 100
                      FROM discounts d
                      WHERE d.interval_id = interval_id and d.discount_type = 1), sum);
    SET sum = IFNULL((SELECT sum - day_count * d.discount_amount
                      FROM discounts d
                      WHERE d.interval_id = interval_id and d.discount_type = 2), sum);
    return sum;
END;


select house_id,
       sum(a.price) as price,
       count(a.id)  as day_count,
       min(i.start_date),
       calculate(sum(a.price), min(i.start_date), house_id, count(a.id)) as final_price
from availability a
         inner join intervals i on i.id = a.interval_id
where a.date > '2021-02-03'
  and a.date < '2021-02-13'
  and a.is_available = 1
group by i.house_id
having day_count = 9;

Best Answer

With the structure that you have

The CĀ“GROUP CONCAT isd there because there could be more than 1 interval_id, that lies in the minimum date

select house_id,
       sum(a.price) as price,
       count(a.id)  as day_count
       ,(SELECT GROUP_CONCAT(interval_id ORDER BY interval_id) 
          FROM availability 
          WHERE `date` = '2021-02-03'
          AND interval_id IN (SELECT id FROM `intervals` WHERE house_id = i.house_id)
       ) interval_id

from availability a
         inner join intervals i on i.id = a.interval_id
where a.`date` > '2021-02-03'
  and a.`date` < '2021-02-13'
  and a.is_available = 1
group by i.house_id
having day_count = 9
house_id | price | day_count | interval_id
-------: | ----: | --------: | :----------
       1 |  1060 |         9 | 1