Mysql – Selecting N values from each group

greatest-n-per-groupMySQLmysql-5.7

I have 2 tables which have to be joined to get needed data.

Table1 is articles, Table2 is for channels/categories.

For the sake of simplicity, both tables only have 2 fields.

Basically, I need to select N unique IDs for each group (term_id). Values should't repeat between groups.

I can achieve expected results with RANK and PARTITION but it only works with MySQL 8.x, while I need it to work in 5.7.

Fiddle with the table structure, some sample data and current query I have is here

How can this approach be adapted to 5.7 MySQL?

SET GLOBAL sql_mode=(SELECT REPLACE(@@sql_mode,'ONLY_FULL_GROUP_BY',''));
CREATE TABLE `Table1` (
    `ID` BIGINT(20) UNSIGNED NOT NULL,
    `date` DATETIME NOT NULL);
CREATE TABLE `Table2` (
    `object_id` BIGINT(20) UNSIGNED NOT NULL DEFAULT '0',
    `term_id` BIGINT(20) UNSIGNED NOT NULL DEFAULT '0'
);

INSERT INTO `Table1` (`ID`, `date`) VALUES (195512, '2019-02-27 12:28:18'),
VALUES (195260, '2017-01-02 15:26:45'),
VALUES (195245, '2019-01-02 14:22:57'),
VALUES (192628, '2018-12-20 13:44:39'),
VALUES (192563, '2018-12-20 10:53:32'),
VALUES (191224, '2018-12-14 15:32:22'),
VALUES (189468, '2018-12-07 10:25:31'),
VALUES (188520, '2018-12-03 14:59:59'),
VALUES (187808, '2018-11-30 14:03:27'),
VALUES (185682, '2018-11-20 21:20:18'),
VALUES (195473, '2017-01-03 10:52:01'),
VALUES (192982, '2018-12-21 19:55:42'),
VALUES (192695, '2018-12-20 15:22:41'),
VALUES (192611, '2017-12-20 13:19:29'),
VALUES (192320, '2018-12-19 14:06:06'),
VALUES (192026, '2018-12-18 12:16:22'),
VALUES (191218, '2018-12-14 14:37:57'),
VALUES (190677, '2018-12-12 13:22:07'),
VALUES (190661, '2018-12-12 13:08:27'),
VALUES (190147, '2018-12-10 12:43:42'),
VALUES (195260, '2019-01-02 15:26:45'),
VALUES (192676, '2018-12-20 15:30:08'),
VALUES (192409, '2018-12-19 15:44:59'),
VALUES (192057, '2018-12-18 14:46:46'),
VALUES (191825, '2018-12-17 15:21:13'),
VALUES (190968, '2018-12-13 15:59:28'),
VALUES (190701, '2018-12-12 15:21:18'),
VALUES (190450, '2018-12-11 16:03:07'),
VALUES (190188, '2018-12-10 15:08:08'),
VALUES (189484, '2018-12-07 15:43:13'),
VALUES (195295, '2019-01-02 15:23:54'),
VALUES (192876, '2018-12-21 13:25:22'),
VALUES (192537, '2018-12-20 11:59:50'),
VALUES (192359, '2018-12-19 13:48:19'),
VALUES (192050, '2018-12-18 13:18:03'),
VALUES (192059, '2018-12-18 13:03:10'),
VALUES (192051, '2018-12-18 13:00:17'),
VALUES (191581, '2018-12-17 12:00:12'),
VALUES (191260, '2018-12-14 15:55:44'),
VALUES (190729, '2018-12-12 15:12:36'),



INSERT INTO `Table2` (`object_id`, `term_id`) VALUES 
(195295, 568),
(192876, 568),
(192537, 568),
(192359, 568),
(192050, 568),
(192059, 568),
(192051, 568),
(191581, 568),
(191260, 568),
(190729, 568),
(190387, 568),
(190349, 568),
(189122, 568),
(188751, 568),
(187831, 568),
(187273, 568),
(187020, 568),
(187025, 568),
(185320, 568),
(185283, 568),
(195512, 1)  ,
(195260, 1)  ,
(195245, 1)  ,
(192628, 1)  ,
(192563, 1)  ,
(191224, 1)  ,
(189468, 1)  ,
(188520, 1)  ,
(187808, 1)  ,
(185682, 1)  ,
(183886, 1)  ,
(182668, 1)  ,
(182566, 1)  ,
(182194, 1)  ,
(180177, 1)  ,
(179738, 1)  ,
(179181, 1)  ,
(176889, 1)  ,
(176862, 1)  ,
(175258, 1)  ,
(195473, 564),
(192982, 564),
(192695, 564),
(192611, 564),
(192320, 564),
(192026, 564),
(191218, 564),
(190677, 564),
(190661, 564),
(190147, 564),
(189468, 564),
(189190, 564),
(189159, 564),
(189062, 564),
(188732, 564),
(188688, 564),
(188666, 564),
(188609, 564),
(188611, 564),
(188613, 564),
(195260, 91170),
(192676, 91170),
(192409, 91170),
(192057, 91170),
(191825, 91170),
(190968, 91170),
(190701, 91170),
(190450, 91170),
(190188, 91170),
(189484, 91170),
(189224, 91170),
(189011, 91170),
(188716, 91170),
(188522, 91170),
(187585, 91170),
(187297, 91170),
(187094, 91170),
(186788, 91170),
(185769, 91170),
(185577, 91170);

Query:

SELECT `ID`, `term_id`, `date`, `rank`
FROM 
(
    SELECT DISTINCT(`ID`), `term_id`, `date`, RANK() OVER (PARTITION BY `channels`.`term_id` ORDER BY FIELD(`channels`.`term_id`, 1, 564, 91170, 568 ), `main`.`date` DESC) AS `rank`
    FROM `Table1` AS `main`
    INNER JOIN `Table2` AS `channels` ON(`channels`.`object_id` = `main`.`ID`) AND `channels`.`term_id` IN (1, 564, 91170, 568) 
    GROUP BY (`ID`)
) AS x 
WHERE `rank` <= 3

Expected Result:

| ID     | term_id | date                | rank |
|--------|---------|---------------------|------|
| 195512 | 1       | 2019-02-27 12:28:18 | 1    |
| 195245 | 1       | 2019-01-02 14:22:57 | 2    |
| 192628 | 1       | 2018-12-20 13:44:39 | 3    |
| 192982 | 564     | 2018-12-21 19:55:42 | 1    |
| 192695 | 564     | 2018-12-20 15:22:41 | 2    |
| 192320 | 564     | 2018-12-19 14:06:06 | 3    |
| 195295 | 568     | 2019-01-02 15:23:54 | 1    |
| 192876 | 568     | 2018-12-21 13:25:22 | 2    |
| 192537 | 568     | 2018-12-20 11:59:50 | 3    |
| 192676 | 91170   | 2018-12-20 15:30:08 | 1    |
| 192409 | 91170   | 2018-12-19 15:44:59 | 2    |
| 192057 | 91170   | 2018-12-18 14:46:46 | 3    |

Best Answer

Functionally equivalent but not very efficient way is the next:

SELECT q.*
  FROM ( SELECT z.* 
              , IF( @prev_tid = (@prev_tid := term_id)
                  , @cnt := @cnt + 1                  
                  , @cnt := 1                                 
                  ) AS rank
           FROM ( SELECT DISTINCT(id)
                       , term_id
                       , `date`
                    FROM table1 AS m
                    JOIN table2 AS c ON c.object_id = m.id
                                    AND c.term_id IN (1, 564, 91170, 568) 
                   ORDER BY c.term_id ASC
                          , m.date DESC
                ) AS z
          WHERE @cnt := 1                  
            AND (@prev_tid := 0) OR 1     
          ORDER BY term_id ASC
                 , date DESC
       ) AS q
 WHERE q.rank <= 3
 ;