Mysql – What index(es) and queries for multiple column ranges search

indexindex-tuningMySQLmysql-5.6

Based on a simple innodb mysql 5.6 database schema like this:

test diagram

What is the optimal indexing strategy for querying the "info" table where most of the search criteria will be IN and BETWEEN conditions? Here are some constraints/info on the possible queries:

  • The date_created range will always be a part of the queries (BETWEEN);
  • The user_id and the info_category (JOIN) conditions are optional;
  • The results can be ordered by either the date_created or the view_count fields;

Here are some example queries to be executed:

#Fetch 100 active info based on date_created, user and info_type. 
#Ordered by date_created and offset 100000
SELECT i.id,
       i.title,
       i.view_count
FROM info i
WHERE i.date_created BETWEEN '2016-01-08 11:45:32' AND '2017-01-27 11:45:32'
  AND i.info_type_id IN (1,2,3,23)
  AND i.user_id IN (1,5,120,387,45023) #optional
  AND i.id > 100000
  AND i.date_deleted IS NULL
ORDER BY i.date_created DESC
LIMIT 100;


#Fetch 100 active info based on date_created, info_type. 
#Ordered by view_count and offset 100000
SELECT i.id,
       i.title,
       i.view_count
FROM info i
WHERE i.date_created BETWEEN '2016-01-08 11:45:32' AND '2017-01-27 11:45:32'
  AND i.info_type_id IN (1,2,3,23)
  AND i.user_id IN (1,5,120,387,45023) #optional
  AND i.id > 100000
  AND i.date_deleted IS NULL
ORDER BY i.view_count DESC
LIMIT 100;


#Fetch 100 active info based on date_created, info_type, user and category. 
#Ordered by view_count and offset 100000
SELECT i.id,
       i.title,
       i.view_count
FROM info i
  JOIN info_category ic on ic.info_id = i.id AND ic.category_id IN (1,2,3,4,10)
WHERE i.date_created BETWEEN '2016-01-28 11:45:32' AND '2017-01-27 11:45:32'
  AND i.info_type_id IN (1,2,3,23)
  AND i.user_id IN (1,5,120,387,45023) #optional
  AND i.id > 500000
  AND i.date_deleted IS NULL
GROUP BY i.id
ORDER BY i.view_count DESC
LIMIT 100;

Here is the schema creation sql:

--
-- Schema test
--
DROP SCHEMA IF EXISTS `test` ;
CREATE SCHEMA IF NOT EXISTS `test` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci ;
SHOW WARNINGS;

-- 
-- Set default database
--
USE test;

--
-- Definition for table info_type
--
DROP TABLE IF EXISTS info_type;
CREATE TABLE info_type (
  id tinyint(1) UNSIGNED NOT NULL AUTO_INCREMENT,
  name varchar(20) NOT NULL,
  PRIMARY KEY (id)
)
ENGINE = INNODB
CHARACTER SET utf8
COLLATE utf8_general_ci;

--
-- Definition for table category
--
DROP TABLE IF EXISTS category;
CREATE TABLE category (
  id int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  name varchar(20) NOT NULL,
  PRIMARY KEY (id)
)
ENGINE = INNODB
AUTO_INCREMENT = 1
CHARACTER SET utf8
COLLATE utf8_general_ci;

--
-- Definition for table user
--
DROP TABLE IF EXISTS user;
CREATE TABLE user (
  id int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  name varchar(32) NOT NULL,
  PRIMARY KEY (id)
)
ENGINE = INNODB
AUTO_INCREMENT = 1
CHARACTER SET utf8
COLLATE utf8_general_ci;

--
-- Definition for table info
--
DROP TABLE IF EXISTS info;
CREATE TABLE info (
  id int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
  title varchar(255) NOT NULL,
  date_created datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
  user_id int(10) UNSIGNED NOT NULL,
  info_type_id tinyint(3) UNSIGNED NOT NULL,
  date_deleted datetime DEFAULT NULL,
  view_count int(11) UNSIGNED NOT NULL DEFAULT 0,
  PRIMARY KEY (id),
  INDEX date_created_idx (date_created),
  INDEX fk_info_type_id_idx (info_type_id),
  INDEX user_id_idx (user_id),
  CONSTRAINT fk_info_type_id FOREIGN KEY (info_type_id)
  REFERENCES info_type (id) ON DELETE RESTRICT ON UPDATE RESTRICT,
  CONSTRAINT fk_user_id FOREIGN KEY (user_id)
  REFERENCES user (id) ON DELETE CASCADE ON UPDATE NO ACTION
)
ENGINE = INNODB
AUTO_INCREMENT = 1
CHARACTER SET utf8
COLLATE utf8_general_ci;

--
-- Definition for table info_category
--
DROP TABLE IF EXISTS info_category;
CREATE TABLE info_category (
  info_id INT(10) UNSIGNED NOT NULL,
  category_id INT(10) UNSIGNED NOT NULL,
  PRIMARY KEY (info_id, category_id),
  INDEX k_category_id_idx (category_id),
  CONSTRAINT fk_info_id FOREIGN KEY (info_id)
    REFERENCES info(id) ON DELETE CASCADE ON UPDATE NO ACTION,
  CONSTRAINT fk_category_id FOREIGN KEY (category_id)
    REFERENCES category(id) ON DELETE CASCADE ON UPDATE NO ACTION
)
ENGINE = INNODB
CHARACTER SET utf8
COLLATE utf8_general_ci;

Here is a script to create dummy data (beware, took almost 3 hours on my crappy VM) :

INSERT INTO info_type(name)
  VALUES ('news'), ('announcement'), ('advice'), ('memo'), ('historic');

DELIMITER //

DROP PROCEDURE IF EXISTS createFakeData//
CREATE PROCEDURE createFakeData ()
BEGIN

  DECLARE userCount int default 50000;
  DECLARE infoCount int default 1000000;
  DECLARE categoryCount int DEFAULT 1000;
  DECLARE infoCategoryCount int;
  DECLARE i int DEFAULT 1;
  DECLARE j int;

  DECLARE CONTINUE HANDLER FOR SQLSTATE '23000' BEGIN END;

  #create fake users
  WHILE i < userCount DO
    INSERT INTO user(name) VALUES (CONCAT('user_', i));
    SET i := i + 1;
  END WHILE;

  SET i = 1;

  #create fake infos
  WHILE i < infoCount DO
    INSERT INTO info(title, 
                     date_created, 
                     user_id, 
                     info_type_id, 
                     view_count)
     VALUES (CONCAT('title_', i), 
             DATE_ADD(NOW(), INTERVAL -(FLOOR(RAND() * 365) + 1) DAY), 
             FLOOR(RAND() * userCount) + 1,
             FLOOR(RAND() * 5) + 1,
             FLOOR(RAND() * 10000) + 1);

    SET i := i + 1;
  END WHILE;

  SET i = 1;

  #create fake categories
  WHILE i < categoryCount DO
    INSERT INTO category(name) VALUES (CONCAT('category_', i));
    SET i = i + 1;
  END WHILE;

  SET i = 1;

  #create fake info-category associations
  WHILE i < infoCount DO
    SET infoCategoryCount = FLOOR(RAND() * 10) + 1;
    SET j = 0;

    WHILE j < infoCategoryCount DO
      INSERT INTO info_category (info_id, category_id)
          VALUES (i, FLOOR(RAND() * categoryCount) + 1);
      SET j = j + 1;
    END WHILE;

    SET i = i + 1;
  END WHILE;
END//

DELIMITER ;

CALL createFakeData();
DROP PROCEDURE createFakeData;

Best Answer

It is essentially impossible to index on two ranges.

Your comment about "offset ... order by ..." does not make sense because the offset is based on one column, the order by, another.

The first query is best done with simply

INDEX(date_created)

which handles one of the ranges, plus the ORDER BY, thereby doing some filtering and avoiding a sort while being able to stop before the end of the table (if 100 rows are found).

The other two are not that clear. Either have an index on the ORDER BY column so that sorting and limiting are handled, and/or have an index on whichever filtering column is the most selective.

The JOIN in the third makes it even more difficult to optimize. The Optimizer can't know whether to start with ic or with i; both have filtering.