I run select on a single table (Engine=InnoDB), not too complex but with many rows.
The first select for an id is slower, it takes a few seconds for 9M rows, the next selects are much faster, even when I change the query.
I tried mysql on Windows and mariadb on Linux.
I run select command like this:
select `id`,count(*), sum(`counts`) from reference
where `id`=848
and `started`<= '2000-01-04 00:00:00'
and `ended` >= '2000-01-03 00:00:00';
or like this:
select min(`counts`),max(`counts`) from reference where `id`=848 ;
it doesn't matter which query was first, the first is slower.
when I run on mariadb on linux consequent id was sometimes fast, but on mysql on windows it was also slow in the first time. this made me think that maybe I missing something.
tests, result and time measure and can be found below.
thanks for any help!
This is my db:
create database my_test_db default char set utf8 ;
use my_test_db;
create table items (
`id` int(11) not null auto_increment,
`name` varchar(50),
`description` varchar(250) default '',
primary key (`id`),
unique key item_name_unique(`name`)
);
create table reference (
`id` int(11) not null,
`started` datetime not null,
`ended` datetime not null,
`counts` int(11) not null,
key fk_item_id_idx (`id`),
key idx_started (`started`),
key idx_ended (`ended`),
constraint fk_item_id foreign key (`id`) references items(`id`)
on delete no action on update no action
);
A grafic display:
MariaDB [my_test_db]> describe items;
+-------------+--------------+------+-----+---------+----------------+
| Field | Type | Null | Key | Default | Extra |
+-------------+--------------+------+-----+---------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| name | varchar(50) | YES | UNI | NULL | |
| description | varchar(250) | YES | | | |
+-------------+--------------+------+-----+---------+----------------+
3 rows in set (0.00 sec)
MariaDB [my_test_db]> describe reference;
+---------+----------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+---------+----------+------+-----+---------+-------+
| id | int(11) | NO | MUL | NULL | |
| started | datetime | NO | MUL | NULL | |
| ended | datetime | NO | MUL | NULL | |
| counts | int(11) | NO | | NULL | |
+---------+----------+------+-----+---------+-------+
4 rows in set (0.00 sec)
I used the following program to create 2 infiles:
#include <iostream>
#include <fstream>
#include <string>
#include <ctime>
#include <sstream>
#include <cstring>
#include <random>
#define TM_BUF_SIZE 32
#ifndef WIN32
#define localtime_s(PTM,PTIME_T) localtime_r(PTIME_T,PTM)
#endif
int main(int argc, char** argv)
{
int id_max, count_iterations, time_frame;
if(argc!=4)
{
std::cerr<<"Missing Arguments!!"<<std::endl;
std::cerr<<"Usage: DataGen item_count time_iteration time_frame"<<std::endl;
return -1;
}
id_max = (int)strtol(argv[1],nullptr,0);
count_iterations = (int)strtol(argv[2],nullptr,0);
time_frame = (int)strtol(argv[3],nullptr,0);
std::random_device r;
std::default_random_engine re(r());
std::uniform_int_distribution<int> uni_dist(0, 15);
std::tm temp, tmStart,tmEnd;
char bufStart[TM_BUF_SIZE], bufEnd[TM_BUF_SIZE];
std::memset(&temp, 0, sizeof(tm));
std::ofstream fitems("items.dat");
for (int id = 1; id <= id_max; id++)
{
fitems << id << "\tid-" << id << "\titem.number." << id << std::endl;
}
temp.tm_year = 100;
temp.tm_mday = 1;
time_t ts_start = mktime(&temp);
time_t ts_end;
int iteration_left = count_iterations;
std::ofstream frefs("references.dat");
while(iteration_left--)
{
ts_end = ts_start + time_frame;
localtime_s(&tmStart, &ts_start);
localtime_s(&tmEnd, &ts_end);
std::strftime(bufStart, TM_BUF_SIZE, "%Y-%m-%d %H:%M:%S.0", &tmStart);
std::strftime(bufEnd, TM_BUF_SIZE, "%Y-%m-%d %H:%M:%S.0", &tmEnd);
for (int id = 1; id <= id_max; id++)
{
int count = uni_dist(re);
frefs << id << "\t" << bufStart << "\t" << bufEnd << "\t"<<count<< std::endl;
}
ts_start = ts_end;
if(iteration_left && 0 == iteration_left % 100)
{
std::cout<<iteration_left<<" iterations left"<<std::endl;
}
}
std::cout<<"Done!"<<std::endl;
return 0;
}
copiled it on linux using:
g++ -std=c++0x dataGen.cpp -o DataGen
run the DataGen program like this:
DataGen 3000 3000 60
The program create 2 files: "items.dat" and "references.dat"
load the data into DB:
use my_test_db;
load data infile '/root/items.dat' into table items;
load data infile '/root/references.dat' into table reference;
So I filled the tables with many rows: items with 3K rows and reference with 9M rows.
now I'm running selects on the reference
table:
Here is the results:
#first time for this id:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference where `id`=848 and `started`<= '2000-01-03 00:00:00' and `ended`>='2000-01-02 00:00:00';
+------+----------+---------------+
| id | count(*) | sum(`counts`) |
+------+----------+---------------+
| 848 | 1442 | 10640 |
+------+----------+---------------+
1 row in set (3.31 sec)
#next query for same id change time filters:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference where `id`=848 and `started`<= '2000-01-04 00:00:00' and `ended`>='2000-01-03 00:00:00';
+------+----------+---------------+
| id | count(*) | sum(`counts`) |
+------+----------+---------------+
| 848 | 121 | 944 |
+------+----------+---------------+
1 row in set (0.03 sec)
#next query for same id change time filters again:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=848
and `started`<= '2000-01-02 00:00:00'
and `ended` >= '2000-01-01 00:00:00';
+------+----------+---------------+
| id | count(*) | sum(`counts`) |
+------+----------+---------------+
| 848 | 1441 | 10848 |
+------+----------+---------------+
1 row in set (0.06 sec)
— change only id:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=1848
and `started`<= '2000-01-02 00:00:00'
and `ended` >= '2000-01-01 00:00:00';
+------+----------+---------------+
| id | count(*) | sum(`counts`) |
+------+----------+---------------+
| 1848 | 1441 | 10576 |
+------+----------+---------------+
1 row in set (2.63 sec)
#use same id change time filters:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=1848
and `started`<= '2000-01-02 12:00:00'
and `ended` >= '2000-01-01 12:00:00';
+------+----------+---------------+
| id | count(*) | sum(`counts`) |
+------+----------+---------------+
| 1848 | 1442 | 10780 |
+------+----------+---------------+
1 row in set (0.03 sec)
#use consequent id is also fast:
MariaDB [my_test_db]> select `id`,count(*), sum(`counts`) from reference
where `id`=1849
and `started`<= '2000-01-02 12:00:00'
and `ended` >= '2000-01-01 12:00:00';
+------+----------+---------------+
| id | count(*) | sum(`counts`) |
+------+----------+---------------+
| 1849 | 1442 | 11001 |
+------+----------+---------------+
1 row in set (0.11 sec)
— other query – same id – fast
MariaDB [my_test_db]> select min(counts),max(counts) from reference where `id`=1849 ;
+-------------+-------------+
| min(counts) | max(counts) |
+-------------+-------------+
| 0 | 15 |
+-------------+-------------+
1 row in set (0.03 sec)
#again it is slow for other id
MariaDB [my_test_db]> select min(counts),max(counts) from reference where `id`=1800 ;
+-------------+-------------+
| min(counts) | max(counts) |
+-------------+-------------+
| 0 | 15 |
+-------------+-------------+
1 row in set (2.36 sec)
— describe query:
MariaDB [my_test_db]> describe select `id`,count(*), sum(`counts`) from reference where `id`=1849 and `started`<= '2000-01-02 12:00:00' and `ended`>='2000-01-01 12:00:00';
+------+-------------+-----------+------+--------------------------------------+----------------+---------+-------+------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+-------------+-----------+------+--------------------------------------+----------------+---------+-------+------+-------------+
| 1 | SIMPLE | reference | ref | fk_item_id_idx,idx_started,idx_ended | fk_item_id_idx | 4 | const | 2999 | Using where |
+------+-------------+-----------+------+--------------------------------------+----------------+---------+-------+------+-------------+
1 row in set (0.00 sec)
Best Answer
That's because of loading the data from disk, as Gerald explained.
The first query would benefit from
for the second:
will make it very fast.
InnoDB does not like it when you fail to provide a
PRIMARY KEY
. Ponder what makes sense for the tableReference
.Notice how
DESCRIBE
is less descriptive thanSHOW CREATE TABLE
.It also sounds like you have an unreasonably small value for
innodb_buffer_pool_size
-- this controls the caching. If you have more than 4GB of RAM, I recommend 70% of available RAM be given to that setting.