Improve Query Performance

performancequery-performancesqlite

I have the following sqlite3-Tables:

db.execute("""CREATE TABLE IF NOT EXISTS STATIONS
       (StationUID INTEGER PRIMARY KEY AUTOINCREMENT,
       StationNumber           TEXT    NOT NULL,
       StationName             TEXT    NOT NULL,
       StationLongitude        REAL    NOT NULL,
       StationAltitude         REAL    NOT NULL);""")

print "Created table STATIONS successfully"

db.execute("""CREATE TABLE IF NOT EXISTS TRAVELDAYS
       (TrainDaysUID           INT  NOT NULL,
       Day                     INT  NOT NULL,
       Value                   INT  NOT NULL,
       PRIMARY KEY (TrainDaysUID, Day));""")

print "Created table TRAVELDAYS successfully"

db.execute("""CREATE TABLE IF NOT EXISTS TRAINS
       (TrainUID INTEGER PRIMARY KEY AUTOINCREMENT,
       TrainNumber           TEXT    NOT NULL,
       TrainDaysUID          INT     NOT NULL,
       TrainType             TEXT    NOT NULL,
       TrainZone             TEXT    NOT NULL,
       FOREIGN KEY(TrainDaysUID) REFERENCES TRAVELDAYS(TrainDaysUID));""")

print "Created table TRAINS successfully"


db.execute("""CREATE TABLE IF NOT EXISTS ROUTES
       (RouteUID INTEGER PRIMARY KEY AUTOINCREMENT,
       StartStationUID       INT     NOT NULL,
       EndStationUID         INT     NOT NULL,
       FOREIGN KEY(StartStationUID) REFERENCES STATIONS(StationUID),
       FOREIGN KEY(EndStationUID) REFERENCES STATIONS(StationUID),
       UNIQUE (StartStationUID, EndStationUID) ON CONFLICT IGNORE);""")

print "Created table ROUTES successfully"

db.execute("""CREATE TABLE IF NOT EXISTS LEGS
       (LegUID INTEGER PRIMARY KEY AUTOINCREMENT,
       TrainUID            INT     NOT NULL,
       RouteUID            INT     NOT NULL,
       StartTime           INT     NOT NULL,
       EndTime             INT     NOT NULL,
       TravelTime          INT     NOT NULL,
       DwellTime           INT             ,
       FOREIGN KEY(TrainUID) REFERENCES TRAINS(TrainUID),
       FOREIGN KEY(RouteUID) REFERENCES ROUTES(RouteUID));""")

print "Created table LEGS successfully"
  • Table STATIONS has 10965 entries
  • Table ROUTES has 13026 entries
  • Table LEGS has 610593 entries
  • Table TRAINS has 44920 entries

I need to improve the performance of this query:

cr.execute("SELECT l.TrainUID, l.RouteUID, l.LegUID, l.StartTime, l.EndTime, l.TravelTime, l.DwellTime, s1.StationName, s2.StationName "
           "FROM LEGS AS l "
           "INNER JOIN ROUTES as r ON (r.RouteUID = l.RouteUID) "
           "INNER JOIN STATIONS AS s1 ON (r.StartStationUID = s1.StationUID) "
           "INNER JOIN STATIONS AS s2 ON (r.EndStationUID = s2.StationUID) "
           "WHERE l.StartTime >= 800 AND l.EndTime <= 1200 AND "
           "l.TrainUID IN ( "
                "SELECT DISTINCT l.TrainUID "
                "FROM LEGS AS l "
                "INNER JOIN ROUTES as r ON (r.RouteUID = l.RouteUID) "
                "INNER JOIN STATIONS AS s1 ON (r.StartStationUID = s1.StationUID) "
                "INNER JOIN STATIONS AS s2 ON (r.EndStationUID = s2.StationUID) "
                "INNER JOIN TRAINS AS t ON (t.TrainUID = l.TrainUID) "
                "INNER JOIN TRAVELDAYS as d ON (t.TrainDaysUID = d.TrainDaysUID) "
                "WHERE t.TrainDaysUID == d.TrainDaysUID AND d.Day == 300 AND d.Value == 1 AND (s1.StationNumber = 8000105 OR s2.StationNumber = 8000105)) ")

What can you recommend?

EDIT
I created now those indexes, but it is not really faster.

db.execute("""CREATE INDEX Idx1 ON TRAVELDAYS(TrainDaysUID, Day, Value);""")
db.execute("""CREATE INDEX Idx0 ON STATIONS(StationNumber);""")

Best Answer

Running EXPLAIN QUERY PLAN on your query gives this:

0|0|0|SCAN TABLE LEGS AS l
0|0|0|EXECUTE LIST SUBQUERY 1
1|0|5|SEARCH TABLE TRAVELDAYS AS d USING AUTOMATIC PARTIAL COVERING INDEX (Day=? AND Value=?)
1|1|4|SEARCH TABLE TRAINS AS t USING AUTOMATIC COVERING INDEX (TrainDaysUID=?)
1|2|0|SEARCH TABLE LEGS AS l USING AUTOMATIC COVERING INDEX (TrainUID=?)
1|3|1|SEARCH TABLE ROUTES AS r USING INTEGER PRIMARY KEY (rowid=?)
1|4|2|SEARCH TABLE STATIONS AS s1 USING INTEGER PRIMARY KEY (rowid=?)
1|5|3|SEARCH TABLE STATIONS AS s2 USING INTEGER PRIMARY KEY (rowid=?)
1|0|0|USE TEMP B-TREE FOR DISTINCT
0|1|1|SEARCH TABLE ROUTES AS r USING INTEGER PRIMARY KEY (rowid=?)
0|2|2|SEARCH TABLE STATIONS AS s1 USING INTEGER PRIMARY KEY (rowid=?)
0|3|3|SEARCH TABLE STATIONS AS s2 USING INTEGER PRIMARY KEY (rowid=?)

Any "AUTOMATIC" index is an index that SQLite thinks is worth creating temporarily; try this:

CREATE INDEX x1 ON TRAVELDAYS(Day, Value);
CREATE INDEX x2 ON TRAINS(TrainDaysUID);
CREATE INDEX x3 ON LEGS(TrainUID);