Mysql – Selecting Without Repititions

graphMySQL

I have table with observations of objects moving along edges in a graph, this table has the following form:

PK | TIMESTAMP | object_id | from_id | to_id

where object_id is the id of some object and from_id and to_id are vertices.

Since the movements are observed at a high frequency the tuple

(object_id, from_id, to_id)

is repeated often for different PK and TIMESTAMPS. I'm interested in all the separate edge traversals, so if an object with id 1 moves from vertex 1 to 2, from 2 to 1 and from 1 to 2 I want to have a result:

object_id | from_id | to_id
        1 |       1 |     2
        1 |       2 |     1
        1 |       1 |     2

My question: how to write this query?

Best Answer

The tuples would have to be ordered by from_id, to_id in such a way that when from_id or to_id changes, it would fall under a different grouping. I was thinking of writing this as a Stored Procedure, but I thought of something much more intriguing (I just got the idea from my answering this question Update ranking on table about 3 hours ago)

I'll do it in stages

Stage 1 : Sample Data

DROP DATABASE IF EXISTS bootvis;
CREATE DATABASE bootvis;
USE bootvis
CREATE TABLE graph_edges
(
    pk int not null auto_increment,
    object_id int default null,
    tm timestamp not null default current_timestamp,
    from_id int,
    to_id int,
    primary key (pk)
);
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
SELECT SLEEP(2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
SELECT SLEEP(1);
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1);
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
UPDATE graph_edges SET object_id = 1 WHERE object_id IS NULL;
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1);
SELECT SLEEP(2);
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
UPDATE graph_edges SET object_id = 2 WHERE object_id IS NULL;
SELECT * FROM graph_edges;

Stage 2 : Sample Data Loaded

mysql> DROP DATABASE IF EXISTS bootvis;
Query OK, 1 row affected (0.03 sec)

mysql> CREATE DATABASE bootvis;
Query OK, 1 row affected (0.00 sec)

mysql> USE bootvis
Database changed
mysql> CREATE TABLE graph_edges
    -> (
    ->     pk int not null auto_increment,
    ->     object_id int default null,
    ->     tm timestamp not null default current_timestamp,
    ->     from_id int,
    ->     to_id int,
    ->     primary key (pk)
    -> );
Query OK, 0 rows affected (0.12 sec)

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
Query OK, 4 rows affected (0.05 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> SELECT SLEEP(2);
+----------+
| SLEEP(2) |
+----------+
|        0 |
+----------+
1 row in set (2.00 sec)

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
Query OK, 2 rows affected (0.05 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> SELECT SLEEP(1);
+----------+
| SLEEP(1) |
+----------+
|        0 |
+----------+
1 row in set (1.00 sec)

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2);
Query OK, 2 rows affected (0.07 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1);
Query OK, 8 rows affected (0.08 sec)
Records: 8  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
Query OK, 4 rows affected (0.13 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
Query OK, 2 rows affected (0.10 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> UPDATE graph_edges SET object_id = 1 WHERE object_id IS NULL;
Query OK, 22 rows affected (0.06 sec)
Rows matched: 22  Changed: 22  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2);
Query OK, 2 rows affected (0.05 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1),(2,1);
Query OK, 8 rows affected (0.05 sec)
Records: 8  Duplicates: 0  Warnings: 0

mysql> SELECT SLEEP(2);
+----------+
| SLEEP(2) |
+----------+
|        0 |
+----------+
1 row in set (2.00 sec)

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
Query OK, 4 rows affected (0.08 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
Query OK, 2 rows affected (0.05 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
Query OK, 4 rows affected (0.06 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
Query OK, 2 rows affected (0.05 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (1,2),(1,2),(1,2),(1,2);
Query OK, 4 rows affected (0.05 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> INSERT INTO graph_edges (from_id,to_id) VALUES (2,1),(2,1);
Query OK, 2 rows affected (0.06 sec)
Records: 2  Duplicates: 0  Warnings: 0

mysql> UPDATE graph_edges SET object_id = 2 WHERE object_id IS NULL;
Query OK, 28 rows affected (0.06 sec)
Rows matched: 28  Changed: 28  Warnings: 0

mysql> SELECT * FROM graph_edges;
+----+-----------+---------------------+---------+-------+
| pk | object_id | tm                  | from_id | to_id |
+----+-----------+---------------------+---------+-------+
|  1 |         1 | 2012-11-20 17:29:13 |       1 |     2 |
|  2 |         1 | 2012-11-20 17:29:13 |       1 |     2 |
|  3 |         1 | 2012-11-20 17:29:13 |       1 |     2 |
|  4 |         1 | 2012-11-20 17:29:13 |       1 |     2 |
|  5 |         1 | 2012-11-20 17:29:15 |       2 |     1 |
|  6 |         1 | 2012-11-20 17:29:15 |       2 |     1 |
|  7 |         1 | 2012-11-20 17:29:16 |       1 |     2 |
|  8 |         1 | 2012-11-20 17:29:16 |       1 |     2 |
|  9 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 10 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 11 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 12 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 13 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 14 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 15 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 16 |         1 | 2012-11-20 17:29:16 |       2 |     1 |
| 17 |         1 | 2012-11-20 17:29:16 |       1 |     2 |
| 18 |         1 | 2012-11-20 17:29:16 |       1 |     2 |
| 19 |         1 | 2012-11-20 17:29:16 |       1 |     2 |
| 20 |         1 | 2012-11-20 17:29:16 |       1 |     2 |
| 21 |         1 | 2012-11-20 17:29:17 |       2 |     1 |
| 22 |         1 | 2012-11-20 17:29:17 |       2 |     1 |
| 23 |         2 | 2012-11-20 17:29:17 |       1 |     2 |
| 24 |         2 | 2012-11-20 17:29:17 |       1 |     2 |
| 25 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 26 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 27 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 28 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 29 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 30 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 31 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 32 |         2 | 2012-11-20 17:29:17 |       2 |     1 |
| 33 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 34 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 35 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 36 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 37 |         2 | 2012-11-20 17:29:19 |       2 |     1 |
| 38 |         2 | 2012-11-20 17:29:19 |       2 |     1 |
| 39 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 40 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 41 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 42 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 43 |         2 | 2012-11-20 17:29:19 |       2 |     1 |
| 44 |         2 | 2012-11-20 17:29:19 |       2 |     1 |
| 45 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 46 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 47 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 48 |         2 | 2012-11-20 17:29:19 |       1 |     2 |
| 49 |         2 | 2012-11-20 17:29:19 |       2 |     1 |
| 50 |         2 | 2012-11-20 17:29:19 |       2 |     1 |
+----+-----------+---------------------+---------+-------+
50 rows in set (0.00 sec)

mysql>

Stage 3 : Create Query with Running Counters That Changes When Tuple Changes (call it the Tuple Change Query)

SET @r=0;
SET @old_from=-1;
SET @old_to=-1;
SELECT from_id,to_id,
@inc:=((@old_from<>from_id)||(@old_to<>to_id)),
@old_from:=from_id,@old_to:=to_id,
@r:=(@r+@inc) as group_number
FROM graph_edges;

Stage 4 : Run the Tuple Change Query

mysql> SET @r=0;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @old_from=-1;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @old_to=-1;
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT from_id,to_id,
    -> @inc:=((@old_from<>from_id)||(@old_to<>to_id)),
    -> @old_from:=from_id,@old_to:=to_id,
    -> @r:=(@r+@inc) as group_number
    -> FROM graph_edges;
+---------+-------+------------------------------------------------+--------------------+----------------+--------------+
| from_id | to_id | @inc:=((@old_from<>from_id)||(@old_to<>to_id)) | @old_from:=from_id | @old_to:=to_id | group_number |
+---------+-------+------------------------------------------------+--------------------+----------------+--------------+
|       1 |     2 |                                              1 |                  1 |              2 |            1 |
|       1 |     2 |                                              0 |                  1 |              2 |            1 |
|       1 |     2 |                                              0 |                  1 |              2 |            1 |
|       1 |     2 |                                              0 |                  1 |              2 |            1 |
|       2 |     1 |                                              1 |                  2 |              1 |            2 |
|       2 |     1 |                                              0 |                  2 |              1 |            2 |
|       1 |     2 |                                              1 |                  1 |              2 |            3 |
|       1 |     2 |                                              0 |                  1 |              2 |            3 |
|       2 |     1 |                                              1 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       2 |     1 |                                              0 |                  2 |              1 |            4 |
|       1 |     2 |                                              1 |                  1 |              2 |            5 |
|       1 |     2 |                                              0 |                  1 |              2 |            5 |
|       1 |     2 |                                              0 |                  1 |              2 |            5 |
|       1 |     2 |                                              0 |                  1 |              2 |            5 |
|       2 |     1 |                                              1 |                  2 |              1 |            6 |
|       2 |     1 |                                              0 |                  2 |              1 |            6 |
|       1 |     2 |                                              1 |                  1 |              2 |            7 |
|       1 |     2 |                                              0 |                  1 |              2 |            7 |
|       2 |     1 |                                              1 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       2 |     1 |                                              0 |                  2 |              1 |            8 |
|       1 |     2 |                                              1 |                  1 |              2 |            9 |
|       1 |     2 |                                              0 |                  1 |              2 |            9 |
|       1 |     2 |                                              0 |                  1 |              2 |            9 |
|       1 |     2 |                                              0 |                  1 |              2 |            9 |
|       2 |     1 |                                              1 |                  2 |              1 |           10 |
|       2 |     1 |                                              0 |                  2 |              1 |           10 |
|       1 |     2 |                                              1 |                  1 |              2 |           11 |
|       1 |     2 |                                              0 |                  1 |              2 |           11 |
|       1 |     2 |                                              0 |                  1 |              2 |           11 |
|       1 |     2 |                                              0 |                  1 |              2 |           11 |
|       2 |     1 |                                              1 |                  2 |              1 |           12 |
|       2 |     1 |                                              0 |                  2 |              1 |           12 |
|       1 |     2 |                                              1 |                  1 |              2 |           13 |
|       1 |     2 |                                              0 |                  1 |              2 |           13 |
|       1 |     2 |                                              0 |                  1 |              2 |           13 |
|       1 |     2 |                                              0 |                  1 |              2 |           13 |
|       2 |     1 |                                              1 |                  2 |              1 |           14 |
|       2 |     1 |                                              0 |                  2 |              1 |           14 |
+---------+-------+------------------------------------------------+--------------------+----------------+--------------+
50 rows in set (0.00 sec)

mysql>

Please notice that @inc changes only when the tuple changes !!!

Stage 5 : Put Tuple Change Query in a Subquery, extract Needed Data From Tuple Change Query, Run GROUP BY group_number (call it Duplicate Extraction Query)

SET @r=0;
SET @old_from=-1;
SET @old_to=-1;
SELECT group_number,from_id,to_id FROM
(
    SELECT from_id,to_id,
    @inc:=((@old_from<>from_id)||(@old_to<>to_id)),
    @old_from:=from_id,@old_to:=to_id,
    @r:=(@r+@inc) as group_number
    FROM graph_edges
) g GROUP BY group_number;

Stage 6 : Run Duplicate Extraction Query

mysql> SET @r=0;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @old_from=-1;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @old_to=-1;
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT group_number,from_id,to_id FROM
    -> (
    ->     SELECT from_id,to_id,
    ->     @inc:=((@old_from<>from_id)||(@old_to<>to_id)),
    ->     @old_from:=from_id,@old_to:=to_id,
    ->     @r:=(@r+@inc) as group_number
    ->     FROM graph_edges
    -> ) g GROUP BY group_number;
+--------------+---------+-------+
| group_number | from_id | to_id |
+--------------+---------+-------+
|            1 |       1 |     2 |
|            2 |       2 |     1 |
|            3 |       1 |     2 |
|            4 |       2 |     1 |
|            5 |       1 |     2 |
|            6 |       2 |     1 |
|            7 |       1 |     2 |
|            8 |       2 |     1 |
|            9 |       1 |     2 |
|           10 |       2 |     1 |
|           11 |       1 |     2 |
|           12 |       2 |     1 |
|           13 |       1 |     2 |
|           14 |       2 |     1 |
+--------------+---------+-------+
14 rows in set (0.00 sec)

mysql>

Stage 7 : (OPTIONAL) Show Count for Each group_number

SET @r=0;
SET @old_from=-1;
SET @old_to=-1;
SELECT group_number,from_id,to_id,count(1) group_count FROM
(
    SELECT from_id,to_id,
    @inc:=((@old_from<>from_id)||(@old_to<>to_id)),
    @old_from:=from_id,@old_to:=to_id,
    @r:=(@r+@inc) as group_number
    FROM graph_edges
) g GROUP BY group_number;

Stage 8 : (OPTIONAL) Run the Show Count Query for Each group_number

mysql> SET @r=0;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @old_from=-1;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @old_to=-1;
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT group_number,from_id,to_id,count(1) group_count FROM
    -> (
    ->     SELECT from_id,to_id,
    ->     @inc:=((@old_from<>from_id)||(@old_to<>to_id)),
    ->     @old_from:=from_id,@old_to:=to_id,
    ->     @r:=(@r+@inc) as group_number
    ->     FROM graph_edges
    -> ) g GROUP BY group_number;
+--------------+---------+-------+-------------+
| group_number | from_id | to_id | group_count |
+--------------+---------+-------+-------------+
|            1 |       1 |     2 |           4 |
|            2 |       2 |     1 |           2 |
|            3 |       1 |     2 |           2 |
|            4 |       2 |     1 |           8 |
|            5 |       1 |     2 |           4 |
|            6 |       2 |     1 |           2 |
|            7 |       1 |     2 |           2 |
|            8 |       2 |     1 |           8 |
|            9 |       1 |     2 |           4 |
|           10 |       2 |     1 |           2 |
|           11 |       1 |     2 |           4 |
|           12 |       2 |     1 |           2 |
|           13 |       1 |     2 |           4 |
|           14 |       2 |     1 |           2 |
+--------------+---------+-------+-------------+
14 rows in set (0.00 sec)

mysql>

Stage 9 : Give it a Try !!!