Mysql – Append duplicate records with count (thesql)

MySQL

We need to update a set of titles that have some duplicate entries. Because of the size of the database I would prefer to use direct mysql method, but I am open to any solutions out there.

If we have records (url_title):

a-stoney
a-stoney
ab-dille
ab-dille
ab-dille
ac-gifta
ae-jones
ae-jones

I want to be able to run against the table and end up with ONLY the duplicate url_titles updated as follows:

a-stoney
a-stoney-1
ab-dille
ab-dille-1
ab-dille-2
ac-gifta
ae-jones
ae-jones-1

Or some similar output, where non-duplicate url-titles are not edited, and any duplicates are applied a similar convention

Table syntax:

CREATE TABLE `exp_channel_titles` (
  `entry_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `site_id` int(4) unsigned NOT NULL DEFAULT '1',
  `channel_id` int(4) unsigned NOT NULL,
  `author_id` int(10) unsigned NOT NULL DEFAULT '0',
  `forum_topic_id` int(10) unsigned DEFAULT NULL,
  `ip_address` varchar(45) NOT NULL DEFAULT '0',
  `title` varchar(100) DEFAULT NULL,
  `url_title` varchar(75) NOT NULL,
  `status` varchar(50) NOT NULL,
  `versioning_enabled` char(1) NOT NULL DEFAULT 'n',
  `view_count_one` int(10) unsigned NOT NULL DEFAULT '0',
  `view_count_two` int(10) unsigned NOT NULL DEFAULT '0',
  `view_count_three` int(10) unsigned NOT NULL DEFAULT '0',
  `view_count_four` int(10) unsigned NOT NULL DEFAULT '0',
  `allow_comments` varchar(1) NOT NULL DEFAULT 'y',
  `sticky` varchar(1) NOT NULL DEFAULT 'n',
  `entry_date` int(10) NOT NULL,
  `year` char(4) NOT NULL,
  `month` char(2) NOT NULL,
  `day` char(3) NOT NULL,
  `expiration_date` int(10) NOT NULL DEFAULT '0',
  `comment_expiration_date` int(10) NOT NULL DEFAULT '0',
  `edit_date` bigint(14) DEFAULT NULL,
  `recent_comment_date` int(10) DEFAULT NULL,
  `comment_total` int(4) unsigned NOT NULL DEFAULT '0',
  PRIMARY KEY (`entry_id`),
  KEY `channel_id` (`channel_id`),
  KEY `author_id` (`author_id`),
  KEY `url_title` (`url_title`),
  KEY `status` (`status`),
  KEY `entry_date` (`entry_date`),
  KEY `expiration_date` (`expiration_date`),
  KEY `site_id` (`site_id`)
) ENGINE=MyISAM AUTO_INCREMENT=181332 DEFAULT CHARSET=utf8;
    enter code here

Best Answer

From the sake of this question, I will call the source table aaronlax

PROPOSED SOLUTION

SET @ndx = 0;
SET @cur_hash = '';
SELECT entry_id,CONCAT(url_title,IF(ndx>0,CONCAT('-',ndx),'')) url_title
FROM (SELECT
   @new_hash := MD5(url_title),
   (@ndx := IF(@new_hash=@cur_hash,@ndx+1,0)) ndx,
   @cur_hash := MD5(url_title),
   url_title,entry_id
FROM (SELECT url_title,entry_id FROM aaronlax ORDER BY url_title) AA) A;

SAMPLE DATA

use test
DROP TABLE IF EXISTS aaronlax;
CREATE TABLE aaronlax
(
    entry_id int(10) unsigned NOT NULL AUTO_INCREMENT,
    url_title varchar(75) NOT NULL,
    PRIMARY KEY (entry_id),
    KEY (url_title)    
) ENGINE=MyISAM;
INSERt INTO aaronlax (url_title) VALUES
('a-stoney'),('a-stoney'),
('ab-dille'),('ab-dille'),
('ab-dille'),('ac-gifta'),
('ae-jones'),('ae-jones');

SAMPLE DATA LOADED

mysql> use test
Database changed
mysql> DROP TABLE IF EXISTS aaronlax;
Query OK, 0 rows affected (0.00 sec)

mysql> CREATE TABLE aaronlax
    -> (
    ->     entry_id int(10) unsigned NOT NULL AUTO_INCREMENT,
    ->     url_title varchar(75) NOT NULL,
    ->     PRIMARY KEY (entry_id),
    ->     KEY (url_title)
    -> ) ENGINE=MyISAM;
Query OK, 0 rows affected (0.02 sec)

mysql> INSERt INTO aaronlax (url_title) VALUES
    -> ('a-stoney'),('a-stoney'),
    -> ('ab-dille'),('ab-dille'),
    -> ('ab-dille'),('ac-gifta'),
    -> ('ae-jones'),('ae-jones');
Query OK, 8 rows affected (0.00 sec)
Records: 8  Duplicates: 0  Warnings: 0

mysql> select * from aaronlax;
+----------+-----------+
| entry_id | url_title |
+----------+-----------+
|        1 | a-stoney  |
|        2 | a-stoney  |
|        3 | ab-dille  |
|        4 | ab-dille  |
|        5 | ab-dille  |
|        6 | ac-gifta  |
|        7 | ae-jones  |
|        8 | ae-jones  |
+----------+-----------+
8 rows in set (0.00 sec)

mysql>

PROPOSED SOLUTION EXECUTED

mysql> SET @ndx = 0;
Query OK, 0 rows affected (0.00 sec)

mysql> SET @cur_hash = '';
Query OK, 0 rows affected (0.00 sec)

mysql> SELECT entry_id,CONCAT(url_title,IF(ndx>0,CONCAT('-',ndx),'')) url_title
    -> FROM (SELECT
    ->    @new_hash := MD5(url_title),
    ->    (@ndx := IF(@new_hash=@cur_hash,@ndx+1,0)) ndx,
    ->    @cur_hash := MD5(url_title),
    ->    url_title,entry_id
    -> FROM (SELECT url_title,entry_id FROM aaronlax ORDER BY url_title) AA) A;
+----------+------------+
| entry_id | url_title  |
+----------+------------+
|        1 | a-stoney   |
|        2 | a-stoney-1 |
|        3 | ab-dille   |
|        4 | ab-dille-1 |
|        5 | ab-dille-2 |
|        6 | ac-gifta   |
|        7 | ae-jones   |
|        8 | ae-jones-1 |
+----------+------------+
8 rows in set (0.00 sec)

mysql>

GIVE IT A TRY !!!

CAVEAT

In case you are wondering why the proposed solution works, let's take a look at the subqueries

INNERMOST SUBQUERY

mysql> SELECT url_title,entry_id FROM aaronlax ORDER BY url_title;
+-----------+----------+
| url_title | entry_id |
+-----------+----------+
| a-stoney  |        1 |
| a-stoney  |        2 |
| ab-dille  |        3 |
| ab-dille  |        4 |
| ab-dille  |        5 |
| ac-gifta  |        6 |
| ae-jones  |        7 |
| ae-jones  |        8 |
+-----------+----------+
8 rows in set (0.00 sec)

mysql>

NEXT LEVEL SUBQUERY

mysql> SELECT
    ->    @new_hash := MD5(url_title),
    ->    (@ndx := IF(@new_hash=@cur_hash,@ndx+1,0)) ndx,
    ->    @cur_hash := MD5(url_title),
    ->    url_title,entry_id
    -> FROM (SELECT url_title,entry_id FROM aaronlax ORDER BY url_title) AA;
+----------------------------------+------+----------------------------------+-----------+----------+
| @new_hash := MD5(url_title)      | ndx  | @cur_hash := MD5(url_title)      | url_title | entry_id |
+----------------------------------+------+----------------------------------+-----------+----------+
| 911adc6db360f218da5069b20cfb1917 |    0 | 911adc6db360f218da5069b20cfb1917 | a-stoney  |        1 |
| 911adc6db360f218da5069b20cfb1917 |    1 | 911adc6db360f218da5069b20cfb1917 | a-stoney  |        2 |
| b468c0dd0ef78fde7cf5ae14765f9a5a |    0 | b468c0dd0ef78fde7cf5ae14765f9a5a | ab-dille  |        3 |
| b468c0dd0ef78fde7cf5ae14765f9a5a |    1 | b468c0dd0ef78fde7cf5ae14765f9a5a | ab-dille  |        4 |
| b468c0dd0ef78fde7cf5ae14765f9a5a |    2 | b468c0dd0ef78fde7cf5ae14765f9a5a | ab-dille  |        5 |
| 3fff0367af8bf5d3b3b0089b8f187c04 |    0 | 3fff0367af8bf5d3b3b0089b8f187c04 | ac-gifta  |        6 |
| 1909ba0f7a57ca081c82d2f7ba54c9e5 |    0 | 1909ba0f7a57ca081c82d2f7ba54c9e5 | ae-jones  |        7 |
| 1909ba0f7a57ca081c82d2f7ba54c9e5 |    1 | 1909ba0f7a57ca081c82d2f7ba54c9e5 | ae-jones  |        8 |
+----------------------------------+------+----------------------------------+-----------+----------+
8 rows in set (0.00 sec)

mysql>

From this level, I only need three columns: entry_id, url_title, ndx in the outer most level

Then, I append to the url_title a hyphen and ndx if ndx is nonzero