MYSQL阿里的一个sql优化问题

原创水平有限，如果有误请指出

今天研究了一天innodb事物，临近下班了同事田兴春告诉我有个阿里发出来的优化题，并且把建表和语句给我了，并且告诉我语句里面有隐式转换
昨天群里也有人说这道题但是一直没空看，刚好这会没事就看了一下，整个脚本如下：

点击(此处)折叠或打开

成都创新互联于2013年开始，先为河池等服务建站，河池等地企业，进行企业商务咨询服务。为河池企业网站制作PC+手机+微官网三网同步一站式服务解决您的所有建站问题。

数据准备：
create table a (id int auto_increment,seller_id bigint,seller_name varchar(100) collate utf8_bin ,gmt_create varchar(30),primary key(id));
insert into a (seller_id,seller_name,gmt_create) values (100000,'uniqla','2017-01-01');
insert into a (seller_id,seller_name,gmt_create) values (100001,'uniqlb','2017-02-01');
insert into a (seller_id,seller_name,gmt_create) values (100002,'uniqlc','2017-03-01');
insert into a (seller_id,seller_name,gmt_create) values (100003,'uniqld','2017-04-01');
insert into a (seller_id,seller_name,gmt_create) values (100004,'uniqle','2017-05-01');
insert into a (seller_id,seller_name,gmt_create) values (100005,'uniqlf','2017-06-01');
insert into a (seller_id,seller_name,gmt_create) values (100006,'uniqlg','2017-07-01');
insert into a (seller_id,seller_name,gmt_create) values (100007,'uniqlh','2017-08-01');
insert into a (seller_id,seller_name,gmt_create) values (100008,'uniqli','2017-09-01');
insert into a (seller_id,seller_name,gmt_create) values (100009,'uniqlj','2017-10-01');
insert into a (seller_id,seller_name,gmt_create) values (100010,'uniqlk','2017-11-01');
insert into a (seller_id,seller_name,gmt_create) values (100011,'uniqll','2017-12-01');
insert into a (seller_id,seller_name,gmt_create) values (100012,'uniqlm','2018-01-01');
insert into a (seller_id,seller_name,gmt_create) values (100013,'uniqln','2018-02-01');
insert into a (seller_id,seller_name,gmt_create) values (100014,'uniqlo','2018-03-01');
insert into a (seller_id,seller_name,gmt_create) values (100015,'uniqlp','2018-04-01');
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) select seller_id,seller_name,gmt_create from a;
insert into a (seller_id,seller_name,gmt_create) values (100016,'uniqlq',now());
create table b (id int auto_increment,seller_name varchar(100),user_id varchar(50),user_name varchar(100),sales bigint,gmt_create varchar(30),primary key(id));
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqla','1','a',1,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlb','2','b',3,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlc','3','c',1,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqld','4','d',4,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqle','5','e',5,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlf','6','f',1,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlg','7','g',7,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlh','8','h',1,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqli','9','i',1,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlj','10','j',15,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlk','11','k',61,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqll','12','l',31,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlm','13','m',134,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqln','14','n',1455,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlo','15','o',166,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('niqlp','16','p',15,now());
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) select seller_name,user_id,user_name,sales,gmt_create from b;
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('uniqlq','17','s',109,now());
create table c (id int auto_increment,user_id varchar(50),order_id varchar(100),state bigint,gmt_create varchar(30),primary key(id));
insert into c (user_id,order_id,state,gmt_create) values( 21,1,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 22,2,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 33,3,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 43,4,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 54,5,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 65,6,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 75,7,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 85,8,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 95,8,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 100,8,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) values( 150,8,0 ,now() );
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) select user_id,order_id,state,gmt_create from c;
insert into c (user_id,order_id,state,gmt_create) values( 17,8,0 ,now() );
待优化SQL：
select a.seller_id,a.seller_name,b.user_name,c.state
from a,b,c
where a.seller_name=b.seller_name
and b.user_id=c.user_id
and c.user_id=17
and a.gmt_create BETWEEN DATE_ADD(NOW(), INTERVAL - 600 MINUTE)
AND DATE_ADD(NOW(), INTERVAL 600 MINUTE)
order by a.gmt_create

先说明这个优化题目主要考察下面5点：
1、BNL和NJL的区别
2、NJL的实现
3、DBA对于数据分布的观察
4、隐式转换索引不能使用
5、比较字符集不同索引不能使用

一、我们先来分别描述
1、BNL和NJL的区别
这个区别参考我的文章
http://blog.itpub.net/7728585/viewspace-2129502/
(从顺序随机I/O原理来讨论MySQL MRR NLJ BNL BKA )
简单的说BNL一般用于TYPE=INDEX以及TYPE=ALL的情况，因为被驱动表连接条件没有索引，而需要join buffer 将驱动表中待连接的
数据取出来(物理/逻辑读取)，放到join buffer，主要目的在于减少被驱动表的驱动次数，从而提高效率，因为没有索引的情况
被驱动表扫描一次实在太慢了，这里的B就是BLOCK的意思.
而NJL一般用于被驱动表连接条件有索引的情况，通过索引上的ref或者eq_ref(取决于索引是否唯一)就理所当然的快很多很多，这个时候join buffer是不会
使用的，它只需要读取一条数据(物理/逻辑读取)来驱动一次驱动表，因为驱动表连接条件有索引，自然就快了(索引定位回表)
2、NJL的实现
同样可以参考上面的文章，上面也大概说了一下，就不在废话了
3、DBA对于数据分布的观察
这一点是人为可以达到的，简单的说比如一个表有100条数据 99条为no=1 1条为no=2，那么我们
需要对这个有所警觉，如果这个表示用作驱动表那么no=2的时候效果要远远好于no=1。这道题也有
这个因素
明显and c.user_id='17' 只有一条数据
4、隐式转换索引不能使用
这个不管是MYSQL还是ORACLE都有的问题，
ORACLE会显示给出来to_char(id)='1'之类的
MYSQL中会有如下类似的警告
| Warning | 1739 | Cannot use ref access on index 'user_id' due to type or collation conversion on field 'user_id' |
| Warning | 1739 | Cannot use range access on index 'user_id' due to type or collation conversion on field 'user_id'

比如这里的
c.user_id=17
而
user_id 是varchar类型不是int类型
又比如这里的
a.gmt_create BETWEEN DATE_ADD(NOW(), INTERVAL - 600 MINUTE)
AND DATE_ADD(NOW(), INTERVAL 600 MINUTE)
这里
gmt_create varchar(30) 居然也是varchar 擦！！
5、比较字符集不同索引使用异常
这个关于字符串的比较问题我已经在文章里面有所描述
http://blog.itpub.net/7728585/viewspace-2141914/
简单的说这里
a.seller_name=b.seller_name
a.seller_name 比较字符集是utf8_bin 区分大小写
而
b.seller_name 是不区分大小写的这是默认的。
他们之间做join必然被驱动表用不到索引使用异常。(innodb 可以icp)
也会有类似如下的警告：
Cannot use ref access on index 'seller_name' due to type or collation conversion on field 'seller_name'

二、优化原则问题
我们知道基本所有的语句执行算法逻辑都在MYSQL层次，INNODB只是负责将数据通过几种方式
(PAGE_CUR_G，PAGE_CUR_GE，PAGE_CUR_L，PAGE_CUR_LE)扫描出来，递送给MYSQL层次进行处理，这之间存在扫描拿到
innodb record-->innodb tuple-->mysql record的一个转换的过程，这个步骤大部分被标记为sending data过程
(update/delete为updating)，那么我们就有必要减少中间结果集的产生，来减少整个从innodb拿数据到MYSQL层的
整个数据量。这里以NJL的优化原则为列解释，因为这道题就是这个目的

1、减少NJL驱动结果集的数据
这事显而易见的，减少驱动次数自然就减少了数据在innodb和mysql之间的传递
2、被驱动表的索引唯一性要尽量好
这个问题稍微难理解一点，但是仔细想一下也没什么，如果被驱动表索引唯一性更好，那么通过索引回表的次数就少了，
这里可以通过rows和filter进行大概判断，大概是因为他们本来就不准。
曾经我们就有一个列子也是同事田兴春和我一起看的。一个被驱动表有两个链接条件，一个索引唯一性很差，而唯一性好的连接
列上没有索引，我们在唯一性好的列上建立了索引性能马上提升了。

三、关于本题

我们还是先避免c.user_id=17 隐试转换将17改为'17',a.gmt_create没有必要改他，原因后面会说
select a.seller_id,a.seller_name,b.user_name,c.state
from a,b,c
where a.seller_name=b.seller_name
and b.user_id=c.user_id
and c.user_id='17'
and a.gmt_create BETWEEN DATE_ADD(NOW(), INTERVAL - 600 MINUTE)
AND DATE_ADD(NOW(), INTERVAL 600 MINUTE)
order by a.gmt_create

然后我们看一下执行计划
+----+-------------+-------+------------+------+---------------+------+---------+------+--------+----------+----------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+--------+----------+----------------------------------------------------+
| 1 | SIMPLE | a | NULL | ALL | NULL | NULL | NULL | NULL | 16108 | 11.11 | Using where; Using temporary; Using filesort |
| 1 | SIMPLE | b | NULL | ALL | NULL | NULL | NULL | NULL | 16173 | 10.00 | Using where; Using join buffer (Block Nested Loop) |
| 1 | SIMPLE | c | NULL | ALL | NULL | NULL | NULL | NULL | 359382 | 1.00 | Using where; Using join buffer (Block Nested Loop) |
+----+-------------+-------+------------+------+---------------+------+---------+------+--------+----------+----------------------------------------------------+
3 rows in set, 1 warning (0.02 sec)

这里使用BNL，这事正常的连接条件没有任何索引，同时我们开始观察数据发现这道题
c表最后插入了
insert into c (user_id,order_id,state,gmt_create) values( 17,8,0 ,now() );
b表最后插入了
insert into b (seller_name,user_id,user_name,sales,gmt_create) values ('uniqlq','17','s',109,now());
a表最后插入了
insert into a (seller_id,seller_name,gmt_create) values (100016,'uniqlq',now());
我们可以发现整个语句不管a,b,c表数据量有多大，整个连接下来只有一条数据，这也是我说的DBA对于数据分布观察的问题
，按照最优化的方法通过c表c.user_id='17'过滤后得到一个驱动结果集(实际上这里b表也可以MYSQL自动转换了)只有一条数据
然后连接b表（b.user_id=c.user_id）自然中间驱动结果集也只有一条数据，最后通过（a.seller_name=b.seller_name）连接
a表自然就只有一条数据了
and a.gmt_create BETWEEN DATE_ADD(NOW(), INTERVAL - 600 MINUTE)
AND DATE_ADD(NOW(), INTERVAL 600 MINUTE)
order by a.gmt_create
这两个都不用管它了。

按照这个思想。
我们可以先在c.user_id建立一个索引，意图在于通过索引过滤掉 c.user_id='17'， b.user_id建立索引意图在于NJL被驱动表使用索引而不是全表的BNL
执行计划变为:

点击(此处)折叠或打开

mysql> desc select a.seller_id,a.seller_name,b.user_name,c.state from a,b,c where a.seller_name=b.seller_name and b.user_id=c.user_id and c.user_id='17' and a.gmt_create BETWEEN DATE_ADD(NOW(), INTERVAL - 600 MINUTE) AND
DATE_ADD(NOW(), INTERVAL 600 MINUTE) order by a.gmt_create;
+----+-------------+-------+------------+------+---------------+---------+---------+-------+-------+----------+----------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+---------+---------+-------+-------+----------+----------------------------------------------------+
| 1 | SIMPLE | b | NULL | ref | user_id | user_id | 153 | const | 1 | 100.00 | Using temporary; Using filesort |
| 1 | SIMPLE | c | NULL | ref | user_id | user_id | 153 | const | 1 | 100.00 | Using index condition |
| 1 | SIMPLE | a | NULL | ALL | NULL | NULL | NULL | NULL | 16108 | 1.11 | Using where; Using join buffer (Block Nested Loop) |
+----+-------------+-------+------------+------+---------------+---------+---------+-------+-------+----------+----------------------------------------------------+

显然这里b.user_id=c.user_id and c.user_id='17' 有一个转换如下我们通过sql trace可以看到

"resulting_condition": "((`a`.`seller_name` = `b`.`seller_name`) and (`c`.`user_id` = '17') and (`a`.`gmt_create` between (now() + interval -(600) minute) and (now() + interval 600 minute)) and multiple equal(`b`.`user_id`, `c`.`user_id`))"
我们可以注意这里的
multiple equal(`b`.`user_id`, `c`.`user_id`))"
这实际上进行了转换因为显然的b.user_id='17'是成立的

剩下的就是解决a表的BNL问题。我们不能让a表进行type=ALL 全表扫描，从而加快速度
我们在a.seller_name和b.seller_name建立索引执行计划变成了

点击(此处)折叠或打开

mysql> desc select a.seller_id,a.seller_name,b.user_name,c.state from a,b,c where a.seller_name=b.seller_name and b.user_id=c.user_id and c.user_id='17' and a.gmt_create BETWEEN DATE_ADD(NOW(), INTERVAL - 600 MINUTE) AND
DATE_ADD(NOW(), INTERVAL 600 MINUTE) order by a.gmt_create;
+----+-------------+-------+------------+------+---------------------+-------------+---------+--------------------+------+----------+----------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------------+-------------+---------+--------------------+------+----------+----------------------------------------------+
| 1 | SIMPLE | b | NULL | ref | user_id,seller_name | user_id | 153 | const | 1 | 100.00 | Using where; Using temporary; Using filesort |
| 1 | SIMPLE | c | NULL | ref | user_id | user_id | 153 | const | 1 | 100.00 | Using index condition |
| 1 | SIMPLE | a | NULL | ref | seller_name | seller_name | 303 | test.b.seller_name | 947 | 11.11 | Using index condition; Using where |
+----+-------------+-------+------------+------+---------------------+-------------+---------+--------------------+------+----------+----------------------------------------------+
3 rows in set, 2 warnings (0.00 sec)

这个时候我们看起来使用到了索引，但是这是ICP的功劳，我们看警告
Cannot use ref access on index 'seller_name' due to type or collation conversion on field 'seller_name'
这也就是我说的比较字符集不同索引使用异常，为了消除这个问题我们不得不更改a表seller_name的比较字符集

最后我们得到执行计划

点击(此处)折叠或打开

+----+-------------+-------+------------+------+---------------------+-------------+---------+--------------------+------+----------+----------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------------+-------------+---------+--------------------+------+----------+----------------------------------------------+
| 1 | SIMPLE | b | NULL | ref | user_id,seller_name | user_id | 153 | const | 1 | 100.00 | Using where; Using temporary; Using filesort |
| 1 | SIMPLE | c | NULL | ref | user_id | user_id | 153 | const | 1 | 100.00 | Using index condition |
| 1 | SIMPLE | a | NULL
当前名称：MYSQL阿里的一个sql优化问题
新闻来源：http://hbruida.cn/article/poddpd.html

MYSQL阿里的一个sql优化问题

其他资讯