[20160706]like % 綁定變量.txt
--最近一直在優化一個項目,程式中存在大量的like模糊查詢,例子:
/* Formatted on 2016/7/6 11:10:55 (QP5 v5.252.13127.32867) */
SELECT PATIENT_ID
,NAME
,RIS_NO
,SEX
,EXAM_ITEM
,DATE_OF_BIRTH
,LASTSAVEUSER
,EXAM_PARA
,PATIENT_LOCAL_ID
,MODALITY
,EXAM_CLASS
,EXAM_SUB_CLASS
,DEVICE
,RESULT_STATUS
,EXAM_DATE_TIME
,STUDY_UID
,LASTSAVETIME
,CHECKUSER
,NORMALNAME
,CHECKSTATE
,OUT_URL
,CHECKUSERID
,CREATEUSER
,CHECKTIME
,PRINT_STATUS
,IS_ABNORMAL
FROM REPORTQUERY
WHERE patient_id LIKE :V001
AND (LASTSAVETIME BETWEEN :V002 AND :V003)
AND (checkstate = :V004)
AND (modality = :V005)
AND ROWNUM < 1001
SQL_ID CHILD_NUMBER WAS NAME POSITION MAX_LENGTH LAST_CAPTURED DATATYPE_STRING VALUE_STRING
------------- ------------ --- -------------------- ---------- ---------- ------------------- --------------- --------------------
222m95hh0kvgh 1 YES :V001 1 128 2016-07-02 10:53:34 VARCHAR2(128) %TJ1431070%
YES :V002 2 32 2016-07-02 10:53:34 VARCHAR2(32) 2016-07-02 00:00:00
YES :V003 3 32 2016-07-02 10:53:34 VARCHAR2(32) 2016-07-02 23:59:59
YES :V004 4 2000 2016-07-02 10:53:34 VARCHAR2(2000) 40
YES :V005 5 128 2016-07-02 10:53:34 VARCHAR2(128) US
--//REPORTQUERY是視圖,本來選擇patient_id是很快的查詢,由于前面的%,導緻執行計劃選擇LASTSAVETIME,而這個範圍是1天(有時候查詢1個月),導緻
--//大量的邏輯讀。我自己做一些測試想看看使用綁定變量的情況。
1.環境:
SCOTT@book> @ &r/ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
2.建立測試例子:
create table tx (id number,v varchar2(20),nv nvarchar2(20),pad varchar2(200));
insert into tx select rownum,lpad(rownum,6,'0'),lpad(rownum,6,'0'),lpad('x',200,'x') from dual connect by level<=1e5;
commit ;
create index i_tx_v on tx(v) PCTFREE 50;
create index i_tx_nv on tx(nv) PCTFREE 50;
execute sys.dbms_stats.gather_table_stats ( OwnName => user,TabName => 'tx',Estimate_Percent => NULL,Method_Opt => 'FOR ALL COLUMNS SIZE 1 ',Cascade => True ,No_Invalidate => false);
3.測試:
SCOTT@book> column pad noprint
SCOTT@book> alter session set statistics_level=all;
Session altered.
SCOTT@book> select * from tx where v like '%000042%';
ID V NV
---------- -------------------- ----------------------------------------
42 000042 000042
SCOTT@book> @ &r/dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID c9z5cg37cu8yz, child number 0
select * from tx where v like '%000042%'
Plan hash value: 40191160
--------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
| 0 | SELECT STATEMENT | | 1 | | | 888 (100)| | 1 |00:00:00.06 | 3275 |
|* 1 | TABLE ACCESS FULL| TX | 1 | 5000 | 1103K| 888 (1)| 00:00:11 | 1 |00:00:00.06 | 3275 |
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / TX@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter(("V" LIKE '%000042%' AND "V" IS NOT NULL))
--如果查詢select * from tx where v like '000042%';是可以使用索引的。
4.如果使用綁定變量呢?
SCOTT@book> variable x varchar2(20);
SCOTT@book> exec :x := '%000042%';
PL/SQL procedure successfully completed.
SCOTT@book> select * from tx where v like :x;
SQL_ID 87rcszfzcz5gu, child number 0
select * from tx where v like :x
Plan hash value: 3964412060
-----------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | Reads |
| 0 | SELECT STATEMENT | | 1 | | | 187 (100)| | 1 |00:00:00.07 | 464 | 461 |
| 1 | TABLE ACCESS BY INDEX ROWID| TX | 1 | 5000 | 1103K| 187 (0)| 00:00:03 | 1 |00:00:00.07 | 464 | 461 |
|* 2 | INDEX RANGE SCAN | I_TX_V | 1 | 5000 | | 25 (0)| 00:00:01 | 1 |00:00:00.07 | 463 | 461 |
2 - SEL$1 / TX@SEL$1
Peeked Binds (identified by position):
--------------------------------------
1 - (VARCHAR2(30), CSID=852): '%000042%'
2 - access("V" LIKE :X)
filter("V" LIKE :X)
--可以發現綁定變量是使用索引的。有點奇怪的是估計傳回相同的行相同,而帶入參數選擇索引,估計索引群集因子很小。
--把索引建立大一些看看。
SCOTT@book> alter index i_tx_v rebuild pctfree 90;
Index altered.
SCOTT@book> execute sys.dbms_stats.gather_table_stats ( OwnName => user,TabName => 'tx',Estimate_Percent => NULL,Method_Opt => 'FOR ALL COLUMNS SIZE 1 ',Cascade => True ,No_Invalidate => false);
--------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
| 0 | SELECT STATEMENT | | 1 | | | 307 (100)| | 1 |00:00:00.07 | 2862 |
| 1 | TABLE ACCESS BY INDEX ROWID| TX | 1 | 5000 | 1103K| 307 (0)| 00:00:04 | 1 |00:00:00.07 | 2862 |
|* 2 | INDEX RANGE SCAN | I_TX_V | 1 | 5000 | | 145 (0)| 00:00:02 | 1 |00:00:00.07 | 2861 |
filter("V" LIKE :X)
--//這樣并沒有改變群集因子,cost增加不大。
--//oracle 估計傳回5000行,占5000/100000=5%.oracle也是選擇索引範圍掃描。
--//這種情況是選擇索引好還是全表掃描還呢?主要看傳回的行數量。
--//oracle很奇怪,前面的直接打入文字變量,估計傳回也是5000,但是執行計劃選擇的是全表掃描。
SCOTT@book> exec :x := '%00%';
Plan hash value: 3964412060
| 0 | SELECT STATEMENT | | 1 | | | 307 (100)| | 12520 |00:00:00.08 | 4242 |
| 1 | TABLE ACCESS BY INDEX ROWID| TX | 1 | 5000 | 1103K| 307 (0)| 00:00:04 | 12520 |00:00:00.08 | 4242 |
|* 2 | INDEX RANGE SCAN | I_TX_V | 1 | 5000 | | 145 (0)| 00:00:02 | 12520 |00:00:00.07 | 2923 |
--------------------------------------------------------------------------------------------------------------------------------
--執行計劃不變。重建立立表ty。大亂順序,提高索引的群集因子。
SCOTT@book> create table ty as select * from tx order by DBMS_RANDOM.VALUE;
Table created.
create index i_ty_v on ty(v) PCTFREE 10;
create index i_ty_nv on ty(nv) PCTFREE 10;
--分析表,忽略。
SCOTT@book> exec :x := '%00%';
SQL_ID 50t5atjp1pjh9, child number 0
select * from ty where v like :x
Plan hash value: 1260447134
-----------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | Reads |
| 0 | SELECT STATEMENT | | 1 | | | 893 (100)| | 12520 |00:00:00.07 | 3284 | 3226 |
|* 1 | TABLE ACCESS FULL| TY | 1 | 5000 | 1103K| 893 (1)| 00:00:11 | 12520 |00:00:00.07 | 3284 | 3226 |
1 - SEL$1 / TY@SEL$1
1 - (VARCHAR2(30), CSID=852): '%00%'
1 - filter("V" LIKE :X)
--可以看到群集因子對執行計劃影響很大。實際上這種執行計劃如果傳回行很少,使用索引更佳。
--//我個人的建議最好少用模糊查詢,開發往往說這個是使用者要求,實際上如果最後很慢了,使用者往往抱怨不斷。我自己很久以前就遇到類似的情況。
--//我還建議開發可以做一個按鈕,預設做精确查詢,使用者要模糊查詢必須選中它。如果模糊很慢,以後使用者自然會少用,可惜大部分開發不願意做.....
--//當然現在有許多方法來穩定執行計劃。sql profile ,spm都可以實作。但是實際上這種是以掃描整個索引為代價,而且是單塊讀。
--//我以前的做法就是人為修改統計資訊,lock統計,增加表的塊數量,導緻執行計劃選擇索引,因為大多數情況傳回的行很少,很多查詢基本沒有意義。
--//比如你查詢 name like '%李%',這樣傳回很多行,使用者不大可能在裡面查找需要的資訊。
SCOTT@book> EXECUTE SYS.DBMS_STATS.set_table_stats (OWNNAME=>user, TABNAME=>'ty', numblks=> 8888888);
SCOTT@book> select * from ty where v like :x;
SQL_ID 50t5atjp1pjh9, child number 1
Plan hash value: 1299836486
| 0 | SELECT STATEMENT | | 1 | | | 5014 (100)| | 1 |00:00:00.06 | 255 | 1 |
| 1 | TABLE ACCESS BY INDEX ROWID| TY | 1 | 5000 | 1103K| 5014 (1)| 00:01:01 | 1 |00:00:00.06 | 255 | 1 |
|* 2 | INDEX RANGE SCAN | I_TY_V | 1 | 5000 | | 14 (0)| 00:00:01 | 1 |00:00:00.06 | 254 | 0 |
2 - SEL$1 / TY@SEL$1
--總之,開發寫代碼不要太随意了,認真思考可能産生的結果。看問題看得長遠一些。
--當使用模糊like時,裡面的查詢條件一定要注意。