Skip to content

Commit f18abc9

Browse files
eurekakasre-bot
authored andcommitted
planner: fix row count estimation for unique composite IndexScan of IndexJoin (pingcap#14167)
1 parent 980f72d commit f18abc9

6 files changed

+101
-18
lines changed

cmd/explaintest/r/explain_complex.result

+7-7
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ id count task operator info
118118
Projection_13 1.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
119119
└─Limit_16 1.00 root offset:0, count:2500
120120
└─HashAgg_19 1.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
121-
└─IndexMergeJoin_30 0.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, test.st.ip), gt(test.dd.t, test.st.t)
122-
├─IndexLookUp_28 0.00 root
123-
│ ├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
124-
│ └─Selection_27 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
125-
│ └─TableScan_26 1.00 cop[tikv] table:dd, keep order:false, stats:pseudo
121+
└─HashRightJoin_34 0.00 root inner join, inner:IndexLookUp_52, equal:[eq(test.dd.aid, test.st.aid) eq(test.dd.ip, test.st.ip)], other cond:gt(test.dd.t, test.st.t)
122+
├─IndexLookUp_52 0.00 root
123+
│ ├─IndexScan_49 3333.33 cop[tikv] table:dd, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
124+
│ └─Selection_51 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), not(isnull(test.dd.ip))
125+
│ └─TableScan_50 3333.33 cop[tikv] table:dd, keep order:false, stats:pseudo
126126
└─IndexLookUp_41 3.33 root
127127
├─IndexScan_38 3333.33 cop[tikv] table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
128128
└─Selection_40 3.33 cop[tikv] eq(test.st.pt, "android"), not(isnull(test.st.ip))
@@ -137,9 +137,9 @@ Projection_10 0.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd
137137
│ └─Selection_34 0.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), not(isnull(test.st.dic))
138138
│ └─TableScan_33 3333.33 cop[tikv] table:gad, keep order:false, stats:pseudo
139139
└─IndexLookUp_22 0.00 root
140-
├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
140+
├─IndexScan_19 10000.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
141141
└─Selection_21 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
142-
└─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false, stats:pseudo
142+
└─TableScan_20 10000.00 cop[tikv] table:sdk, keep order:false, stats:pseudo
143143
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
144144
id count task operator info
145145
Projection_5 1.00 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21

cmd/explaintest/r/explain_complex_stats.result

+6-7
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,13 @@ id count task operator info
128128
Projection_13 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
129129
└─Limit_16 424.00 root offset:0, count:2500
130130
└─HashAgg_19 424.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
131-
└─IndexMergeJoin_30 424.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.st.ip, test.dd.ip), gt(test.dd.t, test.st.t)
131+
└─HashRightJoin_34 424.00 root inner join, inner:TableReader_37, equal:[eq(test.st.aid, test.dd.aid) eq(test.st.ip, test.dd.ip)], other cond:gt(test.dd.t, test.st.t)
132132
├─TableReader_37 424.00 root data:Selection_36
133133
│ └─Selection_36 424.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.pt, "android"), gt(test.st.t, 1478143908), not(isnull(test.st.ip))
134134
│ └─TableScan_35 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false
135-
└─IndexLookUp_28 1.00 root
136-
├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
137-
└─Selection_27 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
138-
└─TableScan_26 1.00 cop[tikv] table:dd, keep order:false
135+
└─TableReader_44 455.80 root data:Selection_43
136+
└─Selection_43 455.80 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
137+
└─TableScan_42 2000.00 cop[tikv] table:dd, range:[0,+inf], keep order:false
139138
explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
140139
id count task operator info
141140
Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext
@@ -145,9 +144,9 @@ Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.
145144
│ └─Selection_30 170.34 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), gt(test.st.t, 1477971479), not(isnull(test.st.dic))
146145
│ └─TableScan_29 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false
147146
└─IndexLookUp_22 1.00 root
148-
├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
147+
├─IndexScan_19 3.93 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
149148
└─Selection_21 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
150-
└─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false
149+
└─TableScan_20 3.93 cop[tikv] table:sdk, keep order:false
151150
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
152151
id count task operator info
153152
Projection_5 39.28 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21

planner/core/exhaust_physical_plans.go

+13-3
Original file line numberDiff line numberDiff line change
@@ -594,15 +594,25 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
594594
}
595595
joins = make([]PhysicalPlan, 0, 3)
596596
rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys)
597-
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt)
597+
maxOneRow := false
598+
if helper.chosenPath.Index.Unique && helper.maxUsedCols == len(helper.chosenPath.FullIdxCols) {
599+
l := len(helper.chosenAccess)
600+
if l == 0 {
601+
maxOneRow = true
602+
} else {
603+
sf, ok := helper.chosenAccess[l-1].(*expression.ScalarFunction)
604+
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
605+
}
606+
}
607+
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
598608

599609
joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
600610
// The index merge join's inner plan is different from index join, so we
601611
// should construct another inner plan for it.
602612
// Because we can't keep order for union scan, if there is a union scan in inner task,
603613
// we can't construct index merge join.
604614
if us == nil {
605-
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt)
615+
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt, maxOneRow)
606616
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
607617
}
608618
// We can reuse the `innerTask` here since index nested loop hash join
@@ -742,6 +752,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
742752
keepOrder bool,
743753
desc bool,
744754
rowCount float64,
755+
maxOneRow bool,
745756
) task {
746757
is := PhysicalIndexScan{
747758
Table: ds.tableInfo,
@@ -793,7 +804,6 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
793804
if rowCount <= 0 {
794805
rowCount = ds.tableStats.RowCount
795806
}
796-
maxOneRow := path.Index.Unique && len(outerJoinKeys) == len(path.FullIdxCols)
797807
if maxOneRow {
798808
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
799809
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect

planner/core/integration_test.go

+26-1
Original file line numberDiff line numberDiff line change
@@ -303,5 +303,30 @@ func (s *testIntegrationSuite) TestINLJHintSmallTable(c *C) {
303303
tk.MustExec("insert into t2 values(1,1),(2,2),(3,3),(4,4),(5,5)")
304304
tk.MustExec("analyze table t1, t2")
305305
tk.MustExec("explain select /*+ TIDB_INLJ(t1) */ * from t1 join t2 on t1.a = t2.a")
306-
tk.MustQuery("show warnings").Check(testkit.Rows())
306+
}
307+
308+
func (s *testIntegrationSuite) TestIndexJoinUniqueCompositeIndex(c *C) {
309+
tk := testkit.NewTestKit(c, s.store)
310+
311+
tk.MustExec("use test")
312+
tk.MustExec("drop table if exists t1, t2")
313+
tk.MustExec("create table t1(a int not null, c int not null)")
314+
tk.MustExec("create table t2(a int not null, b int not null, c int not null, primary key(a,b))")
315+
tk.MustExec("insert into t1 values(1,1)")
316+
tk.MustExec("insert into t2 values(1,1,1),(1,2,1)")
317+
tk.MustExec("analyze table t1,t2")
318+
319+
var input []string
320+
var output []struct {
321+
SQL string
322+
Plan []string
323+
}
324+
s.testData.GetTestCases(c, &input, &output)
325+
for i, tt := range input {
326+
s.testData.OnRecord(func() {
327+
output[i].SQL = tt
328+
output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows())
329+
})
330+
tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
331+
}
307332
}

planner/core/testdata/integration_suite_in.json

+11
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@
2626
"explain select * from t t1 left join t t2 on t1.a = t2.a where cast(t1.b as date) >= '2019-01-01'"
2727
]
2828
},
29+
{
30+
"name": "TestIndexJoinUniqueCompositeIndex",
31+
"cases": [
32+
// Row count of IndexScan should be 2.
33+
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
34+
// Row count of IndexScan should be 2.
35+
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
36+
// Row count of IndexScan should be 1.
37+
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1"
38+
]
39+
},
2940
{
3041
"name": "TestPartitionTableStats",
3142
"cases": [

planner/core/testdata/integration_suite_out.json

+38
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,44 @@
7878
}
7979
]
8080
},
81+
{
82+
"Name": "TestIndexJoinUniqueCompositeIndex",
83+
"Cases": [
84+
{
85+
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
86+
"Plan": [
87+
"IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:eq(test.t1.c, test.t2.c)",
88+
"├─TableReader_19 1.00 root data:TableScan_18",
89+
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
90+
"└─IndexLookUp_8 2.00 root ",
91+
" ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false",
92+
" └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
93+
]
94+
},
95+
{
96+
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
97+
"Plan": [
98+
"IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:le(test.t1.c, test.t2.b)",
99+
"├─TableReader_19 1.00 root data:TableScan_18",
100+
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
101+
"└─IndexLookUp_8 2.00 root ",
102+
" ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) le(test.t1.c, test.t2.b)], keep order:false",
103+
" └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
104+
]
105+
},
106+
{
107+
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1",
108+
"Plan": [
109+
"IndexJoin_9 1.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a",
110+
"├─TableReader_19 1.00 root data:TableScan_18",
111+
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
112+
"└─IndexLookUp_8 1.00 root ",
113+
" ├─IndexScan_6 1.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) eq(test.t2.b, 1)], keep order:false",
114+
" └─TableScan_7 1.00 cop[tikv] table:t2, keep order:false"
115+
]
116+
}
117+
]
118+
},
81119
{
82120
"Name": "TestPartitionTableStats",
83121
"Cases": [

0 commit comments

Comments
 (0)