Skip to content

Commit 22e88af

Browse files
set AP query for tables with large row size and vector indexes (#22704)
set AP query for tables with large row size and vector indexes Approved by: @ouyuanning, @aunjgr, @XuPeng-SH
1 parent e6eecb6 commit 22e88af

File tree

6 files changed

+176
-11
lines changed

6 files changed

+176
-11
lines changed

pkg/pb/plan/plan.pb.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2025 Matrix Origin
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package explain
16+
17+
import (
18+
"bytes"
19+
"context"
20+
"strings"
21+
"testing"
22+
23+
planpb "github.com/matrixorigin/matrixone/pkg/pb/plan"
24+
)
25+
26+
func TestCostDescribeImpl_IncludesRowsizeWhenPositive(t *testing.T) {
27+
stats := &planpb.Stats{
28+
Cost: 10,
29+
Outcnt: 5,
30+
Selectivity: 0.5,
31+
Dop: 2,
32+
BlockNum: 3,
33+
Rowsize: 128.0,
34+
}
35+
impl := &CostDescribeImpl{Stats: stats}
36+
buf := new(bytes.Buffer)
37+
if err := impl.GetDescription(context.Background(), NewExplainDefaultOptions(), buf); err != nil {
38+
t.Fatalf("GetDescription error: %v", err)
39+
}
40+
got := buf.String()
41+
if !strings.Contains(got, "rowsize=128.00") {
42+
t.Fatalf("expected rowsize to be printed, got: %s", got)
43+
}
44+
}
45+
46+
func TestCostDescribeImpl_OmitsRowsizeWhenZero(t *testing.T) {
47+
stats := &planpb.Stats{
48+
Cost: 1,
49+
Outcnt: 1,
50+
Selectivity: 1,
51+
Dop: 1,
52+
BlockNum: 1,
53+
Rowsize: 0,
54+
}
55+
impl := &CostDescribeImpl{Stats: stats}
56+
buf := new(bytes.Buffer)
57+
if err := impl.GetDescription(context.Background(), NewExplainDefaultOptions(), buf); err != nil {
58+
t.Fatalf("GetDescription error: %v", err)
59+
}
60+
got := buf.String()
61+
if strings.Contains(got, "rowsize=") {
62+
t.Fatalf("did not expect rowsize to be printed when zero, got: %s", got)
63+
}
64+
}

pkg/sql/plan/explain/explain_node.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1180,11 +1180,15 @@ func (c *CostDescribeImpl) GetDescription(ctx context.Context, options *ExplainO
11801180
if c.Stats.HashmapStats != nil && c.Stats.HashmapStats.HashmapSize > 1 {
11811181
hashmapSizeStr = " hashmapSize=" + strconv.FormatFloat(c.Stats.HashmapStats.HashmapSize, 'f', 2, 64)
11821182
}
1183+
var rowsizeStr string
1184+
if c.Stats.Rowsize > 0 {
1185+
rowsizeStr = " rowsize=" + strconv.FormatFloat(c.Stats.Rowsize, 'f', 2, 64)
1186+
}
11831187
buf.WriteString(" (cost=" + strconv.FormatFloat(c.Stats.Cost, 'f', 2, 64) +
11841188
" outcnt=" + strconv.FormatFloat(c.Stats.Outcnt, 'f', 2, 64) +
11851189
" selectivity=" + strconv.FormatFloat(c.Stats.Selectivity, 'f', 4, 64) +
11861190
" dop=" + strconv.FormatInt(int64(c.Stats.Dop), 10) +
1187-
blockNumStr + hashmapSizeStr + ")")
1191+
blockNumStr + hashmapSizeStr + rowsizeStr + ")")
11881192
}
11891193
return nil
11901194
}

pkg/sql/plan/stats.go

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ const highNDVcolumnThreshHold = 0.95
4949
const statsCacheInitSize = 128
5050
const statsCacheMaxSize = 8192
5151

52+
// RowSizeThreshold Regardless of the table,
53+
// the minimum row size is 100.
54+
// However, due to inaccurate statistical information,
55+
// the RowSizeThreshold is tentatively set at 128,
56+
// and it is only used for tables with vector indexes
57+
const RowSizeThreshold = 128
58+
const LargeBlockThresholdForOneCN = 4
59+
const LargeBlockThresholdForMultiCN = 32
60+
5261
// for test
5362
var ForceScanOnMultiCN atomic.Bool
5463

@@ -1426,6 +1435,19 @@ func calcScanStats(node *plan.Node, builder *QueryBuilder) *plan.Stats {
14261435
stats.Outcnt = stats.Selectivity * stats.TableCnt
14271436
stats.Cost = stats.TableCnt * blockSel
14281437
stats.BlockNum = int32(float64(s.BlockNumber)*blockSel) + 1
1438+
// estimate average row size from collected table stats: sum(SizeMap)/TableCnt
1439+
// SizeMap stores approximate persisted bytes per column; divide by total rows to get bytes/row
1440+
{
1441+
var totalSize uint64
1442+
for _, v := range s.SizeMap {
1443+
totalSize += v
1444+
}
1445+
if stats.TableCnt > 0 {
1446+
stats.Rowsize = float64(totalSize) / stats.TableCnt
1447+
} else {
1448+
stats.Rowsize = 0
1449+
}
1450+
}
14291451
return stats
14301452
}
14311453

@@ -1648,18 +1670,42 @@ func HasShuffleInPlan(qry *plan.Query) bool {
16481670
return false
16491671
}
16501672

1651-
func calcDOP(ncpu, blocks int32, isPrepare bool) int32 {
1652-
if ncpu <= 0 || blocks <= 16 {
1673+
// dop tuning constants
1674+
const (
1675+
// base block-to-core mapping for dop estimation
1676+
dopBlocksBaseUnit int32 = 16 // default: every ~16 blocks add a core
1677+
dopBlocksPrepareUnit int32 = 64 // prepare: more conservative
1678+
)
1679+
1680+
func calcDOP(ncpu int32, stats *plan.Stats, isPrepare bool) int32 {
1681+
if ncpu <= 0 {
16531682
return 1
16541683
}
1655-
ret := blocks/16 + 1
1684+
1685+
baseUnit := dopBlocksBaseUnit
16561686
if isPrepare {
1657-
ret = blocks/64 + 1
1687+
baseUnit = dopBlocksPrepareUnit
16581688
}
1659-
if ret <= ncpu {
1660-
return ret
1689+
1690+
blocks := stats.BlockNum
1691+
var ret int32 = 1
1692+
if blocks > 0 {
1693+
ret = blocks/baseUnit + 1
16611694
}
1662-
return ncpu
1695+
1696+
rs := stats.Rowsize
1697+
if rs >= RowSizeThreshold {
1698+
// very wide rows: be aggressive
1699+
ret = stats.BlockNum
1700+
}
1701+
1702+
if ret > ncpu {
1703+
ret = ncpu
1704+
}
1705+
if ret < 1 {
1706+
ret = 1
1707+
}
1708+
return ret
16631709
}
16641710

16651711
// set node dop and left child recursively
@@ -1693,7 +1739,7 @@ func CalcNodeDOP(p *plan.Plan, rootID int32, ncpu int32, lencn int) {
16931739
setNodeDOP(p, rootID, dop)
16941740
}
16951741
} else {
1696-
node.Stats.Dop = calcDOP(ncpu, node.Stats.BlockNum, p.IsPrepare)
1742+
node.Stats.Dop = calcDOP(ncpu, node.Stats, p.IsPrepare)
16971743
}
16981744
}
16991745

@@ -1737,6 +1783,16 @@ func GetExecType(qry *plan.Query, txnHaveDDL bool, isPrepare bool) ExecType {
17371783
ret = ExecTypeAP_ONECN
17381784
}
17391785
}
1786+
if node.NodeType == plan.Node_TABLE_SCAN &&
1787+
// due to the inaccuracy of stats.Rowsize, currently only vector index tables are supported
1788+
(node.TableDef.TableType == catalog.SystemSI_IVFFLAT_TblType_Entries || node.TableDef.TableType == catalog.Hnsw_TblType_Storage) &&
1789+
stats.Rowsize > RowSizeThreshold &&
1790+
stats.BlockNum > LargeBlockThresholdForOneCN {
1791+
ret = ExecTypeAP_ONECN
1792+
if stats.BlockNum > LargeBlockThresholdForMultiCN {
1793+
ret = ExecTypeAP_MULTICN
1794+
}
1795+
}
17401796
if node.NodeType != plan.Node_TABLE_SCAN && stats.HashmapStats != nil && stats.HashmapStats.Shuffle {
17411797
ret = ExecTypeAP_ONECN
17421798
}

pkg/sql/plan/stats_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,3 +266,44 @@ func TestUpdateStatsInfo_Decimal_DifferentScales(t *testing.T) {
266266
})
267267
}
268268
}
269+
270+
func makeQueryWithScan(tableType string, rowsize float64, blockNum int32) *plan.Query {
271+
n := &plan.Node{
272+
NodeType: plan.Node_TABLE_SCAN,
273+
TableDef: &plan.TableDef{TableType: tableType},
274+
Stats: &plan.Stats{
275+
Rowsize: rowsize,
276+
BlockNum: blockNum,
277+
},
278+
}
279+
return &plan.Query{
280+
Nodes: []*plan.Node{n},
281+
Steps: []int32{0},
282+
}
283+
}
284+
285+
func TestGetExecType_VectorIndex_WideRows_OneCN(t *testing.T) {
286+
// rowsize just above threshold, blockNum between oneCN and multiCN thresholds
287+
q := makeQueryWithScan(catalog.SystemSI_IVFFLAT_TblType_Entries, float64(RowSizeThreshold+1), LargeBlockThresholdForOneCN+1)
288+
got := GetExecType(q, false, false)
289+
if got != ExecTypeAP_ONECN {
290+
t.Fatalf("expected ExecTypeAP_ONECN, got %v", got)
291+
}
292+
}
293+
294+
func TestGetExecType_VectorIndex_WideRows_MultiCN(t *testing.T) {
295+
q := makeQueryWithScan(catalog.Hnsw_TblType_Storage, float64(RowSizeThreshold+1), LargeBlockThresholdForMultiCN+1)
296+
got := GetExecType(q, false, false)
297+
if got != ExecTypeAP_MULTICN {
298+
t.Fatalf("expected ExecTypeAP_MULTICN, got %v", got)
299+
}
300+
}
301+
302+
func TestGetExecType_NonVectorTable_NotForcedByRowsize(t *testing.T) {
303+
// Non-vector tables should not trigger rowsize shortcut; with small blockNum, expect TP
304+
q := makeQueryWithScan("normal_table", float64(RowSizeThreshold+10), LargeBlockThresholdForOneCN)
305+
got := GetExecType(q, false, false)
306+
if got != ExecTypeTP {
307+
t.Fatalf("expected ExecTypeTP for non-vector table, got %v", got)
308+
}
309+
}

proto/plan.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ message Stats {
498498
double cost = 2;
499499
//number of output lines
500500
double outcnt = 3;
501-
// average size of one row, currently not used
501+
// average size of one row
502502
double rowsize = 4;
503503
//for scan, this means total count of all table, before filtering
504504
double table_cnt = 5;

0 commit comments

Comments
 (0)