Skip to content

Commit

Permalink
[#23709] YSQL: Tune heuristic costs in base scans cost model
Browse files Browse the repository at this point in the history
Summary:
The heuristic cost factors in base scans cost model affect the costs of various
operations such as seek and next operations and remote filtering overhead.

To tune these costs, we created a  benchmark with several queries that
isolate the impact of different tuning parameters. For instance, we created
multiple queries on a table with varying selectivity of the filters to see the
time taken to transfer results of varying sizes and tuned the costs such that
they correlate with the execution times. Earlier, we tuned the costs by
taking the client-side execution times into consideration. This includes
the time taken to send the result to the client.

However, we realized that it is better to use server-side execution times
to tune the costs. If we pick the best plan using these costs, it should
automatically be the best plan for the client.

Moreover, PG costs also seemed to be tuned to server-side execution
times. This difference in approach seemed to cause a disparity between
the costs that YB specific cost models were assigning to the base scans,
versus the costs that PG cost models were assigning to operations such
as joins and sort. By tuning with client-side execution times, we were
able to achieve better plans in more complex queries which required PG
side processing.
Jira: DB-12619

Test Plan:
 - TAQO runs show improved plan choices in benchmarks like Join Order Benchmark and TPC-H and other internal benchmarks
 - ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressTAQO'
 - ./yb_build.sh --java-test 'org.yb.pgsql.TestPgCostModelSeekNextEstimation'
 - ./yb_build.sh release --java-test 'org.yb.pgsql.TestPgRegressJoin'
 - ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPlanner'

Reviewers: mihnea, mtakahara, telgersma

Reviewed By: mtakahara

Subscribers: tnayak, yql

Differential Revision: https://phorge.dev.yugabyte.com/D41734
  • Loading branch information
gauravk-in committed Mar 4, 2025
1 parent 378f9ba commit c24c294
Show file tree
Hide file tree
Showing 16 changed files with 2,429 additions and 966 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ private void testSeekAndNextEstimationIndexScanHelper(
NODE_INDEX_SCAN, expected_seeks, expected_nexts, expected_docdb_result_width);
}

private void testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(
Statement stmt, String query,
String table_name, String index_name,
double expected_seeks,
double expected_nexts,
Integer expected_docdb_result_width) throws Exception {
testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt, query, table_name, index_name,
NODE_INDEX_SCAN, expected_seeks, expected_nexts, expected_docdb_result_width);
}

private void testSeekAndNextEstimationIndexOnlyScanHelper(
Statement stmt, String query,
String table_name, String index_name,
Expand All @@ -107,6 +117,16 @@ private void testSeekAndNextEstimationIndexOnlyScanHelper(
NODE_INDEX_ONLY_SCAN, expected_seeks, expected_nexts, expected_docdb_result_width);
}

private void testSeekAndNextEstimationIndexOnlyScanHelper_IgnoreActualResults(
Statement stmt, String query,
String table_name, String index_name,
double expected_seeks,
double expected_nexts,
Integer expected_docdb_result_width) throws Exception {
testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt, query, table_name, index_name,
NODE_INDEX_ONLY_SCAN, expected_seeks, expected_nexts, expected_docdb_result_width);
}

private void testSeekAndNextEstimationIndexScanHelper(
Statement stmt, String query,
String table_name, String index_name,
Expand Down Expand Up @@ -221,14 +241,15 @@ private void testSeekAndNextEstimationBitmapScanHelper_IgnoreActualResults(
private void testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(
Statement stmt, String query,
String table_name, String index_name,
String node_type,
double expected_seeks,
double expected_nexts,
Integer expected_docdb_result_width) throws Exception {
try {
testExplainDebug(stmt, query,
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_INDEX_SCAN)
.nodeType(node_type)
.relationName(table_name)
.indexName(index_name)
.estimatedSeeks(expectedSeeksRange(expected_seeks))
Expand Down Expand Up @@ -270,6 +291,30 @@ private void testSeekAndNextEstimationSeqScanHelper(
}
}

private void testSeekAndNextEstimationSeqScanHelper_IgnoreActualResults(
Statement stmt, String query,
String table_name, double expected_seeks,
double expected_nexts,
long expected_docdb_result_width) throws Exception {
try {
testExplainDebug(stmt, query,
makeTopLevelBuilder()
.plan(makePlanBuilder()
.nodeType(NODE_SEQ_SCAN)
.relationName(table_name)
.estimatedSeeks(expectedSeeksRange(expected_seeks))
.estimatedNexts(expectedNextsRange(expected_nexts))
.estimatedDocdbResultWidth(Checkers.equal(expected_docdb_result_width))
.build())
.build());
}
catch (AssertionError e) {
LOG.info("Failed Query: " + query);
LOG.info(e.toString());
throw e;
}
}

@Before
public void setUp() throws Exception {
try (Statement stmt = connection.createStatement()) {
Expand Down Expand Up @@ -777,8 +822,8 @@ public void testSeekNextEstimationBitmapScanWithAnd() throws Exception {

testSeekAndNextEstimationBitmapScanHelper(stmt,
String.format(query, T_NO_PKEY_NAME, "k1 <= 40 AND k2 <= 40"),
T_NO_PKEY_NAME, 4000, 12000, 10,
makePlanBuilder().nodeType(NODE_BITMAP_INDEX_SCAN).build());
T_NO_PKEY_NAME, 1600, 4800, 10,
makePlanBuilder().nodeType(NODE_BITMAP_AND).build());

testSeekAndNextEstimationBitmapScanHelper(stmt,
String.format(query, T_NO_PKEY_NAME, "k1 <= 80 AND k2 <= 80"),
Expand Down Expand Up @@ -1004,11 +1049,11 @@ public void testSeekNextEstimationSeekForwardOptimization() throws Exception {
"/*+IndexScan(t4)*/ SELECT * FROM t4 WHERE k4 IN (4, 5, 6, 7)",
T4_NAME, T4_INDEX_NAME, 40031, 80000, 20);

testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt,
testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt,
"/*+IndexScan(t4)*/ SELECT * FROM t4 WHERE k4 IN (4, 6, 8, 10)",
T4_NAME, T4_INDEX_NAME, 40031, 80000, 20);

testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt,
testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt,
"/*+IndexScan(t4)*/ SELECT * FROM t4 WHERE k4 IN (4, 7, 10, 13)",
T4_NAME, T4_INDEX_NAME, 40031, 80000, 20);

Expand All @@ -1017,4 +1062,39 @@ public void testSeekNextEstimationSeekForwardOptimization() throws Exception {
T4_NAME, T4_INDEX_NAME, 40031, 80000, 20);
}
}

@Test
public void testSeekNextEstimation25862IntegerOverflow() throws Exception {
/*
* #25862 : Estimated seeks and nexts value can overflow in a large table
*
* This test case checks that estimated seeks and nexts values do not overflow
* when the table has a large number of rows.
*/
try (Statement stmt = this.connection2.createStatement()) {
stmt.execute("CREATE TABLE t_25862 (k1 INT, v1 INT, PRIMARY KEY (k1 ASC))");
stmt.execute("CREATE INDEX t_25862_idx on t_25862 (v1 ASC)");
/* Simluate a large table by setting reltuples in pg_class to 4B rows. */
stmt.execute("SET yb_non_ddl_txn_for_sys_tables_allowed = ON");
stmt.execute("UPDATE pg_class SET reltuples=4000000000 WHERE relname LIKE '%t_25862%'");
stmt.execute("UPDATE pg_yb_catalog_version SET current_version=current_version+1 "
+ "WHERE db_oid=1");
stmt.execute("SET yb_non_ddl_txn_for_sys_tables_allowed = OFF");

stmt.execute("SET yb_enable_base_scans_cost_model = ON");

testSeekAndNextEstimationSeqScanHelper_IgnoreActualResults(stmt,
"/*+ SeqScan(t_25862) */ SELECT * FROM t_25862 WHERE k1 > 0",
"t_25862", 1302084.0, 4001302082.0, 2);
testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt,
"/*+ IndexScan(t_25862 t_25862_pkey) */ SELECT * FROM t_25862 WHERE k1 > 0",
"t_25862", "t_25862_pkey", 1302084.0, 1333333334.0, 2);
testSeekAndNextEstimationIndexScanHelper_IgnoreActualResults(stmt,
"/*+ IndexScan(t_25862 t_25862_idx) */ SELECT * FROM t_25862 WHERE v1 > 0",
"t_25862", "t_25862_idx", 1334635417.0, 1333333334.0, 2);
testSeekAndNextEstimationIndexOnlyScanHelper_IgnoreActualResults(stmt,
"/*+ IndexOnlyScan(t_25862 t_25862_idx) */ SELECT v1 FROM t_25862 WHERE v1 > 0",
"t_25862", "t_25862_idx", 1302084.0, 1333333334.0, 1);
}
}
}
14 changes: 7 additions & 7 deletions src/postgres/src/include/optimizer/cost.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@
#define YB_DEFAULT_DOCDB_BLOCK_SIZE 32768

/* LSM Lookup costs */
#define YB_DEFAULT_DOCDB_NEXT_CPU_CYCLES 50
#define YB_DEFAULT_SEEK_COST_FACTOR 50
#define YB_DEFAULT_BACKWARD_SEEK_COST_FACTOR 10
#define YB_DEFAULT_DOCDB_NEXT_CPU_CYCLES 5
#define YB_DEFAULT_SEEK_COST_FACTOR 0.4
#define YB_DEFAULT_BACKWARD_SEEK_COST_FACTOR 1

/*
* The value for the fast backward scan seek cost factor has been selected based on the smallest
Expand All @@ -59,14 +59,14 @@
#define YB_DEFAULT_FAST_BACKWARD_SEEK_COST_FACTOR (YB_DEFAULT_BACKWARD_SEEK_COST_FACTOR / 3.0)

/* DocDB row decode and process cost */
#define YB_DEFAULT_DOCDB_MERGE_CPU_CYCLES 50
#define YB_DEFAULT_DOCDB_MERGE_CPU_CYCLES 5

/* DocDB storage filter cost */
#define YB_DEFAULT_DOCDB_REMOTE_FILTER_OVERHEAD_CYCLES 20
#define YB_DEFAULT_DOCDB_REMOTE_FILTER_OVERHEAD_CYCLES 3

/* Network transfer cost */
#define YB_DEFAULT_LOCAL_LATENCY_COST 180.0
#define YB_DEFAULT_LOCAL_THROUGHPUT_COST 80000.0
#define YB_DEFAULT_LOCAL_LATENCY_COST 10.0
#define YB_DEFAULT_LOCAL_THROUGHPUT_COST 800.0

/*
* TODO : Since we cannot currently estimate the number of key value pairs per
Expand Down
80 changes: 46 additions & 34 deletions src/postgres/src/test/regress/expected/yb.orig.bitmap_scans.out
Original file line number Diff line number Diff line change
Expand Up @@ -1818,81 +1818,93 @@ ANALYZE test_and;
(7 rows)

/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE a < 10 AND b < 10;
QUERY PLAN
----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=1620 loops=1)
Storage Filter: (a < 10)
-> Bitmap Index Scan on test_and_b_idx (actual rows=3600 loops=1)
Index Cond: (b < 10)
(4 rows)
-> BitmapAnd (actual rows=1620 loops=1)
-> Bitmap Index Scan on test_and_b_idx (actual rows=3600 loops=1)
Index Cond: (b < 10)
-> Bitmap Index Scan on test_and_a_idx (actual rows=3600 loops=1)
Index Cond: (a < 10)
(6 rows)

/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE a < 10 AND c < 10;
QUERY PLAN
----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=1620 loops=1)
Storage Filter: (a < 10)
-> Bitmap Index Scan on test_and_c_idx (actual rows=3600 loops=1)
Index Cond: (c < 10)
(4 rows)
-> BitmapAnd (actual rows=1620 loops=1)
-> Bitmap Index Scan on test_and_c_idx (actual rows=3600 loops=1)
Index Cond: (c < 10)
-> Bitmap Index Scan on test_and_a_idx (actual rows=3600 loops=1)
Index Cond: (a < 10)
(6 rows)

/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE b < 10 AND c < 10;
QUERY PLAN
----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=1620 loops=1)
Storage Filter: (b < 10)
-> Bitmap Index Scan on test_and_c_idx (actual rows=3600 loops=1)
Index Cond: (c < 10)
(4 rows)
-> BitmapAnd (actual rows=1620 loops=1)
-> Bitmap Index Scan on test_and_c_idx (actual rows=3600 loops=1)
Index Cond: (c < 10)
-> Bitmap Index Scan on test_and_b_idx (actual rows=3600 loops=1)
Index Cond: (b < 10)
(6 rows)

/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE a < 10 AND b < 10 AND c < 10;
QUERY PLAN
----------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=729 loops=1)
Storage Filter: ((a < 10) AND (b < 10))
-> Bitmap Index Scan on test_and_c_idx (actual rows=3600 loops=1)
Index Cond: (c < 10)
(4 rows)
Storage Filter: (a < 10)
-> BitmapAnd (actual rows=1620 loops=1)
-> Bitmap Index Scan on test_and_c_idx (actual rows=3600 loops=1)
Index Cond: (c < 10)
-> Bitmap Index Scan on test_and_b_idx (actual rows=3600 loops=1)
Index Cond: (b < 10)
(7 rows)

-- complex nested queries
/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE a < 5 AND (b < 3 OR b > 16);
QUERY PLAN
----------------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=480 loops=1)
-> BitmapAnd (actual rows=480 loops=1)
-> Bitmap Index Scan on test_and_a_idx (actual rows=1600 loops=1)
Index Cond: (a < 5)
-> BitmapOr (actual rows=2400 loops=1)
-> Bitmap Index Scan on test_and_b_idx (actual rows=800 loops=1)
Index Cond: (b < 3)
-> Bitmap Index Scan on test_and_b_idx (actual rows=1600 loops=1)
Index Cond: (b > 16)
-> Bitmap Index Scan on test_and_a_idx (actual rows=1600 loops=1)
Index Cond: (a < 5)
(9 rows)

/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE (b < 3 AND a < 5) OR (b > 16 AND a < 5);
QUERY PLAN
----------------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=480 loops=1)
-> BitmapAnd (actual rows=480 loops=1)
-> Bitmap Index Scan on test_and_a_idx (actual rows=1600 loops=1)
Index Cond: (a < 5)
-> BitmapOr (actual rows=2400 loops=1)
-> Bitmap Index Scan on test_and_b_idx (actual rows=800 loops=1)
Index Cond: (b < 3)
-> Bitmap Index Scan on test_and_b_idx (actual rows=1600 loops=1)
Index Cond: (b > 16)
-> Bitmap Index Scan on test_and_a_idx (actual rows=1600 loops=1)
Index Cond: (a < 5)
(9 rows)

/*+ BitmapScan(t) */ EXPLAIN (ANALYZE, SUMMARY OFF, COSTS OFF) SELECT * FROM test_and t WHERE (b < 3 AND a < 5) OR (b > 16 AND a < 6);
QUERY PLAN
----------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------
YB Bitmap Table Scan on test_and t (actual rows=560 loops=1)
Storage Filter: (((b < 3) AND (a < 5)) OR ((b > 16) AND (a < 6)))
-> BitmapOr (actual rows=2400 loops=1)
-> BitmapOr (actual rows=1200 loops=1)
-> Bitmap Index Scan on test_and_b_idx (actual rows=800 loops=1)
Index Cond: (b < 3)
-> Bitmap Index Scan on test_and_b_idx (actual rows=1600 loops=1)
Index Cond: (b > 16)
(7 rows)
-> BitmapAnd (actual rows=400 loops=1)
-> Bitmap Index Scan on test_and_b_idx (actual rows=1600 loops=1)
Index Cond: (b > 16)
-> Bitmap Index Scan on test_and_a_idx (actual rows=2000 loops=1)
Index Cond: (a < 6)
(10 rows)

RESET yb_enable_base_scans_cost_model;
RESET yb_explain_hide_non_deterministic_fields;
Expand Down
Loading

0 comments on commit c24c294

Please sign in to comment.