Skip to content

Commit

Permalink
[#26262] DocDB: Fix load balancing for rewritten tables in a colocate…
Browse files Browse the repository at this point in the history
…d database

Summary:
`ReadPgClassInfo` in `sys_catalog.cc` constructs a map of tables to their tablespaces.

There is a check performed exclusively on colocated databases to skip over
primary key indexes:
```
    if (is_colocated_database) {
      ...
      auto table_info =
          master_->catalog_manager()->GetTableInfo(GetPgsqlTableId(database_oid, oid));

      if (!table_info) {
        // Primary key indexes are a separate entry in pg_class but they do not have
        // their own entry in YugaByte's catalog manager. So, we skip them here.
        continue;
      }
    }
```

This check is incorrect because it uses the relation's oid instead of the relation's
relfilenode to look up table info. This causes the lookup to fail for rewritten tables
(in a colocated database) because the old table is dropped during the rewrite.
After a master restart, the table info for the dropped table is cleared from memory,
causing the rewritten table to be incorrectly skipped.

This revision moves this check after the computation of the table_id, ensuring it
uses the correct oid or relfilenode as necessary. Also, perform the check for all
relations, as we also want to skip adding primary key indexes that are not in
colocated databases.
Jira: DB-15606

Test Plan:
Jenkins: urgent
./yb_build.sh release --java-test 'org.yb.pgsql.TestTablespaceProperties#testTableRewrite'

Reviewers: asrivastava, slingam, smishra, hsunder, kfranz

Reviewed By: hsunder, kfranz

Subscribers: kfranz, ybase

Differential Revision: https://phorge.dev.yugabyte.com/D42236
  • Loading branch information
fizaaluthra committed Mar 4, 2025
1 parent ca4c58b commit 2c4972f
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -461,4 +461,13 @@ public static void tearDownAfterClass() throws Exception {
protected static String maxQueryLayerRetriesConf(int max_retries) {
return "yb_max_query_layer_retries=" + max_retries;
}

public void findAndKillMasterLeader() throws Exception {
HostAndPort masterHostAndPort = miniCluster.getClient().getLeaderMasterHostAndPort();
miniCluster.killMasterOnHostPort(masterHostAndPort);
}

public void waitForMasterLeader(long timeout) throws Exception {
miniCluster.getClient().waitForMasterLeader(timeout);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public abstract class BaseTablespaceTest extends BasePgSQLTest {

private static final int LOAD_BALANCER_MAX_CONCURRENT = 10;

protected static final int MASTER_LEADER_TIMEOUT_MS = 90000;

private static String placementUuid = "";

// Custom tablespace to be used commonly across tests.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,65 @@ public void testTablesOptOutOfColocation() throws Exception {
verifyTablePlacement(nonColocatedIndex, ts1);
}

@Test
public void testTableRewrite() throws Exception {
markClusterNeedsRecreation();
final String dbname = "testdatabase";
try (Statement stmt = connection.createStatement()) {
stmt.execute(String.format("CREATE DATABASE %s COLOCATED=TRUE", dbname));
}
final String nonColocatedTable = "colocation_opt_out_table";
final String nonColocatedIndex = "colocation_opt_out_idx";
final String regularTable = "testtable";
final String regularIndex = "testindex";
final String tablespaceClause = "TABLESPACE " + tablespaceName;

String[] databases = {dbname, "yugabyte"};
String[] tables = {nonColocatedTable, regularTable};
String[] indexes = {nonColocatedIndex, regularIndex};

for (int i = 0; i < databases.length; i++) {
try (Connection connection2 = getConnectionBuilder().withDatabase(databases[i]).connect();
Statement stmt = connection2.createStatement()) {
stmt.execute(
String.format(
"CREATE TABLE %s (h INT, a INT) WITH (colocated = false) %s",
tables[i], tablespaceClause));
stmt.execute(
String.format(
"CREATE INDEX %s ON %s (a) %s",
indexes[i], tables[i], tablespaceClause));
// Perform table rewrite.
stmt.execute(
String.format(
"ALTER TABLE %s ADD PRIMARY KEY (h)",
tables[i], tables[i]));
}
verifyTablePlacement(tables[i], customTablespace);
verifyTablePlacement(indexes[i], customTablespace);
}

findAndKillMasterLeader();
waitForMasterLeader(MASTER_LEADER_TIMEOUT_MS);

// Test that ALTER ... SET TABLESPACE works after table rewrite + master failover.
Tablespace ts2 = new Tablespace("tablespace_2", Collections.singletonList(1));
ts2.create(connection);

for (int i = 0; i < databases.length; i++) {
try (Connection connection2 = getConnectionBuilder().withDatabase(databases[i]).connect();
Statement stmt = connection2.createStatement()) {
stmt.execute(String.format("ALTER TABLE %s SET TABLESPACE %s", tables[i], ts2.name));
stmt.execute(String.format("ALTER INDEX %s SET TABLESPACE %s", indexes[i], ts2.name));
}
// Wait for load balancer to run.
waitForLoadBalancer();

verifyTablePlacement(tables[i], ts2);
verifyTablePlacement(indexes[i], ts2);
}
}

@Test
public void readReplicaWithTablespaces() throws Exception {
markClusterNeedsRecreation();
Expand Down
26 changes: 11 additions & 15 deletions src/yb/master/sys_catalog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1320,28 +1320,13 @@ Status SysCatalogTable::ReadPgClassInfo(
continue;
}

if (is_colocated_database) {
// A table in a colocated database is colocated unless it opted out
// of colocation.
auto table_info =
master_->catalog_manager()->GetTableInfo(GetPgsqlTableId(database_oid, oid));

if (!table_info) {
// Primary key indexes are a separate entry in pg_class but they do not have
// their own entry in YugaByte's catalog manager. So, we skip them here.
continue;
}
}

// Process the tablespace oid for this table/index.
const auto& tablespace_oid_col = row.GetValue(tablespace_col_id);
if (!tablespace_oid_col) {
return STATUS(Corruption, "Could not read tablespace column from pg_class");
}

const uint32 tablespace_oid = tablespace_oid_col->uint32_value();
VLOG(1) << "Table { oid: " << oid << ", name: " << table_name << " }"
<< " has tablespace oid " << tablespace_oid;

boost::optional<TablespaceId> tablespace_id = boost::none;
// If the tablespace oid is kInvalidOid then it means this table was created
Expand All @@ -1367,6 +1352,17 @@ Status SysCatalogTable::ReadPgClassInfo(
} else {
table_id = GetPgsqlTableId(database_oid, relfilenode_oid);
}

auto table_info = master_->catalog_manager()->GetTableInfo(table_id);

if (!table_info) {
// Some relations (eg: primary key indexes) may exist in pg_class but not in
// YugaByte's catalog manager. So, we skip them here.
continue;
}

VLOG(1) << "Table { uuid: " << table_id << ", name: " << table_name << " }"
<< " has tablespace oid " << tablespace_oid;
const auto& ret = table_to_tablespace_map->emplace(table_id, tablespace_id);
// The map should not have a duplicate entry with the same oid.
DCHECK(ret.second);
Expand Down

0 comments on commit 2c4972f

Please sign in to comment.