Skip to content

Commit

Permalink
[#25767] DocDB: Inconsistent logic in CleanUpDeletedTables and CheckT…
Browse files Browse the repository at this point in the history
…ableDeleted

Summary:
The final table deletion (hiding) step could be performed by one of 2 functions.
Depending on race conditions it could be `CleanUpDeletedTables` or `CheckTableDeleted`.
But their logic is a little bit different.
`CheckTableDeleted` cleans `ysql_ddl_txn_verifier_state` and `transaction` fields in table persistent info.
While `CleanUpDeletedTables` does not do it.

As result, if table was deleted by `CleanUpDeletedTables`, it could fall into incorrect state.
In this state we cannot perform clone on DB with such table.

Fixed by moving shared logic to `PrepareTableDeletion` that is invoked by both methods.

Also increased history retention interval in YbAdminSnapshotScheduleTestWithYsqlColocationRestoreParam.
Jira: DB-15055

Test Plan: ./yb_build.sh release -n 40 --gtest_filter ColocationAndRestoreType/YbAdminSnapshotScheduleTestWithYsqlColocationRestoreParam.PgsqlSequenceVerifyPartialRestore/4 --cxx-test yb-admin-snapshot-schedule-test -- -p 8

Reviewers: asrivastava

Reviewed By: asrivastava

Subscribers: myang, ybase

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D41499
  • Loading branch information
spolitov committed Jan 28, 2025
1 parent 7367fa0 commit c85b22b
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 55 deletions.
67 changes: 38 additions & 29 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6943,25 +6943,41 @@ bool CatalogManager::ShouldDeleteTable(const TableInfoPtr& table) {
return all_tablets_done || table->is_system() || table->IsColocatedUserTable();
}

TableInfo::WriteLock CatalogManager::PrepareTableDeletion(const TableInfoPtr& table) {
std::pair<TableInfo::WriteLock, TransactionId> CatalogManager::PrepareTableDeletion(
const TableInfoPtr& table) {
auto lock = table->LockForWrite();
if (lock->is_hiding()) {
LOG(INFO) << "Marking table as HIDDEN: " << table->ToString();
lock.mutable_data()->pb.set_hide_state(SysTablesEntryPB::HIDDEN);
lock.mutable_data()->pb.set_hide_hybrid_time(master_->clock()->Now().ToUint64());
// Don't erase hidden tablets from partitions_ as they are needed for CLONE, PITR, SELECT AS-OF.
return lock;
}
if (lock->is_deleting()) {
} else if (lock->is_deleting()) {
// Update the metadata for the on-disk state.
LOG(INFO) << "Marking table as DELETED: " << table->ToString();
lock.mutable_data()->set_state(SysTablesEntryPB::DELETED,
Substitute("Deleted with tablets at $0", LocalTimeAsString()));
// Erase all the tablets from tablets_ and partitions_ structures.
table->ClearTabletMaps();
return lock;
} else {
return {TableInfo::WriteLock(), TransactionId::Nil()};
}
return TableInfo::WriteLock();

auto transaction_id_res = lock->GetCurrentDdlTransactionId();
TransactionId transaction_id;
if (transaction_id_res.ok()) {
transaction_id = *transaction_id_res;
} else {
LOG(INFO) << "Failed to get current DDL transaction for table " + table->ToString() << ": "
<< transaction_id_res.status();
transaction_id = TransactionId::Nil();
}
if (transaction_id != TransactionId::Nil()) {
VLOG(3) << "Check table deleted " << table->id();
lock.mutable_data()->pb.clear_ysql_ddl_txn_verifier_state();
lock.mutable_data()->pb.clear_transaction();
}

return {std::move(lock), transaction_id};
}

void CatalogManager::CleanUpDeletedTables(const LeaderEpoch& epoch) {
Expand All @@ -6977,13 +6993,13 @@ void CatalogManager::CleanUpDeletedTables(const LeaderEpoch& epoch) {
}
std::sort(tables.begin(), tables.end(), IdLess());
// Mark the tables as DELETED and remove them from the in-memory maps.
vector<TableInfo*> tables_to_update_on_disk;
vector<TableInfo::WriteLock> table_locks;
std::vector<TableInfo*> tables_to_update_on_disk;
std::vector<std::pair<TableInfo::WriteLock, TransactionId>> table_lock_and_transaction_ids;
for (const auto& table : tables) {
if (ShouldDeleteTable(table)) {
auto lock = PrepareTableDeletion(table);
if (lock.locked()) {
table_locks.push_back(std::move(lock));
auto lock_and_transaction_id = PrepareTableDeletion(table);
if (lock_and_transaction_id.first.locked()) {
table_lock_and_transaction_ids.push_back(std::move(lock_and_transaction_id));
tables_to_update_on_disk.push_back(table.get());
}
}
Expand All @@ -6995,19 +7011,19 @@ void CatalogManager::CleanUpDeletedTables(const LeaderEpoch& epoch) {
return;
}
// Update the table in-memory info as DELETED after we've removed them from the maps.
for (auto& lock : table_locks) {
for (auto& [lock, _] : table_lock_and_transaction_ids) {
lock.Commit();
}
size_t i = 0;
for (auto table : tables_to_update_on_disk) {
auto transaction_id = table_lock_and_transaction_ids[i].second;
++i;
// Clean up any DDL verification state that is waiting for this table to start deleting.
auto res = table->LockForRead()->GetCurrentDdlTransactionId();
WARN_NOT_OK(
res, Format("Failed to get current DDL transaction for table $0", table->ToString()));
if (!res.ok() || res.get() == TransactionId::Nil()) {
if (transaction_id.IsNil()) {
continue;
}
VLOG(3) << "Cleanup deleted table " << table->id();
RemoveDdlTransactionState(table->id(), {res.get()});
RemoveDdlTransactionState(table->id(), {transaction_id});
}
// TODO: Check if we want to delete the totally deleted table from the sys_catalog here.
// TODO: SysCatalog::DeleteItem() if we've DELETED all user tables in a DELETING namespace.
Expand Down Expand Up @@ -7516,6 +7532,7 @@ Status CatalogManager::AlterTable(const AlterTableRequestPB* req,
}
} else if (req->has_transaction() && table->GetTableType() == PGSQL_TABLE_TYPE) {
if (!l->has_ysql_ddl_txn_verifier_state()) {
LOG_WITH_FUNC(INFO) << "Add ysql_ddl_txn_verifier_state to " << table->id();
table_pb.mutable_transaction()->CopyFrom(req->transaction());
auto *ddl_state = table_pb.add_ysql_ddl_txn_verifier_state();
SchemaToPB(previous_schema, ddl_state->mutable_previous_schema());
Expand Down Expand Up @@ -12604,16 +12621,12 @@ void CatalogManager::CheckTableDeleted(const TableInfoPtr& table, const LeaderEp
if (table->is_index()) {
indexed_table = GetTableInfo(table->indexed_table_id());
}
auto lock = PrepareTableDeletion(table);
auto [lock, transaction_id] = PrepareTableDeletion(table);
if (!lock.locked()) {
return;
}
// Clean up any DDL verification state that is waiting for this table to be deleted.
auto res = lock->GetCurrentDdlTransactionId();
WARN_NOT_OK(
res, "Failed to get current DDL transaction for table " + table->ToString());
bool need_remove_ddl_state = false;
if (res.ok() && res.get() != TransactionId::Nil()) {
if (!transaction_id.IsNil()) {
// When deleting an index, we also need to update the indexed table
// to remove this index from it. Updating the indexed table involves
// setting up a fully_applied_schema and incrementing its schema version.
Expand Down Expand Up @@ -12641,10 +12654,6 @@ void CatalogManager::CheckTableDeleted(const TableInfoPtr& table, const LeaderEp
1.0 /* delay_multiplier */),
Format("Fully_applied_schema of $0 fail to clear", *indexed_table));
}
VLOG(3) << "Check table deleted " << table->id();
need_remove_ddl_state = true;
lock.mutable_data()->pb.clear_ysql_ddl_txn_verifier_state();
lock.mutable_data()->pb.clear_transaction();
}
Status s = sys_catalog_->Upsert(epoch, table);
if (!s.ok()) {
Expand All @@ -12654,8 +12663,8 @@ void CatalogManager::CheckTableDeleted(const TableInfoPtr& table, const LeaderEp
return;
}
lock.Commit();
if (need_remove_ddl_state) {
RemoveDdlTransactionState(table->id(), {res.get()});
if (!transaction_id.IsNil()) {
RemoveDdlTransactionState(table->id(), {transaction_id});
}
}), "Failed to submit update table task");
}
Expand Down
2 changes: 1 addition & 1 deletion src/yb/master/catalog_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,7 @@ class CatalogManager : public CatalogManagerIf, public SnapshotCoordinatorContex
//
// If all conditions are met, returns a locked write lock on this table.
// Otherwise lock is default constructed, i.e. not locked.
TableInfo::WriteLock PrepareTableDeletion(const TableInfoPtr& table);
std::pair<TableInfo::WriteLock, TransactionId> PrepareTableDeletion(const TableInfoPtr& table);
bool ShouldDeleteTable(const TableInfoPtr& table);

// Used by ConsensusService to retrieve the TabletPeer for a system
Expand Down
42 changes: 25 additions & 17 deletions src/yb/tools/yb-admin-snapshot-schedule-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,19 @@ class YbAdminSnapshotScheduleTest : public AdminTestBase {
std::string source_namespace_id{VERIFY_RESULT(GetMemberAsStr(out, "source_namespace_id"))};
std::string seq_no{VERIFY_RESULT(GetMemberAsStr(out, "seq_no"))};

RETURN_NOT_OK(WaitFor([&]() -> Result<bool> {
auto out = VERIFY_RESULT(
CallJsonAdmin("list_clones", source_namespace_id, seq_no));
return WaitFor([&]() -> Result<bool> {
auto out = VERIFY_RESULT(CallJsonAdmin("list_clones", source_namespace_id, seq_no));
const auto entries = out.GetArray();
SCHECK_EQ(entries.Size(), 1, IllegalState, "Wrong number of entries. Expected 1");
master::SysCloneStatePB::State state = master::SysCloneStatePB::CLONE_SCHEMA_STARTED;
auto state = master::SysCloneStatePB::CLONE_SCHEMA_STARTED;
master::SysCloneStatePB::State_Parse(
std::string(VERIFY_RESULT(GetMemberAsStr(entries[0], "aggregate_state"))), &state);
return state == master::SysCloneStatePB::ABORTED ||
state == master::SysCloneStatePB::COMPLETE;
}, timeout, "Wait for clone to complete"));
return Status::OK();
if (state == master::SysCloneStatePB::ABORTED) {
return STATUS_FORMAT(
Aborted, "Clone aborted: $0", common::PrettyWriteRapidJsonToString(entries[0]));
}
return state == master::SysCloneStatePB::COMPLETE;
}, timeout, "Wait for clone to complete");
}

Status PrepareCommon() {
Expand All @@ -287,13 +288,17 @@ class YbAdminSnapshotScheduleTest : public AdminTestBase {
}

virtual std::vector<std::string> ExtraTSFlags() {
return { Format("--timestamp_history_retention_interval_sec=$0", kHistoryRetentionIntervalSec),
return { Format("--timestamp_history_retention_interval_sec=$0", HistoryRetentionIntervalSec()),
"--history_cutoff_propagation_interval_ms=1000",
"--enable_automatic_tablet_splitting=true",
Format("--cleanup_split_tablets_interval_sec=$0",
MonoDelta(kCleanupSplitTabletsInterval).ToSeconds()) };
}

virtual int HistoryRetentionIntervalSec() {
return kHistoryRetentionIntervalSec;
}

virtual std::vector<std::string> ExtraMasterFlags() {
// To speed up tests.
return {
Expand Down Expand Up @@ -1213,16 +1218,14 @@ class YbAdminSnapshotScheduleTestWithYsqlParam
return PgConnect(GetRestoredDbName());
}

virtual Status RestoreSnapshotSchedule(
Status RestoreSnapshotSchedule(
const std::string& schedule_id, Timestamp restore_at) override {
if (GetRestoreType() == RestoreType::kClone) {
RETURN_NOT_OK(CloneAndWait(
"ysql." + kTableName.namespace_name(), GetRestoredDbName(), 2min /* timeout */,
restore_at.ToFormattedString()));
return Status::OK();
} else {
if (GetRestoreType() != RestoreType::kClone) {
return YbAdminSnapshotScheduleTest::RestoreSnapshotSchedule(schedule_id, restore_at);
}
return CloneAndWait(
"ysql." + kTableName.namespace_name(), GetRestoredDbName(), 2min /* timeout */,
restore_at.ToFormattedString());
}

void ExecuteOnTables(std::string non_colo_prefix,
Expand Down Expand Up @@ -1286,7 +1289,12 @@ INSTANTIATE_TEST_CASE_P(

// Test for PITR and DB clone against colocated and non-colocated databases.
class YbAdminSnapshotScheduleTestWithYsqlColocationRestoreParam:
public YbAdminSnapshotScheduleTestWithYsqlParam {};
public YbAdminSnapshotScheduleTestWithYsqlParam {
int HistoryRetentionIntervalSec() override {
return 30;
}
};

INSTANTIATE_TEST_CASE_P(
ColocationAndRestoreType, YbAdminSnapshotScheduleTestWithYsqlColocationRestoreParam,
::testing::Values(
Expand Down
16 changes: 8 additions & 8 deletions src/yb/tools/yb-admin_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2962,21 +2962,21 @@ Status ClusterAdminClient::CreateSnapshotMetaFile(
}));

if (resp.snapshots_size() > 1) {
LOG(WARNING) << "Requested snapshot metadata for snapshot '" << snapshot_id << "', but got "
<< resp.snapshots_size() << " snapshots in the response";
LOG(WARNING) << "Requested snapshot metadata for snapshot '" << snapshot_id << "', but got "
<< resp.snapshots_size() << " snapshots in the response";
}

SnapshotInfoPB* snapshot = nullptr;
for (SnapshotInfoPB& snapshot_entry : *resp.mutable_snapshots()) {
if (SnapshotIdToString(snapshot_entry.id()) == snapshot_id) {
if (SnapshotIdToString(snapshot_entry.id()) == snapshot_id) {
snapshot = &snapshot_entry;
break;
}
}
}
if (!snapshot) {
return STATUS_FORMAT(
InternalError, "Response contained $0 entries but no entry for snapshot '$1'",
resp.snapshots_size(), snapshot_id);
return STATUS_FORMAT(
InternalError, "Response contained $0 entries but no entry for snapshot '$1'",
resp.snapshots_size(), snapshot_id);
}

if (FLAGS_TEST_metadata_file_format_version == -1) {
Expand All @@ -2988,7 +2988,7 @@ Status ClusterAdminClient::CreateSnapshotMetaFile(
meta.clear_namespace_name();
entry.set_data(meta.SerializeAsString());
}
}
}
}

cout << "Exporting snapshot " << snapshot_id << " (" << snapshot->entry().state() << ") to file "
Expand Down

0 comments on commit c85b22b

Please sign in to comment.