Skip to content

Commit

Permalink
started changes to build facts only on the rhs
Browse files Browse the repository at this point in the history
  • Loading branch information
gropaul committed Oct 30, 2024
1 parent 6197a74 commit 1914472
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 58 deletions.
44 changes: 1 addition & 43 deletions src/execution/fact_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,52 +102,10 @@ static void GetChainData(Vector &pointers_v, TupleDataCollection *data_collectio
}
}

// Function to determine which side of a join operation should be used for building and probing
// 1. if one of the sides is already built, we should use the other side for building and probing
// 2. if both sides are built or none of them is built, we should use the smaller side for building and probing
static void DetermineSidesAndBuild(fact_data_t *&build_side, fact_data_t *&probe_side, data_ptr_t *&build_res,
data_ptr_t *&probe_res) {

bool current_build_side_build = build_side->IsHTBuild();
bool current_probe_side_build = probe_side->IsHTBuild();

// Determine which side to build and which to probe
if (current_build_side_build && !current_probe_side_build) {
// Current build side is built, use the other side for probing, everything is already set up
} else if (!current_build_side_build && current_probe_side_build) {
// Current build side is not built, but the probe side is built, swap the sides
std::swap(build_side, probe_side);
std::swap(build_res, probe_res);
} else {
// Either both sides are built or neither side is built
// Build the smaller side and probe the larger side
idx_t current_build_side_length = build_side->chain_length;
idx_t current_probe_side_length = probe_side->chain_length;

// If the build side is smaller than the probe side, swap the sides
if (current_build_side_length < current_probe_side_length) {
std::swap(build_side, probe_side);
std::swap(build_res, probe_res);
}

// if not both sides are built, build the build side
if (!current_build_side_build) {
// build the build side
build_side->BuildHT();
}
}

D_ASSERT(build_side->IsHTBuild());
}

// We always have to return the rhs pointers to make sure that we can expand on the rhs

void __attribute__((noinline)) Intersect(fact_data_t *left_ptr, fact_data_t *right_ptr, data_ptr_t *lhs_pointers_res,
static void inline Intersect(fact_data_t *left_ptr, fact_data_t *right_ptr, data_ptr_t *lhs_pointers_res,
data_ptr_t *rhs_pointers_res, idx_t &intersection_count) {

// build on the lhs to probe with the rhs
DetermineSidesAndBuild(left_ptr, right_ptr, lhs_pointers_res, rhs_pointers_res);

auto left = *left_ptr;
auto right = *right_ptr;

Expand Down
1 change: 0 additions & 1 deletion src/execution/join_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,6 @@ void JoinHashTable::FinalizeFactDatas() {
fact_ptr_data = buffer_manager.GetBufferAllocator().Allocate(ht_elements_count * sizeof(data_ptr_t));
fact_ptr = reinterpret_cast<data_ptr_t *>(fact_ptr_data.get());


idx_t full_entry_count = 0;

// 1st run: Get total capacity needed and mark the entries that are occupied in the fact_keys array
Expand Down
14 changes: 0 additions & 14 deletions src/include/duckdb/execution/fact_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@ struct fact_data_t { // NOLINT
pointers = pointers_p;
keys = keys_p;

ht_build = false;
keys_gathered = false;

ht_capacity = ht_capacity_p;
ht_bitmask = ht_capacity - 1;
}
Expand All @@ -56,19 +53,8 @@ struct fact_data_t { // NOLINT
uint64_t *chain_ht;
// the capacity of the hashtable

// whether the map has been built
bool ht_build;
// keys gathered from the chain
bool keys_gathered;

inline bool IsHTBuild() const {
return ht_build;
}

// Builds the key map if it has not been built yet
void BuildHT() {
D_ASSERT(!ht_build);
ht_build = true;
FillHtWithIndex(keys, chain_length, chain_ht, ht_bitmask);
}

Expand Down

0 comments on commit 1914472

Please sign in to comment.