Skip to content

Commit

Permalink
improvements: Made GetRowPointers even leaner
Browse files Browse the repository at this point in the history
  • Loading branch information
gropaul committed Mar 13, 2024
1 parent 4da57ad commit 3914227
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 16 deletions.
33 changes: 17 additions & 16 deletions src/execution/join_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using ProbeSpill = JoinHashTable::ProbeSpill;
using ProbeSpillLocalState = JoinHashTable::ProbeSpillLocalAppendState;

JoinHashTable::ProbeState::ProbeState()
: ht_offsets_v(LogicalType::UBIGINT), ht_offsets_dense_v(LogicalType::UBIGINT),
: ht_offsets_v(LogicalType::UBIGINT), salt_v(LogicalType::UBIGINT), ht_offsets_dense_v(LogicalType::UBIGINT),
row_ptr_insert_to_v(LogicalType::POINTER), non_empty_sel(STANDARD_VECTOR_SIZE),
key_no_match_sel(STANDARD_VECTOR_SIZE), salt_match_sel(STANDARD_VECTOR_SIZE) {
}
Expand Down Expand Up @@ -157,6 +157,7 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta

auto hashes = UnifiedVectorFormat::GetData<hash_t>(hashes_v_unified);

auto salts = FlatVector::GetData<hash_t>(state.salt_v);
auto ht_offsets = FlatVector::GetData<idx_t>(state.ht_offsets_v);
auto ht_offsets_dense = FlatVector::GetData<idx_t>(state.ht_offsets_dense_v);

Expand Down Expand Up @@ -184,6 +185,11 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta
idx_t dense_index = state.non_empty_sel.get_index(i);
const auto row_index = sel.get_index(dense_index);
state.non_empty_sel.set_index(i, row_index);

auto uvf_index = hashes_v_unified.sel->get_index(row_index);
auto hash = hashes[uvf_index];
hash_t row_salt = aggr_ht_entry_t::ExtractSalt(hash);
salts[row_index] = row_salt;
}

auto pointers_result = FlatVector::GetData<data_ptr_t>(pointers_result_v);
Expand All @@ -208,43 +214,38 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta
auto &ht_offset = ht_offsets[row_index];

idx_t increment;
bool salt_match;
aggr_ht_entry_t entry;

// increment the ht_offset of the entry as long as next entry is occupied and salt does not match
do {

auto &entry = entries[ht_offset];
entry = entries[ht_offset];
bool occupied = entry.IsOccupied();

// no need to do anything, as the vector is zeroed
if (!occupied) {
break;
}
auto uvf_index = hashes_v_unified.sel->get_index(row_index);
auto hash = hashes[uvf_index];
hash_t row_salt = aggr_ht_entry_t::ExtractSalt(hash);
bool salt_match = entry.GetSalt() == row_salt;

// the entries we need to process in the next iteration are the ones that are occupied and the row_salt
// does not match, the ones that are empty need no further processing
state.salt_match_sel.set_index(salt_match_count, row_index);
salt_match_count += salt_match;
salt_match = entry.GetSalt() == salts[row_index];

// condition for incrementing the ht_offset: occupied and row_salt does not match -> move to next entry
increment = !salt_match;
IncrementAndWrap(ht_offset, increment, bitmask);

} while (increment);

// the entries we need to process in the next iteration are the ones that are occupied and the row_salt
// does not match, the ones that are empty need no further processing
state.salt_match_sel.set_index(salt_match_count, row_index);
salt_match_count += salt_match;
row_ptr_insert_to[row_index] = entry.GetPointer();
}

if (salt_match_count == 0) {
break;
} else {
// Get the pointers_result_v to the rows that need to be compared
for (idx_t need_compare_idx = 0; need_compare_idx < salt_match_count; need_compare_idx++) {
const auto row_index = state.salt_match_sel.get_index(need_compare_idx);
const auto &entry = entries[ht_offsets[row_index]];
row_ptr_insert_to[row_index] = entry.GetPointer();
}

// Perform row comparisons, after function call salt_match_sel will point to the keys that match
idx_t key_no_match_count = 0;
Expand Down
1 change: 1 addition & 0 deletions src/include/duckdb/execution/join_hashtable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ class JoinHashTable {
Vector ht_offsets_v;
Vector ht_offsets_dense_v;
Vector row_ptr_insert_to_v;
Vector salt_v;

SelectionVector non_empty_sel;

Expand Down

0 comments on commit 3914227

Please sign in to comment.