Skip to content

Commit

Permalink
improvement: mainly iterate over non empty entries
Browse files Browse the repository at this point in the history
  • Loading branch information
gropaul committed Mar 12, 2024
1 parent 32a462e commit 23048a7
Showing 1 changed file with 16 additions and 19 deletions.
35 changes: 16 additions & 19 deletions src/execution/join_hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,51 +180,42 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta
auto pointers_result = FlatVector::GetData<data_ptr_t>(pointers_result_v);
auto row_ptr_insert_to = FlatVector::GetData<data_ptr_t>(state.row_ptr_insert_to_v);

const SelectionVector *remaining_sel = &state.non_empty_sel;
idx_t remaining_count = non_empty_count;
const SelectionVector *remaining_non_empty_sel = &state.non_empty_sel;

idx_t &match_count = count;
match_count = 0;

while (remaining_count > 0) {
while (non_empty_count > 0) {

idx_t salt_match_count = 0;

// for each entry, linear probing until
// a) an empty entry is found -> return nullptr (do nothing, as vector is zeroed)
// for non each entry, linear probing until
// b) an entry is found where the salt matches -> need to compare the keys
for (idx_t i = 0; i < remaining_count; i++) {
const auto row_index = remaining_sel->get_index(i);
for (idx_t i = 0; i < non_empty_count; i++) {
const auto row_index = remaining_non_empty_sel->get_index(i);
auto uvf_index = hashes_v_unified.sel->get_index(row_index);

auto &ht_offset = ht_offsets[uvf_index];

idx_t increment;

// increment the ht_offset of the entry as long as next entry is occupied and salt does not match
// increment the ht_offset of the entry as long as the salt does not match
do {

auto &entry = entries[ht_offset];
bool occupied = entry.IsOccupied();

// no need to do anything, as the vector is zeroed
if (!occupied) {
break;
}

auto hash = hashes[uvf_index];
hash_t row_salt = aggr_ht_entry_t::ExtractSalt(hash);
bool salt_match = entry.GetSalt() == row_salt;
bool salt_match = entry.GetSalt() == row_salt && entry.IsOccupied();

// the entries we need to process in the next iteration are the ones that are occupied and the row_salt
// does not match, the ones that are empty need no further processing
state.salt_match_sel.set_index(salt_match_count, row_index);
salt_match_count += salt_match;

// condition for incrementing the ht_offset: occupied and row_salt does not match -> move to next entry
increment = !salt_match;
increment = !salt_match && entry.IsOccupied();
IncrementAndWrap(ht_offset, increment, bitmask);

} while (increment);
}

Expand Down Expand Up @@ -256,17 +247,23 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta
match_count++;
}

non_empty_count = 0;

// update the ht_offset to point to the next entry for the ones that did not match
for (idx_t i = 0; i < key_no_match_count; i++) {
const auto row_index = state.key_no_match_sel.get_index(i);
const auto uvf_index = hashes_v_unified.sel->get_index(row_index);
auto &ht_offset = ht_offsets[uvf_index];

IncrementAndWrap(ht_offset, 1, bitmask);

// if the entry is occupied, we need to keep it in the non_empty_sel
const auto &entry = entries[ht_offset];
state.non_empty_sel.set_index(non_empty_count, row_index);
non_empty_count += entry.IsOccupied();
}

remaining_sel = &state.key_no_match_sel;
remaining_count = key_no_match_count;
remaining_non_empty_sel = &state.non_empty_sel;
}
}
}
Expand Down

0 comments on commit 23048a7

Please sign in to comment.