From 23048a722c693753e16495f890786e8e9b22ac08 Mon Sep 17 00:00:00 2001 From: PGross Date: Tue, 12 Mar 2024 12:18:44 +0100 Subject: [PATCH] improvement: mainly iterate over non empty entries --- src/execution/join_hashtable.cpp | 35 +++++++++++++++----------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/execution/join_hashtable.cpp b/src/execution/join_hashtable.cpp index bedd6e00802a..20e46c30eacf 100644 --- a/src/execution/join_hashtable.cpp +++ b/src/execution/join_hashtable.cpp @@ -180,41 +180,33 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta auto pointers_result = FlatVector::GetData(pointers_result_v); auto row_ptr_insert_to = FlatVector::GetData(state.row_ptr_insert_to_v); - const SelectionVector *remaining_sel = &state.non_empty_sel; - idx_t remaining_count = non_empty_count; + const SelectionVector *remaining_non_empty_sel = &state.non_empty_sel; idx_t &match_count = count; match_count = 0; - while (remaining_count > 0) { + while (non_empty_count > 0) { idx_t salt_match_count = 0; - // for each entry, linear probing until - // a) an empty entry is found -> return nullptr (do nothing, as vector is zeroed) + // for non each entry, linear probing until // b) an entry is found where the salt matches -> need to compare the keys - for (idx_t i = 0; i < remaining_count; i++) { - const auto row_index = remaining_sel->get_index(i); + for (idx_t i = 0; i < non_empty_count; i++) { + const auto row_index = remaining_non_empty_sel->get_index(i); auto uvf_index = hashes_v_unified.sel->get_index(row_index); auto &ht_offset = ht_offsets[uvf_index]; idx_t increment; - // increment the ht_offset of the entry as long as next entry is occupied and salt does not match + // increment the ht_offset of the entry as long as the salt does not match do { auto &entry = entries[ht_offset]; - bool occupied = entry.IsOccupied(); - - // no need to do anything, as the vector is zeroed - if (!occupied) { - break; - } auto hash = hashes[uvf_index]; hash_t row_salt = aggr_ht_entry_t::ExtractSalt(hash); - bool salt_match = entry.GetSalt() == row_salt; + bool salt_match = entry.GetSalt() == row_salt && entry.IsOccupied(); // the entries we need to process in the next iteration are the ones that are occupied and the row_salt // does not match, the ones that are empty need no further processing @@ -222,9 +214,8 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta salt_match_count += salt_match; // condition for incrementing the ht_offset: occupied and row_salt does not match -> move to next entry - increment = !salt_match; + increment = !salt_match && entry.IsOccupied(); IncrementAndWrap(ht_offset, increment, bitmask); - } while (increment); } @@ -256,6 +247,8 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta match_count++; } + non_empty_count = 0; + // update the ht_offset to point to the next entry for the ones that did not match for (idx_t i = 0; i < key_no_match_count; i++) { const auto row_index = state.key_no_match_sel.get_index(i); @@ -263,10 +256,14 @@ void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_sta auto &ht_offset = ht_offsets[uvf_index]; IncrementAndWrap(ht_offset, 1, bitmask); + + // if the entry is occupied, we need to keep it in the non_empty_sel + const auto &entry = entries[ht_offset]; + state.non_empty_sel.set_index(non_empty_count, row_index); + non_empty_count += entry.IsOccupied(); } - remaining_sel = &state.key_no_match_sel; - remaining_count = key_no_match_count; + remaining_non_empty_sel = &state.non_empty_sel; } } }