Skip to content

Commit

Permalink
Apply CRoaring bitmap library (infiniflow#1816)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Update Bitmask class

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Breaking Change (fix or feature that could cause existing
functionality not to work as expected)
- [x] Refactoring
  • Loading branch information
yangzq50 authored Sep 10, 2024
1 parent 17abc21 commit 248fcf9
Show file tree
Hide file tree
Showing 66 changed files with 464 additions and 1,333 deletions.
37 changes: 11 additions & 26 deletions src/executor/expression/expression_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ import data_block;
import base_expression;
import expression_state;
import selection;
import bitmask;
import roaring_bitmap;
import logical_type;
import bitmask_buffer;
import column_vector;
import expression_evaluator;
import internal_types;
Expand Down Expand Up @@ -90,34 +89,20 @@ void ExpressionSelector::Select(const SharedPtr<BaseExpression> &expr,

void ExpressionSelector::Select(const SharedPtr<ColumnVector> &bool_column, SizeT count, SharedPtr<Selection> &output_true_select, bool nullable) {
if (bool_column->vector_type() != ColumnVectorType::kCompactBit || bool_column->data_type()->type() != LogicalType::kBoolean) {
String error_message = "Attempting to select non-boolean expression";
UnrecoverableError(error_message);
UnrecoverableError("Attempting to select non-boolean expression");
}
const auto &boolean_buffer = *(bool_column->buffer_);
const auto &null_mask = bool_column->nulls_ptr_;
if (nullable && !(null_mask->IsAllTrue())) {
const u64 *result_null_data = null_mask->GetData();
SizeT unit_count = BitmaskBuffer::UnitCount(count);
for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) {
end_index = std::min(end_index, count);
if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) {
// all data of 64 rows are not null
for (; start_index < end_index; ++start_index) {
if (boolean_buffer.GetCompactBit(start_index)) {
output_true_select->Append(start_index);
}
}
} else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) {
// all data of 64 rows are null
start_index = end_index;
} else {
for (; start_index < end_index; ++start_index) {
if ((null_mask->IsTrue(start_index)) && boolean_buffer.GetCompactBit(start_index)) {
output_true_select->Append(start_index);
}
}
if (nullable) {
null_mask->RoaringBitmapApplyFunc([&](const u32 idx) -> bool {
if (idx >= count) [[unlikely]] {
return false;
}
}
if (boolean_buffer.GetCompactBit(idx)) {
output_true_select->Append(idx);
}
return idx + 1 < count;
});
} else {
for (SizeT idx = 0; idx < count; ++idx) {
if (boolean_buffer.GetCompactBit(idx)) {
Expand Down
1 change: 0 additions & 1 deletion src/executor/expression/expression_selector.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import base_expression;
import expression_state;
import data_block;
import selection;
import bitmask;

export module expression_selector;

Expand Down
4 changes: 2 additions & 2 deletions src/executor/operator/physical_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ import column_index_reader;
import filter_value_type_classification;
import common_analyzer;
import analyzer_pool;
import bitmask;
import roaring_bitmap;
import segment_entry;
import knn_filter;

Expand Down Expand Up @@ -127,7 +127,7 @@ struct FilterQueryNode final : public QueryNode {
// filter info
CommonQueryFilter *common_query_filter_;
const SizeT filter_result_count_ = common_query_filter_->filter_result_count_;
const Map<SegmentID, std::variant<Vector<u32>, Bitmask>> *filter_result_ptr_ = &common_query_filter_->filter_result_;
const Map<SegmentID, Bitmask> *filter_result_ptr_ = &common_query_filter_->filter_result_;
const BaseExpression *secondary_index_filter_ = common_query_filter_->secondary_index_filter_qualified_.get();

explicit FilterQueryNode(CommonQueryFilter *common_query_filter, UniquePtr<QueryNode> &&query_tree)
Expand Down
40 changes: 17 additions & 23 deletions src/executor/operator/physical_scan/physical_filter_scan_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,44 +22,38 @@ module physical_filter_scan_base;
import stl;
import common_query_filter;
import base_table_ref;
import bitmask;
import roaring_bitmap;
import default_values;

namespace infinity {

bool PhysicalFilterScanBase::CalculateFilterBitmask(SegmentID segment_id, BlockID block_id, BlockOffset row_count, Bitmask &bitmask) const {
bitmask = Bitmask(row_count);
if (common_query_filter_->AlwaysTrue()) {
bitmask.SetAllTrue();
return true;
}
auto it_filter = common_query_filter_->filter_result_.find(segment_id);
if (it_filter == common_query_filter_->filter_result_.end()) {
bitmask.SetAllFalse();
return false;
}
// not skipped after common_query_filter
const std::variant<Vector<u32>, Bitmask> &filter_result = it_filter->second;
bitmask.Initialize(std::bit_ceil(row_count));
const Bitmask &filter_result = it_filter->second;
if (filter_result.IsAllTrue()) {
return true;
}
bitmask.SetAllFalse();
const u32 block_start_offset = block_id * DEFAULT_BLOCK_CAPACITY;
const u32 block_end_offset = block_start_offset + row_count;
if (std::holds_alternative<Vector<u32>>(filter_result)) {
const Vector<u32> &filter_result_vector = std::get<Vector<u32>>(filter_result);
const auto it1 = std::lower_bound(filter_result_vector.begin(), filter_result_vector.end(), block_start_offset);
const auto it2 = std::lower_bound(filter_result_vector.begin(), filter_result_vector.end(), block_end_offset);
bitmask.SetAllFalse();
for (auto it = it1; it < it2; ++it) {
bitmask.SetTrue(*it - block_start_offset);
}
} else {
u32 u64_start_offset = block_start_offset / 64;
u32 u64_end_offset = (block_end_offset - 1) / 64;
if (const u64 *filter_data = std::get<Bitmask>(filter_result).GetData(); filter_data) {
bitmask.SetAllFalse();
u64 *data = bitmask.GetData();
for (u32 i = u64_start_offset; i <= u64_end_offset; ++i) {
data[i - u64_start_offset] = filter_data[i];
}
}
}
Bitmask middle_bitmask(filter_result.count());
middle_bitmask.SetAllFalse();
middle_bitmask.SetTrueRange(block_start_offset, block_end_offset);
middle_bitmask.MergeAnd(filter_result);
middle_bitmask.RoaringBitmapApplyFunc([&](const u32 offset) {
bitmask.SetTrue(offset - block_start_offset);
return true;
});
bitmask.RunOptimize();
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import common_query_filter;
import physical_operator_type;
import base_table_ref;
import load_meta;
import bitmask;
import roaring_bitmap;

namespace infinity {

Expand Down
Loading

0 comments on commit 248fcf9

Please sign in to comment.