From 6e07c7cd43bad6886b4d85e91f8bfab8a736df15 Mon Sep 17 00:00:00 2001 From: yzq <58433399+yangzq50@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:13:00 +0800 Subject: [PATCH] Update bool type to store values as compact bits --- .../expression/expression_selector.cpp | 58 +- .../expression/expression_selector.cppm | 7 +- src/executor/expression/expression_state.cpp | 4 +- src/executor/operator/physical_knn_scan.cpp | 21 +- src/executor/operator/physical_sort.cpp | 11 +- src/executor/physical_operator.cpp | 4 +- src/function/aggregate_function.cppm | 28 +- src/function/scalar/and.cpp | 11 +- src/function/scalar/equals.cpp | 9 +- src/function/scalar/inequals.cpp | 9 +- src/function/scalar/not.cpp | 9 +- src/function/scalar/or.cpp | 13 +- src/parser/type/data_type.cpp | 5 +- src/storage/buffer/column_buffer.cpp | 6 + src/storage/column_vector/column_vector.cpp | 81 ++- src/storage/column_vector/column_vector.cppm | 48 ++ src/storage/column_vector/null_value.cppm | 7 +- .../operator/binary_operator.cppm | 550 +++++++++++++++++- .../operator/unary_operator.cppm | 102 +++- src/storage/column_vector/vector_buffer.cpp | 146 ++++- src/storage/column_vector/vector_buffer.cppm | 16 +- src/storage/data_block.cpp | 9 +- src/storage/meta/entry/block_column_entry.cpp | 20 +- .../column_vector/column_vector_bool.cpp | 16 +- src/unit_test/storage/definition/table.cpp | 2 +- test/sql/dql/select.slt | 104 ---- test/sql/dql/type/boolean.slt | 130 +++++ .../dql/type/date_time_datetime_timestamp.slt | 95 +++ 28 files changed, 1302 insertions(+), 219 deletions(-) create mode 100644 test/sql/dql/type/boolean.slt create mode 100644 test/sql/dql/type/date_time_datetime_timestamp.slt diff --git a/src/executor/expression/expression_selector.cpp b/src/executor/expression/expression_selector.cpp index bdad4efbf2..63cd67d493 100644 --- a/src/executor/expression/expression_selector.cpp +++ b/src/executor/expression/expression_selector.cpp @@ -76,51 +76,47 @@ void ExpressionSelector::Select(const SharedPtr &expr, SizeT count, SharedPtr &output_true_select) { SharedPtr bool_column = MakeShared(MakeShared(LogicalType::kBoolean)); - bool_column->Initialize(); + bool_column->Initialize(ColumnVectorType::kCompactBit); ExpressionEvaluator expr_evaluator; expr_evaluator.Init(input_data_); expr_evaluator.Execute(expr, state, bool_column); - const auto *bool_column_ptr = (const u8 *)(bool_column->data()); - SharedPtr &null_mask = bool_column->nulls_ptr_; - - Select(bool_column_ptr, null_mask, count, output_true_select, true); + Select(bool_column, count, output_true_select, true); } -void ExpressionSelector::Select(const u8 *__restrict bool_column, - const SharedPtr &null_mask, - SizeT count, - SharedPtr &output_true_select, - bool nullable) { +void ExpressionSelector::Select(const SharedPtr &bool_column, SizeT count, SharedPtr &output_true_select, bool nullable) { + if (bool_column->vector_type() != ColumnVectorType::kCompactBit || bool_column->data_type()->type() != LogicalType::kBoolean) { + Error("Attempting to select non-boolean expression"); + } + const auto &boolean_buffer = *(bool_column->buffer_); + const auto &null_mask = bool_column->nulls_ptr_; if (nullable && !(null_mask->IsAllTrue())) { - const u64 *result_null_data = null_mask->GetData(); - SizeT unit_count = BitmaskBuffer::UnitCount(count); - for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { - end_index = Min(end_index, count); - if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { - // all data of 64 rows are not null - while (start_index < end_index) { - if (bool_column[start_index] > 0) { - output_true_select->Append(start_index); - } - ++start_index; + const u64 *result_null_data = null_mask->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + for (; start_index < end_index; ++start_index) { + if (boolean_buffer.GetCompactBit(start_index)) { + output_true_select->Append(start_index); } - } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { - // all data of 64 rows are null - start_index = end_index; - } else { - while (start_index < end_index) { - if ((null_mask->IsTrue(start_index)) && (bool_column[start_index] > 0)) { - output_true_select->Append(start_index); - } - ++start_index; + } + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (; start_index < end_index; ++start_index) { + if ((null_mask->IsTrue(start_index)) && boolean_buffer.GetCompactBit(start_index)) { + output_true_select->Append(start_index); } } } + } } else { for (SizeT idx = 0; idx < count; ++idx) { - if (bool_column[idx] > 0) { + if (boolean_buffer.GetCompactBit(idx)) { output_true_select->Append(idx); } } diff --git a/src/executor/expression/expression_selector.cppm b/src/executor/expression/expression_selector.cppm index 28215b9fc2..659ff9d202 100644 --- a/src/executor/expression/expression_selector.cppm +++ b/src/executor/expression/expression_selector.cppm @@ -25,6 +25,7 @@ class ExpressionState; class DataBlock; class Selection; class Bitmask; +class ColumnVector; export class ExpressionSelector { public: @@ -43,11 +44,7 @@ public: void Select(const SharedPtr &expr, SharedPtr &state, SizeT count, SharedPtr &output_true_select); - static void Select(const u8 *__restrict bool_column, - const SharedPtr &null_mask, - SizeT count, - SharedPtr &output_true_select, - bool nullable); + static void Select(const SharedPtr &bool_column, SizeT count, SharedPtr &output_true_select, bool nullable); private: const DataBlock *input_data_{nullptr}; diff --git a/src/executor/expression/expression_state.cpp b/src/executor/expression/expression_state.cpp index 9b5810a22b..4bdc17a795 100644 --- a/src/executor/expression/expression_state.cpp +++ b/src/executor/expression/expression_state.cpp @@ -158,7 +158,9 @@ SharedPtr ExpressionState::CreateState(const SharedPtrcolumn_vector_->Initialize(ColumnVectorType::kConstant, DEFAULT_VECTOR_SIZE); } else { - result->column_vector_->Initialize(ColumnVectorType::kFlat, DEFAULT_VECTOR_SIZE); + auto column_vector_type = + (function_expr_data_type->type() == LogicalType::kBoolean) ? ColumnVectorType::kCompactBit : ColumnVectorType::kFlat; + result->column_vector_->Initialize(column_vector_type, DEFAULT_VECTOR_SIZE); } // result->output_data_block_.Init({function_expr->Type()}); diff --git a/src/executor/operator/physical_knn_scan.cpp b/src/executor/operator/physical_knn_scan.cpp index bc0bd52f2c..1027547a72 100644 --- a/src/executor/operator/physical_knn_scan.cpp +++ b/src/executor/operator/physical_knn_scan.cpp @@ -27,6 +27,7 @@ import base_table_ref; import block_entry; import knn_scan_data; import column_buffer; +import vector_buffer; import block_column_entry; import knn_distance; import third_party; @@ -92,7 +93,7 @@ void ReadDataBlock(DataBlock *output, output->Finalize(); } -void MergeIntoBitmask(const u8 *__restrict input_bool_column, +void MergeIntoBitmask(const VectorBuffer *input_bool_column_buffer, const SharedPtr &input_null_mask, const SizeT count, Bitmask &bitmask, @@ -100,7 +101,7 @@ void MergeIntoBitmask(const u8 *__restrict input_bool_column, SizeT bitmask_offset = 0) { if ((!nullable) || (input_null_mask->IsAllTrue())) { for (SizeT idx = 0; idx < count; ++idx) { - if (input_bool_column[idx] == 0) { + if (!(input_bool_column_buffer->GetCompactBit(idx))) { bitmask.SetFalse(idx + bitmask_offset); } } @@ -115,7 +116,7 @@ void MergeIntoBitmask(const u8 *__restrict input_bool_column, if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { // all data of 64 rows are not null for (; start_index < end_index; ++start_index) { - if (input_bool_column[start_index] == 0) { + if (!(input_bool_column_buffer->GetCompactBit(start_index))) { bitmask.SetFalse(start_index + bitmask_offset); } } @@ -134,7 +135,7 @@ void MergeIntoBitmask(const u8 *__restrict input_bool_column, } } else { for (; start_index < end_index; ++start_index) { - if (!(input_null_mask->IsTrue(start_index)) || (input_bool_column[start_index] == 0)) { + if (!(input_null_mask->IsTrue(start_index)) || !(input_bool_column_buffer->GetCompactBit(start_index))) { bitmask.SetFalse(start_index + bitmask_offset); } } @@ -265,13 +266,13 @@ void PhysicalKnnScan::ExecuteInternal(QueryContext *query_context, KnnScanOperat // filter and build bitmask, if filter_expression_ != nullptr db_for_filter->Reset(row_count); ReadDataBlock(db_for_filter, buffer_mgr, row_count, block_entry, base_table_ref_->column_ids_); - bool_column->Initialize(ColumnVectorType::kFlat, row_count); + bool_column->Initialize(ColumnVectorType::kCompactBit, row_count); ExpressionEvaluator expr_evaluator; expr_evaluator.Init(db_for_filter); expr_evaluator.Execute(filter_expression_, filter_state_, bool_column); - const auto *bool_column_ptr = (const u8 *)(bool_column->data()); + const VectorBuffer *bool_column_buffer = bool_column->buffer_.get(); SharedPtr &null_mask = bool_column->nulls_ptr_; - MergeIntoBitmask(bool_column_ptr, null_mask, row_count, bitmask, true); + MergeIntoBitmask(bool_column_buffer, null_mask, row_count, bitmask, true); bool_column->Reset(); } @@ -314,12 +315,12 @@ void PhysicalKnnScan::ExecuteInternal(QueryContext *query_context, KnnScanOperat auto row_count = block_entry->row_count_; db_for_filter->Reset(row_count); ReadDataBlock(db_for_filter, buffer_mgr, row_count, block_entry.get(), base_table_ref_->column_ids_); - bool_column->Initialize(ColumnVectorType::kFlat, row_count); + bool_column->Initialize(ColumnVectorType::kCompactBit, row_count); expr_evaluator.Init(db_for_filter); expr_evaluator.Execute(filter_expression_, filter_state_, bool_column); - const auto *bool_column_ptr = (const u8 *)(bool_column->data()); + const VectorBuffer *bool_column_buffer = bool_column->buffer_.get(); SharedPtr &null_mask = bool_column->nulls_ptr_; - MergeIntoBitmask(bool_column_ptr, null_mask, row_count, bitmask, true, segment_row_count_real); + MergeIntoBitmask(bool_column_buffer, null_mask, row_count, bitmask, true, segment_row_count_real); segment_row_count_real += row_count; bool_column->Reset(); } diff --git a/src/executor/operator/physical_sort.cpp b/src/executor/operator/physical_sort.cpp index 934e63b5c2..a7c4b9f287 100644 --- a/src/executor/operator/physical_sort.cpp +++ b/src/executor/operator/physical_sort.cpp @@ -101,7 +101,16 @@ class Comparator { switch (type.type()) { case kBoolean: { - COMPARE(BooleanT) + auto bool_left = left_result_vector->buffer_->GetCompactBit(left_index.offset); + auto bool_right = right_result_vector->buffer_->GetCompactBit(left_index.offset); + if (bool_left == bool_right) { + continue; + } + if (order_type == OrderType::kAsc) { + return bool_left < bool_right; + } else { + return bool_left > bool_right; + } } case kTinyInt: { COMPARE(TinyIntT) diff --git a/src/executor/physical_operator.cpp b/src/executor/physical_operator.cpp index 3bec3d8660..f9685f5ddf 100644 --- a/src/executor/physical_operator.cpp +++ b/src/executor/physical_operator.cpp @@ -59,7 +59,9 @@ void PhysicalOperator::InputLoad(QueryContext *query_context, OperatorState *ope // Filling ColumnVector for (SizeT j = 0; j < load_column_count; ++j) { SharedPtr column_vector = ColumnVector::Make(load_metas[j].type_); - column_vector->Initialize(ColumnVectorType::kFlat, capacity); + auto column_vector_type = + (load_metas[j].type_->type() == LogicalType::kBoolean) ? ColumnVectorType::kCompactBit : ColumnVectorType::kFlat; + column_vector->Initialize(column_vector_type, capacity); input_block->InsertVector(column_vector, load_metas[j].index_); } diff --git a/src/function/aggregate_function.cppm b/src/function/aggregate_function.cppm index 8be567b9fa..b3f5947333 100644 --- a/src/function/aggregate_function.cppm +++ b/src/function/aggregate_function.cppm @@ -13,12 +13,12 @@ // limitations under the License. module; - +#include import stl; import function; import function_data; import column_vector; - +import vector_buffer; import infinity_exception; import base_expression; import parser; @@ -43,6 +43,21 @@ public: // Loop execute state update according to the input column vector switch (input_column_vector->vector_type()) { + case ColumnVectorType::kCompactBit: { + if constexpr (!std::is_same_v) { + Error("kCompactBit column vector only support Boolean type"); + } else { + // only for count, min, max + SizeT row_count = input_column_vector->Size(); + BooleanT value; + const VectorBuffer *buffer = input_column_vector->buffer_.get(); + for (SizeT idx = 0; idx < row_count; ++idx) { + value = buffer->GetCompactBit(idx); + ((AggregateState *)state)->Update(&value, 0); + } + } + break; + } case ColumnVectorType::kFlat: { SizeT row_count = input_column_vector->Size(); auto *input_ptr = (InputType *)(input_column_vector->data()); @@ -52,6 +67,15 @@ public: break; } case ColumnVectorType::kConstant: { + if (input_column_vector->data_type()->type() == LogicalType::kBoolean) { + if constexpr (!std::is_same_v) { + Error("types do not match"); + } else { + BooleanT value = input_column_vector->buffer_->GetCompactBit(0); + ((AggregateState *)state)->Update(&value, 0); + } + break; + } auto *input_ptr = (InputType *)(input_column_vector->data()); ((AggregateState *)state)->Update(input_ptr, 0); break; diff --git a/src/function/scalar/and.cpp b/src/function/scalar/and.cpp index be5d501507..f88a9a3000 100644 --- a/src/function/scalar/and.cpp +++ b/src/function/scalar/and.cpp @@ -14,6 +14,7 @@ module; +#include import stl; import new_catalog; @@ -30,7 +31,15 @@ namespace infinity { struct AndFunction { template static inline void Run(TA left, TB right, TC &result) { - result = left and right; + if constexpr (std::is_same_v, u8> && std::is_same_v, u8> && + std::is_same_v, u8>) { + result = left & right; + } else if constexpr (std::is_same_v, BooleanT> && std::is_same_v, BooleanT> && + std::is_same_v, BooleanT>) { + result = left and right; + } else { + Error("AND function accepts only u8 and BooleanT."); + } } }; diff --git a/src/function/scalar/equals.cpp b/src/function/scalar/equals.cpp index 4ccc613da5..3347487fa4 100644 --- a/src/function/scalar/equals.cpp +++ b/src/function/scalar/equals.cpp @@ -13,7 +13,7 @@ // limitations under the License. module; - +#include import stl; import new_catalog; @@ -30,7 +30,12 @@ namespace infinity { struct EqualsFunction { template static inline void Run(TA left, TB right, TC &result) { - result = (left == right); + if constexpr (std::is_same_v, u8> && std::is_same_v, u8> && + std::is_same_v, u8>) { + result = ~(left ^ right); + } else { + result = (left == right); + } } }; diff --git a/src/function/scalar/inequals.cpp b/src/function/scalar/inequals.cpp index 52b1d15ba9..ebfac9b766 100644 --- a/src/function/scalar/inequals.cpp +++ b/src/function/scalar/inequals.cpp @@ -13,8 +13,8 @@ // limitations under the License. module; - #include +#include import stl; import new_catalog; @@ -32,7 +32,12 @@ namespace infinity { struct InEqualsFunction { template static inline void Run(TA left, TB right, TC &result) { - result = (left != right); + if constexpr (std::is_same_v, u8> && std::is_same_v, u8> && + std::is_same_v, u8>) { + result = (left ^ right); + } else { + result = (left != right); + } } }; diff --git a/src/function/scalar/not.cpp b/src/function/scalar/not.cpp index 8945e7cf59..ca9f18a8d5 100644 --- a/src/function/scalar/not.cpp +++ b/src/function/scalar/not.cpp @@ -14,6 +14,7 @@ module; +#include import stl; import new_catalog; @@ -30,7 +31,13 @@ namespace infinity { struct NotFunction { template static inline void Run(TA input, TB &result) { - result = !input; + if constexpr (std::is_same_v, u8> && std::is_same_v, u8>) { + result = ~input; + } else if constexpr (std::is_same_v, BooleanT> && std::is_same_v, BooleanT>) { + result = !input; + } else { + Error("NOT function accepts only u8 and BooleanT."); + } } }; diff --git a/src/function/scalar/or.cpp b/src/function/scalar/or.cpp index 9cd0538fb9..153031dc59 100644 --- a/src/function/scalar/or.cpp +++ b/src/function/scalar/or.cpp @@ -14,6 +14,7 @@ module; +#include import stl; import new_catalog; @@ -21,7 +22,7 @@ import infinity_exception; import scalar_function; import scalar_function_set; import parser; -import third_party; +// import third_party; module or_func; @@ -30,7 +31,15 @@ namespace infinity { struct OrFunction { template static inline void Run(TA left, TB right, TC &result) { - result = left or right; + if constexpr (std::is_same_v, u8> && std::is_same_v, u8> && + std::is_same_v, u8>) { + result = left | right; + } else if constexpr (std::is_same_v, BooleanT> && std::is_same_v, BooleanT> && + std::is_same_v, BooleanT>) { + result = left or right; + } else { + Error("OR function accepts only u8 and BooleanT."); + } } }; diff --git a/src/parser/type/data_type.cpp b/src/parser/type/data_type.cpp index 202fd5406f..75396c0e56 100644 --- a/src/parser/type/data_type.cpp +++ b/src/parser/type/data_type.cpp @@ -26,7 +26,10 @@ namespace infinity { DataType::DataType(LogicalType logical_type, std::shared_ptr type_info_ptr) : type_(logical_type), type_info_(std::move(type_info_ptr)) { switch (logical_type) { - case kBoolean: + case kBoolean: { + plain_type_ = false; + break; + } case kTinyInt: case kSmallInt: case kInteger: diff --git a/src/storage/buffer/column_buffer.cpp b/src/storage/buffer/column_buffer.cpp index 9eb2ea41fc..c203b02f0b 100644 --- a/src/storage/buffer/column_buffer.cpp +++ b/src/storage/buffer/column_buffer.cpp @@ -78,6 +78,9 @@ const_ptr_t ColumnBuffer::GetValueAt(SizeT row_idx, const DataType &data_type) { return static_cast(inline_col_.GetData()) + data_type.Size() * row_idx; } else { switch (data_type.type()) { + case LogicalType::kBoolean: { + Error("Can't return a pointer for compact Boolean type"); + } case LogicalType::kVarchar: case LogicalType::kArray: case LogicalType::kTuple: @@ -145,6 +148,9 @@ ptr_t ColumnBuffer::GetValueAtMut(SizeT row_idx, const DataType &data_type) { return static_cast(inline_col_.GetDataMut()) + data_type.Size() * row_idx; } else { switch (data_type.type()) { + case LogicalType::kBoolean: { + Error("Can't return a pointer for compact Boolean type"); + } case kVarchar: case kArray: case kTuple: diff --git a/src/storage/column_vector/column_vector.cpp b/src/storage/column_vector/column_vector.cpp index 85dc07e7b3..d28b564711 100644 --- a/src/storage/column_vector/column_vector.cpp +++ b/src/storage/column_vector/column_vector.cpp @@ -61,9 +61,12 @@ void ColumnVector::Initialize(const ColumnVector &other, const Selection &input_ // case LogicalType::kBitmap: // case LogicalType::kPolygon: // case LogicalType::kPath: + case LogicalType::kBoolean: { + vector_buffer_type = VectorBufferType::kCompactBit; + break; + } case LogicalType::kVarchar: { vector_buffer_type = VectorBufferType::kHeap; - break; } case LogicalType::kInvalid: @@ -242,6 +245,13 @@ void ColumnVector::Initialize(ColumnVectorType vector_type, SizeT capacity) { if (vector_type == ColumnVectorType::kInvalid) { Error("Attempt to initialize column vector to invalid type."); } + + // require BooleanT vector to be initialized with ColumnVectorType::kConstant or ColumnVectorType::kCompactBit + // if ColumnVectorType::kFlat is used, change it to ColumnVectorType::kCompactBit + if (data_type_->type() == LogicalType::kBoolean && vector_type == ColumnVectorType::kFlat) { + vector_type = ColumnVectorType::kCompactBit; + } + // TODO: No check on capacity value. vector_type_ = vector_type; @@ -255,9 +265,12 @@ void ColumnVector::Initialize(ColumnVectorType vector_type, SizeT capacity) { // case LogicalType::kBitmap: // case LogicalType::kPolygon: // case LogicalType::kPath: + case LogicalType::kBoolean: { + vector_buffer_type = VectorBufferType::kCompactBit; + break; + } case LogicalType::kVarchar: { vector_buffer_type = VectorBufferType::kHeap; - break; } case LogicalType::kInvalid: @@ -314,9 +327,12 @@ void ColumnVector::Initialize(ColumnVectorType vector_type, const ColumnVector & // case LogicalType::kBitmap: // case LogicalType::kPolygon: // case LogicalType::kPath: + case LogicalType::kBoolean: { + vector_buffer_type = VectorBufferType::kCompactBit; + break; + } case LogicalType::kVarchar: { vector_buffer_type = VectorBufferType::kHeap; - break; } case LogicalType::kInvalid: @@ -652,7 +668,7 @@ String ColumnVector::ToString(SizeT row_index) const { switch (data_type_->type()) { case kBoolean: { - return ((BooleanT *)data_ptr_)[row_index] ? "true" : "false"; + return buffer_->GetCompactBit(row_index) ? "true" : "false"; } case kTinyInt: { return ToStr(((TinyIntT *)data_ptr_)[row_index]); @@ -782,7 +798,7 @@ Value ColumnVector::GetValue(SizeT index) const { switch (data_type_->type()) { case kBoolean: { - return Value::MakeBool(((BooleanT *)data_ptr_)[index]); + return Value::MakeBool(buffer_->GetCompactBit(index)); } case kTinyInt: { return Value::MakeTinyInt(((TinyIntT *)data_ptr_)[index]); @@ -902,7 +918,7 @@ void ColumnVector::SetValue(SizeT index, const Value &value) { switch (data_type_->type()) { case kBoolean: { - ((BooleanT *)data_ptr_)[index] = value.GetValue(); + buffer_->SetCompactBit(index, value.GetValue()); break; } case kTinyInt: { @@ -1062,7 +1078,7 @@ void ColumnVector::SetByRawPtr(SizeT index, const_ptr_t raw_ptr) { switch (data_type_->type()) { case kBoolean: { - ((BooleanT *)data_ptr_)[index] = *(BooleanT *)(raw_ptr); + buffer_->SetCompactBit(index, *(BooleanT *)(raw_ptr)); break; } case kTinyInt: { @@ -1401,7 +1417,20 @@ SizeT ColumnVector::AppendWith(ColumnBuffer &column_buffer, SizeT start_row, Siz } switch (data_type_->type()) { - case kBoolean: + case kBoolean: { + auto src_ptr = reinterpret_cast(column_buffer.GetAll()); + auto get_boolean_at = [src_ptr](SizeT row_idx) -> BooleanT { + SizeT byte_idx = row_idx / 8; + SizeT bit_idx = row_idx % 8; + return (src_ptr[byte_idx] >> bit_idx) & u8(1); + }; + auto dst_buffer = buffer_.get(); + for (SizeT row_idx = 0; row_idx < appended_rows; ++row_idx) { + dst_buffer->SetCompactBit(tail_index_ + row_idx, get_boolean_at(start_row + row_idx)); + } + this->tail_index_ += appended_rows; + break; + } case kTinyInt: case kSmallInt: case kInteger: @@ -1586,6 +1615,9 @@ bool ColumnVector::operator==(const ColumnVector &other) const { return false; } } + } else if (data_type_->type() == LogicalType::kBoolean) { + return other.data_type_->type() == LogicalType::kBoolean && + VectorBuffer::CompactBitIsSame(this->buffer_, this->tail_index_, other.buffer_, other.tail_index_); } else { return 0 == Memcmp(this->data_ptr_, other.data_ptr_, this->tail_index_ * this->data_type_size_); } @@ -1596,11 +1628,16 @@ i32 ColumnVector::GetSizeInBytes() const { if (!initialized) { Error("Column vector isn't initialized."); } - if (vector_type_ != ColumnVectorType::kFlat && vector_type_ != ColumnVectorType::kConstant) { + if (vector_type_ != ColumnVectorType::kFlat && vector_type_ != ColumnVectorType::kConstant && vector_type_ != ColumnVectorType::kCompactBit) { Error(Format("Not supported vector_type {}", int(vector_type_))); } i32 size = this->data_type_->GetSizeInBytes() + sizeof(ColumnVectorType); - size += sizeof(i32) + this->tail_index_ * this->data_type_size_; + size += sizeof(i32); + if (vector_type_ == ColumnVectorType::kCompactBit) { + size += (this->tail_index_ + 7) / 8; + } else { + size += this->tail_index_ * this->data_type_size_; + } if (data_type_->type() == kVarchar) { size += sizeof(i32) + buffer_->fix_heap_mgr_->total_size(); } @@ -1612,15 +1649,21 @@ void ColumnVector::WriteAdv(char *&ptr) const { if (!initialized) { Error("Column vector isn't initialized."); } - if (vector_type_ != ColumnVectorType::kFlat && vector_type_ != ColumnVectorType::kConstant) { + if (vector_type_ != ColumnVectorType::kFlat && vector_type_ != ColumnVectorType::kConstant && vector_type_ != ColumnVectorType::kCompactBit) { Error(Format("Not supported vector_type {}", int(vector_type_))); } this->data_type_->WriteAdv(ptr); WriteBufAdv(ptr, this->vector_type_); // write fixed part WriteBufAdv(ptr, tail_index_); - Memcpy(ptr, this->data_ptr_, this->tail_index_ * this->data_type_size_); - ptr += this->tail_index_ * this->data_type_size_; + if (vector_type_ == ColumnVectorType::kCompactBit) { + SizeT byte_size = (this->tail_index_ + 7) / 8; + Memcpy(ptr, this->data_ptr_, byte_size); + ptr += byte_size; + } else { + Memcpy(ptr, this->data_ptr_, this->tail_index_ * this->data_type_size_); + ptr += this->tail_index_ * this->data_type_size_; + } // write variable part if (data_type_->type() == kVarchar) { i32 heap_len = buffer_->fix_heap_mgr_->total_size(); @@ -1641,9 +1684,15 @@ SharedPtr ColumnVector::ReadAdv(char *&ptr, i32 maxbytes) { // read fixed part i32 tail_index = ReadBufAdv(ptr); column_vector->tail_index_ = tail_index; - i32 data_type_size = data_type->Size(); - Memcpy((void *)column_vector->data_ptr_, ptr, tail_index * data_type_size); - ptr += tail_index * data_type_size; + if (vector_type == ColumnVectorType::kCompactBit) { + SizeT byte_size = (tail_index + 7) / 8; + Memcpy((void *)column_vector->data_ptr_, ptr, byte_size); + ptr += byte_size; + } else { + i32 data_type_size = data_type->Size(); + Memcpy((void *)column_vector->data_ptr_, ptr, tail_index * data_type_size); + ptr += tail_index * data_type_size; + } // read variable part if (data_type->type() == kVarchar) { i32 heap_len = ReadBufAdv(ptr); diff --git a/src/storage/column_vector/column_vector.cppm b/src/storage/column_vector/column_vector.cppm index 1b096b5727..37409ddfb4 100644 --- a/src/storage/column_vector/column_vector.cppm +++ b/src/storage/column_vector/column_vector.cppm @@ -36,6 +36,7 @@ export enum class ColumnVectorType : i8 { kInvalid, kFlat, // Stand without any encode kConstant, // All vector has same type and value + kCompactBit, // Compact bit encoding // kDictionary, // There is a dictionary zone // kRLE, // Run length encoding // kSequence, @@ -190,6 +191,23 @@ private: } } + template <> + static void CopyValue(const ColumnVector &dst, const ColumnVector &src, SizeT from, SizeT count) { + auto dst_tail = dst.tail_index_; + const VectorBuffer *src_buffer = src.buffer_.get(); + auto dst_buffer = dst.buffer_.get(); + if (dst_tail % 8 == 0 && from % 8 == 0) { + SizeT dst_byte_offset = dst_tail / 8; + SizeT src_byte_offset = from / 8; + SizeT byte_count = (count + 7) / 8; // copy to tail + Memcpy(dst_buffer->GetData() + dst_byte_offset, src_buffer->GetData() + src_byte_offset, byte_count); + } else { + for (SizeT idx = 0; idx < count; ++idx) { + dst_buffer->SetCompactBit(dst_tail + idx, src_buffer->GetCompactBit(from + idx)); + } + } + } + // Used by Append by Ptr void SetByRawPtr(SizeT index, const_ptr_t raw_ptr); @@ -231,6 +249,17 @@ ColumnVector::CopyFrom(const VectorBuffer *__restrict src_buf, VectorBuffer *__r } } +template <> +inline void ColumnVector::CopyFrom(const VectorBuffer *__restrict src_buf, + VectorBuffer *__restrict dst_buf, + SizeT count, + const Selection &input_select) { + for (SizeT idx = 0; idx < count; ++idx) { + SizeT row_id = input_select[idx]; + dst_buf->SetCompactBit(idx, src_buf->GetCompactBit(row_id)); + } +} + template <> inline void ColumnVector::CopyFrom(const VectorBuffer *__restrict src_buf, VectorBuffer *__restrict dst_buf, @@ -384,6 +413,19 @@ inline void ColumnVector::CopyFrom(const VectorBuffer *__restrict src_buf, std::copy(((const DataT *)(src)) + source_start_idx, ((const DataT *)(src)) + source_end_idx, ((DataT *)(dst)) + dest_start_idx); } +template <> +inline void ColumnVector::CopyFrom(const VectorBuffer *__restrict src_buf, + VectorBuffer *__restrict dst_buf, + SizeT source_start_idx, + SizeT dest_start_idx, + SizeT count) { + VectorBuffer::CopyCompactBits(reinterpret_cast(dst_buf->GetData()), + reinterpret_cast(src_buf->GetData()), + dest_start_idx, + source_start_idx, + count); +} + template <> inline void ColumnVector::CopyFrom(const VectorBuffer *__restrict src_buf, VectorBuffer *__restrict dst_buf, @@ -546,6 +588,12 @@ inline void ColumnVector::CopyRowFrom(const VectorBuffer *__restrict src_buf, Si ((DataT *)(dst))[dst_idx] = ((const DataT *)(src))[src_idx]; } +template <> +inline void +ColumnVector::CopyRowFrom(const VectorBuffer *__restrict src_buf, SizeT src_idx, VectorBuffer *__restrict dst_buf, SizeT dst_idx) { + dst_buf->SetCompactBit(dst_idx, src_buf->GetCompactBit(src_idx)); +} + template <> inline void ColumnVector::CopyRowFrom(const VectorBuffer *__restrict src_buf, SizeT src_idx, VectorBuffer *__restrict dst_buf, SizeT dst_idx) { diff --git a/src/storage/column_vector/null_value.cppm b/src/storage/column_vector/null_value.cppm index 8373bb1f8d..c5f23f1fee 100644 --- a/src/storage/column_vector/null_value.cppm +++ b/src/storage/column_vector/null_value.cppm @@ -13,11 +13,10 @@ // limitations under the License. module; - +#include import parser; import stl; -// -//import infinity_exception; +import infinity_exception; export module null_value; @@ -25,7 +24,7 @@ export namespace infinity { template inline ValueType NullValue() { - TypeError(DataType::TypeToString() + " doesn't have null value."); + TypeException(DataType::TypeToString() + " doesn't have null value."); } template <> diff --git a/src/storage/column_vector/operator/binary_operator.cppm b/src/storage/column_vector/operator/binary_operator.cppm index 1557a219d2..9ae52c2cce 100644 --- a/src/storage/column_vector/operator/binary_operator.cppm +++ b/src/storage/column_vector/operator/binary_operator.cppm @@ -14,9 +14,11 @@ module; +#include import stl; import column_vector; - +import vector_buffer; +import parser; import infinity_exception; import bitmask; import bitmask_buffer; @@ -25,6 +27,24 @@ export module binary_operator; namespace infinity { +class BooleanPointer { + VectorBuffer *buffer_ = nullptr; + SizeT index_ = {}; + +public: + explicit BooleanPointer(VectorBuffer *buffer) : buffer_(buffer) {} + + BooleanPointer &operator[](SizeT index) { + index_ = index; + return *this; + } + + BooleanPointer &operator=(const BooleanT &value) { + buffer_->SetCompactBit(index_, value); + return *this; + } +}; + export class BinaryOperator { public: template @@ -34,11 +54,26 @@ public: SizeT count, void *state_ptr, bool nullable) { + if constexpr (std::is_same_v, BooleanT>) { + if constexpr (std::is_same_v, BooleanT> ^ std::is_same_v, BooleanT>) { + Error("Type mismatch: BinaryOperator applied to boolean and non-boolean type."); + } else if constexpr (std::is_same_v, BooleanT> && std::is_same_v, BooleanT>) { + // (boolean (and / or) boolean) -> boolean + ExecuteBooleanSourceBinary(left, right, result, count, state_ptr, nullable); + } else { + // (x (> / >= / < / <= / == / !=) y) -> boolean + ExecuteBooleanResultBinary(left, right, result, count, state_ptr, nullable); + } + return; + } switch (left->vector_type()) { case ColumnVectorType::kInvalid: { Error("Invalid column vector type."); } + case ColumnVectorType::kCompactBit: { + Error("kCompactBit should not be in this branch."); + } case ColumnVectorType::kFlat: { return ExecuteFlat(left, right, result, count, state_ptr, nullable); } @@ -355,9 +390,9 @@ private: static void inline ExecuteFlatHeterogeneous(const SharedPtr &, const SharedPtr &, SharedPtr &, - SizeT , + SizeT, void *, - bool ) { + bool) { Error("Not implemented."); } @@ -506,16 +541,16 @@ private: result_null->SetAllTrue(); Operator::template Execute(left_ptr[0], right_ptr[0], result_ptr[0], result_null.get(), 0, state_ptr); } - result->Finalize(1) ; + result->Finalize(1); } template static void inline ExecuteConstantHeterogeneous(const SharedPtr &, const SharedPtr &, SharedPtr &, - SizeT , + SizeT, void *, - bool ) { + bool) { Error("Not implemented."); } @@ -523,9 +558,9 @@ private: static void inline ExecuteHeterogeneousFlat(const SharedPtr &, const SharedPtr &, SharedPtr &, - SizeT , + SizeT, void *, - bool ) { + bool) { Error("Not implemented."); } @@ -533,9 +568,9 @@ private: static void inline ExecuteHeterogeneousConstant(const SharedPtr &, const SharedPtr &, SharedPtr &, - SizeT , + SizeT, void *, - bool ) { + bool) { Error("Not implemented."); } @@ -543,11 +578,502 @@ private: static void inline ExecuteHeterogeneousHeterogeneous(const SharedPtr &, const SharedPtr &, SharedPtr &, - SizeT , + SizeT, void *, - bool ) { + bool) { Error("Not implemented."); } + + // function with Boolean type + template + static void inline AllBooleanExecuteWithNull(const SharedPtr &left, + const SharedPtr &right, + SharedPtr &result, + SizeT count, + void *state_ptr) { + const SharedPtr &left_null = left->nulls_ptr_; + const SharedPtr &right_null = right->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + + if (left_null->IsAllTrue()) { + result_null->DeepCopy(*right_null); + } else { + result_null->DeepCopy(*left_null); + if (!(right_null->IsAllTrue())) { + result_null->Merge(*right_null); + } + } + + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto left_u8 = reinterpret_cast(left->data()); + auto right_u8 = reinterpret_cast(right->data()); + auto result_u8 = reinterpret_cast(result->data()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + const SizeT e = end_index / 8, tail = end_index % 8; + for (SizeT b = start_index / 8; b < e; ++b) { + Operator::template Execute(left_u8[b], right_u8[b], result_u8[b], result_null.get(), 0, state_ptr); + } + if (tail) { + u8 tail_result; + Operator::template Execute(left_u8[e], right_u8[e], tail_result, result_null.get(), 0, state_ptr); + const u8 mask_keep = u8(0xff) << tail; + result_u8[e] = (result_u8[e] & mask_keep) | (tail_result & ~mask_keep); + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (BooleanT answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(left->buffer_->GetCompactBit(start_index), + right->buffer_->GetCompactBit(start_index), + answer, + result_null.get(), + start_index, + state_ptr); + result->buffer_->SetCompactBit(start_index, answer); + } + } + } + } + } + + template + static void inline AllBooleanExecuteWithNull(const SharedPtr &left, + const u8 right_u8, + SharedPtr &result, + SizeT count, + void *state_ptr) { + const SharedPtr &left_null = left->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + + result_null->DeepCopy(*left_null); + + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto left_u8 = reinterpret_cast(left->data()); + BooleanT right_boolean = (right_u8 & (u8(1))); + auto result_u8 = reinterpret_cast(result->data()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + const SizeT e = end_index / 8, tail = end_index % 8; + for (SizeT b = start_index / 8; b < e; ++b) { + Operator::template Execute(left_u8[b], right_u8, result_u8[b], result_null.get(), 0, state_ptr); + } + if (tail) { + u8 tail_result; + Operator::template Execute(left_u8[e], right_u8, tail_result, result_null.get(), 0, state_ptr); + const u8 mask_keep = u8(0xff) << tail; + result_u8[e] = (result_u8[e] & mask_keep) | (tail_result & ~mask_keep); + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (BooleanT answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(left->buffer_->GetCompactBit(start_index), + right_boolean, + answer, + result_null.get(), + start_index, + state_ptr); + result->buffer_->SetCompactBit(start_index, answer); + } + } + } + } + } + + template + static void inline AllBooleanExecuteWithNull(const u8 left_u8, + const SharedPtr &right, + SharedPtr &result, + SizeT count, + void *state_ptr) { + const SharedPtr &right_null = right->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + + result_null->DeepCopy(*right_null); + + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto right_u8 = reinterpret_cast(right->data()); + BooleanT left_boolean = (left_u8 & (u8(1))); + auto result_u8 = reinterpret_cast(result->data()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + const SizeT e = end_index / 8, tail = end_index % 8; + for (SizeT b = start_index / 8; b < e; ++b) { + Operator::template Execute(left_u8, right_u8[b], result_u8[b], result_null.get(), 0, state_ptr); + } + if (tail) { + u8 tail_result; + Operator::template Execute(left_u8, right_u8[e], tail_result, result_null.get(), 0, state_ptr); + const u8 mask_keep = u8(0xff) << tail; + result_u8[e] = (result_u8[e] & mask_keep) | (tail_result & ~mask_keep); + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (BooleanT answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(left_boolean, + right->buffer_->GetCompactBit(start_index), + answer, + result_null.get(), + start_index, + state_ptr); + result->buffer_->SetCompactBit(start_index, answer); + } + } + } + } + } + + template + static void inline ExecuteBooleanSourceBinary(const SharedPtr &left, + const SharedPtr &right, + SharedPtr &result, + SizeT count, + void *state_ptr, + bool nullable) { + const SharedPtr &left_null = left->nulls_ptr_; + const SharedPtr &right_null = right->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + // check constants + auto left_vector_type = left->vector_type(); + auto right_vector_type = right->vector_type(); + // now only support ColumnVectorType::kConstant and ColumnVectorType::kCompactBit for Boolean + if (left_vector_type == ColumnVectorType::kConstant && right_vector_type == ColumnVectorType::kConstant) { + if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + bool answer; + Operator::template Execute(left->buffer_->GetCompactBit(0), + right->buffer_->GetCompactBit(0), + answer, + result_null.get(), + 0, + state_ptr); + result->buffer_->SetCompactBit(0, answer); + result_null->SetAllTrue(); + } else { + result_null->SetAllFalse(); + } + result->Finalize(1); + } else if (left_vector_type == ColumnVectorType::kCompactBit && right_vector_type == ColumnVectorType::kCompactBit) { + if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + result_null->SetAllTrue(); + SizeT count_bytes = count / 8; + SizeT count_tail = count % 8; + auto left_u8 = reinterpret_cast(left->data()); + auto right_u8 = reinterpret_cast(right->data()); + auto result_u8 = reinterpret_cast(result->data()); + for (SizeT i = 0; i < count_bytes; ++i) { + Operator::template Execute(left_u8[i], right_u8[i], result_u8[i], result_null.get(), 0, state_ptr); + } + if (count_tail > 0) { + u8 &tail_u8 = result_u8[count_bytes]; + u8 ans; + Operator::template Execute(left_u8[count_bytes], right_u8[count_bytes], ans, result_null.get(), 0, state_ptr); + u8 keep_mask = u8(0xff) << count_tail; + tail_u8 = (tail_u8 & keep_mask) | (ans & ~keep_mask); + } + } else { + AllBooleanExecuteWithNull(left, right, result, count, state_ptr); + } + result->Finalize(count); + } else if (left_vector_type == ColumnVectorType::kConstant && right_vector_type == ColumnVectorType::kCompactBit) { + bool left_value = left->buffer_->GetCompactBit(0); + u8 left_u8 = left_value ? u8(0xff) : u8(0x00); + if (nullable && !(left_null->IsAllTrue())) { + result_null->SetAllFalse(); + } else if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + result_null->SetAllTrue(); + SizeT count_bytes = count / 8; + SizeT count_tail = count % 8; + auto right_u8 = reinterpret_cast(right->data()); + auto result_u8 = reinterpret_cast(result->data()); + for (SizeT i = 0; i < count_bytes; ++i) { + Operator::template Execute(left_u8, right_u8[i], result_u8[i], result_null.get(), 0, state_ptr); + } + if (count_tail > 0) { + u8 &tail_u8 = result_u8[count_bytes]; + u8 ans; + Operator::template Execute(left_u8, right_u8[count_bytes], ans, result_null.get(), 0, state_ptr); + u8 keep_mask = u8(0xff) << count_tail; + tail_u8 = (tail_u8 & keep_mask) | (ans & ~keep_mask); + } + } else { + AllBooleanExecuteWithNull(left_u8, right, result, count, state_ptr); + } + result->Finalize(count); + } else if (left_vector_type == ColumnVectorType::kCompactBit && right_vector_type == ColumnVectorType::kConstant) { + bool right_value = right->buffer_->GetCompactBit(0); + u8 right_u8 = right_value ? u8(0xff) : u8(0x00); + if (nullable && !(right_null->IsAllTrue())) { + result_null->SetAllFalse(); + } else if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + result_null->SetAllTrue(); + SizeT count_bytes = count / 8; + SizeT count_tail = count % 8; + auto left_u8 = reinterpret_cast(left->data()); + auto result_u8 = reinterpret_cast(result->data()); + for (SizeT i = 0; i < count_bytes; ++i) { + Operator::template Execute(left_u8[i], right_u8, result_u8[i], result_null.get(), 0, state_ptr); + } + if (count_tail > 0) { + u8 &tail_u8 = result_u8[count_bytes]; + u8 ans; + Operator::template Execute(left_u8[count_bytes], right_u8, ans, result_null.get(), 0, state_ptr); + u8 keep_mask = u8(0xff) << count_tail; + tail_u8 = (tail_u8 & keep_mask) | (ans & ~keep_mask); + } + } else { + AllBooleanExecuteWithNull(left, right_u8, result, count, state_ptr); + } + result->Finalize(count); + } else { + Error("Wrong boolean operation."); + } + } + + template + static inline void ResultBooleanExecuteWithNull(const SharedPtr &left, + const SharedPtr &right, + SharedPtr &result, + SizeT count, + void *state_ptr) { + const SharedPtr &left_null = left->nulls_ptr_; + const SharedPtr &right_null = right->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + + if (left_null->IsAllTrue()) { + result_null->DeepCopy(*right_null); + } else { + result_null->DeepCopy(*left_null); + if (!(right_null->IsAllTrue())) { + result_null->Merge(*right_null); + } + } + + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto left_ptr = reinterpret_cast(left->data()); + auto right_ptr = reinterpret_cast(right->data()); + BooleanPointer result_ptr(result->buffer_.get()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + for (SizeT b = start_index; b < end_index; ++b) { + bool answer; + Operator::template Execute(left_ptr[b], right_ptr[b], answer, result_null.get(), 0, state_ptr); + result_ptr[b] = answer; + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (bool answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(left_ptr[start_index], right_ptr[start_index], answer, result_null.get(), start_index, state_ptr); + result_ptr[start_index] = answer; + } + } + } + } + } + + template + static inline void ResultBooleanExecuteWithNull(const LeftType left_constant, + const SharedPtr &right, + SharedPtr &result, + SizeT count, + void *state_ptr) { + const SharedPtr &right_null = right->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + + result_null->DeepCopy(*right_null); + + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto right_ptr = reinterpret_cast(right->data()); + BooleanPointer result_ptr(result->buffer_.get()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + for (SizeT b = start_index; b < end_index; ++b) { + bool answer; + Operator::template Execute(left_constant, right_ptr[b], answer, result_null.get(), 0, state_ptr); + result_ptr[b] = answer; + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (bool answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(left_constant, right_ptr[start_index], answer, result_null.get(), start_index, state_ptr); + result_ptr[start_index] = answer; + } + } + } + } + } + + template + static inline void ResultBooleanExecuteWithNull(const SharedPtr &left, + const RightType right_constant, + SharedPtr &result, + SizeT count, + void *state_ptr) { + const SharedPtr &left_null = left->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + + result_null->DeepCopy(*left_null); + + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto left_ptr = reinterpret_cast(left->data()); + BooleanPointer result_ptr(result->buffer_.get()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + for (SizeT b = start_index; b < end_index; ++b) { + bool answer; + Operator::template Execute(left_ptr[b], right_constant, answer, result_null.get(), 0, state_ptr); + result_ptr[b] = answer; + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (bool answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(left_ptr[start_index], right_constant, answer, result_null.get(), start_index, state_ptr); + result_ptr[start_index] = answer; + } + } + } + } + } + + template + static void inline ExecuteBooleanResultBinary(const SharedPtr &left, + const SharedPtr &right, + SharedPtr &result, + SizeT count, + void *state_ptr, + bool nullable) { + static_assert(!std::is_same_v, BooleanT> && !std::is_same_v, BooleanT>, + "static_assert: ExecuteBooleanResultBinary should not be called with BooleanT input."); + auto left_vector_type = left->vector_type(); + auto right_vector_type = right->vector_type(); + auto check_vector_type_valid = [](ColumnVectorType vector_type) { + // only support kFlat and kConstant + return vector_type == ColumnVectorType::kFlat || vector_type == ColumnVectorType::kConstant; + }; + if (!check_vector_type_valid(left_vector_type) || !check_vector_type_valid(right_vector_type)) { + Error("ExecuteBooleanResultBinary: Invalid input type."); + } + const SharedPtr &left_null = left->nulls_ptr_; + const SharedPtr &right_null = right->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + if (left_vector_type == ColumnVectorType::kConstant && right_vector_type == ColumnVectorType::kConstant) { + if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + bool answer; + Operator::template Execute(*(reinterpret_cast(left->data())), + *(reinterpret_cast(right->data())), + answer, + result_null.get(), + 0, + state_ptr); + result->buffer_->SetCompactBit(0, answer); + result_null->SetAllTrue(); + } else { + result_null->SetAllFalse(); + } + result->Finalize(1); + } else if (left_vector_type == ColumnVectorType::kFlat && right_vector_type == ColumnVectorType::kFlat) { + if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + result_null->SetAllTrue(); + auto left_ptr = reinterpret_cast(left->data()); + auto right_ptr = reinterpret_cast(right->data()); + BooleanPointer result_ptr(result->buffer_.get()); + for (SizeT i = 0; i < count; ++i) { + bool answer; + Operator::template Execute(left_ptr[i], right_ptr[i], answer, result_null.get(), 0, state_ptr); + result_ptr[i] = answer; + } + } else { + ResultBooleanExecuteWithNull(left, right, result, count, state_ptr); + } + result->Finalize(count); + } else if (left_vector_type == ColumnVectorType::kConstant && right_vector_type == ColumnVectorType::kFlat) { + auto left_c = *(reinterpret_cast(left->data())); + if (nullable && !(left_null->IsAllTrue())) { + result_null->SetAllFalse(); + } else if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + result_null->SetAllTrue(); + auto right_ptr = reinterpret_cast(right->data()); + BooleanPointer result_ptr(result->buffer_.get()); + for (SizeT i = 0; i < count; ++i) { + bool answer; + Operator::template Execute(left_c, right_ptr[i], answer, result_null.get(), 0, state_ptr); + result_ptr[i] = answer; + } + } else { + ResultBooleanExecuteWithNull(left_c, right, result, count, state_ptr); + } + result->Finalize(count); + } else if (left_vector_type == ColumnVectorType::kFlat && right_vector_type == ColumnVectorType::kConstant) { + auto right_c = *(reinterpret_cast(right->data())); + if (nullable && !(right_null->IsAllTrue())) { + result_null->SetAllFalse(); + } else if (!nullable || (left_null->IsAllTrue() && right_null->IsAllTrue())) { + result_null->SetAllTrue(); + auto left_ptr = reinterpret_cast(left->data()); + BooleanPointer result_ptr(result->buffer_.get()); + for (SizeT i = 0; i < count; ++i) { + bool answer; + Operator::template Execute(left_ptr[i], right_c, answer, result_null.get(), 0, state_ptr); + result_ptr[i] = answer; + } + } else { + ResultBooleanExecuteWithNull(left, right_c, result, count, state_ptr); + } + result->Finalize(count); + } + } }; } // namespace infinity diff --git a/src/storage/column_vector/operator/unary_operator.cppm b/src/storage/column_vector/operator/unary_operator.cppm index 72b7c3dbfa..001b3cd6d9 100644 --- a/src/storage/column_vector/operator/unary_operator.cppm +++ b/src/storage/column_vector/operator/unary_operator.cppm @@ -14,9 +14,10 @@ module; +#include import stl; import column_vector; - +import parser; import infinity_exception; import bitmask; import bitmask_buffer; @@ -39,10 +40,29 @@ public: case ColumnVectorType::kInvalid: { Error("Invalid column vector type."); } + case ColumnVectorType::kCompactBit: { + if (result->vector_type() != ColumnVectorType::kCompactBit) { + Error("Target vector type isn't kCompactBit."); + } + if constexpr (!std::is_same_v, BooleanT> || !std::is_same_v, BooleanT>) { + Error("kCompactBit should match with BooleanT."); + } + if (nullable && !(input_null->IsAllTrue())) { + ExecuteBooleanWithNull(input, result, count, state_ptr); + } else { + ExecuteBoolean(input, result, count, state_ptr); + } + // Result tail_index need to update. + result->Finalize(count); + return; + } case ColumnVectorType::kFlat: { if (result->vector_type() != ColumnVectorType::kFlat) { Error("Target vector type isn't flat."); } + if constexpr (std::is_same_v, BooleanT> || std::is_same_v, BooleanT>) { + Error("BooleanT type should not be kFlat."); + } if (nullable) { ExecuteFlatWithNull(input_ptr, input_null, result_ptr, result_null, count, state_ptr); @@ -57,15 +77,21 @@ public: if (count != 1) { Error("Attempting to execute more than one row of the constant column vector."); } - if (nullable) { - if (input->nulls_ptr_->IsAllTrue()) { - result_null->SetAllTrue(); - Operator::template Execute(input_ptr[0], result_ptr[0], result_null.get(), 0, state_ptr); + if (nullable && !(input_null->IsAllTrue())) { + result_null->SetFalse(0); + } else { + result_null->SetAllTrue(); + if constexpr (std::is_same_v, BooleanT> || std::is_same_v, BooleanT>) { + if constexpr (!std::is_same_v, BooleanT> || + !std::is_same_v, BooleanT>) { + Error("Unary operator input and output type error."); + } + BooleanT result_value; + Operator::template Execute(input->buffer_->GetCompactBit(0), result_value, result_null.get(), 0, state_ptr); + result->buffer_->SetCompactBit(0, result_value); } else { - result_null->SetFalse(0); + Operator::template Execute(input_ptr[0], result_ptr[0], result_null.get(), 0, state_ptr); } - } else { - Operator::template Execute(input_ptr[0], result_ptr[0], result_null.get(), 0, state_ptr); } result->Finalize(1); return; @@ -151,6 +177,66 @@ private: Operator::template Execute(input_ptr[i], result_ptr[i], result_null.get(), i, state_ptr); } } + + template + static void inline ExecuteBoolean(const SharedPtr &input, SharedPtr &result, SizeT count, void *state_ptr) { + SharedPtr &result_null = result->nulls_ptr_; + result_null->SetAllTrue(); + SizeT count_bytes = count / 8; + SizeT count_tail = count % 8; + auto input_u8 = reinterpret_cast(input->data()); + auto result_u8 = reinterpret_cast(result->data()); + for (SizeT i = 0; i < count_bytes; ++i) { + Operator::template Execute(input_u8[i], result_u8[i], result_null.get(), 0, state_ptr); + } + if (count_tail > 0) { + u8 &tail_u8 = result_u8[count_bytes]; + u8 ans; + Operator::template Execute(input_u8[count_bytes], ans, result_null.get(), 0, state_ptr); + u8 keep_mask = u8(0xff) << count_tail; + tail_u8 = (tail_u8 & keep_mask) | (ans & ~keep_mask); + } + } + + template + static void inline ExecuteBooleanWithNull(const SharedPtr &input, SharedPtr &result, SizeT count, void *state_ptr) { + const SharedPtr &input_null = input->nulls_ptr_; + SharedPtr &result_null = result->nulls_ptr_; + result_null->DeepCopy(*input_null); + const u64 *result_null_data = result_null->GetData(); + SizeT unit_count = BitmaskBuffer::UnitCount(count); + auto input_u8 = reinterpret_cast(input->data()); + auto result_u8 = reinterpret_cast(result->data()); + static_assert(BitmaskBuffer::UNIT_BITS % 8 == 0, "static_assert: BitmaskBuffer::UNIT_BITS % 8 == 0"); + for (SizeT i = 0, start_index = 0, end_index = BitmaskBuffer::UNIT_BITS; i < unit_count; ++i, end_index += BitmaskBuffer::UNIT_BITS) { + end_index = Min(end_index, count); + if (result_null_data[i] == BitmaskBuffer::UNIT_MAX) { + // all data of 64 rows are not null + const SizeT e = end_index / 8, tail = end_index % 8; + for (SizeT b = start_index / 8; b < e; ++b) { + Operator::template Execute(input_u8[b], result_u8[b], result_null.get(), 0, state_ptr); + } + if (tail) { + u8 tail_result; + Operator::template Execute(input_u8[e], tail_result, result_null.get(), 0, state_ptr); + const u8 mask_keep = u8(0xff) << tail; + result_u8[e] = (result_u8[e] & mask_keep) | (tail_result & ~mask_keep); + } + start_index = end_index; + } else if (result_null_data[i] == BitmaskBuffer::UNIT_MIN) { + // all data of 64 rows are null + start_index = end_index; + } else { + for (BooleanT answer; start_index < end_index; ++start_index) { + if (result_null->IsTrue(start_index)) { + // This row isn't null + Operator::template Execute(input->buffer_->GetCompactBit(start_index), answer, result_null.get(), start_index, state_ptr); + result->buffer_->SetCompactBit(start_index, answer); + } + } + } + } + } }; } // namespace infinity diff --git a/src/storage/column_vector/vector_buffer.cpp b/src/storage/column_vector/vector_buffer.cpp index 0d58606a27..774d25b66a 100644 --- a/src/storage/column_vector/vector_buffer.cpp +++ b/src/storage/column_vector/vector_buffer.cpp @@ -26,10 +26,33 @@ namespace infinity { SharedPtr VectorBuffer::Make(SizeT data_type_size, SizeT capacity, VectorBufferType buffer_type) { SharedPtr buffer_ptr = MakeShared(); buffer_ptr->buffer_type_ = buffer_type; - buffer_ptr->Initialize(data_type_size, capacity); + switch (buffer_type) { + case VectorBufferType::kCompactBit: { + buffer_ptr->InitializeCompactBit(capacity); + break; + } + default: { + buffer_ptr->Initialize(data_type_size, capacity); + break; + } + } return buffer_ptr; } +void VectorBuffer::InitializeCompactBit(SizeT capacity) { + if (initialized_) { + Error("Vector buffer is already initialized."); + } + SizeT data_size = (capacity + 7) / 8; + if (data_size > 0) { + // data_ = MakeUnique(data_size); + data_ = new char[data_size]; + } + initialized_ = true; + data_size_ = data_size; + capacity_ = capacity; +} + void VectorBuffer::Initialize(SizeT type_size, SizeT capacity) { if (initialized_) { Error("Vector buffer is already initialized."); @@ -61,4 +84,125 @@ void VectorBuffer::Copy(ptr_t input, SizeT size) { Memcpy(data_, input, size); } +bool VectorBuffer::RawPointerGetCompactBit(const u8 *src_ptr_u8, SizeT idx) { + SizeT byte_idx = idx / 8; + SizeT bit_idx = idx % 8; + return (src_ptr_u8[byte_idx] & (u8(1) << bit_idx)) != 0; +} + +bool VectorBuffer::GetCompactBit(SizeT idx) const { + if (idx >= capacity_) { + Error("Index out of range."); + } + return VectorBuffer::RawPointerGetCompactBit(reinterpret_cast(data_), idx); +} + +void VectorBuffer::RawPointerSetCompactBit(u8 *dst_ptr_u8, SizeT idx, bool val) { + SizeT byte_idx = idx / 8; + SizeT bit_idx = idx % 8; + if (val) { + dst_ptr_u8[byte_idx] |= (u8(1) << bit_idx); + } else { + dst_ptr_u8[byte_idx] &= ~(u8(1) << bit_idx); + } +} + +void VectorBuffer::SetCompactBit(SizeT idx, bool val) { + if (idx >= capacity_) { + Error("Index out of range."); + } + VectorBuffer::RawPointerSetCompactBit(reinterpret_cast(data_), idx, val); +} + +bool VectorBuffer::CompactBitIsSame(const SharedPtr &lhs, SizeT lhs_cnt, const SharedPtr &rhs, SizeT rhs_cnt) { + if (lhs_cnt != rhs_cnt) { + return false; + } + if (lhs_cnt == 0) { + return true; + } + SizeT full_byte_cnt = lhs_cnt / 8; + SizeT last_byte_cnt = lhs_cnt % 8; + auto lhs_data = reinterpret_cast(lhs->data_); + auto rhs_data = reinterpret_cast(rhs->data_); + for (SizeT idx = 0; idx < full_byte_cnt; ++idx) { + if (lhs_data[idx] != rhs_data[idx]) { + return false; + } + } + if (last_byte_cnt > 0) { + // las_byte_cnt is in [1, 7] + u8 mask = (1 << last_byte_cnt) - 1; + if ((lhs_data[full_byte_cnt] & mask) != (rhs_data[full_byte_cnt] & mask)) { + return false; + } + } + return true; +} + +// TODO: Verify this function by unittests. +void VectorBuffer::CopyCompactBits(u8 *dst_ptr_u8, const u8 *src_ptr_u8, SizeT dest_start_idx, SizeT source_start_idx, SizeT count) { + if ((source_start_idx % 8) == (dest_start_idx % 8)) { + SizeT start_offset = source_start_idx % 8; + SizeT start_todo = 8 - start_offset; + SizeT extra; + if (start_offset == 0) { + // Copy by byte when the start index is aligned. + Memcpy(dst_ptr_u8 + dest_start_idx / 8, src_ptr_u8 + source_start_idx / 8, count / 8); + // Copy the last bits. + extra = count % 8; + } else { + u8 src_1 = src_ptr_u8[source_start_idx / 8]; + u8 &dst_1 = dst_ptr_u8[dest_start_idx / 8]; + if (count <= start_todo) { + u8 source_mask = (u8(0xff) << start_offset) & (~(u8(0xff) << (start_offset + count))); + dst_1 = (dst_1 & (~source_mask)) | (src_1 & source_mask); + return; + } else { + u8 source_mask = u8(0xff) << start_offset; + dst_1 = (dst_1 & (~source_mask)) | (src_1 & source_mask); + Memcpy(dst_ptr_u8 + (dest_start_idx / 8) + 1, src_ptr_u8 + (source_start_idx / 8) + 1, (count - start_todo) / 8); + extra = (count - start_todo) % 8; + } + } + if (extra != 0) { + u8 src = src_ptr_u8[(source_start_idx + count) / 8]; + u8 &dst = dst_ptr_u8[(dest_start_idx + count) / 8]; + u8 extra_mask = ~(u8(0xff) << extra); + dst = (dst & (~extra_mask)) | (src & extra_mask); + } + } else { + if (SizeT start_offset = dest_start_idx % 8; start_offset != 0) { + // head part + SizeT start_todo = Min(count, 8 - start_offset); + for (SizeT i = 0; i < start_todo; ++i) { + RawPointerSetCompactBit(dst_ptr_u8, dest_start_idx++, RawPointerGetCompactBit(src_ptr_u8, source_start_idx++)); + } + count -= start_todo; + } + SizeT byte_count = count / 8; + SizeT dst_byte_start_idx = dest_start_idx / 8; + SizeT source_byte_start_idx = source_start_idx / 8; + SizeT source_bit_start_idx = source_start_idx % 8; + u8 mask_1 = u8(0xff) << source_bit_start_idx; + u8 mask_2 = ~mask_1; + for (SizeT i = 0; i < byte_count; ++i) { + // dst should get 2 parts from 2 different src bytes. + u8 src_1 = src_ptr_u8[source_byte_start_idx + i]; + u8 src_2 = src_ptr_u8[source_byte_start_idx + i + 1]; + dst_ptr_u8[dst_byte_start_idx + i] = (src_1 & mask_1) | (src_2 & mask_2); + } + // tail part + if (SizeT tail_count = count % 8; tail_count) { + u8 src_1 = src_ptr_u8[source_byte_start_idx + byte_count]; + u8 src_2 = src_ptr_u8[source_byte_start_idx + byte_count + 1]; + u8 tail_full = (src_1 & mask_1) | (src_2 & mask_2); + u8 tail_keep = u8(0xff) << tail_count; + u8 tail_set = ~tail_keep; + u8 &dst = dst_ptr_u8[dst_byte_start_idx + byte_count]; + dst = (dst & tail_keep) | (tail_full & tail_set); + } + } +} + } // namespace infinity diff --git a/src/storage/column_vector/vector_buffer.cppm b/src/storage/column_vector/vector_buffer.cppm index 331e20b249..aa4c7952fa 100644 --- a/src/storage/column_vector/vector_buffer.cppm +++ b/src/storage/column_vector/vector_buffer.cppm @@ -23,7 +23,7 @@ export module vector_buffer; namespace infinity { -export enum class VectorBufferType { kInvalid, kStandard, kHeap }; +export enum class VectorBufferType { kInvalid, kStandard, kHeap, kCompactBit }; export class VectorBuffer { public: @@ -41,6 +41,8 @@ public: void Initialize(SizeT type_size, SizeT capacity); + void InitializeCompactBit(SizeT capacity); + void ResetToInit(); void Copy(ptr_t input, SizeT size); @@ -50,6 +52,18 @@ public: return data_; } + [[nodiscard]] bool GetCompactBit(SizeT idx) const; + + void SetCompactBit(SizeT idx, bool val); + + [[nodiscard]] static bool RawPointerGetCompactBit(const u8 *src_ptr_u8, SizeT idx); + + static void RawPointerSetCompactBit(u8 *dst_ptr_u8, SizeT idx, bool val); + + static bool CompactBitIsSame(const SharedPtr &lhs, SizeT lhs_cnt, const SharedPtr &rhs, SizeT rhs_cnt); + + static void CopyCompactBits(u8 *dst, const u8 *src, SizeT dst_start_id, SizeT src_start_id, SizeT count); + public: bool initialized_{false}; // UniquePtr data_{nullptr}; diff --git a/src/storage/data_block.cpp b/src/storage/data_block.cpp index df1f66d570..75a94f85d2 100644 --- a/src/storage/data_block.cpp +++ b/src/storage/data_block.cpp @@ -105,7 +105,8 @@ void DataBlock::Init(const Vector> &types, SizeT capacity) { column_vectors.reserve(column_count_); for (SizeT idx = 0; idx < column_count_; ++idx) { column_vectors.emplace_back(MakeShared(types[idx])); - column_vectors[idx]->Initialize(ColumnVectorType::kFlat, capacity); + auto column_vector_type = (types[idx]->type() == LogicalType::kBoolean) ? ColumnVectorType::kCompactBit : ColumnVectorType::kFlat; + column_vectors[idx]->Initialize(column_vector_type, capacity); } capacity_ = capacity; initialized = true; @@ -142,8 +143,9 @@ void DataBlock::Reset() { // No data is appended into any column. for (SizeT i = 0; i < column_count_; ++i) { + ColumnVectorType old_vector_type = column_vectors[i]->vector_type(); column_vectors[i]->Reset(); - column_vectors[i]->Initialize(); + column_vectors[i]->Initialize(old_vector_type); } row_count_ = 0; @@ -156,8 +158,9 @@ void DataBlock::Reset(SizeT capacity) { // Reset each column into just initialized status. // No data is appended into any column. for (SizeT i = 0; i < column_count_; ++i) { + ColumnVectorType old_vector_type = column_vectors[i]->vector_type(); column_vectors[i]->Reset(); - column_vectors[i]->Initialize(ColumnVectorType::kFlat, capacity); + column_vectors[i]->Initialize(old_vector_type, capacity); } row_count_ = 0; capacity_ = capacity; diff --git a/src/storage/meta/entry/block_column_entry.cpp b/src/storage/meta/entry/block_column_entry.cpp index a4f6ea02f1..e2b7789434 100644 --- a/src/storage/meta/entry/block_column_entry.cpp +++ b/src/storage/meta/entry/block_column_entry.cpp @@ -87,6 +87,16 @@ void BlockColumnEntry::Append(BlockColumnEntry *column_entry, if (column_entry->buffer_ == nullptr) { Error("Not initialize buffer handle"); } + if (column_entry->column_type_->type() == kBoolean) { + BufferHandle buffer_handle = column_entry->buffer_->Load(); + auto dst_p = reinterpret_cast(buffer_handle.GetDataMut()); + VectorBuffer::CopyCompactBits(dst_p, + reinterpret_cast(input_column_vector->data()), + column_entry_offset, + input_column_vector_offset, + append_rows); + return; + } SizeT data_type_size = input_column_vector->data_type_size_; ptr_t src_ptr = input_column_vector->data() + input_column_vector_offset * data_type_size; @@ -105,7 +115,15 @@ void BlockColumnEntry::AppendRaw(BlockColumnEntry *block_column_entry, // ptr_t dst_ptr = column_data_entry->buffer_handle_->LoadData() + dst_offset; DataType *column_type = block_column_entry->column_type_.get(); switch (column_type->type()) { - case kBoolean: + case kBoolean: { + auto src_boolean = reinterpret_cast(src_p); + SizeT data_count = data_size / sizeof(BooleanT); + auto dst_ptr_u8 = reinterpret_cast(buffer_handle.GetDataMut()); + for (SizeT i = 0; i < data_count; ++i) { + VectorBuffer::RawPointerSetCompactBit(dst_ptr_u8, dst_offset + i, src_boolean[i]); + } + break; + } case kDate: case kTime: case kDateTime: diff --git a/src/unit_test/storage/column_vector/column_vector_bool.cpp b/src/unit_test/storage/column_vector/column_vector_bool.cpp index ea0492c4b2..037c63ac49 100644 --- a/src/unit_test/storage/column_vector/column_vector_bool.cpp +++ b/src/unit_test/storage/column_vector/column_vector_bool.cpp @@ -37,7 +37,7 @@ TEST_F(ColumnVectorBoolTest, flat_boolean) { ColumnVector column_vector(data_type); column_vector.Initialize(); - EXPECT_THROW(column_vector.SetVectorType(ColumnVectorType::kFlat), TypeException); + EXPECT_THROW(column_vector.SetVectorType(ColumnVectorType::kCompactBit), TypeException); EXPECT_EQ(column_vector.capacity(), DEFAULT_VECTOR_SIZE); EXPECT_EQ(column_vector.Size(), 0); @@ -45,9 +45,9 @@ TEST_F(ColumnVectorBoolTest, flat_boolean) { EXPECT_THROW(column_vector.GetValue(0), TypeException); EXPECT_EQ(column_vector.data_type_size_, 1); EXPECT_NE(column_vector.data(), nullptr); - EXPECT_EQ(column_vector.vector_type(), ColumnVectorType::kFlat); + EXPECT_EQ(column_vector.vector_type(), ColumnVectorType::kCompactBit); EXPECT_EQ(column_vector.data_type(), data_type); - EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kStandard); + EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kCompactBit); EXPECT_NE(column_vector.buffer_, nullptr); EXPECT_NE(column_vector.nulls_ptr_, nullptr); @@ -102,7 +102,7 @@ TEST_F(ColumnVectorBoolTest, flat_boolean) { // EXPECT_THROW(column_vector.Initialize(), TypeException); // column_vector.SetVectorType(ColumnVectorType::kFlat); column_vector.Initialize(); - EXPECT_THROW(column_vector.SetVectorType(ColumnVectorType::kFlat), TypeException); + EXPECT_THROW(column_vector.SetVectorType(ColumnVectorType::kCompactBit), TypeException); EXPECT_EQ(column_vector.capacity(), DEFAULT_VECTOR_SIZE); EXPECT_EQ(column_vector.Size(), 0); @@ -110,9 +110,9 @@ TEST_F(ColumnVectorBoolTest, flat_boolean) { EXPECT_THROW(column_vector.GetValue(0), TypeException); EXPECT_EQ(column_vector.data_type_size_, 1); EXPECT_NE(column_vector.data(), nullptr); - EXPECT_EQ(column_vector.vector_type(), ColumnVectorType::kFlat); + EXPECT_EQ(column_vector.vector_type(), ColumnVectorType::kCompactBit); EXPECT_EQ(column_vector.data_type(), data_type); - EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kStandard); + EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kCompactBit); EXPECT_NE(column_vector.buffer_, nullptr); EXPECT_NE(column_vector.nulls_ptr_, nullptr); @@ -157,7 +157,7 @@ TEST_F(ColumnVectorBoolTest, contant_bool) { EXPECT_NE(column_vector.data(), nullptr); EXPECT_EQ(column_vector.vector_type(), ColumnVectorType::kConstant); EXPECT_EQ(column_vector.data_type(), data_type); - EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kStandard); + EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kCompactBit); EXPECT_NE(column_vector.buffer_, nullptr); EXPECT_NE(column_vector.nulls_ptr_, nullptr); @@ -199,7 +199,7 @@ TEST_F(ColumnVectorBoolTest, contant_bool) { EXPECT_NE(column_vector.data(), nullptr); EXPECT_EQ(column_vector.vector_type(), ColumnVectorType::kConstant); EXPECT_EQ(column_vector.data_type(), data_type); - EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kStandard); + EXPECT_EQ(column_vector.buffer_->buffer_type_, VectorBufferType::kCompactBit); EXPECT_NE(column_vector.buffer_, nullptr); EXPECT_NE(column_vector.nulls_ptr_, nullptr); diff --git a/src/unit_test/storage/definition/table.cpp b/src/unit_test/storage/definition/table.cpp index 5bd0e91f8f..a85bcab748 100644 --- a/src/unit_test/storage/definition/table.cpp +++ b/src/unit_test/storage/definition/table.cpp @@ -81,7 +81,7 @@ TEST_F(TableTest, test1) { SharedPtr column1 = order_by_table->GetDataBlockById(block_id)->column_vectors[0]; EXPECT_EQ(column1->data_type()->type(), LogicalType::kBoolean); for (SizeT row_id = 0; row_id < row_count; ++row_id) { - EXPECT_EQ(((BooleanT *)column1->data())[row_id], row_id % 2 == 0); + EXPECT_EQ(column1->buffer_->GetCompactBit(row_id), row_id % 2 == 0); } // Check Column2 data diff --git a/test/sql/dql/select.slt b/test/sql/dql/select.slt index 77d12d5799..3f5748f137 100644 --- a/test/sql/dql/select.slt +++ b/test/sql/dql/select.slt @@ -23,107 +23,3 @@ DROP TABLE select1; statement ok DROP TABLE select2; - -statement ok -DROP TABLE IF EXISTS date1selectwhere; - -statement ok -CREATE TABLE date1selectwhere (i INTEGER, d1 DATE, d2 DATE); - -statement ok -INSERT INTO date1selectwhere VALUES (1, DATE '1970-1-1', DATE '2970-1-1'); - -statement ok -INSERT INTO date1selectwhere VALUES (11, DATE '1870-11-1', DATE '2570-1-1'); - -statement ok -INSERT INTO date1selectwhere VALUES (111, DATE '6570-11-1', DATE '5570-6-21'); - -query I -SELECT * FROM date1selectwhere; ----- -1 1970-01-01 2970-01-01 -11 1870-11-01 2570-01-01 -111 6570-11-01 5570-06-21 - -query II -SELECT * FROM date1selectwhere WHERE d1 < d2; ----- -1 1970-01-01 2970-01-01 -11 1870-11-01 2570-01-01 - -query III -SELECT * FROM date1selectwhere WHERE d2 >= DATE '2970-1-1'; ----- -1 1970-01-01 2970-01-01 -111 6570-11-01 5570-06-21 - -query IV -SELECT * FROM date1selectwhere WHERE d1 = DATE '1970-1-1' + INTERVAL -100 YEAR + INTERVAL 10 MONTH; ----- -11 1870-11-01 2570-01-01 - -query V -SELECT * FROM date1selectwhere WHERE i <= 22 - 11 AND d1 < DATE '1900-1-1'; ----- -11 1870-11-01 2570-01-01 - -statement ok -INSERT INTO date1selectwhere VALUES (2222, DATE '2022-1-31', DATE '2023-1-31'); - -# add 1 month, date should remain valid -query VI -SELECT i, d1 + INTERVAL 1 MONTH, d2 + INTERVAL 1 MONTH FROM date1selectwhere WHERE i = 2222; ----- -2222 2022-02-28 2023-02-28 - -statement ok -DROP TABLE date1selectwhere; - -statement ok -DROP TABLE IF EXISTS datetimeselectwhere; - -statement ok -CREATE TABLE datetimeselectwhere (t TIME, dt DATETIME, ts TIMESTAMP); - -statement ok -INSERT INTO datetimeselectwhere VALUES (TIME '0:0:0', DATETIME '1970-1-1 0:0:0', TIMESTAMP '2970-1-31 0:0:0'); - -statement ok -INSERT INTO datetimeselectwhere VALUES (TIME '23:59:59', DATETIME '2570-1-31 23:59:59', TIMESTAMP '1870-11-1 0:0:0'); - -statement ok -INSERT INTO datetimeselectwhere VALUES (TIME '12:0:0', DATETIME '5570-8-31 0:0:0', TIMESTAMP '6570-12-31 12:0:0'); - -query I -SELECT * FROM datetimeselectwhere; ----- -00:00:00 1970-01-01 00:00:00 2970-01-31 00:00:00 -23:59:59 2570-01-31 23:59:59 1870-11-01 00:00:00 -12:00:00 5570-08-31 00:00:00 6570-12-31 12:00:00 - -query II -SELECT dt + INTERVAL 10 SECONDS FROM datetimeselectwhere; ----- -1970-01-01 00:00:10 -2570-02-01 00:00:09 -5570-08-31 00:00:10 - -# add 1 month, date should remain valid -query III -SELECT ts + INTERVAL 1 MONTH FROM datetimeselectwhere WHERE t < TIME '12:0:0'; ----- -2970-02-28 00:00:00 - -query IV -SELECT EXTRACT('hour' FROM t + (INTERVAL 3 HOURS)) FROM datetimeselectwhere WHERE ts - INTERVAL 12 HOURS = TIMESTAMP '6570-12-31 0:0:0'; ----- -15 - -query V -SELECT EXTRACT('year' FROM ts + (INTERVAL 1 DAY)) FROM datetimeselectwhere WHERE EXTRACT('hour' FROM t) = 12; ----- -6571 - -statement ok -DROP TABLE datetimeselectwhere; \ No newline at end of file diff --git a/test/sql/dql/type/boolean.slt b/test/sql/dql/type/boolean.slt new file mode 100644 index 0000000000..4ec431e308 --- /dev/null +++ b/test/sql/dql/type/boolean.slt @@ -0,0 +1,130 @@ +statement ok +DROP TABLE IF EXISTS boolean1; + +statement ok +CREATE TABLE boolean1 (i INTEGER, b1 BOOLEAN, b2 BOOLEAN); + +statement ok +INSERT INTO boolean1 VALUES (1, TRUE, FALSE), (2, FALSE, TRUE), (3, TRUE, TRUE), (4, FALSE, FALSE), (5, TRUE, FALSE), + (6, FALSE, TRUE), (7, TRUE, TRUE), (8, FALSE, FALSE), (9, TRUE, FALSE), (10, FALSE, TRUE); + +query I +SELECT * FROM boolean1; +---- +1 true false +2 false true +3 true true +4 false false +5 true false +6 false true +7 true true +8 false false +9 true false +10 false true + +query II +SELECT * FROM boolean1 WHERE b1; +---- +1 true false +3 true true +5 true false +7 true true +9 true false + +query III +SELECT * FROM boolean1 WHERE b1 and b2; +---- +3 true true +7 true true + +query IV +SELECT * FROM boolean1 WHERE b1 and not b2; +---- +1 true false +5 true false +9 true false + +query V +SELECT * FROM boolean1 WHERE (i > 5) and (not b1); +---- +6 false true +8 false false +10 false true + +query VI +SELECT * FROM boolean1 WHERE (i > 5) and (not b1) and (not b2); +---- +8 false false + +query VII +SELECT * FROM boolean1 WHERE b1 = b2; +---- +3 true true +4 false false +7 true true +8 false false + +query VIII +SELECT * FROM boolean1 WHERE b1 <> b2; +---- +1 true false +2 false true +5 true false +6 false true +9 true false +10 false true + +statement ok +DROP TABLE boolean1; + +statement ok +DROP TABLE IF EXISTS boolean2; + +statement ok +CREATE TABLE boolean2 (i INTEGER, b1 BOOLEAN, b2 BOOLEAN); + +statement ok +INSERT INTO boolean2 VALUES (1, TRUE, FALSE), (2, FALSE, TRUE); + +query I +SELECT * FROM boolean2; +---- +1 true false +2 false true + +query II +SELECT * FROM boolean2 WHERE b1; +---- +1 true false + +query III +SELECT * FROM boolean2 WHERE b2; +---- +2 false true + +query IV +SELECT * FROM boolean2 WHERE b1 and not b2; +---- +1 true false + +query V +SELECT * FROM boolean2 WHERE (not b1) and b2; +---- +2 false true + +query VI +SELECT * FROM boolean2 WHERE (not b1) and (not b2); +---- + +query VII +SELECT * FROM boolean2 WHERE b1 = b2; +---- + +query VIII +SELECT * FROM boolean2 WHERE b1 <> b2; +---- +1 true false +2 false true + +statement ok +DROP TABLE boolean2; diff --git a/test/sql/dql/type/date_time_datetime_timestamp.slt b/test/sql/dql/type/date_time_datetime_timestamp.slt new file mode 100644 index 0000000000..eb5ac888d0 --- /dev/null +++ b/test/sql/dql/type/date_time_datetime_timestamp.slt @@ -0,0 +1,95 @@ +statement ok +DROP TABLE IF EXISTS date1selectwhere; + +statement ok +CREATE TABLE date1selectwhere (i INTEGER, d1 DATE, d2 DATE); + +statement ok +INSERT INTO date1selectwhere VALUES (1, DATE '1970-1-1', DATE '2970-1-1'), + (11, DATE '1870-11-1', DATE '2570-1-1'), + (111, DATE '6570-11-1', DATE '5570-6-21'); + +query I +SELECT * FROM date1selectwhere; +---- +1 1970-01-01 2970-01-01 +11 1870-11-01 2570-01-01 +111 6570-11-01 5570-06-21 + +query II +SELECT * FROM date1selectwhere WHERE d1 < d2; +---- +1 1970-01-01 2970-01-01 +11 1870-11-01 2570-01-01 + +query III +SELECT * FROM date1selectwhere WHERE d2 >= DATE '2970-1-1'; +---- +1 1970-01-01 2970-01-01 +111 6570-11-01 5570-06-21 + +query IV +SELECT * FROM date1selectwhere WHERE d1 = DATE '1970-1-1' + INTERVAL -100 YEAR + INTERVAL 10 MONTH; +---- +11 1870-11-01 2570-01-01 + +query V +SELECT * FROM date1selectwhere WHERE i <= 22 - 11 AND d1 < DATE '1900-1-1'; +---- +11 1870-11-01 2570-01-01 + +statement ok +INSERT INTO date1selectwhere VALUES (2222, DATE '2022-1-31', DATE '2023-1-31'); + +# add 1 month, date should remain valid +query VI +SELECT i, d1 + INTERVAL 1 MONTH, d2 + INTERVAL 1 MONTH FROM date1selectwhere WHERE i = 2222; +---- +2222 2022-02-28 2023-02-28 + +statement ok +DROP TABLE date1selectwhere; + +statement ok +DROP TABLE IF EXISTS datetimeselectwhere; + +statement ok +CREATE TABLE datetimeselectwhere (t TIME, dt DATETIME, ts TIMESTAMP); + +statement ok +INSERT INTO datetimeselectwhere VALUES (TIME '0:0:0', DATETIME '1970-1-1 0:0:0', TIMESTAMP '2970-1-31 0:0:0'), + (TIME '23:59:59', DATETIME '2570-1-31 23:59:59', TIMESTAMP '1870-11-1 0:0:0'), + (TIME '12:0:0', DATETIME '5570-8-31 0:0:0', TIMESTAMP '6570-12-31 12:0:0'); + +query I +SELECT * FROM datetimeselectwhere; +---- +00:00:00 1970-01-01 00:00:00 2970-01-31 00:00:00 +23:59:59 2570-01-31 23:59:59 1870-11-01 00:00:00 +12:00:00 5570-08-31 00:00:00 6570-12-31 12:00:00 + +query II +SELECT dt + INTERVAL 10 SECONDS FROM datetimeselectwhere; +---- +1970-01-01 00:00:10 +2570-02-01 00:00:09 +5570-08-31 00:00:10 + +# add 1 month, date should remain valid +query III +SELECT ts + INTERVAL 1 MONTH FROM datetimeselectwhere WHERE t < TIME '12:0:0'; +---- +2970-02-28 00:00:00 + +query IV +SELECT EXTRACT('hour' FROM t + (INTERVAL 3 HOURS)) FROM datetimeselectwhere WHERE ts - INTERVAL 12 HOURS = TIMESTAMP '6570-12-31 0:0:0'; +---- +15 + +query V +SELECT EXTRACT('year' FROM ts + (INTERVAL 1 DAY)) FROM datetimeselectwhere WHERE EXTRACT('hour' FROM t) = 12; +---- +6571 + +statement ok +DROP TABLE datetimeselectwhere;