From d9490e6d06a632f2f3b5cf4115899e27e0a0f5ac Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Mon, 16 Dec 2024 14:05:15 -0500 Subject: [PATCH 1/8] Expand zone map to work with null checks and logical conjunction --- src/common/expression_type.cpp | 4 ++ .../storage/predicate/column_predicate.h | 14 ++-- .../storage/predicate/constant_predicate.h | 4 +- .../storage/predicate/null_predicate.h | 34 +++++++++ .../storage/store/column_chunk_stats.h | 7 +- src/optimizer/filter_push_down_optimizer.cpp | 6 +- src/storage/predicate/CMakeLists.txt | 1 + src/storage/predicate/column_predicate.cpp | 72 +++++++++++++++++-- src/storage/predicate/constant_predicate.cpp | 3 +- src/storage/predicate/null_predicate.cpp | 15 ++++ src/storage/store/column_chunk_data.cpp | 14 ++-- src/storage/store/column_chunk_stats.cpp | 17 +++-- 12 files changed, 157 insertions(+), 34 deletions(-) create mode 100644 src/include/storage/predicate/null_predicate.h create mode 100644 src/storage/predicate/null_predicate.cpp diff --git a/src/common/expression_type.cpp b/src/common/expression_type.cpp index 9a8be5636ae..cc104e9adab 100644 --- a/src/common/expression_type.cpp +++ b/src/common/expression_type.cpp @@ -122,6 +122,10 @@ std::string ExpressionTypeUtil::toParsableString(ExpressionType type) { return "<"; case ExpressionType::LESS_THAN_EQUALS: return "<="; + case ExpressionType::IS_NULL: + return "IS NULL"; + case ExpressionType::IS_NOT_NULL: + return "IS NOT NULL"; default: throw RuntimeException(stringFormat( "ExpressionTypeUtil::toParsableString not implemented for {}", toString(type))); diff --git a/src/include/storage/predicate/column_predicate.h b/src/include/storage/predicate/column_predicate.h index 55cb4577e90..d0fb9a84c71 100644 --- a/src/include/storage/predicate/column_predicate.h +++ b/src/include/storage/predicate/column_predicate.h @@ -15,9 +15,10 @@ class KUZU_API ColumnPredicateSet { ColumnPredicateSet() = default; EXPLICIT_COPY_DEFAULT_MOVE(ColumnPredicateSet); - void addPredicate(std::unique_ptr predicate) { + void addColumnPredicate(std::unique_ptr predicate) { predicates.push_back(std::move(predicate)); } + void tryAddPredicate(const binder::Expression& column, const binder::Expression& predicate); bool isEmpty() const { return predicates.empty(); } common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const; @@ -34,13 +35,14 @@ class KUZU_API ColumnPredicateSet { class KUZU_API ColumnPredicate { public: - explicit ColumnPredicate(std::string columnName) : columnName{std::move(columnName)} {} + ColumnPredicate(std::string columnName, common::ExpressionType expressionType) + : columnName{std::move(columnName)}, expressionType(expressionType) {} virtual ~ColumnPredicate() = default; virtual common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const = 0; - virtual std::string toString() = 0; + virtual std::string toString(); virtual std::unique_ptr copy() const = 0; @@ -51,11 +53,7 @@ class KUZU_API ColumnPredicate { protected: std::string columnName; -}; - -struct ColumnPredicateUtil { - static std::unique_ptr tryConvert(const binder::Expression& column, - const binder::Expression& predicate); + common::ExpressionType expressionType; }; } // namespace storage diff --git a/src/include/storage/predicate/constant_predicate.h b/src/include/storage/predicate/constant_predicate.h index 425bacb72eb..09b29183edf 100644 --- a/src/include/storage/predicate/constant_predicate.h +++ b/src/include/storage/predicate/constant_predicate.h @@ -11,8 +11,7 @@ class ColumnConstantPredicate : public ColumnPredicate { public: ColumnConstantPredicate(std::string columnName, common::ExpressionType expressionType, common::Value value) - : ColumnPredicate{std::move(columnName)}, expressionType{expressionType}, - value{std::move(value)} {} + : ColumnPredicate{std::move(columnName), expressionType}, value{std::move(value)} {} common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const override; @@ -23,7 +22,6 @@ class ColumnConstantPredicate : public ColumnPredicate { } private: - common::ExpressionType expressionType; common::Value value; }; diff --git a/src/include/storage/predicate/null_predicate.h b/src/include/storage/predicate/null_predicate.h new file mode 100644 index 00000000000..c175b633c54 --- /dev/null +++ b/src/include/storage/predicate/null_predicate.h @@ -0,0 +1,34 @@ +#pragma once + +#include "column_predicate.h" +#include "common/enums/expression_type.h" + +namespace kuzu { +namespace storage { + +class ColumnIsNullPredicate : public ColumnPredicate { +public: + explicit ColumnIsNullPredicate(std::string columnName) + : ColumnPredicate{std::move(columnName), common::ExpressionType::IS_NULL} {} + + common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const override; + + std::unique_ptr copy() const override { + return std::make_unique(columnName); + } +}; + +class ColumnIsNotNullPredicate : public ColumnPredicate { +public: + explicit ColumnIsNotNullPredicate(std::string columnName) + : ColumnPredicate{std::move(columnName), common::ExpressionType::IS_NOT_NULL} {} + + common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const override; + + std::unique_ptr copy() const override { + return std::make_unique(columnName); + } +}; + +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/store/column_chunk_stats.h b/src/include/storage/store/column_chunk_stats.h index 67c3b0f2589..cc7b1c89a6b 100644 --- a/src/include/storage/store/column_chunk_stats.h +++ b/src/include/storage/store/column_chunk_stats.h @@ -6,13 +6,16 @@ namespace kuzu::storage { struct ColumnChunkStats { std::optional max; std::optional min; + bool mayHaveNulls = false; - void update(std::optional min, std::optional max, + void update(std::optional min, std::optional max, bool mayHaveNulls, common::PhysicalTypeID dataType); void update(StorageValue val, common::PhysicalTypeID dataType); - void update(uint8_t* data, uint64_t offset, uint64_t numValues, + void update(uint8_t* data, uint64_t offset, uint64_t numValues, bool mayHaveNulls, common::PhysicalTypeID physicalType); void reset(); + + void updateMayHaveNulls(bool newMayHaveNulls); }; } // namespace kuzu::storage diff --git a/src/optimizer/filter_push_down_optimizer.cpp b/src/optimizer/filter_push_down_optimizer.cpp index 00ea2b187c7..ed66df28bea 100644 --- a/src/optimizer/filter_push_down_optimizer.cpp +++ b/src/optimizer/filter_push_down_optimizer.cpp @@ -138,11 +138,7 @@ static ColumnPredicateSet getPredicateSet(const Expression& column, const binder::expression_vector& predicates) { auto predicateSet = ColumnPredicateSet(); for (auto& predicate : predicates) { - auto columnPredicate = ColumnPredicateUtil::tryConvert(column, *predicate); - if (columnPredicate == nullptr) { - continue; - } - predicateSet.addPredicate(std::move(columnPredicate)); + predicateSet.tryAddPredicate(column, *predicate); } return predicateSet; } diff --git a/src/storage/predicate/CMakeLists.txt b/src/storage/predicate/CMakeLists.txt index 07df3fda0b8..45aa6d4ed8f 100644 --- a/src/storage/predicate/CMakeLists.txt +++ b/src/storage/predicate/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(kuzu_storage_predicate OBJECT + null_predicate.cpp column_predicate.cpp constant_predicate.cpp) diff --git a/src/storage/predicate/column_predicate.cpp b/src/storage/predicate/column_predicate.cpp index 5e8445589fa..a12bb41fd6b 100644 --- a/src/storage/predicate/column_predicate.cpp +++ b/src/storage/predicate/column_predicate.cpp @@ -3,6 +3,7 @@ #include "binder/expression/literal_expression.h" #include "binder/expression/scalar_function_expression.h" #include "storage/predicate/constant_predicate.h" +#include "storage/predicate/null_predicate.h" using namespace kuzu::binder; using namespace kuzu::common; @@ -10,6 +11,9 @@ using namespace kuzu::common; namespace kuzu { namespace storage { +static void tryAddConjunction(const Expression& column, const Expression& predicate, + std::vector>& predicateSetToAddTo); + ZoneMapCheckResult ColumnPredicateSet::checkZoneMap(const ColumnChunkStats& stats) const { for (auto& predicate : predicates) { if (predicate->checkZoneMap(stats) == ZoneMapCheckResult::SKIP_SCAN) { @@ -45,9 +49,12 @@ static bool isCastedColumnRef(const Expression& expr) { return false; } +static bool isSimpleExpr(const Expression& expr) { + return isColumnRef(expr.expressionType) || isCastedColumnRef(expr); +} + static bool isColumnRefConstantPair(const Expression& left, const Expression& right) { - return (isColumnRef(left.expressionType) || isCastedColumnRef(left)) && - right.expressionType == ExpressionType::LITERAL; + return isSimpleExpr(left) && right.expressionType == ExpressionType::LITERAL; } static bool columnMatchesExprChild(const Expression& column, const Expression& expr) { @@ -78,13 +85,68 @@ static std::unique_ptr tryConvertToConstColumnPredicate(const E return nullptr; } -std::unique_ptr ColumnPredicateUtil::tryConvert(const Expression& property, +static std::unique_ptr tryConvertToIsNull(const Expression& column, const Expression& predicate) { - if (ExpressionTypeUtil::isComparison(predicate.expressionType)) { - return tryConvertToConstColumnPredicate(property, predicate); + // we only convert simple predicates + if (isSimpleExpr(*predicate.getChild(0))) { + return std::make_unique(column.toString()); + } + return nullptr; +} + +static std::unique_ptr tryConvertToIsNotNull(const Expression& column, + const Expression& predicate) { + if (isSimpleExpr(*predicate.getChild(0))) { + return std::make_unique(column.toString()); } return nullptr; } +static void emplaceIfNotNull(std::unique_ptr columnPredicate, + std::vector>& predicateSetToAddTo) { + if (columnPredicate) { + predicateSetToAddTo.emplace_back(std::move(columnPredicate)); + } +} + +static void tryAddColumnPredicate(const Expression& property, const Expression& predicate, + std::vector>& predicateSetToAddTo) { + if (ExpressionTypeUtil::isComparison(predicate.expressionType)) { + emplaceIfNotNull(tryConvertToConstColumnPredicate(property, predicate), + predicateSetToAddTo); + return; + } + switch (predicate.expressionType) { + case common::ExpressionType::AND: + tryAddConjunction(property, predicate, predicateSetToAddTo); + break; + case common::ExpressionType::IS_NULL: + emplaceIfNotNull(tryConvertToIsNull(property, predicate), predicateSetToAddTo); + break; + case common::ExpressionType::IS_NOT_NULL: + emplaceIfNotNull(tryConvertToIsNotNull(property, predicate), predicateSetToAddTo); + break; + default: + break; + } + return; +} + +static void tryAddConjunction(const Expression& column, const Expression& predicate, + std::vector>& predicateSetToAddTo) { + KU_ASSERT(predicate.expressionType == common::ExpressionType::AND); + tryAddColumnPredicate(column, *predicate.getChild(0), predicateSetToAddTo); + tryAddColumnPredicate(column, *predicate.getChild(1), predicateSetToAddTo); +} + +void ColumnPredicateSet::tryAddPredicate(const binder::Expression& column, + const binder::Expression& predicate) { + tryAddColumnPredicate(column, predicate, predicates); +} + +std::string ColumnPredicate::toString() { + return stringFormat("{} {}", columnName, ExpressionTypeUtil::toParsableString(expressionType)); +} + } // namespace storage } // namespace kuzu diff --git a/src/storage/predicate/constant_predicate.cpp b/src/storage/predicate/constant_predicate.cpp index c3e0a809515..170bf16615a 100644 --- a/src/storage/predicate/constant_predicate.cpp +++ b/src/storage/predicate/constant_predicate.cpp @@ -84,8 +84,7 @@ std::string ColumnConstantPredicate::toString() { } else { valStr = value.toString(); } - return stringFormat("{} {} {}", columnName, - ExpressionTypeUtil::toParsableString(expressionType), valStr); + return stringFormat("{} {}", ColumnPredicate::toString(), valStr); } } // namespace storage diff --git a/src/storage/predicate/null_predicate.cpp b/src/storage/predicate/null_predicate.cpp new file mode 100644 index 00000000000..49e465690fe --- /dev/null +++ b/src/storage/predicate/null_predicate.cpp @@ -0,0 +1,15 @@ +#include "storage/predicate/null_predicate.h" + +#include "storage/store/column_chunk_stats.h" + +namespace kuzu::storage { +common::ZoneMapCheckResult ColumnIsNullPredicate::checkZoneMap( + const ColumnChunkStats& stats) const { + return stats.mayHaveNulls ? common::ZoneMapCheckResult::ALWAYS_SCAN : + common::ZoneMapCheckResult::SKIP_SCAN; +} + +common::ZoneMapCheckResult ColumnIsNotNullPredicate::checkZoneMap(const ColumnChunkStats&) const { + return common::ZoneMapCheckResult::ALWAYS_SCAN; +} +} // namespace kuzu::storage diff --git a/src/storage/store/column_chunk_data.cpp b/src/storage/store/column_chunk_data.cpp index 2bc9a259817..6230b9497fb 100644 --- a/src/storage/store/column_chunk_data.cpp +++ b/src/storage/store/column_chunk_data.cpp @@ -81,7 +81,8 @@ ColumnChunkData::ColumnChunkData(MemoryManager& mm, LogicalType dataType, uint64 bool enableCompression, ResidencyState residencyState, bool hasNullData, bool initializeToZero) : residencyState{residencyState}, dataType{std::move(dataType)}, enableCompression{enableCompression}, - numBytesPerValue{getDataTypeSizeInChunk(this->dataType)}, capacity{capacity}, numValues{0} { + numBytesPerValue{getDataTypeSizeInChunk(this->dataType)}, capacity{capacity}, numValues{0}, + inMemoryStats() { if (hasNullData) { nullData = std::make_unique(mm, capacity, enableCompression, residencyState); } @@ -180,20 +181,25 @@ static void updateInMemoryStats(ColumnChunkStats& stats, const ValueVector& valu uint64_t offset = 0, uint64_t numValues = std::numeric_limits::max()) { const auto physicalType = values.dataType.getPhysicalType(); const auto numValuesToCheck = std::min(numValues, values.state->getSelSize()); - stats.update(values.getData(), offset, numValuesToCheck, physicalType); + // we pessimistically set mayHaveNulls to check the entire vector instead of the selected range + stats.update(values.getData(), offset, numValuesToCheck, values.hasNoNullsGuarantee(), + physicalType); } static void updateInMemoryStats(ColumnChunkStats& stats, const ColumnChunkData* values, uint64_t offset = 0, uint64_t numValues = std::numeric_limits::max()) { const auto physicalType = values->getDataType().getPhysicalType(); const auto numValuesToCheck = std::min(values->getNumValues(), numValues); - stats.update(values->getData(), offset, numValuesToCheck, physicalType); + const bool mayHaveNulls = values->hasNullData() && values->getNullData().mayHaveNull(); + stats.update(values->getData(), offset, numValuesToCheck, mayHaveNulls, physicalType); } ColumnChunkStats ColumnChunkData::getMergedColumnChunkStats() const { const CompressionMetadata& onDiskMetadata = metadata.compMeta; auto ret = inMemoryStats; - ret.update(onDiskMetadata.min, onDiskMetadata.max, getDataType().getPhysicalType()); + const bool mayHaveNulls = nullData && nullData->mayHaveNull(); + ret.update(onDiskMetadata.min, onDiskMetadata.max, mayHaveNulls, + getDataType().getPhysicalType()); return ret; } diff --git a/src/storage/store/column_chunk_stats.cpp b/src/storage/store/column_chunk_stats.cpp index 265718a8c2e..9595dadc922 100644 --- a/src/storage/store/column_chunk_stats.cpp +++ b/src/storage/store/column_chunk_stats.cpp @@ -5,33 +5,40 @@ namespace kuzu { namespace storage { -void ColumnChunkStats::update(uint8_t* data, uint64_t offset, uint64_t numValues, +void ColumnChunkStats::update(uint8_t* data, uint64_t offset, uint64_t numValues, bool mayHaveNulls, common::PhysicalTypeID physicalType) { const bool isStorageValueType = common::TypeUtils::visit(physicalType, [](T) { return StorageValueType; }); if (isStorageValueType) { auto [minVal, maxVal] = getMinMaxStorageValue(data, offset, numValues, physicalType, nullptr); - update(minVal, maxVal, physicalType); + update(minVal, maxVal, mayHaveNulls, physicalType); + } else { + updateMayHaveNulls(mayHaveNulls); } } void ColumnChunkStats::update(std::optional newMin, - std::optional newMax, common::PhysicalTypeID dataType) { + std::optional newMax, bool mayHaveNulls, common::PhysicalTypeID dataType) { if (!min.has_value() || (newMin.has_value() && min->gt(*newMin, dataType))) { min = newMin; } if (!max.has_value() || (newMax.has_value() && newMax->gt(*max, dataType))) { max = newMax; } + updateMayHaveNulls(mayHaveNulls); } void ColumnChunkStats::update(StorageValue val, common::PhysicalTypeID dataType) { - update(val, val, dataType); + update(val, val, false, dataType); +} + +void ColumnChunkStats::updateMayHaveNulls(bool newMayHaveNulls) { + mayHaveNulls = mayHaveNulls || newMayHaveNulls; } void ColumnChunkStats::reset() { - *this = {}; + *this = ColumnChunkStats{}; } } // namespace storage From 260229b2a1eca8f2fcab25240a3301269444bcca Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Mon, 16 Dec 2024 16:37:01 -0500 Subject: [PATCH 2/8] Rework column chunk stats + handle nulls --- .../storage/predicate/column_predicate.h | 6 ++--- .../storage/predicate/constant_predicate.h | 2 +- .../storage/predicate/null_predicate.h | 4 +-- src/include/storage/store/column_chunk.h | 3 +++ src/include/storage/store/column_chunk_data.h | 2 +- .../storage/store/column_chunk_stats.h | 15 ++++++++--- src/storage/predicate/column_predicate.cpp | 2 +- src/storage/predicate/constant_predicate.cpp | 13 ++++----- src/storage/predicate/null_predicate.cpp | 9 ++++--- src/storage/store/chunked_node_group.cpp | 8 +++--- src/storage/store/column_chunk.cpp | 15 +++++++++++ src/storage/store/column_chunk_data.cpp | 27 ++++++++++--------- src/storage/store/column_chunk_stats.cpp | 21 +++++++-------- 13 files changed, 77 insertions(+), 50 deletions(-) diff --git a/src/include/storage/predicate/column_predicate.h b/src/include/storage/predicate/column_predicate.h index d0fb9a84c71..a7b595d0beb 100644 --- a/src/include/storage/predicate/column_predicate.h +++ b/src/include/storage/predicate/column_predicate.h @@ -7,7 +7,7 @@ namespace kuzu { namespace storage { -struct ColumnChunkStats; +struct MergedColumnChunkStats; class ColumnPredicate; class KUZU_API ColumnPredicateSet { @@ -21,7 +21,7 @@ class KUZU_API ColumnPredicateSet { void tryAddPredicate(const binder::Expression& column, const binder::Expression& predicate); bool isEmpty() const { return predicates.empty(); } - common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const; + common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const; std::string toString() const; @@ -40,7 +40,7 @@ class KUZU_API ColumnPredicate { virtual ~ColumnPredicate() = default; - virtual common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const = 0; + virtual common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const = 0; virtual std::string toString(); diff --git a/src/include/storage/predicate/constant_predicate.h b/src/include/storage/predicate/constant_predicate.h index 09b29183edf..1fae05e6721 100644 --- a/src/include/storage/predicate/constant_predicate.h +++ b/src/include/storage/predicate/constant_predicate.h @@ -13,7 +13,7 @@ class ColumnConstantPredicate : public ColumnPredicate { common::Value value) : ColumnPredicate{std::move(columnName), expressionType}, value{std::move(value)} {} - common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const override; + common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const override; std::string toString() override; diff --git a/src/include/storage/predicate/null_predicate.h b/src/include/storage/predicate/null_predicate.h index c175b633c54..4926918e492 100644 --- a/src/include/storage/predicate/null_predicate.h +++ b/src/include/storage/predicate/null_predicate.h @@ -11,7 +11,7 @@ class ColumnIsNullPredicate : public ColumnPredicate { explicit ColumnIsNullPredicate(std::string columnName) : ColumnPredicate{std::move(columnName), common::ExpressionType::IS_NULL} {} - common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const override; + common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const override; std::unique_ptr copy() const override { return std::make_unique(columnName); @@ -23,7 +23,7 @@ class ColumnIsNotNullPredicate : public ColumnPredicate { explicit ColumnIsNotNullPredicate(std::string columnName) : ColumnPredicate{std::move(columnName), common::ExpressionType::IS_NOT_NULL} {} - common::ZoneMapCheckResult checkZoneMap(const ColumnChunkStats& stats) const override; + common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const override; std::unique_ptr copy() const override { return std::make_unique(columnName); diff --git a/src/include/storage/store/column_chunk.h b/src/include/storage/store/column_chunk.h index 75912ec9399..9b94555ff37 100644 --- a/src/include/storage/store/column_chunk.h +++ b/src/include/storage/store/column_chunk.h @@ -102,6 +102,9 @@ class ColumnChunk { void loadFromDisk() { data->loadFromDisk(); } uint64_t spillToDisk() { return data->spillToDisk(); } + MergedColumnChunkStats getMergedColumnChunkStats( + const transaction::Transaction* transaction) const; + private: void scanCommittedUpdates(const transaction::Transaction* transaction, ColumnChunkData& output, common::offset_t startOffsetInOutput, common::row_idx_t startRowScanned, diff --git a/src/include/storage/store/column_chunk_data.h b/src/include/storage/store/column_chunk_data.h index bbc12fa866c..978fb2eed4c 100644 --- a/src/include/storage/store/column_chunk_data.h +++ b/src/include/storage/store/column_chunk_data.h @@ -229,7 +229,7 @@ class ColumnChunkData { void loadFromDisk(); uint64_t spillToDisk(); - ColumnChunkStats getMergedColumnChunkStats() const; + MergedColumnChunkStats getMergedColumnChunkStats() const; void updateStats(const common::ValueVector* vector, const common::SelectionVector& selVector); diff --git a/src/include/storage/store/column_chunk_stats.h b/src/include/storage/store/column_chunk_stats.h index cc7b1c89a6b..c10f2fdf362 100644 --- a/src/include/storage/store/column_chunk_stats.h +++ b/src/include/storage/store/column_chunk_stats.h @@ -6,16 +6,23 @@ namespace kuzu::storage { struct ColumnChunkStats { std::optional max; std::optional min; - bool mayHaveNulls = false; - void update(std::optional min, std::optional max, bool mayHaveNulls, + void update(std::optional min, std::optional max, common::PhysicalTypeID dataType); void update(StorageValue val, common::PhysicalTypeID dataType); - void update(uint8_t* data, uint64_t offset, uint64_t numValues, bool mayHaveNulls, + void update(uint8_t* data, uint64_t offset, uint64_t numValues, common::PhysicalTypeID physicalType); void reset(); +}; + +struct MergedColumnChunkStats { + MergedColumnChunkStats(ColumnChunkStats stats, bool mayContainNulls) + : stats(stats), mayHaveNulls(mayContainNulls) {} + + ColumnChunkStats stats; + bool mayHaveNulls; - void updateMayHaveNulls(bool newMayHaveNulls); + void merge(const MergedColumnChunkStats& o, common::PhysicalTypeID dataType); }; } // namespace kuzu::storage diff --git a/src/storage/predicate/column_predicate.cpp b/src/storage/predicate/column_predicate.cpp index a12bb41fd6b..6fdd3cf1781 100644 --- a/src/storage/predicate/column_predicate.cpp +++ b/src/storage/predicate/column_predicate.cpp @@ -14,7 +14,7 @@ namespace storage { static void tryAddConjunction(const Expression& column, const Expression& predicate, std::vector>& predicateSetToAddTo); -ZoneMapCheckResult ColumnPredicateSet::checkZoneMap(const ColumnChunkStats& stats) const { +ZoneMapCheckResult ColumnPredicateSet::checkZoneMap(const MergedColumnChunkStats& stats) const { for (auto& predicate : predicates) { if (predicate->checkZoneMap(stats) == ZoneMapCheckResult::SKIP_SCAN) { return ZoneMapCheckResult::SKIP_SCAN; diff --git a/src/storage/predicate/constant_predicate.cpp b/src/storage/predicate/constant_predicate.cpp index 170bf16615a..fcbb90df394 100644 --- a/src/storage/predicate/constant_predicate.cpp +++ b/src/storage/predicate/constant_predicate.cpp @@ -19,11 +19,11 @@ bool inRange(T min, T max, T val) { } template -ZoneMapCheckResult checkZoneMapSwitch(const ColumnChunkStats& stats, ExpressionType expressionType, - const Value& value) { - KU_ASSERT(stats.min.has_value() && stats.max.has_value()); - auto max = stats.max->get(); - auto min = stats.min->get(); +ZoneMapCheckResult checkZoneMapSwitch(const MergedColumnChunkStats& mergedStats, + ExpressionType expressionType, const Value& value) { + KU_ASSERT(mergedStats.stats.min.has_value() && mergedStats.stats.max.has_value()); + auto max = mergedStats.stats.max->get(); + auto min = mergedStats.stats.min->get(); auto constant = value.getValue(); switch (expressionType) { case ExpressionType::EQUALS: { @@ -62,7 +62,8 @@ ZoneMapCheckResult checkZoneMapSwitch(const ColumnChunkStats& stats, ExpressionT return ZoneMapCheckResult::ALWAYS_SCAN; } -ZoneMapCheckResult ColumnConstantPredicate::checkZoneMap(const ColumnChunkStats& stats) const { +ZoneMapCheckResult ColumnConstantPredicate::checkZoneMap( + const MergedColumnChunkStats& stats) const { auto physicalType = value.getDataType().getPhysicalType(); return TypeUtils::visit( physicalType, diff --git a/src/storage/predicate/null_predicate.cpp b/src/storage/predicate/null_predicate.cpp index 49e465690fe..6a4e791bbf7 100644 --- a/src/storage/predicate/null_predicate.cpp +++ b/src/storage/predicate/null_predicate.cpp @@ -4,12 +4,13 @@ namespace kuzu::storage { common::ZoneMapCheckResult ColumnIsNullPredicate::checkZoneMap( - const ColumnChunkStats& stats) const { - return stats.mayHaveNulls ? common::ZoneMapCheckResult::ALWAYS_SCAN : - common::ZoneMapCheckResult::SKIP_SCAN; + const MergedColumnChunkStats& mergedStats) const { + return mergedStats.mayHaveNulls ? common::ZoneMapCheckResult::ALWAYS_SCAN : + common::ZoneMapCheckResult::SKIP_SCAN; } -common::ZoneMapCheckResult ColumnIsNotNullPredicate::checkZoneMap(const ColumnChunkStats&) const { +common::ZoneMapCheckResult ColumnIsNotNullPredicate::checkZoneMap( + const MergedColumnChunkStats&) const { return common::ZoneMapCheckResult::ALWAYS_SCAN; } } // namespace kuzu::storage diff --git a/src/storage/store/chunked_node_group.cpp b/src/storage/store/chunked_node_group.cpp index 9bb4bbbbc6b..bcea84075ff 100644 --- a/src/storage/store/chunked_node_group.cpp +++ b/src/storage/store/chunked_node_group.cpp @@ -195,8 +195,8 @@ void ChunkedNodeGroup::write(const ChunkedNodeGroup& data, column_id_t offsetCol } } -static ZoneMapCheckResult getZoneMapResult(const TableScanState& scanState, - const std::vector>& chunks) { +static ZoneMapCheckResult getZoneMapResult(const Transaction* transaction, + const TableScanState& scanState, const std::vector>& chunks) { if (!scanState.columnPredicateSets.empty()) { for (auto i = 0u; i < scanState.columnIDs.size(); i++) { const auto columnID = scanState.columnIDs[i]; @@ -206,7 +206,7 @@ static ZoneMapCheckResult getZoneMapResult(const TableScanState& scanState, KU_ASSERT(i < scanState.columnPredicateSets.size()); const auto columnZoneMapResult = scanState.columnPredicateSets[i].checkZoneMap( - chunks[columnID]->getData().getMergedColumnChunkStats()); + chunks[columnID]->getMergedColumnChunkStats(transaction)); RUNTIME_CHECK(const bool columnHasStorageValueType = TypeUtils::visit(chunks[columnID]->getDataType().getPhysicalType(), [](T) { return StorageValueType; })); @@ -225,7 +225,7 @@ void ChunkedNodeGroup::scan(const Transaction* transaction, const TableScanState length_t numRowsToScan) const { KU_ASSERT(rowIdxInGroup + numRowsToScan <= numRows); auto& anchorSelVector = scanState.outState->getSelVectorUnsafe(); - if (getZoneMapResult(scanState, chunks) == common::ZoneMapCheckResult::SKIP_SCAN) { + if (getZoneMapResult(transaction, scanState, chunks) == common::ZoneMapCheckResult::SKIP_SCAN) { anchorSelVector.setToFiltered(0); return; } diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 13d93f75260..dabbf3558a6 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -201,6 +201,21 @@ void ColumnChunk::update(const Transaction* transaction, offset_t offsetInChunk, transaction->pushVectorUpdateInfo(*updateInfo, vectorIdx, *vectorUpdateInfo); } +MergedColumnChunkStats ColumnChunk::getMergedColumnChunkStats( + const transaction::Transaction* transaction) const { + auto baseStats = data->getMergedColumnChunkStats(); + if (updateInfo) { + for (idx_t i = 0; i < updateInfo->getNumVectors(); ++i) { + const auto* vectorInfo = updateInfo->getVectorInfo(transaction, i); + if (vectorInfo) { + baseStats.merge(vectorInfo->data->getMergedColumnChunkStats(), + getDataType().getPhysicalType()); + } + } + } + return baseStats; +} + void ColumnChunk::serialize(Serializer& serializer) const { serializer.writeDebuggingInfo("enable_compression"); serializer.write(enableCompression); diff --git a/src/storage/store/column_chunk_data.cpp b/src/storage/store/column_chunk_data.cpp index 6230b9497fb..733f8125c6b 100644 --- a/src/storage/store/column_chunk_data.cpp +++ b/src/storage/store/column_chunk_data.cpp @@ -182,25 +182,22 @@ static void updateInMemoryStats(ColumnChunkStats& stats, const ValueVector& valu const auto physicalType = values.dataType.getPhysicalType(); const auto numValuesToCheck = std::min(numValues, values.state->getSelSize()); // we pessimistically set mayHaveNulls to check the entire vector instead of the selected range - stats.update(values.getData(), offset, numValuesToCheck, values.hasNoNullsGuarantee(), - physicalType); + stats.update(values.getData(), offset, numValuesToCheck, physicalType); } static void updateInMemoryStats(ColumnChunkStats& stats, const ColumnChunkData* values, uint64_t offset = 0, uint64_t numValues = std::numeric_limits::max()) { const auto physicalType = values->getDataType().getPhysicalType(); const auto numValuesToCheck = std::min(values->getNumValues(), numValues); - const bool mayHaveNulls = values->hasNullData() && values->getNullData().mayHaveNull(); - stats.update(values->getData(), offset, numValuesToCheck, mayHaveNulls, physicalType); + const auto nullMask = values->getNullMask(); + stats.update(values->getData(), offset, numValuesToCheck, physicalType); } -ColumnChunkStats ColumnChunkData::getMergedColumnChunkStats() const { +MergedColumnChunkStats ColumnChunkData::getMergedColumnChunkStats() const { const CompressionMetadata& onDiskMetadata = metadata.compMeta; - auto ret = inMemoryStats; - const bool mayHaveNulls = nullData && nullData->mayHaveNull(); - ret.update(onDiskMetadata.min, onDiskMetadata.max, mayHaveNulls, - getDataType().getPhysicalType()); - return ret; + ColumnChunkStats stats = inMemoryStats; + stats.update(onDiskMetadata.min, onDiskMetadata.max, getDataType().getPhysicalType()); + return MergedColumnChunkStats{stats, nullData && nullData->mayHaveNull()}; } void ColumnChunkData::updateStats(const common::ValueVector* vector, @@ -213,8 +210,10 @@ void ColumnChunkData::updateStats(const common::ValueVector* vector, [this, vector, &selVector](T) { for (idx_t i = 0; i < selVector.getSelSize(); ++i) { auto pos = selVector.getSelectedPositions()[i]; - auto val = vector->getValue(pos); - inMemoryStats.update(StorageValue{val}, getDataType().getPhysicalType()); + if (!vector->isNull(pos)) { + const auto val = vector->getValue(pos); + inMemoryStats.update(StorageValue{val}, getDataType().getPhysicalType()); + } } }, [](T) { static_assert(!StorageValueType); }); @@ -668,7 +667,9 @@ void BoolChunkData::write(const ValueVector* vector, offset_t offsetInVector, nullData->write(vector, offsetInVector, offsetInChunk); } numValues = offsetInChunk >= numValues ? offsetInChunk + 1 : numValues; - inMemoryStats.update(StorageValue{valueToSet}, dataType.getPhysicalType()); + if (!vector->isNull(offsetInVector)) { + inMemoryStats.update(StorageValue{valueToSet}, dataType.getPhysicalType()); + } } void BoolChunkData::write(ColumnChunkData* srcChunk, offset_t srcOffsetInChunk, diff --git a/src/storage/store/column_chunk_stats.cpp b/src/storage/store/column_chunk_stats.cpp index 9595dadc922..a8b1d389b83 100644 --- a/src/storage/store/column_chunk_stats.cpp +++ b/src/storage/store/column_chunk_stats.cpp @@ -5,40 +5,39 @@ namespace kuzu { namespace storage { -void ColumnChunkStats::update(uint8_t* data, uint64_t offset, uint64_t numValues, bool mayHaveNulls, +void ColumnChunkStats::update(uint8_t* data, uint64_t offset, uint64_t numValues, common::PhysicalTypeID physicalType) { const bool isStorageValueType = common::TypeUtils::visit(physicalType, [](T) { return StorageValueType; }); if (isStorageValueType) { auto [minVal, maxVal] = getMinMaxStorageValue(data, offset, numValues, physicalType, nullptr); - update(minVal, maxVal, mayHaveNulls, physicalType); - } else { - updateMayHaveNulls(mayHaveNulls); + update(minVal, maxVal, physicalType); } } void ColumnChunkStats::update(std::optional newMin, - std::optional newMax, bool mayHaveNulls, common::PhysicalTypeID dataType) { + std::optional newMax, common::PhysicalTypeID dataType) { if (!min.has_value() || (newMin.has_value() && min->gt(*newMin, dataType))) { min = newMin; } if (!max.has_value() || (newMax.has_value() && newMax->gt(*max, dataType))) { max = newMax; } - updateMayHaveNulls(mayHaveNulls); } void ColumnChunkStats::update(StorageValue val, common::PhysicalTypeID dataType) { - update(val, val, false, dataType); + update(val, val, dataType); } -void ColumnChunkStats::updateMayHaveNulls(bool newMayHaveNulls) { - mayHaveNulls = mayHaveNulls || newMayHaveNulls; +void ColumnChunkStats::reset() { + *this = {}; } -void ColumnChunkStats::reset() { - *this = ColumnChunkStats{}; +void MergedColumnChunkStats::merge(const MergedColumnChunkStats& o, + common::PhysicalTypeID dataType) { + stats.update(o.stats.min, o.stats.max, dataType); + mayHaveNulls = mayHaveNulls || o.mayHaveNulls; } } // namespace storage From 5c819214a6c7dbc2766d333a751299ae1177f252 Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Tue, 17 Dec 2024 09:51:05 -0500 Subject: [PATCH 3/8] Fix handling of nulls --- src/include/storage/store/column_chunk_data.h | 1 + .../storage/store/column_chunk_stats.h | 2 +- src/storage/store/column_chunk_data.cpp | 11 ++++-- src/storage/store/column_chunk_stats.cpp | 4 +-- test/test_files/call/call.test | 22 ++++++------ test/test_files/filter/node.test | 34 ++++++++++++++++++- 6 files changed, 56 insertions(+), 18 deletions(-) diff --git a/src/include/storage/store/column_chunk_data.h b/src/include/storage/store/column_chunk_data.h index 978fb2eed4c..25e13f1805a 100644 --- a/src/include/storage/store/column_chunk_data.h +++ b/src/include/storage/store/column_chunk_data.h @@ -337,6 +337,7 @@ class NullChunkData final : public BoolChunkData { void setNull(common::offset_t pos, bool isNull); bool mayHaveNull() const { return mayHaveNullValue; } + bool mayHaveNullOnDisk() const; void resetToEmpty() override { resetToNoNull(); diff --git a/src/include/storage/store/column_chunk_stats.h b/src/include/storage/store/column_chunk_stats.h index c10f2fdf362..8ea85d18255 100644 --- a/src/include/storage/store/column_chunk_stats.h +++ b/src/include/storage/store/column_chunk_stats.h @@ -11,7 +11,7 @@ struct ColumnChunkStats { common::PhysicalTypeID dataType); void update(StorageValue val, common::PhysicalTypeID dataType); void update(uint8_t* data, uint64_t offset, uint64_t numValues, - common::PhysicalTypeID physicalType); + const common::NullMask* nullMask, common::PhysicalTypeID physicalType); void reset(); }; diff --git a/src/storage/store/column_chunk_data.cpp b/src/storage/store/column_chunk_data.cpp index 733f8125c6b..bc9936ecc56 100644 --- a/src/storage/store/column_chunk_data.cpp +++ b/src/storage/store/column_chunk_data.cpp @@ -182,7 +182,7 @@ static void updateInMemoryStats(ColumnChunkStats& stats, const ValueVector& valu const auto physicalType = values.dataType.getPhysicalType(); const auto numValuesToCheck = std::min(numValues, values.state->getSelSize()); // we pessimistically set mayHaveNulls to check the entire vector instead of the selected range - stats.update(values.getData(), offset, numValuesToCheck, physicalType); + stats.update(values.getData(), offset, numValuesToCheck, &values.getNullMask(), physicalType); } static void updateInMemoryStats(ColumnChunkStats& stats, const ColumnChunkData* values, @@ -190,14 +190,15 @@ static void updateInMemoryStats(ColumnChunkStats& stats, const ColumnChunkData* const auto physicalType = values->getDataType().getPhysicalType(); const auto numValuesToCheck = std::min(values->getNumValues(), numValues); const auto nullMask = values->getNullMask(); - stats.update(values->getData(), offset, numValuesToCheck, physicalType); + stats.update(values->getData(), offset, numValuesToCheck, + nullMask ? &nullMask.value() : nullptr, physicalType); } MergedColumnChunkStats ColumnChunkData::getMergedColumnChunkStats() const { const CompressionMetadata& onDiskMetadata = metadata.compMeta; ColumnChunkStats stats = inMemoryStats; stats.update(onDiskMetadata.min, onDiskMetadata.max, getDataType().getPhysicalType()); - return MergedColumnChunkStats{stats, nullData && nullData->mayHaveNull()}; + return MergedColumnChunkStats{stats, nullData && (nullData->mayHaveNullOnDisk())}; } void ColumnChunkData::updateStats(const common::ValueVector* vector, @@ -726,6 +727,10 @@ void NullChunkData::append(ColumnChunkData* other, offset_t startOffsetInOtherCh updateInMemoryStats(inMemoryStats, other, startOffsetInOtherChunk, numValuesToAppend); } +bool NullChunkData::mayHaveNullOnDisk() const { + return mayHaveNull() || metadata.compMeta.max.get(); +} + void NullChunkData::serialize(Serializer& serializer) const { KU_ASSERT(residencyState == ResidencyState::ON_DISK); serializer.writeDebuggingInfo("null_chunk_metadata"); diff --git a/src/storage/store/column_chunk_stats.cpp b/src/storage/store/column_chunk_stats.cpp index a8b1d389b83..f3f3b84e570 100644 --- a/src/storage/store/column_chunk_stats.cpp +++ b/src/storage/store/column_chunk_stats.cpp @@ -6,12 +6,12 @@ namespace kuzu { namespace storage { void ColumnChunkStats::update(uint8_t* data, uint64_t offset, uint64_t numValues, - common::PhysicalTypeID physicalType) { + const common::NullMask* nullMask, common::PhysicalTypeID physicalType) { const bool isStorageValueType = common::TypeUtils::visit(physicalType, [](T) { return StorageValueType; }); if (isStorageValueType) { auto [minVal, maxVal] = - getMinMaxStorageValue(data, offset, numValues, physicalType, nullptr); + getMinMaxStorageValue(data, offset, numValues, physicalType, nullMask); update(minVal, maxVal, physicalType); } } diff --git a/test/test_files/call/call.test b/test/test_files/call/call.test index c1ca67e9e34..d4367404a44 100644 --- a/test/test_files/call/call.test +++ b/test/test_files/call/call.test @@ -134,17 +134,17 @@ True ---- 1 False -# -LOG ZoneMapConfig -# -STATEMENT CALL enable_zone_map=true -# ---- ok -# -STATEMENT CALL current_setting('enable_zone_map') RETURN * -# ---- 1 -# True -# -STATEMENT CALL enable_zone_map=false -# ---- ok -# -STATEMENT CALL current_setting('enable_zone_map') RETURN * -# ---- 1 -# False +-LOG ZoneMapConfig +-STATEMENT CALL enable_zone_map=true +---- ok +-STATEMENT CALL current_setting('enable_zone_map') RETURN * +---- 1 +True +-STATEMENT CALL enable_zone_map=false +---- ok +-STATEMENT CALL current_setting('enable_zone_map') RETURN * +---- 1 +False -LOG NodeTableInfo -STATEMENT CALL table_info('person') RETURN * diff --git a/test/test_files/filter/node.test b/test/test_files/filter/node.test index cc1aa4a992f..5bad64ef6c6 100644 --- a/test/test_files/filter/node.test +++ b/test/test_files/filter/node.test @@ -16,6 +16,7 @@ 77|123456789.123000 -CASE ZoneMapUpdateThenQueryWithoutCheckpoint +-LOG Comparison -STATEMENT CALL enable_zone_map=true; ---- ok -STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=40 + 50 @@ -25,12 +26,43 @@ 0|90 -STATEMENT MATCH (a:person) WHERE a.age > 95 RETURN a.ID, a.age ---- 0 --STATEMENT CREATE (a:person {id: 100, age: 100}) +-STATEMENT CREATE (a:person {id: 100, age: 100, isStudent: true}) ---- ok -STATEMENT MATCH (a:person) WHERE a.age > 95 RETURN a.ID, a.age ---- 1 100|100 +-LOG NullCheck +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) +---- 1 +0 +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NOT NULL RETURN COUNT(*) +---- 1 +9 +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.isStudent=null +---- ok +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) +---- 1 +1 +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NOT NULL RETURN COUNT(*) +---- 1 +8 +-RELOADDB +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) +---- 1 +1 +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NOT NULL RETURN COUNT(*) +---- 1 +8 +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.isStudent=true +---- ok +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) +---- 1 +0 +-STATEMENT MATCH (a:person) WHERE a.isStudent IS NOT NULL RETURN COUNT(*) +---- 1 +9 + -CASE FilterNode -LOG PersonNodesAgeFilteredTest1 From 130a677671d9ae3f05c4dd9cf880adcf5e18b77b Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Tue, 17 Dec 2024 10:42:33 -0500 Subject: [PATCH 4/8] Don't populate min/max stats for non-storage value types --- src/storage/store/chunked_node_group.cpp | 5 ----- src/storage/store/column_chunk_data.cpp | 7 ++++++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/storage/store/chunked_node_group.cpp b/src/storage/store/chunked_node_group.cpp index bcea84075ff..e1589316111 100644 --- a/src/storage/store/chunked_node_group.cpp +++ b/src/storage/store/chunked_node_group.cpp @@ -207,11 +207,6 @@ static ZoneMapCheckResult getZoneMapResult(const Transaction* transaction, KU_ASSERT(i < scanState.columnPredicateSets.size()); const auto columnZoneMapResult = scanState.columnPredicateSets[i].checkZoneMap( chunks[columnID]->getMergedColumnChunkStats(transaction)); - RUNTIME_CHECK(const bool columnHasStorageValueType = - TypeUtils::visit(chunks[columnID]->getDataType().getPhysicalType(), - [](T) { return StorageValueType; })); - KU_ASSERT(columnHasStorageValueType || - columnZoneMapResult == common::ZoneMapCheckResult::ALWAYS_SCAN); if (columnZoneMapResult == common::ZoneMapCheckResult::SKIP_SCAN) { return common::ZoneMapCheckResult::SKIP_SCAN; } diff --git a/src/storage/store/column_chunk_data.cpp b/src/storage/store/column_chunk_data.cpp index bc9936ecc56..d022a9a7196 100644 --- a/src/storage/store/column_chunk_data.cpp +++ b/src/storage/store/column_chunk_data.cpp @@ -197,7 +197,12 @@ static void updateInMemoryStats(ColumnChunkStats& stats, const ColumnChunkData* MergedColumnChunkStats ColumnChunkData::getMergedColumnChunkStats() const { const CompressionMetadata& onDiskMetadata = metadata.compMeta; ColumnChunkStats stats = inMemoryStats; - stats.update(onDiskMetadata.min, onDiskMetadata.max, getDataType().getPhysicalType()); + const auto physicalType = getDataType().getPhysicalType(); + const bool isStorageValueType = + common::TypeUtils::visit(physicalType, [](T) { return StorageValueType; }); + if (isStorageValueType) { + stats.update(onDiskMetadata.min, onDiskMetadata.max, physicalType); + } return MergedColumnChunkStats{stats, nullData && (nullData->mayHaveNullOnDisk())}; } From 2e1e1f89d11ba16bab05fb572dad783cecb8586d Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Tue, 17 Dec 2024 11:06:39 -0500 Subject: [PATCH 5/8] Remove unused conjunction add and add more tests for null --- .../storage/predicate/column_predicate.h | 8 +++- src/optimizer/filter_push_down_optimizer.cpp | 6 ++- src/storage/predicate/column_predicate.cpp | 42 +++---------------- test/test_files/filter/node.test | 33 ++++++++++++++- 4 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/include/storage/predicate/column_predicate.h b/src/include/storage/predicate/column_predicate.h index a7b595d0beb..fc62b538a94 100644 --- a/src/include/storage/predicate/column_predicate.h +++ b/src/include/storage/predicate/column_predicate.h @@ -15,7 +15,7 @@ class KUZU_API ColumnPredicateSet { ColumnPredicateSet() = default; EXPLICIT_COPY_DEFAULT_MOVE(ColumnPredicateSet); - void addColumnPredicate(std::unique_ptr predicate) { + void addPredicate(std::unique_ptr predicate) { predicates.push_back(std::move(predicate)); } void tryAddPredicate(const binder::Expression& column, const binder::Expression& predicate); @@ -56,5 +56,11 @@ class KUZU_API ColumnPredicate { common::ExpressionType expressionType; }; +struct ColumnPredicateUtil { + static std::unique_ptr tryConvert(const binder::Expression& column, + const binder::Expression& predicate); + common::ExpressionType expressionType; +}; + } // namespace storage } // namespace kuzu diff --git a/src/optimizer/filter_push_down_optimizer.cpp b/src/optimizer/filter_push_down_optimizer.cpp index ed66df28bea..00ea2b187c7 100644 --- a/src/optimizer/filter_push_down_optimizer.cpp +++ b/src/optimizer/filter_push_down_optimizer.cpp @@ -138,7 +138,11 @@ static ColumnPredicateSet getPredicateSet(const Expression& column, const binder::expression_vector& predicates) { auto predicateSet = ColumnPredicateSet(); for (auto& predicate : predicates) { - predicateSet.tryAddPredicate(column, *predicate); + auto columnPredicate = ColumnPredicateUtil::tryConvert(column, *predicate); + if (columnPredicate == nullptr) { + continue; + } + predicateSet.addPredicate(std::move(columnPredicate)); } return predicateSet; } diff --git a/src/storage/predicate/column_predicate.cpp b/src/storage/predicate/column_predicate.cpp index 6fdd3cf1781..6255f46c994 100644 --- a/src/storage/predicate/column_predicate.cpp +++ b/src/storage/predicate/column_predicate.cpp @@ -11,9 +11,6 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -static void tryAddConjunction(const Expression& column, const Expression& predicate, - std::vector>& predicateSetToAddTo); - ZoneMapCheckResult ColumnPredicateSet::checkZoneMap(const MergedColumnChunkStats& stats) const { for (auto& predicate : predicates) { if (predicate->checkZoneMap(stats) == ZoneMapCheckResult::SKIP_SCAN) { @@ -102,46 +99,19 @@ static std::unique_ptr tryConvertToIsNotNull(const Expression& return nullptr; } -static void emplaceIfNotNull(std::unique_ptr columnPredicate, - std::vector>& predicateSetToAddTo) { - if (columnPredicate) { - predicateSetToAddTo.emplace_back(std::move(columnPredicate)); - } -} - -static void tryAddColumnPredicate(const Expression& property, const Expression& predicate, - std::vector>& predicateSetToAddTo) { +std::unique_ptr ColumnPredicateUtil::tryConvert(const Expression& property, + const Expression& predicate) { if (ExpressionTypeUtil::isComparison(predicate.expressionType)) { - emplaceIfNotNull(tryConvertToConstColumnPredicate(property, predicate), - predicateSetToAddTo); - return; + return tryConvertToConstColumnPredicate(property, predicate); } switch (predicate.expressionType) { - case common::ExpressionType::AND: - tryAddConjunction(property, predicate, predicateSetToAddTo); - break; case common::ExpressionType::IS_NULL: - emplaceIfNotNull(tryConvertToIsNull(property, predicate), predicateSetToAddTo); - break; + return tryConvertToIsNull(property, predicate); case common::ExpressionType::IS_NOT_NULL: - emplaceIfNotNull(tryConvertToIsNotNull(property, predicate), predicateSetToAddTo); - break; + return tryConvertToIsNotNull(property, predicate); default: - break; + return nullptr; } - return; -} - -static void tryAddConjunction(const Expression& column, const Expression& predicate, - std::vector>& predicateSetToAddTo) { - KU_ASSERT(predicate.expressionType == common::ExpressionType::AND); - tryAddColumnPredicate(column, *predicate.getChild(0), predicateSetToAddTo); - tryAddColumnPredicate(column, *predicate.getChild(1), predicateSetToAddTo); -} - -void ColumnPredicateSet::tryAddPredicate(const binder::Expression& column, - const binder::Expression& predicate) { - tryAddColumnPredicate(column, predicate, predicates); } std::string ColumnPredicate::toString() { diff --git a/test/test_files/filter/node.test b/test/test_files/filter/node.test index 5bad64ef6c6..92c6247a45b 100644 --- a/test/test_files/filter/node.test +++ b/test/test_files/filter/node.test @@ -32,7 +32,7 @@ ---- 1 100|100 --LOG NullCheck +-LOG NullCheckBool -STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) ---- 1 0 @@ -63,6 +63,37 @@ ---- 1 9 +-LOG NullCheckStruct +-STATEMENT MATCH (a:organisation) WHERE a.state IS NULL RETURN COUNT(*) +---- 1 +0 +-STATEMENT MATCH (a:organisation) WHERE a.state IS NOT NULL RETURN COUNT(*) +---- 1 +3 +-STATEMENT MATCH (a:organisation) WHERE a.ID=4 SET a.state=null +---- ok +-STATEMENT MATCH (a:organisation) WHERE a.state IS NULL RETURN COUNT(*) +---- 1 +1 +-STATEMENT MATCH (a:organisation) WHERE a.state IS NOT NULL RETURN COUNT(*) +---- 1 +2 +-RELOADDB +-STATEMENT MATCH (a:organisation) WHERE a.state IS NULL RETURN COUNT(*) +---- 1 +1 +-STATEMENT MATCH (a:organisation) WHERE a.state IS NOT NULL RETURN COUNT(*) +---- 1 +2 +-STATEMENT MATCH (a:organisation) WHERE a.ID=4 SET a.state={revenue: CAST(1, "INT16"), location: ["aaa"], stock: {price: [1], volumn: 1}} +---- ok +-STATEMENT MATCH (a:organisation) WHERE a.state IS NULL RETURN COUNT(*) +---- 1 +0 +-STATEMENT MATCH (a:organisation) WHERE a.state IS NOT NULL RETURN COUNT(*) +---- 1 +3 + -CASE FilterNode -LOG PersonNodesAgeFilteredTest1 From a1429920437b62283fc3dfb3748cd263a2fa75b2 Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Tue, 17 Dec 2024 12:22:19 -0500 Subject: [PATCH 6/8] Merge null chunk no/all nulls guaranteed with in memory stats for null chunk + other refactoring --- src/include/storage/store/column_chunk_data.h | 31 +++++++++++-------- .../storage/store/column_chunk_stats.h | 8 +++-- src/storage/predicate/null_predicate.cpp | 9 +++--- src/storage/store/column_chunk_data.cpp | 14 +++++---- src/storage/store/column_chunk_stats.cpp | 3 +- src/storage/store/string_chunk_data.cpp | 2 +- test/test_files/filter/node.test | 6 ---- 7 files changed, 39 insertions(+), 34 deletions(-) diff --git a/src/include/storage/store/column_chunk_data.h b/src/include/storage/store/column_chunk_data.h index 25e13f1805a..5026bd79392 100644 --- a/src/include/storage/store/column_chunk_data.h +++ b/src/include/storage/store/column_chunk_data.h @@ -325,19 +325,23 @@ class BoolChunkData : public ColumnChunkData { class NullChunkData final : public BoolChunkData { public: NullChunkData(MemoryManager& mm, uint64_t capacity, bool enableCompression, ResidencyState type) - : BoolChunkData(mm, capacity, enableCompression, type, false /*hasNullData*/), - mayHaveNullValue{false} {} + : BoolChunkData(mm, capacity, enableCompression, type, false /*hasNullData*/) {} NullChunkData(MemoryManager& mm, bool enableCompression, const ColumnChunkMetadata& metadata) - : BoolChunkData{mm, enableCompression, metadata, false /*hasNullData*/}, - mayHaveNullValue{false} {} + : BoolChunkData{mm, enableCompression, metadata, false /*hasNullData*/} {} // Maybe this should be combined with BoolChunkData if the only difference is these // functions? bool isNull(common::offset_t pos) const { return getValue(pos); } void setNull(common::offset_t pos, bool isNull); - bool mayHaveNull() const { return mayHaveNullValue; } - bool mayHaveNullOnDisk() const; + bool noNullsGuaranteedInMem() const { + return !inMemoryStats.max || !inMemoryStats.max->get(); + } + bool allNullsGuaranteedInMem() const { + return !inMemoryStats.min || inMemoryStats.min->get(); + } + bool haveNoNullsGuaranteed() const; + bool haveAllNullsGuaranteed() const; void resetToEmpty() override { resetToNoNull(); @@ -345,18 +349,21 @@ class NullChunkData final : public BoolChunkData { } void resetToNoNull() { memset(getData(), 0 /* non null */, getBufferSize()); - mayHaveNullValue = false; + inMemoryStats.min = inMemoryStats.max = false; } void resetToAllNull() override { memset(getData(), 0xFF /* null */, getBufferSize()); - mayHaveNullValue = true; + inMemoryStats.min = inMemoryStats.max = true; } void copyFromBuffer(uint64_t* srcBuffer, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBits, bool invert = false) { if (common::NullMask::copyNullMask(srcBuffer, srcOffset, getData(), dstOffset, numBits, invert)) { - mayHaveNullValue = true; + // we pessimistically assume that the buffer contains both true/false values + // so the zone map won't skip scans + inMemoryStats.min = false; + inMemoryStats.max = true; } } @@ -377,11 +384,9 @@ class NullChunkData final : public BoolChunkData { common::Deserializer& deSer); common::NullMask getNullMask() const { - return common::NullMask(std::span(getData(), capacity / 64), mayHaveNullValue); + return common::NullMask(std::span(getData(), capacity / 64), + !noNullsGuaranteedInMem()); } - -protected: - bool mayHaveNullValue; }; class InternalIDChunkData final : public ColumnChunkData { diff --git a/src/include/storage/store/column_chunk_stats.h b/src/include/storage/store/column_chunk_stats.h index 8ea85d18255..1d25510bee3 100644 --- a/src/include/storage/store/column_chunk_stats.h +++ b/src/include/storage/store/column_chunk_stats.h @@ -16,11 +16,13 @@ struct ColumnChunkStats { }; struct MergedColumnChunkStats { - MergedColumnChunkStats(ColumnChunkStats stats, bool mayContainNulls) - : stats(stats), mayHaveNulls(mayContainNulls) {} + MergedColumnChunkStats(ColumnChunkStats stats, bool guaranteedNoNulls, bool guaranteedAllNulls) + : stats(stats), guaranteedNoNulls(guaranteedNoNulls), + guaranteedAllNulls(guaranteedAllNulls) {} ColumnChunkStats stats; - bool mayHaveNulls; + bool guaranteedNoNulls; + bool guaranteedAllNulls; void merge(const MergedColumnChunkStats& o, common::PhysicalTypeID dataType); }; diff --git a/src/storage/predicate/null_predicate.cpp b/src/storage/predicate/null_predicate.cpp index 6a4e791bbf7..143653ca42f 100644 --- a/src/storage/predicate/null_predicate.cpp +++ b/src/storage/predicate/null_predicate.cpp @@ -5,12 +5,13 @@ namespace kuzu::storage { common::ZoneMapCheckResult ColumnIsNullPredicate::checkZoneMap( const MergedColumnChunkStats& mergedStats) const { - return mergedStats.mayHaveNulls ? common::ZoneMapCheckResult::ALWAYS_SCAN : - common::ZoneMapCheckResult::SKIP_SCAN; + return mergedStats.guaranteedNoNulls ? common::ZoneMapCheckResult::SKIP_SCAN : + common::ZoneMapCheckResult::ALWAYS_SCAN; } common::ZoneMapCheckResult ColumnIsNotNullPredicate::checkZoneMap( - const MergedColumnChunkStats&) const { - return common::ZoneMapCheckResult::ALWAYS_SCAN; + const MergedColumnChunkStats& mergedStats) const { + return mergedStats.guaranteedAllNulls ? common::ZoneMapCheckResult::SKIP_SCAN : + common::ZoneMapCheckResult::ALWAYS_SCAN; } } // namespace kuzu::storage diff --git a/src/storage/store/column_chunk_data.cpp b/src/storage/store/column_chunk_data.cpp index d022a9a7196..e039bbc8b37 100644 --- a/src/storage/store/column_chunk_data.cpp +++ b/src/storage/store/column_chunk_data.cpp @@ -203,7 +203,8 @@ MergedColumnChunkStats ColumnChunkData::getMergedColumnChunkStats() const { if (isStorageValueType) { stats.update(onDiskMetadata.min, onDiskMetadata.max, physicalType); } - return MergedColumnChunkStats{stats, nullData && (nullData->mayHaveNullOnDisk())}; + return MergedColumnChunkStats{stats, !nullData || nullData->haveNoNullsGuaranteed(), + nullData && nullData->haveAllNullsGuaranteed()}; } void ColumnChunkData::updateStats(const common::ValueVector* vector, @@ -694,9 +695,6 @@ void BoolChunkData::write(ColumnChunkData* srcChunk, offset_t srcOffsetInChunk, void NullChunkData::setNull(offset_t pos, bool isNull) { setValue(isNull, pos); - if (isNull) { - mayHaveNullValue = true; - } // TODO(Guodong): Better let NullChunkData also support `append` a // vector. if (pos >= numValues) { @@ -732,8 +730,12 @@ void NullChunkData::append(ColumnChunkData* other, offset_t startOffsetInOtherCh updateInMemoryStats(inMemoryStats, other, startOffsetInOtherChunk, numValuesToAppend); } -bool NullChunkData::mayHaveNullOnDisk() const { - return mayHaveNull() || metadata.compMeta.max.get(); +bool NullChunkData::haveNoNullsGuaranteed() const { + return noNullsGuaranteedInMem() && !metadata.compMeta.max.get(); +} + +bool NullChunkData::haveAllNullsGuaranteed() const { + return allNullsGuaranteedInMem() && metadata.compMeta.min.get(); } void NullChunkData::serialize(Serializer& serializer) const { diff --git a/src/storage/store/column_chunk_stats.cpp b/src/storage/store/column_chunk_stats.cpp index f3f3b84e570..e1d5401a360 100644 --- a/src/storage/store/column_chunk_stats.cpp +++ b/src/storage/store/column_chunk_stats.cpp @@ -37,7 +37,8 @@ void ColumnChunkStats::reset() { void MergedColumnChunkStats::merge(const MergedColumnChunkStats& o, common::PhysicalTypeID dataType) { stats.update(o.stats.min, o.stats.max, dataType); - mayHaveNulls = mayHaveNulls || o.mayHaveNulls; + guaranteedNoNulls = guaranteedNoNulls && o.guaranteedNoNulls; + guaranteedAllNulls = guaranteedAllNulls && o.guaranteedAllNulls; } } // namespace storage diff --git a/src/storage/store/string_chunk_data.cpp b/src/storage/store/string_chunk_data.cpp index e04a061073a..d9ccdf8d261 100644 --- a/src/storage/store/string_chunk_data.cpp +++ b/src/storage/store/string_chunk_data.cpp @@ -108,7 +108,7 @@ void StringChunkData::scan(ValueVector& output, offset_t offset, length_t length sel_t posInOutputVector) const { KU_ASSERT(offset + length <= numValues && nullData); nullData->scan(output, offset, length, posInOutputVector); - if (nullData->mayHaveNull()) { + if (!nullData->noNullsGuaranteedInMem()) { for (auto i = 0u; i < length; i++) { if (!nullData->isNull(offset + i)) { output.setValue(posInOutputVector + i, diff --git a/test/test_files/filter/node.test b/test/test_files/filter/node.test index 92c6247a45b..5e95ee578d9 100644 --- a/test/test_files/filter/node.test +++ b/test/test_files/filter/node.test @@ -33,12 +33,6 @@ 100|100 -LOG NullCheckBool --STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) ----- 1 -0 --STATEMENT MATCH (a:person) WHERE a.isStudent IS NOT NULL RETURN COUNT(*) ----- 1 -9 -STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.isStudent=null ---- ok -STATEMENT MATCH (a:person) WHERE a.isStudent IS NULL RETURN COUNT(*) From 0e97a9687978139a8246c8bf15bc0d498b9ce8eb Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Thu, 19 Dec 2024 16:02:33 -0500 Subject: [PATCH 7/8] Address review comments --- .../filter/zonemap-node-null.benchmark | 3 +++ .../storage/predicate/column_predicate.h | 1 - .../storage/predicate/null_predicate.h | 21 ++++++------------- src/storage/predicate/column_predicate.cpp | 5 +++-- src/storage/predicate/null_predicate.cpp | 14 ++++++------- src/storage/store/column_chunk_data.cpp | 14 +++++-------- 6 files changed, 23 insertions(+), 35 deletions(-) create mode 100644 benchmark/queries/ldbc-sf100/filter/zonemap-node-null.benchmark diff --git a/benchmark/queries/ldbc-sf100/filter/zonemap-node-null.benchmark b/benchmark/queries/ldbc-sf100/filter/zonemap-node-null.benchmark new file mode 100644 index 00000000000..d35d3c6a453 --- /dev/null +++ b/benchmark/queries/ldbc-sf100/filter/zonemap-node-null.benchmark @@ -0,0 +1,3 @@ +-NAME zonemap-node-null +-QUERY MATCH (c:Comment) WHERE c.length IS NULL RETURN *; +---- 0 diff --git a/src/include/storage/predicate/column_predicate.h b/src/include/storage/predicate/column_predicate.h index fc62b538a94..96db60db4c5 100644 --- a/src/include/storage/predicate/column_predicate.h +++ b/src/include/storage/predicate/column_predicate.h @@ -59,7 +59,6 @@ class KUZU_API ColumnPredicate { struct ColumnPredicateUtil { static std::unique_ptr tryConvert(const binder::Expression& column, const binder::Expression& predicate); - common::ExpressionType expressionType; }; } // namespace storage diff --git a/src/include/storage/predicate/null_predicate.h b/src/include/storage/predicate/null_predicate.h index 4926918e492..8646c59930e 100644 --- a/src/include/storage/predicate/null_predicate.h +++ b/src/include/storage/predicate/null_predicate.h @@ -6,27 +6,18 @@ namespace kuzu { namespace storage { -class ColumnIsNullPredicate : public ColumnPredicate { +class ColumnNullPredicate : public ColumnPredicate { public: - explicit ColumnIsNullPredicate(std::string columnName) - : ColumnPredicate{std::move(columnName), common::ExpressionType::IS_NULL} {} - - common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const override; - - std::unique_ptr copy() const override { - return std::make_unique(columnName); + explicit ColumnNullPredicate(std::string columnName, common::ExpressionType type) + : ColumnPredicate{std::move(columnName), type} { + KU_ASSERT( + type == common::ExpressionType::IS_NULL || type == common::ExpressionType::IS_NOT_NULL); } -}; - -class ColumnIsNotNullPredicate : public ColumnPredicate { -public: - explicit ColumnIsNotNullPredicate(std::string columnName) - : ColumnPredicate{std::move(columnName), common::ExpressionType::IS_NOT_NULL} {} common::ZoneMapCheckResult checkZoneMap(const MergedColumnChunkStats& stats) const override; std::unique_ptr copy() const override { - return std::make_unique(columnName); + return std::make_unique(columnName, expressionType); } }; diff --git a/src/storage/predicate/column_predicate.cpp b/src/storage/predicate/column_predicate.cpp index 6255f46c994..ccf8072c1e1 100644 --- a/src/storage/predicate/column_predicate.cpp +++ b/src/storage/predicate/column_predicate.cpp @@ -86,7 +86,7 @@ static std::unique_ptr tryConvertToIsNull(const Expression& col const Expression& predicate) { // we only convert simple predicates if (isSimpleExpr(*predicate.getChild(0))) { - return std::make_unique(column.toString()); + return std::make_unique(column.toString(), ExpressionType::IS_NULL); } return nullptr; } @@ -94,7 +94,8 @@ static std::unique_ptr tryConvertToIsNull(const Expression& col static std::unique_ptr tryConvertToIsNotNull(const Expression& column, const Expression& predicate) { if (isSimpleExpr(*predicate.getChild(0))) { - return std::make_unique(column.toString()); + return std::make_unique(column.toString(), + ExpressionType::IS_NOT_NULL); } return nullptr; } diff --git a/src/storage/predicate/null_predicate.cpp b/src/storage/predicate/null_predicate.cpp index 143653ca42f..3cf1eace6bf 100644 --- a/src/storage/predicate/null_predicate.cpp +++ b/src/storage/predicate/null_predicate.cpp @@ -3,15 +3,13 @@ #include "storage/store/column_chunk_stats.h" namespace kuzu::storage { -common::ZoneMapCheckResult ColumnIsNullPredicate::checkZoneMap( +common::ZoneMapCheckResult ColumnNullPredicate::checkZoneMap( const MergedColumnChunkStats& mergedStats) const { - return mergedStats.guaranteedNoNulls ? common::ZoneMapCheckResult::SKIP_SCAN : - common::ZoneMapCheckResult::ALWAYS_SCAN; + const bool statToCheck = (expressionType == common::ExpressionType::IS_NULL) ? + mergedStats.guaranteedNoNulls : + mergedStats.guaranteedAllNulls; + return statToCheck ? common::ZoneMapCheckResult::SKIP_SCAN : + common::ZoneMapCheckResult::ALWAYS_SCAN; } -common::ZoneMapCheckResult ColumnIsNotNullPredicate::checkZoneMap( - const MergedColumnChunkStats& mergedStats) const { - return mergedStats.guaranteedAllNulls ? common::ZoneMapCheckResult::SKIP_SCAN : - common::ZoneMapCheckResult::ALWAYS_SCAN; -} } // namespace kuzu::storage diff --git a/src/storage/store/column_chunk_data.cpp b/src/storage/store/column_chunk_data.cpp index e039bbc8b37..40b7029833a 100644 --- a/src/storage/store/column_chunk_data.cpp +++ b/src/storage/store/column_chunk_data.cpp @@ -181,7 +181,6 @@ static void updateInMemoryStats(ColumnChunkStats& stats, const ValueVector& valu uint64_t offset = 0, uint64_t numValues = std::numeric_limits::max()) { const auto physicalType = values.dataType.getPhysicalType(); const auto numValuesToCheck = std::min(numValues, values.state->getSelSize()); - // we pessimistically set mayHaveNulls to check the entire vector instead of the selected range stats.update(values.getData(), offset, numValuesToCheck, &values.getNullMask(), physicalType); } @@ -214,14 +213,11 @@ void ColumnChunkData::updateStats(const common::ValueVector* vector, } else { TypeUtils::visit( getDataType().getPhysicalType(), - [this, vector, &selVector](T) { - for (idx_t i = 0; i < selVector.getSelSize(); ++i) { - auto pos = selVector.getSelectedPositions()[i]; - if (!vector->isNull(pos)) { - const auto val = vector->getValue(pos); - inMemoryStats.update(StorageValue{val}, getDataType().getPhysicalType()); - } - } + [this, vector](T) { + vector->forEachNonNull([this, vector](auto pos) { + const auto val = vector->getValue(pos); + inMemoryStats.update(StorageValue{val}, getDataType().getPhysicalType()); + }); }, [](T) { static_assert(!StorageValueType); }); } From 11a990d27915622539a818d0d7813215dfab08a1 Mon Sep 17 00:00:00 2001 From: Royi Luo Date: Fri, 20 Dec 2024 10:02:19 -0500 Subject: [PATCH 8/8] Rename isSimpleExpr --- src/storage/predicate/column_predicate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/storage/predicate/column_predicate.cpp b/src/storage/predicate/column_predicate.cpp index ccf8072c1e1..f00ac2334af 100644 --- a/src/storage/predicate/column_predicate.cpp +++ b/src/storage/predicate/column_predicate.cpp @@ -46,12 +46,12 @@ static bool isCastedColumnRef(const Expression& expr) { return false; } -static bool isSimpleExpr(const Expression& expr) { +static bool isColumnOrCastedColumnRef(const Expression& expr) { return isColumnRef(expr.expressionType) || isCastedColumnRef(expr); } static bool isColumnRefConstantPair(const Expression& left, const Expression& right) { - return isSimpleExpr(left) && right.expressionType == ExpressionType::LITERAL; + return isColumnOrCastedColumnRef(left) && right.expressionType == ExpressionType::LITERAL; } static bool columnMatchesExprChild(const Expression& column, const Expression& expr) { @@ -85,7 +85,7 @@ static std::unique_ptr tryConvertToConstColumnPredicate(const E static std::unique_ptr tryConvertToIsNull(const Expression& column, const Expression& predicate) { // we only convert simple predicates - if (isSimpleExpr(*predicate.getChild(0))) { + if (isColumnOrCastedColumnRef(*predicate.getChild(0))) { return std::make_unique(column.toString(), ExpressionType::IS_NULL); } return nullptr; @@ -93,7 +93,7 @@ static std::unique_ptr tryConvertToIsNull(const Expression& col static std::unique_ptr tryConvertToIsNotNull(const Expression& column, const Expression& predicate) { - if (isSimpleExpr(*predicate.getChild(0))) { + if (isColumnOrCastedColumnRef(*predicate.getChild(0))) { return std::make_unique(column.toString(), ExpressionType::IS_NOT_NULL); }