From 57d83c2f83e99940caf1b57d3facc4eed4605d36 Mon Sep 17 00:00:00 2001 From: Wang Fenjin Date: Mon, 25 Apr 2022 23:03:03 +0800 Subject: [PATCH] upgrade to 0.3.4 (#51) Change-Id: Ifacbe328c99f96fd42e8e87b492da0c0aeb7a26d --- .github/workflows/rust.yaml | 2 +- Cargo.toml | 4 +- libduckdb-sys/Cargo.toml | 2 +- libduckdb-sys/duckdb/duckdb.cpp | 121 +++++++++++++++----------------- libduckdb-sys/duckdb/duckdb.hpp | 32 +++++++-- libduckdb-sys/upgrade.sh | 2 +- 6 files changed, 89 insertions(+), 74 deletions(-) diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index f8de720e..b5256f30 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -37,7 +37,7 @@ jobs: - run: cargo fmt --all -- --check - name: Download DuckDB run: | - wget https://github.com/duckdb/duckdb/releases/download/v0.3.3/libduckdb-linux-amd64.zip -O libduckdb.zip + wget https://github.com/duckdb/duckdb/releases/download/v0.3.4/libduckdb-linux-amd64.zip -O libduckdb.zip unzip libduckdb.zip -d libduckdb # - run: cargo clippy --all-targets --workspace --features bundled --features modern-full -- -D warnings -A clippy::redundant-closure - run: cargo clippy --all-targets --workspace --features buildtime_bindgen --features modern-full -- -D warnings -A clippy::redundant-closure diff --git a/Cargo.toml b/Cargo.toml index 88929380..caf693dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "duckdb" -version = "0.3.3" +version = "0.3.4" authors = ["wangfenjin "] edition = "2021" description = "Ergonomic wrapper for DuckDB" @@ -69,7 +69,7 @@ tempdir = "0.3.7" [dependencies.libduckdb-sys] path = "libduckdb-sys" -version = "0.3.3" +version = "0.3.4" [package.metadata.docs.rs] features = [] diff --git a/libduckdb-sys/Cargo.toml b/libduckdb-sys/Cargo.toml index 056db952..6af3a197 100644 --- a/libduckdb-sys/Cargo.toml +++ b/libduckdb-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libduckdb-sys" -version = "0.3.3" +version = "0.3.4" authors = ["wangfenjin "] edition = "2021" build = "build.rs" diff --git a/libduckdb-sys/duckdb/duckdb.cpp b/libduckdb-sys/duckdb/duckdb.cpp index f83ee6be..be7086e2 100644 --- a/libduckdb-sys/duckdb/duckdb.cpp +++ b/libduckdb-sys/duckdb/duckdb.cpp @@ -3484,6 +3484,9 @@ string IndexCatalogEntry::ToSQL() { if (sql.empty()) { throw InternalException("Cannot convert INDEX to SQL because it was not created with a SQL statement"); } + if (sql[sql.size() - 1] != ';') { + sql += ";"; + } return sql; } @@ -63943,6 +63946,7 @@ void PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &result, Globa const idx_t count = ie_lstate.SelectOuterRows(ie_lstate.right_matches); if (!count) { ie_gstate.GetNextPair(context.client, ie_sink, ie_lstate); + continue; } SliceSortedPayload(result, ie_sink.tables[1]->global_sort_state, ie_lstate.right_base, ie_lstate.true_sel, @@ -72020,12 +72024,12 @@ void PartitionableHashTable::Partition() { D_ASSERT(radix_partitioned_hts.size() == 0); D_ASSERT(partition_info.n_partitions > 1); - vector partition_hts; + vector partition_hts(partition_info.n_partitions); for (auto &unpartitioned_ht : unpartitioned_hts) { for (idx_t r = 0; r < partition_info.n_partitions; r++) { radix_partitioned_hts[r].push_back(make_unique( buffer_manager, group_types, payload_types, bindings, HtEntryType::HT_WIDTH_32)); - partition_hts.push_back(radix_partitioned_hts[r].back().get()); + partition_hts[r] = radix_partitioned_hts[r].back().get(); } unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT); unpartitioned_ht.reset(); @@ -72504,6 +72508,8 @@ template <> bool TrySubtractOperator::Operation(int32_t left, int32_t right, int32_t &result); template <> bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result); +template <> +bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result); struct SubtractOperatorOverflowCheck { template @@ -83750,6 +83756,9 @@ void HistogramFun::RegisterFunction(BuiltinFunctions &set) { fun.AddFunction(GetHistogramFunction(LogicalType::TIMESTAMP_S)); fun.AddFunction(GetHistogramFunction(LogicalType::TIMESTAMP_MS)); fun.AddFunction(GetHistogramFunction(LogicalType::TIMESTAMP_NS)); + fun.AddFunction(GetHistogramFunction(LogicalType::TIME)); + fun.AddFunction(GetHistogramFunction(LogicalType::TIME_TZ)); + fun.AddFunction(GetHistogramFunction(LogicalType::DATE)); set.AddFunction(fun); } @@ -91556,6 +91565,10 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state, VectorData value_data; value_vector.Orrify(count, value_data); + // not required for a comparison of nested types + auto child_value = FlatVector::GetData(child_vector); + auto values = FlatVector::GetData(value_vector); + for (idx_t i = 0; i < count; i++) { auto list_index = list_data.sel->get_index(i); auto value_index = value_data.sel->get_index(i); @@ -91566,23 +91579,18 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state, } const auto &list_entry = list_entries[list_index]; - auto source_idx = child_data.sel->get_index(list_entry.offset); - // not required for a comparison of nested types - auto child_value = FlatVector::GetData(child_vector); - auto values = FlatVector::GetData(value_vector); - - result_entries[list_index] = OP::Initialize(); + result_entries[i] = OP::Initialize(); for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) { - auto child_value_idx = source_idx + child_idx; + auto child_value_idx = child_data.sel->get_index(list_entry.offset + child_idx); if (!child_data.validity.RowIsValid(child_value_idx)) { continue; } if (!is_nested) { if (ValueEqualsOrNot(child_value[child_value_idx], values[value_index])) { - result_entries[list_index] = OP::UpdateResultEntries(child_idx); + result_entries[i] = OP::UpdateResultEntries(child_idx); break; // Found value in list, no need to look further } } else { @@ -91590,7 +91598,7 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state, // to more efficiently compare nested types if (ValueEqualsOrNot(child_vector.GetValue(child_value_idx), value_vector.GetValue(value_index))) { - result_entries[list_index] = OP::UpdateResultEntries(child_idx); + result_entries[i] = OP::UpdateResultEntries(child_idx); break; // Found value in list, no need to look further } } @@ -92009,10 +92017,7 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto continue; } - auto source_idx = child_data.sel->get_index(list_entry.offset); - idx_t child_idx = 0; - - while (child_idx < list_entry.length) { + for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) { // states vector is full, update if (states_idx == STANDARD_VECTOR_SIZE) { @@ -92025,10 +92030,10 @@ static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vecto states_idx = 0; } - sel_vector.set_index(states_idx, source_idx + child_idx); + auto source_idx = child_data.sel->get_index(list_entry.offset + child_idx); + sel_vector.set_index(states_idx, source_idx); states_update[states_idx] = state_ptr; states_idx++; - child_idx++; } } @@ -92065,7 +92070,7 @@ static unique_ptr ListAggregateBind(ClientContext &context, Scalar // get the function name Value function_value = ExpressionExecutor::EvaluateScalar(*arguments[1]); - auto function_name = StringValue::Get(function_value); + auto function_name = function_value.ToString(); vector types; types.push_back(list_child_type); @@ -96177,6 +96182,12 @@ bool TrySubtractOperator::Operation(int64_t left, int64_t right, int64_t &result return true; } +template <> +bool TrySubtractOperator::Operation(hugeint_t left, hugeint_t right, hugeint_t &result) { + result = left; + return Hugeint::SubtractInPlace(result, right); +} + //===--------------------------------------------------------------------===// // subtract decimal with overflow check //===--------------------------------------------------------------------===// @@ -133896,28 +133907,25 @@ unique_ptr StatisticsPropagator::PropagateExpression(BoundAggreg namespace duckdb { -unique_ptr CastHugeintToSmallestType(unique_ptr expr, NumericStatistics &num_stats) { - // Compute range - if (num_stats.min.IsNull() || num_stats.max.IsNull()) { - return expr; - } - - auto min_val = num_stats.min.GetValue(); - auto max_val = num_stats.max.GetValue(); - if (max_val < min_val) { - return expr; - } +template +bool GetCastType(T signed_range, LogicalType &cast_type) { + auto range = static_cast::type>(signed_range); - // Prevent overflow - if (min_val < NumericLimits().Minimum() && max_val > NumericLimits().Maximum()) { - return expr; + // Check if this range fits in a smaller type + if (range < NumericLimits::Maximum()) { + cast_type = LogicalType::UTINYINT; + } else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits::Maximum()) { + cast_type = LogicalType::USMALLINT; + } else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits::Maximum()) { + cast_type = LogicalType::UINTEGER; + } else { + return false; } + return true; +} - // Compute range - auto range = max_val - min_val; - - // Check if this range fits in a smaller type - LogicalType cast_type; +template <> +bool GetCastType(hugeint_t range, LogicalType &cast_type) { if (range < NumericLimits().Maximum()) { cast_type = LogicalType::UTINYINT; } else if (range < NumericLimits().Maximum()) { @@ -133925,22 +133933,11 @@ unique_ptr CastHugeintToSmallestType(unique_ptr expr, Nu } else if (range < NumericLimits().Maximum()) { cast_type = LogicalType::UINTEGER; } else if (range < NumericLimits().Maximum()) { - cast_type = LogicalTypeId::UBIGINT; + cast_type = LogicalType::UBIGINT; } else { - return expr; + return false; } - - // Create expression to map to a smaller range - auto input_type = expr->return_type; - auto minimum_expr = make_unique(Value::CreateValue(min_val)); - vector> arguments; - arguments.push_back(move(expr)); - arguments.push_back(move(minimum_expr)); - auto minus_expr = make_unique(input_type, SubtractFun::GetFunction(input_type, input_type), - move(arguments), nullptr, true); - - // Cast to smaller type - return make_unique(move(minus_expr), cast_type); + return true; } template @@ -133958,21 +133955,14 @@ unique_ptr TemplatedCastToSmallestType(unique_ptr expr, // Compute range, cast to unsigned to prevent comparing signed with unsigned T signed_range; - if (!TrySubtractOperator::Operation(signed_min_val, signed_max_val, signed_range)) { + if (!TrySubtractOperator::Operation(signed_max_val, signed_min_val, signed_range)) { // overflow in subtraction: cannot do any simplification return expr; } - auto range = static_cast::type>(signed_range); // Check if this range fits in a smaller type LogicalType cast_type; - if (range < NumericLimits::Maximum()) { - cast_type = LogicalType::UTINYINT; - } else if (sizeof(T) > sizeof(uint16_t) && range < NumericLimits::Maximum()) { - cast_type = LogicalType::USMALLINT; - } else if (sizeof(T) > sizeof(uint32_t) && range < NumericLimits::Maximum()) { - cast_type = LogicalType::UINTEGER; - } else { + if (!GetCastType(signed_range, cast_type)) { return expr; } @@ -134008,7 +133998,7 @@ unique_ptr CastToSmallestType(unique_ptr expr, NumericSt case PhysicalType::INT64: return TemplatedCastToSmallestType(move(expr), num_stats); case PhysicalType::INT128: - return CastHugeintToSmallestType(move(expr), num_stats); + return TemplatedCastToSmallestType(move(expr), num_stats); default: throw NotImplementedException("Unknown integer type!"); } @@ -134976,6 +134966,8 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq if (join.conditions.size() > 1) { // there are multiple conditions: erase this condition join.conditions.erase(join.conditions.begin() + i); + // remove the corresponding statistics + join.join_stats.clear(); i--; continue; } else { @@ -160505,8 +160497,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) { auto copy = make_unique(copy_function->function, move(function_data)); copy->file_path = stmt.info->file_path; copy->use_tmp_file = use_tmp_file; - LocalFileSystem fs; - copy->is_file_and_exists = fs.FileExists(copy->file_path); + copy->is_file_and_exists = config.file_system->FileExists(copy->file_path); copy->AddChild(move(select_node.plan)); @@ -175021,6 +175012,10 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co D_ASSERT(removed_column < column_definitions.size()); column_definitions.erase(column_definitions.begin() + removed_column); + for (idx_t i = 0; i < column_definitions.size(); i++) { + column_definitions[i].oid = i; + } + // alter the row_groups and remove the column from each of them this->row_groups = make_shared(); auto current_row_group = (RowGroup *)parent.row_groups->GetRootSegment(); diff --git a/libduckdb-sys/duckdb/duckdb.hpp b/libduckdb-sys/duckdb/duckdb.hpp index fb8c2d91..b411a2db 100644 --- a/libduckdb-sys/duckdb/duckdb.hpp +++ b/libduckdb-sys/duckdb/duckdb.hpp @@ -10,8 +10,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once #define DUCKDB_AMALGAMATION 1 -#define DUCKDB_SOURCE_ID "fe9ba8003" -#define DUCKDB_VERSION "v0.3.3" +#define DUCKDB_SOURCE_ID "662041e2b" +#define DUCKDB_VERSION "v0.3.4" //===----------------------------------------------------------------------===// // DuckDB // @@ -2472,6 +2472,13 @@ struct TemplatedValidityMask { entry_idx = row_idx / BITS_PER_VALUE; idx_in_entry = row_idx % BITS_PER_VALUE; } + //! Get an entry that has first-n bits set as valid and rest set as invalid + static inline V EntryWithValidBits(idx_t n) { + if (n == 0) { + return V(0); + } + return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n); + } //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a //! not-null check @@ -2547,20 +2554,33 @@ struct TemplatedValidityMask { } } - //! Marks "count" entries in the validity mask as invalid (null) + //! Marks exactly "count" bits in the validity mask as invalid (null) inline void SetAllInvalid(idx_t count) { EnsureWritable(); - for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) { + if (count == 0) { + return; + } + auto last_entry_index = ValidityBuffer::EntryCount(count) - 1; + for (idx_t i = 0; i < last_entry_index; i++) { validity_mask[i] = 0; } + auto last_entry_bits = count % static_cast(BITS_PER_VALUE); + validity_mask[last_entry_index] = (last_entry_bits == 0) ? 0 : (ValidityBuffer::MAX_ENTRY << (last_entry_bits)); } - //! Marks "count" entries in the validity mask as valid (not null) + //! Marks exactly "count" bits in the validity mask as valid (not null) inline void SetAllValid(idx_t count) { EnsureWritable(); - for (idx_t i = 0; i < ValidityBuffer::EntryCount(count); i++) { + if (count == 0) { + return; + } + auto last_entry_index = ValidityBuffer::EntryCount(count) - 1; + for (idx_t i = 0; i < last_entry_index; i++) { validity_mask[i] = ValidityBuffer::MAX_ENTRY; } + auto last_entry_bits = count % static_cast(BITS_PER_VALUE); + validity_mask[last_entry_index] |= + (last_entry_bits == 0) ? ValidityBuffer::MAX_ENTRY : ~(ValidityBuffer::MAX_ENTRY << (last_entry_bits)); } inline bool IsMaskSet() const { diff --git a/libduckdb-sys/upgrade.sh b/libduckdb-sys/upgrade.sh index 17d2dfdf..71e0a1a2 100755 --- a/libduckdb-sys/upgrade.sh +++ b/libduckdb-sys/upgrade.sh @@ -10,7 +10,7 @@ export DUCKDB_LIB_DIR="$SCRIPT_DIR/duckdb" export DU_INCLUDE_DIR="$DUCKDB_LIB_DIR" # Download and extract amalgamation -DUCKDB_VERSION=v0.3.3 +DUCKDB_VERSION=v0.3.4 wget -T 20 "https://github.com/duckdb/duckdb/releases/download/$DUCKDB_VERSION/libduckdb-src.zip" unzip -o libduckdb-src.zip -d duckdb rm -f libduckdb-src.zip