From bbc85d701ab879a41bc4ed7dd50b49a79a1d2897 Mon Sep 17 00:00:00 2001 From: Elliana May Date: Mon, 3 Jun 2024 18:33:06 +0800 Subject: [PATCH 01/11] feat: support "large" arrow data types (#307) * feat: add large arrow type support * remove old match entry --- src/row.rs | 27 +++++++-------------- src/test_all_types.rs | 12 +++++++++- src/types/mod.rs | 8 +++---- src/types/value_ref.rs | 54 +++++++++++++++++++++++++++++++++--------- 4 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/row.rs b/src/row.rs index ac2c5104..d1ac7905 100644 --- a/src/row.rs +++ b/src/row.rs @@ -1,7 +1,7 @@ use std::{convert, sync::Arc}; use super::{Error, Result, Statement}; -use crate::types::{self, EnumType, FromSql, FromSqlError, ValueRef}; +use crate::types::{self, EnumType, FromSql, FromSqlError, ListType, ValueRef}; use arrow::array::DictionaryArray; use arrow::{ @@ -570,22 +570,6 @@ impl<'stmt> Row<'stmt> { _ => unimplemented!("{:?}", unit), }, // TODO: support more data types - // DataType::List(_) => make_string_from_list!(column, row), - // DataType::Dictionary(index_type, _value_type) => match **index_type { - // DataType::Int8 => dict_array_value_to_string::(column, row), - // DataType::Int16 => dict_array_value_to_string::(column, row), - // DataType::Int32 => dict_array_value_to_string::(column, row), - // DataType::Int64 => dict_array_value_to_string::(column, row), - // DataType::UInt8 => dict_array_value_to_string::(column, row), - // DataType::UInt16 => dict_array_value_to_string::(column, row), - // DataType::UInt32 => dict_array_value_to_string::(column, row), - // DataType::UInt64 => dict_array_value_to_string::(column, row), - // _ => Err(ArrowError::InvalidArgumentError(format!( - // "Pretty printing not supported for {:?} due to index type", - // column.data_type() - // ))), - // }, - // NOTE: DataTypes not supported by duckdb // DataType::Date64 => make_string_date!(array::Date64Array, column, row), // DataType::Time32(unit) if *unit == TimeUnit::Second => { @@ -597,10 +581,15 @@ impl<'stmt> Row<'stmt> { // DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { // make_string_time!(array::Time64NanosecondArray, column, row) // } - DataType::List(_data) => { + DataType::LargeList(..) => { + let arr = column.as_any().downcast_ref::().unwrap(); + + ValueRef::List(ListType::Large(arr), row) + } + DataType::List(..) => { let arr = column.as_any().downcast_ref::().unwrap(); - ValueRef::List(arr, row) + ValueRef::List(ListType::Regular(arr), row) } DataType::Dictionary(key_type, ..) => { let column = column.as_any(); diff --git a/src/test_all_types.rs b/src/test_all_types.rs index 893088ac..1c324751 100644 --- a/src/test_all_types.rs +++ b/src/test_all_types.rs @@ -8,8 +8,18 @@ use crate::{ #[test] fn test_all_types() -> crate::Result<()> { - let database = Connection::open_in_memory()?; + test_with_database(&Connection::open_in_memory()?) +} + +#[test] +fn test_large_arrow_types() -> crate::Result<()> { + let cfg = crate::Config::default().with("arrow_large_buffer_size", "true")?; + let database = Connection::open_in_memory_with_flags(cfg)?; + + test_with_database(&database) +} +fn test_with_database(database: &Connection) -> crate::Result<()> { let excluded = vec![ // uhugeint, time_tz, and dec38_10 aren't supported in the duckdb arrow layer "uhugeint", diff --git a/src/types/mod.rs b/src/types/mod.rs index 79a7ad6a..93222b09 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -71,7 +71,7 @@ pub use self::{ from_sql::{FromSql, FromSqlError, FromSqlResult}, to_sql::{ToSql, ToSqlOutput}, value::Value, - value_ref::{EnumType, TimeUnit, ValueRef}, + value_ref::{EnumType, ListType, TimeUnit, ValueRef}, }; use arrow::datatypes::DataType; @@ -181,14 +181,12 @@ impl From<&DataType> for Type { DataType::Binary => Self::Blob, // DataType::FixedSizeBinary(_) => Self::FixedSizeBinary, // DataType::LargeBinary => Self::LargeBinary, - DataType::Utf8 => Self::Text, - // DataType::LargeUtf8 => Self::LargeUtf8, + DataType::LargeUtf8 | DataType::Utf8 => Self::Text, DataType::List(inner) => Self::List(Box::new(Type::from(inner.data_type()))), // DataType::FixedSizeList(field, size) => Self::Array, - // DataType::LargeList(_) => Self::LargeList, + DataType::LargeList(inner) => Self::List(Box::new(Type::from(inner.data_type()))), // DataType::Struct(inner) => Self::Struct, // DataType::Union(_, _) => Self::Union, - // DataType::Dictionary(_, _) => Self::Enum, DataType::Decimal128(..) => Self::Decimal, DataType::Decimal256(..) => Self::Decimal, // DataType::Map(field, ..) => Self::Map, diff --git a/src/types/value_ref.rs b/src/types/value_ref.rs index d520f8f1..60ef8c7d 100644 --- a/src/types/value_ref.rs +++ b/src/types/value_ref.rs @@ -4,7 +4,7 @@ use crate::types::{FromSqlError, FromSqlResult}; use crate::Row; use rust_decimal::prelude::*; -use arrow::array::{Array, DictionaryArray, ListArray}; +use arrow::array::{Array, ArrayRef, DictionaryArray, LargeListArray, ListArray}; use arrow::datatypes::{UInt16Type, UInt32Type, UInt8Type}; /// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. @@ -75,11 +75,20 @@ pub enum ValueRef<'a> { nanos: i64, }, /// The value is a list - List(&'a ListArray, usize), + List(ListType<'a>, usize), /// The value is an enum Enum(EnumType<'a>, usize), } +/// Wrapper type for different list sizes +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum ListType<'a> { + /// The underlying list is a `ListArray` + Regular(&'a ListArray), + /// The underlying list is a `LargeListArray` + Large(&'a LargeListArray), +} + /// Wrapper type for different enum sizes #[derive(Debug, Copy, Clone, PartialEq)] pub enum EnumType<'a> { @@ -116,7 +125,10 @@ impl ValueRef<'_> { ValueRef::Date32(_) => Type::Date32, ValueRef::Time64(..) => Type::Time64, ValueRef::Interval { .. } => Type::Interval, - ValueRef::List(arr, _) => arr.data_type().into(), + ValueRef::List(arr, _) => match arr { + ListType::Large(arr) => arr.data_type().into(), + ListType::Regular(arr) => arr.data_type().into(), + }, ValueRef::Enum(..) => Type::Enum, } } @@ -177,14 +189,26 @@ impl From> for Value { ValueRef::Date32(d) => Value::Date32(d), ValueRef::Time64(t, d) => Value::Time64(t, d), ValueRef::Interval { months, days, nanos } => Value::Interval { months, days, nanos }, - ValueRef::List(items, idx) => { - let offsets = items.offsets(); - let range = offsets[idx]..offsets[idx + 1]; - let map: Vec = range - .map(|row| Row::value_ref_internal(row.try_into().unwrap(), idx, items.values()).to_owned()) - .collect(); - Value::List(map) - } + ValueRef::List(items, idx) => match items { + ListType::Regular(items) => { + let offsets = items.offsets(); + from_list( + offsets[idx].try_into().unwrap(), + offsets[idx + 1].try_into().unwrap(), + idx, + items.values(), + ) + } + ListType::Large(items) => { + let offsets = items.offsets(); + from_list( + offsets[idx].try_into().unwrap(), + offsets[idx + 1].try_into().unwrap(), + idx, + items.values(), + ) + } + }, ValueRef::Enum(items, idx) => { let value = Row::value_ref_internal( idx, @@ -207,6 +231,14 @@ impl From> for Value { } } +fn from_list(start: usize, end: usize, idx: usize, values: &ArrayRef) -> Value { + Value::List( + (start..end) + .map(|row| Row::value_ref_internal(row, idx, values).to_owned()) + .collect(), + ) +} + impl<'a> From<&'a str> for ValueRef<'a> { #[inline] fn from(s: &str) -> ValueRef<'_> { From d7438c7d3e336930ab12fc21d8dcbca225440060 Mon Sep 17 00:00:00 2001 From: yfu Date: Mon, 3 Jun 2024 20:33:46 +1000 Subject: [PATCH 02/11] Fixes the issue ignoring nulls when copy from arrow array to flat vector (#316) * null fix after copying data from array to duckdb chunk * add test to cover null cases --- src/vtab/arrow.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/vtab/arrow.rs b/src/vtab/arrow.rs index 3d6c23c2..b5835cbe 100644 --- a/src/vtab/arrow.rs +++ b/src/vtab/arrow.rs @@ -257,6 +257,13 @@ pub fn record_batch_to_duckdb_data_chunk( fn primitive_array_to_flat_vector(array: &PrimitiveArray, out_vector: &mut FlatVector) { // assert!(array.len() <= out_vector.capacity()); out_vector.copy::(array.values()); + if let Some(nulls) = array.nulls() { + for (i, null) in nulls.into_iter().enumerate() { + if !null { + out_vector.set_null(i); + } + } + } } fn primitive_array_to_flat_vector_cast( @@ -267,6 +274,13 @@ fn primitive_array_to_flat_vector_cast( let array = arrow::compute::kernels::cast::cast(array, &data_type).unwrap(); let out_vector: &mut FlatVector = out_vector.as_mut_any().downcast_mut().unwrap(); out_vector.copy::(array.as_primitive::().values()); + if let Some(nulls) = array.nulls() { + for (i, null) in nulls.iter().enumerate() { + if !null { + out_vector.set_null(i); + } + } + } } fn primitive_array_to_vector(array: &dyn Array, out: &mut dyn Vector) -> Result<(), Box> { @@ -655,7 +669,7 @@ mod test { db.register_table_function::("arrow")?; // Roundtrip a record batch from Rust to DuckDB and back to Rust - let schema = Schema::new(vec![Field::new("a", input_array.data_type().clone(), false)]); + let schema = Schema::new(vec![Field::new("a", input_array.data_type().clone(), true)]); let rb = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(input_array.clone())])?; let param = arrow_recordbatch_to_query_params(rb); @@ -746,6 +760,22 @@ mod test { Ok(()) } + #[test] + fn test_primitive_roundtrip_contains_nulls() -> Result<(), Box> { + let mut builder = arrow::array::PrimitiveBuilder::::new(); + builder.append_value(1); + builder.append_null(); + builder.append_value(3); + builder.append_null(); + builder.append_null(); + builder.append_value(6); + let array = builder.finish(); + + check_rust_primitive_array_roundtrip(array.clone(), array)?; + + Ok(()) + } + #[test] fn test_timestamp_roundtrip() -> Result<(), Box> { check_rust_primitive_array_roundtrip(Int32Array::from(vec![1, 2, 3]), Int32Array::from(vec![1, 2, 3]))?; From 6640ff20d739abd692a477943db5c4bdf5a1e1e8 Mon Sep 17 00:00:00 2001 From: era127 <127330949+era127@users.noreply.github.com> Date: Mon, 3 Jun 2024 06:34:14 -0400 Subject: [PATCH 03/11] change SQLite to DuckDB (#308) --- src/cache.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index c5fbc296..cd2d44eb 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -22,7 +22,7 @@ impl Connection { /// stmt.execute(["Joe Smith"])?; /// } /// { - /// // This will return the same underlying SQLite statement handle without + /// // This will return the same underlying DuckDB statement handle without /// // having to prepare it again. /// let mut stmt = conn.prepare_cached("INSERT INTO People (name) VALUES (?)")?; /// stmt.execute(["Bob Jones"])?; @@ -34,7 +34,7 @@ impl Connection { /// # Failure /// /// Will return `Err` if `sql` cannot be converted to a C-compatible string - /// or if the underlying SQLite call fails. + /// or if the underlying DuckDB call fails. #[inline] pub fn prepare_cached(&self, sql: &str) -> Result> { self.cache.get(self, sql) @@ -135,7 +135,7 @@ impl StatementCache { // # Failure // // Will return `Err` if no cached statement can be found and the underlying - // SQLite prepare call fails. + // DuckDB prepare call fails. fn get<'conn>(&'conn self, conn: &'conn Connection, sql: &str) -> Result> { let trimmed = sql.trim(); let mut cache = self.0.borrow_mut(); From 4f6535c99ba5972719c5dac0b98373c4a6c4cb0c Mon Sep 17 00:00:00 2001 From: Swoorup Joshi Date: Wed, 5 Jun 2024 02:42:26 +1000 Subject: [PATCH 04/11] Organise workspace crates and consolidate Cargo.toml manifest properties/dependencies (#285) * Organise workspace crates and unify Cargo.toml manifests * Update code coverage and contributing.md * double choco openssl install timeout * num_integer is not optional * move httpfs later...? * before * include dir * add note * fix memory leak in api usage test * specify crates explicitly * swap action * I guess it just doesnt work with workspace members * swap again * swap yet again * ignore unpublished change * typo --------- Co-authored-by: Max Gabrielsson Co-authored-by: Max Gabrielsson --- .github/.codecov.yml | 4 +- .github/workflows/release.yaml | 6 +- .github/workflows/rust.yaml | 6 +- .gitignore | 6 +- .gitmodules | 4 +- CONTRIBUTING.md | 8 +- Cargo.toml | 130 +++++++----------- crates/duckdb-loadable-macros/Cargo.toml | 22 +++ crates/duckdb-loadable-macros/LICENSE | 1 + crates/duckdb-loadable-macros/README.md | 1 + .../duckdb-loadable-macros}/src/lib.rs | 0 crates/duckdb/Cargo.toml | 93 +++++++++++++ .../examples}/Movies_Social_metadata.xlsx | Bin .../duckdb/examples}/appender.rs | 0 {examples => crates/duckdb/examples}/basic.rs | 0 .../duckdb/examples}/date.xlsx | Bin .../duckdb/examples}/hello-ext/main.rs | 0 .../duckdb/examples}/int32_decimal.parquet | Bin .../duckdb/examples}/parquet.rs | 0 {src => crates/duckdb/src}/appender/arrow.rs | 0 {src => crates/duckdb/src}/appender/mod.rs | 0 {src => crates/duckdb/src}/appender_params.rs | 0 {src => crates/duckdb/src}/arrow_batch.rs | 0 {src => crates/duckdb/src}/cache.rs | 0 {src => crates/duckdb/src}/column.rs | 0 {src => crates/duckdb/src}/config.rs | 0 {src => crates/duckdb/src}/error.rs | 0 {src => crates/duckdb/src}/extension.rs | 0 .../duckdb/src}/inner_connection.rs | 0 {src => crates/duckdb/src}/lib.rs | 2 +- {src => crates/duckdb/src}/params.rs | 0 .../duckdb/src}/polars_dataframe.rs | 0 {src => crates/duckdb/src}/pragma.rs | 0 {src => crates/duckdb/src}/r2d2.rs | 0 {src => crates/duckdb/src}/raw_statement.rs | 0 {src => crates/duckdb/src}/row.rs | 3 +- {src => crates/duckdb/src}/statement.rs | 0 {src => crates/duckdb/src}/test_all_types.rs | 0 {src => crates/duckdb/src}/transaction.rs | 0 {src => crates/duckdb/src}/types/chrono.rs | 0 {src => crates/duckdb/src}/types/from_sql.rs | 0 {src => crates/duckdb/src}/types/mod.rs | 0 .../duckdb/src}/types/serde_json.rs | 0 {src => crates/duckdb/src}/types/to_sql.rs | 0 {src => crates/duckdb/src}/types/url.rs | 0 {src => crates/duckdb/src}/types/value.rs | 0 {src => crates/duckdb/src}/types/value_ref.rs | 6 +- {src => crates/duckdb/src}/util/mod.rs | 0 {src => crates/duckdb/src}/util/small_cstr.rs | 0 {src => crates/duckdb/src}/vtab/arrow.rs | 0 {src => crates/duckdb/src}/vtab/data_chunk.rs | 0 {src => crates/duckdb/src}/vtab/excel.rs | 0 {src => crates/duckdb/src}/vtab/function.rs | 0 .../duckdb/src}/vtab/logical_type.rs | 0 {src => crates/duckdb/src}/vtab/mod.rs | 0 {src => crates/duckdb/src}/vtab/value.rs | 0 {src => crates/duckdb/src}/vtab/vector.rs | 0 crates/libduckdb-sys/Cargo.toml | 42 ++++++ crates/libduckdb-sys/LICENSE | 1 + crates/libduckdb-sys/README.md | 1 + .../libduckdb-sys}/bindgen.sh | 0 .../libduckdb-sys}/build.rs | 19 +-- .../libduckdb-sys}/duckdb.tar.gz | Bin .../libduckdb-sys}/openssl/cfgs.rs | 0 .../libduckdb-sys}/openssl/expando.c | 0 .../libduckdb-sys}/openssl/find_normal.rs | 0 .../libduckdb-sys}/openssl/find_vendored.rs | 0 .../libduckdb-sys}/openssl/mod.rs | 0 .../libduckdb-sys}/openssl/run_bindgen.rs | 0 .../src/bindgen_bundled_version.rs | 0 .../libduckdb-sys}/src/error.rs | 0 .../libduckdb-sys}/src/lib.rs | 21 ++- .../libduckdb-sys}/src/raw_statement.rs | 0 .../libduckdb-sys}/update_sources.py | 0 .../libduckdb-sys}/upgrade.sh | 0 .../libduckdb-sys}/wrapper.h | 0 duckdb-loadable-macros/Cargo.toml | 22 --- duckdb-loadable-macros/LICENSE | 1 - duckdb-loadable-macros/README.md | 1 - libduckdb-sys/Cargo.toml | 42 ------ libduckdb-sys/LICENSE | 1 - libduckdb-sys/README.md | 1 - libduckdb-sys/duckdb-sources | 1 - 83 files changed, 249 insertions(+), 196 deletions(-) create mode 100644 crates/duckdb-loadable-macros/Cargo.toml create mode 120000 crates/duckdb-loadable-macros/LICENSE create mode 120000 crates/duckdb-loadable-macros/README.md rename {duckdb-loadable-macros => crates/duckdb-loadable-macros}/src/lib.rs (100%) create mode 100644 crates/duckdb/Cargo.toml rename {examples => crates/duckdb/examples}/Movies_Social_metadata.xlsx (100%) rename {examples => crates/duckdb/examples}/appender.rs (100%) rename {examples => crates/duckdb/examples}/basic.rs (100%) rename {examples => crates/duckdb/examples}/date.xlsx (100%) rename {examples => crates/duckdb/examples}/hello-ext/main.rs (100%) rename {examples => crates/duckdb/examples}/int32_decimal.parquet (100%) rename {examples => crates/duckdb/examples}/parquet.rs (100%) rename {src => crates/duckdb/src}/appender/arrow.rs (100%) rename {src => crates/duckdb/src}/appender/mod.rs (100%) rename {src => crates/duckdb/src}/appender_params.rs (100%) rename {src => crates/duckdb/src}/arrow_batch.rs (100%) rename {src => crates/duckdb/src}/cache.rs (100%) rename {src => crates/duckdb/src}/column.rs (100%) rename {src => crates/duckdb/src}/config.rs (100%) rename {src => crates/duckdb/src}/error.rs (100%) rename {src => crates/duckdb/src}/extension.rs (100%) rename {src => crates/duckdb/src}/inner_connection.rs (100%) rename {src => crates/duckdb/src}/lib.rs (99%) rename {src => crates/duckdb/src}/params.rs (100%) rename {src => crates/duckdb/src}/polars_dataframe.rs (100%) rename {src => crates/duckdb/src}/pragma.rs (100%) rename {src => crates/duckdb/src}/r2d2.rs (100%) rename {src => crates/duckdb/src}/raw_statement.rs (100%) rename {src => crates/duckdb/src}/row.rs (99%) rename {src => crates/duckdb/src}/statement.rs (100%) rename {src => crates/duckdb/src}/test_all_types.rs (100%) rename {src => crates/duckdb/src}/transaction.rs (100%) rename {src => crates/duckdb/src}/types/chrono.rs (100%) rename {src => crates/duckdb/src}/types/from_sql.rs (100%) rename {src => crates/duckdb/src}/types/mod.rs (100%) rename {src => crates/duckdb/src}/types/serde_json.rs (100%) rename {src => crates/duckdb/src}/types/to_sql.rs (100%) rename {src => crates/duckdb/src}/types/url.rs (100%) rename {src => crates/duckdb/src}/types/value.rs (100%) rename {src => crates/duckdb/src}/types/value_ref.rs (98%) rename {src => crates/duckdb/src}/util/mod.rs (100%) rename {src => crates/duckdb/src}/util/small_cstr.rs (100%) rename {src => crates/duckdb/src}/vtab/arrow.rs (100%) rename {src => crates/duckdb/src}/vtab/data_chunk.rs (100%) rename {src => crates/duckdb/src}/vtab/excel.rs (100%) rename {src => crates/duckdb/src}/vtab/function.rs (100%) rename {src => crates/duckdb/src}/vtab/logical_type.rs (100%) rename {src => crates/duckdb/src}/vtab/mod.rs (100%) rename {src => crates/duckdb/src}/vtab/value.rs (100%) rename {src => crates/duckdb/src}/vtab/vector.rs (100%) create mode 100644 crates/libduckdb-sys/Cargo.toml create mode 120000 crates/libduckdb-sys/LICENSE create mode 120000 crates/libduckdb-sys/README.md rename {libduckdb-sys => crates/libduckdb-sys}/bindgen.sh (100%) rename {libduckdb-sys => crates/libduckdb-sys}/build.rs (98%) rename {libduckdb-sys => crates/libduckdb-sys}/duckdb.tar.gz (100%) rename {libduckdb-sys => crates/libduckdb-sys}/openssl/cfgs.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/openssl/expando.c (100%) rename {libduckdb-sys => crates/libduckdb-sys}/openssl/find_normal.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/openssl/find_vendored.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/openssl/mod.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/openssl/run_bindgen.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/src/bindgen_bundled_version.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/src/error.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/src/lib.rs (93%) rename {libduckdb-sys => crates/libduckdb-sys}/src/raw_statement.rs (100%) rename {libduckdb-sys => crates/libduckdb-sys}/update_sources.py (100%) rename {libduckdb-sys => crates/libduckdb-sys}/upgrade.sh (100%) rename {libduckdb-sys => crates/libduckdb-sys}/wrapper.h (100%) delete mode 100644 duckdb-loadable-macros/Cargo.toml delete mode 120000 duckdb-loadable-macros/LICENSE delete mode 120000 duckdb-loadable-macros/README.md delete mode 100644 libduckdb-sys/Cargo.toml delete mode 120000 libduckdb-sys/LICENSE delete mode 120000 libduckdb-sys/README.md delete mode 160000 libduckdb-sys/duckdb-sources diff --git a/.github/.codecov.yml b/.github/.codecov.yml index ab126bc2..e8c58cf3 100644 --- a/.github/.codecov.yml +++ b/.github/.codecov.yml @@ -1,6 +1,6 @@ ignore: - - "libduckdb-sys/duckdb" - - "libduckdb-sys/src/bindgen_bundled_version.rs" + - "crates/libduckdb-sys/duckdb" + - "crates/libduckdb-sys/src/bindgen_bundled_version.rs" coverage: status: project: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 065f1737..91f4c6ae 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -20,13 +20,13 @@ jobs: components: 'rustfmt, clippy' # cargo publish - - uses: wangfenjin/publish-crates@main - name: cargo publish + - name: publish crates + uses: katyo/publish-crates@v2 with: path: './' args: --no-verify --allow-dirty --all-features registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} - ignore-published: true + ignore-unpublished-changes: true # create release - name: "Build Changelog" diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 0ddf6416..3928a586 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -145,10 +145,10 @@ jobs: # as the other tests have them. RUST_BACKTRACE: "0" run: cargo -Z build-std test --features "modern-full extensions-full" --target x86_64-unknown-linux-gnu - - uses: wangfenjin/publish-crates@main - name: cargo publish --dry-run + - name: publish crates --dry-run + uses: katyo/publish-crates@v2 with: path: './' args: --allow-dirty --all-features dry-run: true - ignore-published: true + ignore-unpublished-changes: true diff --git a/.gitignore b/.gitignore index 0b6b9228..02483800 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,6 @@ Cargo.lock *.db -libduckdb-sys/duckdb-sources/ -libduckdb-sys/duckdb/ -libduckdb-sys/._duckdb +crates/libduckdb-sys/duckdb-sources/ +crates/libduckdb-sys/duckdb/ +crates/libduckdb-sys/._duckdb diff --git a/.gitmodules b/.gitmodules index 4754e802..c35364a0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ -[submodule "libduckdb-sys/duckdb-sources"] - path = libduckdb-sys/duckdb-sources +[submodule "crates/libduckdb-sys/duckdb-sources"] + path = crates/libduckdb-sys/duckdb-sources url = https://github.com/duckdb/duckdb update = none diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bac02655..16bc8bed 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,24 +43,24 @@ export DUCKDB_INCLUDE_DIR=~/duckdb-lib Use the exported library and header: ```shell -cd ~/github/duckdb-rs/libduckdb-sys +cd ~/github/duckdb-rs/crates/libduckdb-sys cargo test --features buildtime_bindgen ``` Use the bundled header file: ```shell -cd ~/github/duckdb-rs/libduckdb-sys +cd ~/github/duckdb-rs/crates/libduckdb-sys cargo test --features bundled ``` -Currently in [github actions](https://github.com/wangfenjin/duckdb-rs/actions), we always use the bundled file for testing. So if you change the header in duckdb-cpp repo, you need to make the PR merged and updated the [bundled-file](https://github.com/wangfenjin/duckdb-rs/tree/main/libduckdb-sys/duckdb). +Currently in [github actions](https://github.com/wangfenjin/duckdb-rs/actions), we always use the bundled file for testing. So if you change the header in duckdb-cpp repo, you need to make the PR merged and updated the [bundled-file](https://github.com/wangfenjin/duckdb-rs/tree/main/crates/libduckdb-sys/duckdb). You can generated the amalgamated file by: ```shell cd ~/github/duckdb mkdir -p build/amaldebug python scripts/amalgamation.py -cp src/amalgamation/duckdb.cpp src/include/duckdb.h src/amalgamation/duckdb.hpp ../duckdb-rs/libduckdb-sys/duckdb/ +cp src/amalgamation/duckdb.cpp src/include/duckdb.h src/amalgamation/duckdb.hpp ../duckdb-rs/crates/libduckdb-sys/duckdb/ ``` ### duckdb-rs diff --git a/Cargo.toml b/Cargo.toml index 99c4f03a..5f176778 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,15 @@ -[package] -name = "duckdb" +[workspace] +resolver = "2" +members = [ + "crates/duckdb", + "crates/libduckdb-sys", + "crates/duckdb-loadable-macros" +] + +[workspace.package] version = "0.10.2" authors = ["wangfenjin "] edition = "2021" -description = "Ergonomic wrapper for DuckDB" repository = "https://github.com/wangfenjin/duckdb-rs" homepage = "https://github.com/wangfenjin/duckdb-rs" documentation = "http://docs.rs/duckdb/" @@ -12,90 +18,46 @@ keywords = ["duckdb", "database", "ffi"] license = "MIT" categories = ["database"] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[lib] -name = "duckdb" - -[workspace] -members = ["libduckdb-sys", "duckdb-loadable-macros"] - -[features] -default = [] -bundled = ["libduckdb-sys/bundled"] -httpfs = ["libduckdb-sys/httpfs", "bundled"] -json = ["libduckdb-sys/json", "bundled"] -parquet = ["libduckdb-sys/parquet", "bundled"] -vtab = [] -vtab-loadable = ["vtab", "duckdb-loadable-macros"] -vtab-excel = ["vtab", "calamine"] -vtab-arrow = ["vtab", "num"] -appender-arrow = ["vtab-arrow"] -vtab-full = ["vtab-excel", "vtab-arrow", "appender-arrow"] -extensions-full = ["httpfs", "json", "parquet", "vtab-full"] -buildtime_bindgen = ["libduckdb-sys/buildtime_bindgen"] -modern-full = ["chrono", "serde_json", "url", "r2d2", "uuid", "polars"] -polars = ["dep:polars"] -chrono = ["dep:chrono", "num-integer"] - -[dependencies] -# time = { version = "0.3.2", features = ["formatting", "parsing"], optional = true } -hashlink = "0.8" -chrono = { version = "0.4.22", optional = true } -serde_json = { version = "1.0", optional = true } -csv = { version = "1.1", optional = true } -url = { version = "2.1", optional = true } -lazy_static = { version = "1.4", optional = true } -byteorder = { version = "1.3", features = ["i128"], optional = true } +[workspace.dependencies] +duckdb = { version = "0.10.2", path = "crates/duckdb" } +libduckdb-sys = { version = "0.10.2", path = "crates/libduckdb-sys" } +duckdb-loadable-macros = { version = "0.1.1", path = "crates/duckdb-loadable-macros" } +autocfg = "1.0" +bindgen = { version = "0.69", default-features = false } +byteorder = "1.3" +calamine = "0.22.0" +cast = "0.3" +cc = "1.0" +chrono = "0.4.22" +csv = "1.1" +doc-comment = "0.3" fallible-iterator = "0.3" fallible-streaming-iterator = "0.1" +flate2 = "1.0" +hashlink = "0.8" +lazy_static = "1.4" memchr = "2.3" -uuid = { version = "1.0", optional = true } -smallvec = "1.6.1" -cast = { version = "0.3", features = ["std"] } -arrow = { version = "51", default-features = false, features = ["prettyprint", "ffi"] } +num = { version = "0.4", default-features = false } +pkg-config = "0.3.24" +polars = "0.35.4" +polars-core = "0.35.4" +pretty_assertions = "1.4.0" +proc-macro2 = "1.0.56" +quote = "1.0.21" +r2d2 = "0.8.9" +rand = "0.8.3" +regex = "1.6" rust_decimal = "1.14" -strum = { version = "0.25", features = ["derive"] } -r2d2 = { version = "0.8.9", optional = true } -calamine = { version = "0.22.0", optional = true } -num = { version = "0.4", optional = true, default-features = false, features = ["std"] } -duckdb-loadable-macros = { version = "0.1.1", path="./duckdb-loadable-macros", optional = true } -polars = { version = "0.35.4", features = ["dtype-full"], optional = true} -num-integer = {version = "0.1.46", optional = true} - -[dev-dependencies] -doc-comment = "0.3" +serde = "1.0" +serde_json = "1.0" +smallvec = "1.6.1" +strum = "0.25" +syn = "2.0.15" +tar = "0.4.38" +tempdir = "0.3.7" tempfile = "3.1.0" -lazy_static = "1.4" -regex = "1.6" -uuid = { version = "1.0", features = ["v4"] } unicase = "2.6.0" -rand = "0.8.3" -tempdir = "0.3.7" -polars-core = "0.35.4" -pretty_assertions = "1.4.0" -# criterion = "0.3" - -# [[bench]] -# name = "data_types" -# harness = false - -[dependencies.libduckdb-sys] -path = "libduckdb-sys" -version = "0.10.2" - - -[package.metadata.docs.rs] -features = ['vtab', 'chrono'] -all-features = false -no-default-features = true -default-target = "x86_64-unknown-linux-gnu" - -[package.metadata.playground] -features = [] -all-features = false - -[[example]] -name = "hello-ext" -crate-type = ["cdylib"] -required-features = ["vtab-loadable"] +url = "2.1" +uuid = "1.0" +vcpkg = "0.2" +arrow = { version = "51", default-features = false } diff --git a/crates/duckdb-loadable-macros/Cargo.toml b/crates/duckdb-loadable-macros/Cargo.toml new file mode 100644 index 00000000..98ccaaf0 --- /dev/null +++ b/crates/duckdb-loadable-macros/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "duckdb-loadable-macros" +version = "0.1.1" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +keywords.workspace = true +readme.workspace = true +categories = ["external-ffi-bindings", "database"] +description = "Native bindings to the libduckdb library, C API; build loadable extensions" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +proc-macro2 = { workspace = true } +quote = { workspace = true } +syn = { workspace = true, features = ["extra-traits", "full", "fold", "parsing"] } + +[lib] +proc-macro = true diff --git a/crates/duckdb-loadable-macros/LICENSE b/crates/duckdb-loadable-macros/LICENSE new file mode 120000 index 00000000..30cff740 --- /dev/null +++ b/crates/duckdb-loadable-macros/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/crates/duckdb-loadable-macros/README.md b/crates/duckdb-loadable-macros/README.md new file mode 120000 index 00000000..fe840054 --- /dev/null +++ b/crates/duckdb-loadable-macros/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/duckdb-loadable-macros/src/lib.rs b/crates/duckdb-loadable-macros/src/lib.rs similarity index 100% rename from duckdb-loadable-macros/src/lib.rs rename to crates/duckdb-loadable-macros/src/lib.rs diff --git a/crates/duckdb/Cargo.toml b/crates/duckdb/Cargo.toml new file mode 100644 index 00000000..5e45b701 --- /dev/null +++ b/crates/duckdb/Cargo.toml @@ -0,0 +1,93 @@ +[package] +name = "duckdb" +version = "0.10.2" +authors.workspace = true +edition.workspace = true +repository.workspace = true +homepage.workspace = true +documentation.workspace = true +readme.workspace = true +keywords.workspace = true +license.workspace = true +categories.workspace = true +description = "Ergonomic wrapper for DuckDB" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "duckdb" + +[features] +default = [] +bundled = ["libduckdb-sys/bundled"] +httpfs = ["libduckdb-sys/httpfs", "bundled"] +json = ["libduckdb-sys/json", "bundled"] +parquet = ["libduckdb-sys/parquet", "bundled"] +vtab = [] +vtab-loadable = ["vtab", "duckdb-loadable-macros"] +vtab-excel = ["vtab", "calamine"] +vtab-arrow = ["vtab", "num"] +appender-arrow = ["vtab-arrow"] +vtab-full = ["vtab-excel", "vtab-arrow", "appender-arrow"] +extensions-full = ["httpfs", "json", "parquet", "vtab-full"] +buildtime_bindgen = ["libduckdb-sys/buildtime_bindgen"] +modern-full = ["chrono", "serde_json", "url", "r2d2", "uuid", "polars"] +polars = ["dep:polars"] + +[dependencies] +libduckdb-sys = { workspace = true } +hashlink = { workspace = true } +chrono = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } +csv = { workspace = true, optional = true } +url = { workspace = true, optional = true } +lazy_static = { workspace = true, optional = true } +byteorder = { workspace = true, features = ["i128"], optional = true } +fallible-iterator = { workspace = true } +fallible-streaming-iterator = { workspace = true } +memchr = { workspace = true } +uuid = { workspace = true, optional = true } +smallvec = { workspace = true } +cast = { workspace = true, features = ["std"] } +arrow = { workspace = true, features = ["prettyprint", "ffi"] } +rust_decimal = { workspace = true } +strum = { workspace = true, features = ["derive"] } +r2d2 = { workspace = true, optional = true } +calamine = { workspace = true, optional = true } +num = { workspace = true, features = ["std"], optional = true } +duckdb-loadable-macros = { workspace = true, optional = true } +polars = { workspace = true, features = ["dtype-full"], optional = true } +num-integer = {version = "0.1.46"} + +[dev-dependencies] +doc-comment = { workspace = true } +tempfile = { workspace = true } +lazy_static = { workspace = true } +regex = { workspace = true } +uuid = { workspace = true, features = ["v4"] } +unicase = { workspace = true } +rand = { workspace = true } +tempdir = { workspace = true } +polars-core = { workspace = true } +pretty_assertions = { workspace = true } +# criterion = "0.3" + +# [[bench]] +# name = "data_types" +# harness = false + + +[package.metadata.docs.rs] +features = [] +all-features = false +no-default-features = true +default-target = "x86_64-unknown-linux-gnu" + +[package.metadata.playground] +features = [] +all-features = false + +[[example]] +name = "hello-ext" +crate-type = ["cdylib"] +required-features = ["vtab-loadable"] diff --git a/examples/Movies_Social_metadata.xlsx b/crates/duckdb/examples/Movies_Social_metadata.xlsx similarity index 100% rename from examples/Movies_Social_metadata.xlsx rename to crates/duckdb/examples/Movies_Social_metadata.xlsx diff --git a/examples/appender.rs b/crates/duckdb/examples/appender.rs similarity index 100% rename from examples/appender.rs rename to crates/duckdb/examples/appender.rs diff --git a/examples/basic.rs b/crates/duckdb/examples/basic.rs similarity index 100% rename from examples/basic.rs rename to crates/duckdb/examples/basic.rs diff --git a/examples/date.xlsx b/crates/duckdb/examples/date.xlsx similarity index 100% rename from examples/date.xlsx rename to crates/duckdb/examples/date.xlsx diff --git a/examples/hello-ext/main.rs b/crates/duckdb/examples/hello-ext/main.rs similarity index 100% rename from examples/hello-ext/main.rs rename to crates/duckdb/examples/hello-ext/main.rs diff --git a/examples/int32_decimal.parquet b/crates/duckdb/examples/int32_decimal.parquet similarity index 100% rename from examples/int32_decimal.parquet rename to crates/duckdb/examples/int32_decimal.parquet diff --git a/examples/parquet.rs b/crates/duckdb/examples/parquet.rs similarity index 100% rename from examples/parquet.rs rename to crates/duckdb/examples/parquet.rs diff --git a/src/appender/arrow.rs b/crates/duckdb/src/appender/arrow.rs similarity index 100% rename from src/appender/arrow.rs rename to crates/duckdb/src/appender/arrow.rs diff --git a/src/appender/mod.rs b/crates/duckdb/src/appender/mod.rs similarity index 100% rename from src/appender/mod.rs rename to crates/duckdb/src/appender/mod.rs diff --git a/src/appender_params.rs b/crates/duckdb/src/appender_params.rs similarity index 100% rename from src/appender_params.rs rename to crates/duckdb/src/appender_params.rs diff --git a/src/arrow_batch.rs b/crates/duckdb/src/arrow_batch.rs similarity index 100% rename from src/arrow_batch.rs rename to crates/duckdb/src/arrow_batch.rs diff --git a/src/cache.rs b/crates/duckdb/src/cache.rs similarity index 100% rename from src/cache.rs rename to crates/duckdb/src/cache.rs diff --git a/src/column.rs b/crates/duckdb/src/column.rs similarity index 100% rename from src/column.rs rename to crates/duckdb/src/column.rs diff --git a/src/config.rs b/crates/duckdb/src/config.rs similarity index 100% rename from src/config.rs rename to crates/duckdb/src/config.rs diff --git a/src/error.rs b/crates/duckdb/src/error.rs similarity index 100% rename from src/error.rs rename to crates/duckdb/src/error.rs diff --git a/src/extension.rs b/crates/duckdb/src/extension.rs similarity index 100% rename from src/extension.rs rename to crates/duckdb/src/extension.rs diff --git a/src/inner_connection.rs b/crates/duckdb/src/inner_connection.rs similarity index 100% rename from src/inner_connection.rs rename to crates/duckdb/src/inner_connection.rs diff --git a/src/lib.rs b/crates/duckdb/src/lib.rs similarity index 99% rename from src/lib.rs rename to crates/duckdb/src/lib.rs index 81e2a5d9..f86ad9c1 100644 --- a/src/lib.rs +++ b/crates/duckdb/src/lib.rs @@ -566,7 +566,7 @@ impl fmt::Debug for Connection { } #[cfg(doctest)] -doc_comment::doctest!("../README.md"); +doc_comment::doctest!("../../../README.md"); #[cfg(test)] mod test { diff --git a/src/params.rs b/crates/duckdb/src/params.rs similarity index 100% rename from src/params.rs rename to crates/duckdb/src/params.rs diff --git a/src/polars_dataframe.rs b/crates/duckdb/src/polars_dataframe.rs similarity index 100% rename from src/polars_dataframe.rs rename to crates/duckdb/src/polars_dataframe.rs diff --git a/src/pragma.rs b/crates/duckdb/src/pragma.rs similarity index 100% rename from src/pragma.rs rename to crates/duckdb/src/pragma.rs diff --git a/src/r2d2.rs b/crates/duckdb/src/r2d2.rs similarity index 100% rename from src/r2d2.rs rename to crates/duckdb/src/r2d2.rs diff --git a/src/raw_statement.rs b/crates/duckdb/src/raw_statement.rs similarity index 100% rename from src/raw_statement.rs rename to crates/duckdb/src/raw_statement.rs diff --git a/src/row.rs b/crates/duckdb/src/row.rs similarity index 99% rename from src/row.rs rename to crates/duckdb/src/row.rs index d1ac7905..cfdec186 100644 --- a/src/row.rs +++ b/crates/duckdb/src/row.rs @@ -3,9 +3,8 @@ use std::{convert, sync::Arc}; use super::{Error, Result, Statement}; use crate::types::{self, EnumType, FromSql, FromSqlError, ListType, ValueRef}; -use arrow::array::DictionaryArray; use arrow::{ - array::{self, Array, ArrayRef, ListArray, StructArray}, + array::{self, Array, ArrayRef, DictionaryArray, ListArray, StructArray}, datatypes::*, }; use fallible_iterator::FallibleIterator; diff --git a/src/statement.rs b/crates/duckdb/src/statement.rs similarity index 100% rename from src/statement.rs rename to crates/duckdb/src/statement.rs diff --git a/src/test_all_types.rs b/crates/duckdb/src/test_all_types.rs similarity index 100% rename from src/test_all_types.rs rename to crates/duckdb/src/test_all_types.rs diff --git a/src/transaction.rs b/crates/duckdb/src/transaction.rs similarity index 100% rename from src/transaction.rs rename to crates/duckdb/src/transaction.rs diff --git a/src/types/chrono.rs b/crates/duckdb/src/types/chrono.rs similarity index 100% rename from src/types/chrono.rs rename to crates/duckdb/src/types/chrono.rs diff --git a/src/types/from_sql.rs b/crates/duckdb/src/types/from_sql.rs similarity index 100% rename from src/types/from_sql.rs rename to crates/duckdb/src/types/from_sql.rs diff --git a/src/types/mod.rs b/crates/duckdb/src/types/mod.rs similarity index 100% rename from src/types/mod.rs rename to crates/duckdb/src/types/mod.rs diff --git a/src/types/serde_json.rs b/crates/duckdb/src/types/serde_json.rs similarity index 100% rename from src/types/serde_json.rs rename to crates/duckdb/src/types/serde_json.rs diff --git a/src/types/to_sql.rs b/crates/duckdb/src/types/to_sql.rs similarity index 100% rename from src/types/to_sql.rs rename to crates/duckdb/src/types/to_sql.rs diff --git a/src/types/url.rs b/crates/duckdb/src/types/url.rs similarity index 100% rename from src/types/url.rs rename to crates/duckdb/src/types/url.rs diff --git a/src/types/value.rs b/crates/duckdb/src/types/value.rs similarity index 100% rename from src/types/value.rs rename to crates/duckdb/src/types/value.rs diff --git a/src/types/value_ref.rs b/crates/duckdb/src/types/value_ref.rs similarity index 98% rename from src/types/value_ref.rs rename to crates/duckdb/src/types/value_ref.rs index 60ef8c7d..db06c462 100644 --- a/src/types/value_ref.rs +++ b/crates/duckdb/src/types/value_ref.rs @@ -4,8 +4,10 @@ use crate::types::{FromSqlError, FromSqlResult}; use crate::Row; use rust_decimal::prelude::*; -use arrow::array::{Array, ArrayRef, DictionaryArray, LargeListArray, ListArray}; -use arrow::datatypes::{UInt16Type, UInt32Type, UInt8Type}; +use arrow::{ + array::{Array, ArrayRef, DictionaryArray, LargeListArray, ListArray}, + datatypes::{UInt16Type, UInt32Type, UInt8Type}, +}; /// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds. /// Copy from arrow::datatypes::TimeUnit diff --git a/src/util/mod.rs b/crates/duckdb/src/util/mod.rs similarity index 100% rename from src/util/mod.rs rename to crates/duckdb/src/util/mod.rs diff --git a/src/util/small_cstr.rs b/crates/duckdb/src/util/small_cstr.rs similarity index 100% rename from src/util/small_cstr.rs rename to crates/duckdb/src/util/small_cstr.rs diff --git a/src/vtab/arrow.rs b/crates/duckdb/src/vtab/arrow.rs similarity index 100% rename from src/vtab/arrow.rs rename to crates/duckdb/src/vtab/arrow.rs diff --git a/src/vtab/data_chunk.rs b/crates/duckdb/src/vtab/data_chunk.rs similarity index 100% rename from src/vtab/data_chunk.rs rename to crates/duckdb/src/vtab/data_chunk.rs diff --git a/src/vtab/excel.rs b/crates/duckdb/src/vtab/excel.rs similarity index 100% rename from src/vtab/excel.rs rename to crates/duckdb/src/vtab/excel.rs diff --git a/src/vtab/function.rs b/crates/duckdb/src/vtab/function.rs similarity index 100% rename from src/vtab/function.rs rename to crates/duckdb/src/vtab/function.rs diff --git a/src/vtab/logical_type.rs b/crates/duckdb/src/vtab/logical_type.rs similarity index 100% rename from src/vtab/logical_type.rs rename to crates/duckdb/src/vtab/logical_type.rs diff --git a/src/vtab/mod.rs b/crates/duckdb/src/vtab/mod.rs similarity index 100% rename from src/vtab/mod.rs rename to crates/duckdb/src/vtab/mod.rs diff --git a/src/vtab/value.rs b/crates/duckdb/src/vtab/value.rs similarity index 100% rename from src/vtab/value.rs rename to crates/duckdb/src/vtab/value.rs diff --git a/src/vtab/vector.rs b/crates/duckdb/src/vtab/vector.rs similarity index 100% rename from src/vtab/vector.rs rename to crates/duckdb/src/vtab/vector.rs diff --git a/crates/libduckdb-sys/Cargo.toml b/crates/libduckdb-sys/Cargo.toml new file mode 100644 index 00000000..207bdf56 --- /dev/null +++ b/crates/libduckdb-sys/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "libduckdb-sys" +version = "0.10.2" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +keywords.workspace = true +readme.workspace = true +build = "build.rs" +categories = ["external-ffi-bindings", "database"] +description = "Native bindings to the libduckdb library, C API" +exclude = ["duckdb-sources"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +default = ["vcpkg", "pkg-config"] +bundled = ["cc"] +buildtime_bindgen = ["bindgen", "pkg-config", "vcpkg"] + +httpfs = ["bundled"] +json = ["bundled"] +parquet = ["bundled"] +extensions-full = ["httpfs", "json", "parquet"] + +[dependencies] + +[build-dependencies] +autocfg = { workspace = true } +bindgen = { workspace = true, features = ["runtime"], optional = true } +flate2 = { workspace = true } +pkg-config = { workspace = true, optional = true } +cc = { workspace = true, features = ["parallel"], optional = true } +vcpkg = { workspace = true, optional = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tar = { workspace = true } + +[dev-dependencies] +arrow = { workspace = true, features = ["ffi"] } diff --git a/crates/libduckdb-sys/LICENSE b/crates/libduckdb-sys/LICENSE new file mode 120000 index 00000000..30cff740 --- /dev/null +++ b/crates/libduckdb-sys/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/crates/libduckdb-sys/README.md b/crates/libduckdb-sys/README.md new file mode 120000 index 00000000..fe840054 --- /dev/null +++ b/crates/libduckdb-sys/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/libduckdb-sys/bindgen.sh b/crates/libduckdb-sys/bindgen.sh similarity index 100% rename from libduckdb-sys/bindgen.sh rename to crates/libduckdb-sys/bindgen.sh diff --git a/libduckdb-sys/build.rs b/crates/libduckdb-sys/build.rs similarity index 98% rename from libduckdb-sys/build.rs rename to crates/libduckdb-sys/build.rs index 92031acb..1ae4533a 100644 --- a/libduckdb-sys/build.rs +++ b/crates/libduckdb-sys/build.rs @@ -120,14 +120,6 @@ mod build_bundled { let mut cfg = cc::Build::new(); - #[cfg(feature = "httpfs")] - { - if let Ok((_, openssl_include_dir)) = super::openssl::get_openssl_v2() { - cfg.include(openssl_include_dir); - } - add_extension(&mut cfg, &manifest, "httpfs", &mut cpp_files, &mut include_dirs); - } - #[cfg(feature = "parquet")] add_extension(&mut cfg, &manifest, "parquet", &mut cpp_files, &mut include_dirs); @@ -146,6 +138,16 @@ mod build_bundled { cfg.include(lib_name); + // Note: dont move this, the link order is important and we need to make + // sure we link openssl after duckdb + #[cfg(feature = "httpfs")] + { + if let Ok((_, openssl_include_dir)) = super::openssl::get_openssl_v2() { + cfg.include(openssl_include_dir); + } + add_extension(&mut cfg, &manifest, "httpfs", &mut cpp_files, &mut include_dirs); + } + cfg.includes(include_dirs.iter().map(|x| format!("{}/{}", lib_name, x))); for f in cpp_files { @@ -163,7 +165,6 @@ mod build_bundled { if win_target() { cfg.define("DUCKDB_BUILD_LIBRARY", None); } - cfg.compile(lib_name); println!("cargo:lib_dir={out_dir}"); } diff --git a/libduckdb-sys/duckdb.tar.gz b/crates/libduckdb-sys/duckdb.tar.gz similarity index 100% rename from libduckdb-sys/duckdb.tar.gz rename to crates/libduckdb-sys/duckdb.tar.gz diff --git a/libduckdb-sys/openssl/cfgs.rs b/crates/libduckdb-sys/openssl/cfgs.rs similarity index 100% rename from libduckdb-sys/openssl/cfgs.rs rename to crates/libduckdb-sys/openssl/cfgs.rs diff --git a/libduckdb-sys/openssl/expando.c b/crates/libduckdb-sys/openssl/expando.c similarity index 100% rename from libduckdb-sys/openssl/expando.c rename to crates/libduckdb-sys/openssl/expando.c diff --git a/libduckdb-sys/openssl/find_normal.rs b/crates/libduckdb-sys/openssl/find_normal.rs similarity index 100% rename from libduckdb-sys/openssl/find_normal.rs rename to crates/libduckdb-sys/openssl/find_normal.rs diff --git a/libduckdb-sys/openssl/find_vendored.rs b/crates/libduckdb-sys/openssl/find_vendored.rs similarity index 100% rename from libduckdb-sys/openssl/find_vendored.rs rename to crates/libduckdb-sys/openssl/find_vendored.rs diff --git a/libduckdb-sys/openssl/mod.rs b/crates/libduckdb-sys/openssl/mod.rs similarity index 100% rename from libduckdb-sys/openssl/mod.rs rename to crates/libduckdb-sys/openssl/mod.rs diff --git a/libduckdb-sys/openssl/run_bindgen.rs b/crates/libduckdb-sys/openssl/run_bindgen.rs similarity index 100% rename from libduckdb-sys/openssl/run_bindgen.rs rename to crates/libduckdb-sys/openssl/run_bindgen.rs diff --git a/libduckdb-sys/src/bindgen_bundled_version.rs b/crates/libduckdb-sys/src/bindgen_bundled_version.rs similarity index 100% rename from libduckdb-sys/src/bindgen_bundled_version.rs rename to crates/libduckdb-sys/src/bindgen_bundled_version.rs diff --git a/libduckdb-sys/src/error.rs b/crates/libduckdb-sys/src/error.rs similarity index 100% rename from libduckdb-sys/src/error.rs rename to crates/libduckdb-sys/src/error.rs diff --git a/libduckdb-sys/src/lib.rs b/crates/libduckdb-sys/src/lib.rs similarity index 93% rename from libduckdb-sys/src/lib.rs rename to crates/libduckdb-sys/src/lib.rs index d7547535..ae57cadf 100644 --- a/libduckdb-sys/src/lib.rs +++ b/crates/libduckdb-sys/src/lib.rs @@ -33,18 +33,15 @@ mod tests { ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema}, }; - unsafe fn print_int_result(mut result: duckdb_result) { - for i in 0..duckdb_column_count(&mut result) { - print!( - "{} ", - CStr::from_ptr(duckdb_column_name(&mut result, i)).to_string_lossy() - ); + unsafe fn print_int_result(result: &mut duckdb_result) { + for i in 0..duckdb_column_count(result) { + print!("{} ", CStr::from_ptr(duckdb_column_name(result, i)).to_string_lossy()); } println!(); // print the data of the result - for row_idx in 0..duckdb_row_count(&mut result) { - for col_idx in 0..duckdb_column_count(&mut result) { - let val = duckdb_value_int32(&mut result, col_idx, row_idx); + for row_idx in 0..duckdb_row_count(result) { + for col_idx in 0..duckdb_column_count(result) { + let val = duckdb_value_int32(result, col_idx, row_idx); print!("{val} "); } println!(); @@ -162,7 +159,7 @@ mod tests { } assert_eq!(duckdb_row_count(&mut result), 3); assert_eq!(duckdb_column_count(&mut result), 2); - print_int_result(result); + print_int_result(&mut result); duckdb_destroy_result(&mut result); // test prepare @@ -179,7 +176,7 @@ mod tests { } assert_eq!(duckdb_row_count(&mut result), 2); assert_eq!(duckdb_column_count(&mut result), 2); - print_int_result(result); + print_int_result(&mut result); duckdb_destroy_result(&mut result); // test bind params again @@ -191,7 +188,7 @@ mod tests { } assert_eq!(duckdb_row_count(&mut result), 1); assert_eq!(duckdb_column_count(&mut result), 2); - print_int_result(result); + print_int_result(&mut result); duckdb_destroy_result(&mut result); duckdb_destroy_prepare(&mut stmt); diff --git a/libduckdb-sys/src/raw_statement.rs b/crates/libduckdb-sys/src/raw_statement.rs similarity index 100% rename from libduckdb-sys/src/raw_statement.rs rename to crates/libduckdb-sys/src/raw_statement.rs diff --git a/libduckdb-sys/update_sources.py b/crates/libduckdb-sys/update_sources.py similarity index 100% rename from libduckdb-sys/update_sources.py rename to crates/libduckdb-sys/update_sources.py diff --git a/libduckdb-sys/upgrade.sh b/crates/libduckdb-sys/upgrade.sh similarity index 100% rename from libduckdb-sys/upgrade.sh rename to crates/libduckdb-sys/upgrade.sh diff --git a/libduckdb-sys/wrapper.h b/crates/libduckdb-sys/wrapper.h similarity index 100% rename from libduckdb-sys/wrapper.h rename to crates/libduckdb-sys/wrapper.h diff --git a/duckdb-loadable-macros/Cargo.toml b/duckdb-loadable-macros/Cargo.toml deleted file mode 100644 index 30c40ab2..00000000 --- a/duckdb-loadable-macros/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "duckdb-loadable-macros" -version = "0.1.1" -authors = ["wangfenjin "] -edition = "2021" -license = "MIT" -repository = "https://github.com/wangfenjin/duckdb-rs" -homepage = "https://github.com/wangfenjin/duckdb-rs" -keywords = ["duckdb", "ffi", "database"] -readme = "README.md" -categories = ["external-ffi-bindings", "database"] -description = "Native bindings to the libduckdb library, C API; build loadable extensions" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -proc-macro2 = { version = "1.0.56" } -quote = { version = "1.0.21" } -syn = { version = "2.0.15", features = [ "extra-traits", "full", "fold", "parsing" ] } - -[lib] -proc-macro = true diff --git a/duckdb-loadable-macros/LICENSE b/duckdb-loadable-macros/LICENSE deleted file mode 120000 index ea5b6064..00000000 --- a/duckdb-loadable-macros/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/duckdb-loadable-macros/README.md b/duckdb-loadable-macros/README.md deleted file mode 120000 index 32d46ee8..00000000 --- a/duckdb-loadable-macros/README.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/libduckdb-sys/Cargo.toml b/libduckdb-sys/Cargo.toml deleted file mode 100644 index 7465f90f..00000000 --- a/libduckdb-sys/Cargo.toml +++ /dev/null @@ -1,42 +0,0 @@ -[package] -name = "libduckdb-sys" -version = "0.10.2" -authors = ["wangfenjin "] -edition = "2021" -build = "build.rs" -license = "MIT" -repository = "https://github.com/wangfenjin/duckdb-rs" -homepage = "https://github.com/wangfenjin/duckdb-rs" -keywords = ["duckdb", "ffi", "database"] -readme = "README.md" -categories = ["external-ffi-bindings", "database"] -description = "Native bindings to the libduckdb library, C API" -exclude = ["duckdb-sources"] - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[features] -default = ["vcpkg", "pkg-config"] -bundled = ["cc"] -buildtime_bindgen = ["bindgen", "pkg-config", "vcpkg"] - -httpfs = ["bundled"] -json = ["bundled"] -parquet = ["bundled"] -extensions-full = ["httpfs", "json", "parquet"] - -[dependencies] - -[build-dependencies] -autocfg = "1.0" -bindgen = { version = "0.69", optional = true, default-features = false, features = ["runtime"] } -flate2 = "1.0" -pkg-config = { version = "0.3.24", optional = true } -cc = { version = "1.0", features = ["parallel"], optional = true } -vcpkg = { version = "0.2", optional = true } -serde = { version = "1.0", features = ["derive"] } -serde_json = { version = "1.0" } -tar = "0.4.38" - -[dev-dependencies] -arrow = { version = "51", default-features = false, features = ["ffi"] } diff --git a/libduckdb-sys/LICENSE b/libduckdb-sys/LICENSE deleted file mode 120000 index ea5b6064..00000000 --- a/libduckdb-sys/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE \ No newline at end of file diff --git a/libduckdb-sys/README.md b/libduckdb-sys/README.md deleted file mode 120000 index 32d46ee8..00000000 --- a/libduckdb-sys/README.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/libduckdb-sys/duckdb-sources b/libduckdb-sys/duckdb-sources deleted file mode 160000 index 1601d94f..00000000 --- a/libduckdb-sys/duckdb-sources +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1601d94f94a7e0d2eb805a94803eb1e3afbbe4ed From 74fce0f0a60fb087d1b2e43580ff3fa48bf2024e Mon Sep 17 00:00:00 2001 From: Yoong Hor Meng Date: Wed, 5 Jun 2024 00:46:07 +0800 Subject: [PATCH 05/11] Update README.md (#242) Cargo command to insert duckdb with "bundled" --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 243a6324..6f594ed2 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,13 @@ You can adjust this behavior in a number of ways: as we are still in development, we will update it regularly. After we are more stable, we will use the stable released version from [duckdb](https://github.com/duckdb/duckdb/releases). This is probably the simplest solution to any build problems. You can enable this by adding the following in your `Cargo.toml` file: + + ```bash + cargo add duckdb --features bundled + ``` + + `Cargo.toml` will be updated. + ```toml [dependencies] # Assume that version DuckDB version 0.9.2 is used. From f628e5adf0317515fbfa06c258262613d2fe269a Mon Sep 17 00:00:00 2001 From: Jack Eadie Date: Wed, 5 Jun 2024 04:30:33 +1000 Subject: [PATCH 06/11] Add support for DuckDB arrays when using Arrow's FixedSizeList (#323) * support UTF8[] * add tests * fix test * format * clippy * bump cause github is broken * add support for DuckDB arrays when using Arrow's FixedSizeList * fmt * add ArrayVector * update path in remote test --------- Co-authored-by: Max Gabrielsson --- crates/duckdb/src/extension.rs | 2 +- crates/duckdb/src/vtab/arrow.rs | 81 ++++++++++++++++++-------- crates/duckdb/src/vtab/data_chunk.rs | 7 ++- crates/duckdb/src/vtab/logical_type.rs | 9 +++ crates/duckdb/src/vtab/vector.rs | 43 ++++++++++++++ 5 files changed, 117 insertions(+), 25 deletions(-) diff --git a/crates/duckdb/src/extension.rs b/crates/duckdb/src/extension.rs index 1fa54c03..1a6a9690 100644 --- a/crates/duckdb/src/extension.rs +++ b/crates/duckdb/src/extension.rs @@ -38,7 +38,7 @@ mod test { let db = Connection::open_in_memory()?; assert_eq!( 300f32, - db.query_row::(r#"SELECT SUM(value) FROM read_parquet('https://github.com/wangfenjin/duckdb-rs/raw/main/examples/int32_decimal.parquet');"#, [], |r| r.get(0))? + db.query_row::(r#"SELECT SUM(value) FROM read_parquet('https://github.com/duckdb/duckdb-rs/raw/main/crates/duckdb/examples/int32_decimal.parquet');"#, [], |r| r.get(0))? ); Ok(()) } diff --git a/crates/duckdb/src/vtab/arrow.rs b/crates/duckdb/src/vtab/arrow.rs index b5835cbe..f1b8e9fe 100644 --- a/crates/duckdb/src/vtab/arrow.rs +++ b/crates/duckdb/src/vtab/arrow.rs @@ -1,5 +1,5 @@ use super::{ - vector::{FlatVector, ListVector, Vector}, + vector::{ArrayVector, FlatVector, ListVector, Vector}, BindInfo, DataChunk, Free, FunctionInfo, InitInfo, LogicalType, LogicalTypeId, StructVector, VTab, }; use std::ptr::null_mut; @@ -196,8 +196,11 @@ pub fn to_duckdb_logical_type(data_type: &DataType) -> Result { - fixed_size_list_array_to_vector(as_fixed_size_list_array(col.as_ref()), &mut chunk.list_vector(i))?; + fixed_size_list_array_to_vector(as_fixed_size_list_array(col.as_ref()), &mut chunk.array_vector(i))?; } DataType::Struct(_) => { let struct_array = as_struct_array(col.as_ref()); @@ -455,33 +458,21 @@ fn list_array_to_vector>( fn fixed_size_list_array_to_vector( array: &FixedSizeListArray, - out: &mut ListVector, + out: &mut ArrayVector, ) -> Result<(), Box> { let value_array = array.values(); let mut child = out.child(value_array.len()); match value_array.data_type() { dt if dt.is_primitive() => { primitive_array_to_vector(value_array.as_ref(), &mut child)?; - for i in 0..array.len() { - let offset = array.value_offset(i); - let length = array.value_length(); - out.set_entry(i, offset as usize, length as usize); - } - out.set_len(value_array.len()); } DataType::Utf8 => { string_array_to_vector(as_string_array(value_array.as_ref()), &mut child); } _ => { - return Err("Nested list is not supported yet.".into()); + return Err("Nested array is not supported yet.".into()); } } - for i in 0..array.len() { - let offset = array.value_offset(i); - let length = array.value_length(); - out.set_entry(i, offset as usize, length as usize); - } - out.set_len(value_array.len()); Ok(()) } @@ -511,7 +502,7 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) -> Result DataType::FixedSizeList(_, _) => { fixed_size_list_array_to_vector( as_fixed_size_list_array(column.as_ref()), - &mut out.list_vector_child(i), + &mut out.array_vector_child(i), )?; } DataType::Struct(_) => { @@ -569,10 +560,10 @@ mod test { use crate::{Connection, Result}; use arrow::{ array::{ - Array, ArrayRef, AsArray, Date32Array, Date64Array, Decimal256Array, Float64Array, GenericListArray, - Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, Time32SecondArray, - Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, - TimestampSecondArray, + Array, ArrayRef, AsArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray, Float64Array, + GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, + Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, }, buffer::{OffsetBuffer, ScalarBuffer}, datatypes::{i256, ArrowPrimitiveType, DataType, Field, Fields, Schema}, @@ -760,6 +751,50 @@ mod test { Ok(()) } + //field: FieldRef, size: i32, values: ArrayRef, nulls: Option + #[test] + fn test_fixed_array_roundtrip() -> Result<(), Box> { + let array = FixedSizeListArray::new( + Arc::new(Field::new("item", DataType::Int32, true)), + 2, + Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), Some(5)])), + None, + ); + + let expected_output_array = array.clone(); + + let db = Connection::open_in_memory()?; + db.register_table_function::("arrow")?; + + // Roundtrip a record batch from Rust to DuckDB and back to Rust + let schema = Schema::new(vec![Field::new("a", array.data_type().clone(), false)]); + + let rb = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())])?; + let param = arrow_recordbatch_to_query_params(rb); + let mut stmt = db.prepare("select a from arrow(?, ?)")?; + let rb = stmt.query_arrow(param)?.next().expect("no record batch"); + + let output_any_array = rb.column(0); + assert!(output_any_array + .data_type() + .equals_datatype(expected_output_array.data_type())); + + match output_any_array.as_fixed_size_list_opt() { + Some(output_array) => { + assert_eq!(output_array.len(), expected_output_array.len()); + for i in 0..output_array.len() { + assert_eq!(output_array.is_valid(i), expected_output_array.is_valid(i)); + if output_array.is_valid(i) { + assert!(expected_output_array.value(i).eq(&output_array.value(i))); + } + } + } + None => panic!("Expected FixedSizeListArray"), + } + + Ok(()) + } + #[test] fn test_primitive_roundtrip_contains_nulls() -> Result<(), Box> { let mut builder = arrow::array::PrimitiveBuilder::::new(); diff --git a/crates/duckdb/src/vtab/data_chunk.rs b/crates/duckdb/src/vtab/data_chunk.rs index 6e472773..3bc6d874 100644 --- a/crates/duckdb/src/vtab/data_chunk.rs +++ b/crates/duckdb/src/vtab/data_chunk.rs @@ -1,6 +1,6 @@ use super::{ logical_type::LogicalType, - vector::{FlatVector, ListVector, StructVector}, + vector::{ArrayVector, FlatVector, ListVector, StructVector}, }; use crate::ffi::{ duckdb_create_data_chunk, duckdb_data_chunk, duckdb_data_chunk_get_column_count, duckdb_data_chunk_get_size, @@ -35,6 +35,11 @@ impl DataChunk { ListVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) } + /// Get a array vector from the column index. + pub fn array_vector(&self, idx: usize) -> ArrayVector { + ArrayVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) + } + /// Get struct vector at the column index: `idx`. pub fn struct_vector(&self, idx: usize) -> StructVector { StructVector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, idx as u64) }) diff --git a/crates/duckdb/src/vtab/logical_type.rs b/crates/duckdb/src/vtab/logical_type.rs index 76a17182..1ee2543a 100644 --- a/crates/duckdb/src/vtab/logical_type.rs +++ b/crates/duckdb/src/vtab/logical_type.rs @@ -182,6 +182,15 @@ impl LogicalType { } } + /// Creates an array type from its child type. + pub fn array(child_type: &LogicalType, array_size: u64) -> Self { + unsafe { + Self { + ptr: duckdb_create_array_type(child_type.ptr, array_size), + } + } + } + /// Creates a decimal type from its `width` and `scale`. pub fn decimal(width: u8, scale: u8) -> Self { unsafe { diff --git a/crates/duckdb/src/vtab/vector.rs b/crates/duckdb/src/vtab/vector.rs index bf61cff4..030cf6ee 100644 --- a/crates/duckdb/src/vtab/vector.rs +++ b/crates/duckdb/src/vtab/vector.rs @@ -1,5 +1,7 @@ use std::{any::Any, ffi::CString, slice}; +use libduckdb_sys::{duckdb_array_type_array_size, duckdb_array_vector_get_child}; + use super::LogicalType; use crate::ffi::{ duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve, @@ -170,6 +172,42 @@ impl ListVector { } } +/// A array vector. (fixed-size list) +pub struct ArrayVector { + /// ArrayVector does not own the vector pointer. + ptr: duckdb_vector, +} + +impl From for ArrayVector { + fn from(ptr: duckdb_vector) -> Self { + Self { ptr } + } +} + +impl ArrayVector { + /// Get the logical type of this ArrayVector. + pub fn logical_type(&self) -> LogicalType { + LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) + } + + pub fn get_array_size(&self) -> u64 { + let ty = self.logical_type(); + unsafe { duckdb_array_type_array_size(ty.ptr) as u64 } + } + + /// Returns the child vector. + /// capacity should be a multiple of the array size. + // TODO: not ideal interface. Where should we keep count. + pub fn child(&self, capacity: usize) -> FlatVector { + FlatVector::with_capacity(unsafe { duckdb_array_vector_get_child(self.ptr) }, capacity) + } + + /// Set primitive data to the child node. + pub fn set_child(&self, data: &[T]) { + self.child(data.len()).copy(data); + } +} + /// A struct vector. pub struct StructVector { /// ListVector does not own the vector pointer. @@ -198,6 +236,11 @@ impl StructVector { ListVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) }) } + /// Take the child as [ArrayVector]. + pub fn array_vector_child(&self, idx: usize) -> ArrayVector { + ArrayVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) }) + } + /// Get the logical type of this struct vector. pub fn logical_type(&self) -> LogicalType { LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) From 4f772b327f707388b70ac280318ab95f60f0403b Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Wed, 5 Jun 2024 17:02:10 +0900 Subject: [PATCH 07/11] Add support for BinaryArray in arrow-vtab (#324) * Add support for BinaryArray in arrow-vtab * Fix lint --- crates/duckdb/src/vtab/arrow.rs | 54 +++++++++++++++++++++++++++----- crates/duckdb/src/vtab/vector.rs | 20 ++++++++++-- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/crates/duckdb/src/vtab/arrow.rs b/crates/duckdb/src/vtab/arrow.rs index f1b8e9fe..fa92e64c 100644 --- a/crates/duckdb/src/vtab/arrow.rs +++ b/crates/duckdb/src/vtab/arrow.rs @@ -6,9 +6,9 @@ use std::ptr::null_mut; use crate::vtab::vector::Inserter; use arrow::array::{ - as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, as_struct_array, Array, - ArrayData, AsArray, BooleanArray, Decimal128Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait, - PrimitiveArray, StringArray, StructArray, + as_boolean_array, as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, + as_struct_array, Array, ArrayData, AsArray, BinaryArray, BooleanArray, Decimal128Array, FixedSizeListArray, + GenericListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, }; use arrow::{ @@ -230,6 +230,9 @@ pub fn record_batch_to_duckdb_data_chunk( DataType::Utf8 => { string_array_to_vector(as_string_array(col.as_ref()), &mut chunk.flat_vector(i)); } + DataType::Binary => { + binary_array_to_vector(as_generic_binary_array(col.as_ref()), &mut chunk.flat_vector(i)); + } DataType::List(_) => { list_array_to_vector(as_list_array(col.as_ref()), &mut chunk.list_vector(i))?; } @@ -430,6 +433,15 @@ fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) { } } +fn binary_array_to_vector(array: &BinaryArray, out: &mut FlatVector) { + assert!(array.len() <= out.capacity()); + + for i in 0..array.len() { + let s = array.value(i); + out.insert(i, s); + } +} + fn list_array_to_vector>( array: &GenericListArray, out: &mut ListVector, @@ -443,6 +455,9 @@ fn list_array_to_vector>( DataType::Utf8 => { string_array_to_vector(as_string_array(value_array.as_ref()), &mut child); } + DataType::Binary => { + binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child); + } _ => { return Err("Nested list is not supported yet.".into()); } @@ -469,6 +484,9 @@ fn fixed_size_list_array_to_vector( DataType::Utf8 => { string_array_to_vector(as_string_array(value_array.as_ref()), &mut child); } + DataType::Binary => { + binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child); + } _ => { return Err("Nested array is not supported yet.".into()); } @@ -493,6 +511,9 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) -> Result DataType::Utf8 => { string_array_to_vector(as_string_array(column.as_ref()), &mut out.child(i)); } + DataType::Binary => { + binary_array_to_vector(as_generic_binary_array(column.as_ref()), &mut out.child(i)); + } DataType::List(_) => { list_array_to_vector(as_list_array(column.as_ref()), &mut out.list_vector_child(i))?; } @@ -560,10 +581,10 @@ mod test { use crate::{Connection, Result}; use arrow::{ array::{ - Array, ArrayRef, AsArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray, Float64Array, - GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, - Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, + Array, ArrayRef, AsArray, BinaryArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray, + Float64Array, GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, + StructArray, Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }, buffer::{OffsetBuffer, ScalarBuffer}, datatypes::{i256, ArrowPrimitiveType, DataType, Field, Fields, Schema}, @@ -924,4 +945,23 @@ mod test { ) ); } + + #[test] + fn test_arrow_binary() { + let byte_array = BinaryArray::from_iter_values([b"test"].iter()); + let arc: ArrayRef = Arc::new(byte_array); + let batch = RecordBatch::try_from_iter(vec![("x", arc)]).unwrap(); + + let db = Connection::open_in_memory().unwrap(); + db.register_table_function::("arrow").unwrap(); + + let mut stmt = db.prepare("SELECT * FROM arrow(?, ?)").unwrap(); + + let mut arr = stmt.query_arrow(arrow_recordbatch_to_query_params(batch)).unwrap(); + let rb = arr.next().expect("no record batch"); + + let column = rb.column(0).as_any().downcast_ref::().unwrap(); + assert_eq!(column.len(), 1); + assert_eq!(column.value(0), b"test"); + } } diff --git a/crates/duckdb/src/vtab/vector.rs b/crates/duckdb/src/vtab/vector.rs index 030cf6ee..7de18578 100644 --- a/crates/duckdb/src/vtab/vector.rs +++ b/crates/duckdb/src/vtab/vector.rs @@ -7,8 +7,9 @@ use crate::ffi::{ duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve, duckdb_list_vector_set_size, duckdb_struct_type_child_count, duckdb_struct_type_child_name, duckdb_struct_vector_get_child, duckdb_validity_set_row_invalid, duckdb_vector, - duckdb_vector_assign_string_element, duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type, - duckdb_vector_get_data, duckdb_vector_get_validity, duckdb_vector_size, + duckdb_vector_assign_string_element, duckdb_vector_assign_string_element_len, + duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type, duckdb_vector_get_data, + duckdb_vector_get_validity, duckdb_vector_size, }; /// Vector trait. @@ -113,6 +114,21 @@ impl Inserter<&str> for FlatVector { } } +impl Inserter<&[u8]> for FlatVector { + fn insert(&self, index: usize, value: &[u8]) { + let value_size = value.len(); + unsafe { + // This function also works for binary data. https://duckdb.org/docs/api/c/api#duckdb_vector_assign_string_element_len + duckdb_vector_assign_string_element_len( + self.ptr, + index as u64, + value.as_ptr() as *const ::std::os::raw::c_char, + value_size as u64, + ); + } + } +} + /// A list vector. pub struct ListVector { /// ListVector does not own the vector pointer. From f5da417f8911e76991d767c7513a43a8cb3cdcfc Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 6 Jun 2024 12:24:01 +0200 Subject: [PATCH 08/11] use openssl from runner (#327) --- .github/workflows/rust.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index 3928a586..97d4fb36 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -107,10 +107,6 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v2 - # - run: echo "VCPKG_ROOT=$env:VCPKG_INSTALLATION_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append - # - run: vcpkg install openssl:x64-windows-static-md - - run: choco install openssl --execution-timeout 5400 - - run: echo 'OPENSSL_DIR=C:\Program Files\OpenSSL' | Out-File -FilePath $env:GITHUB_ENV -Append - uses: actions/cache@v3 with: path: ~/.cargo/registry/index From 3bd503c62cdd59c5ca9be90872fa5f51383820d1 Mon Sep 17 00:00:00 2001 From: Murali S <39963273+muralisoundararajan@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:36:28 +0200 Subject: [PATCH 09/11] Add ability to pass vendored feature to openssl in libduckdb-sys (#321) * Keep other feature configuration from openssl * Fix clippy warnings * Add ability to pass vendored feature to openssl in libduckdb-sys --------- Co-authored-by: Murali --- crates/duckdb/Cargo.toml | 3 +++ crates/libduckdb-sys/Cargo.toml | 5 +++++ crates/libduckdb-sys/openssl/find_normal.rs | 2 +- crates/libduckdb-sys/openssl/find_vendored.rs | 3 +-- crates/libduckdb-sys/openssl/mod.rs | 8 ++++---- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crates/duckdb/Cargo.toml b/crates/duckdb/Cargo.toml index 5e45b701..6a26438c 100644 --- a/crates/duckdb/Cargo.toml +++ b/crates/duckdb/Cargo.toml @@ -23,6 +23,9 @@ bundled = ["libduckdb-sys/bundled"] httpfs = ["libduckdb-sys/httpfs", "bundled"] json = ["libduckdb-sys/json", "bundled"] parquet = ["libduckdb-sys/parquet", "bundled"] +openssl_vendored = ["libduckdb-sys/openssl_vendored", "bundled"] +unstable_boringssl = ["libduckdb-sys/unstable_boringssl", "bundled"] +openssl_bindgen = ["libduckdb-sys/openssl_bindgen", "bundled"] vtab = [] vtab-loadable = ["vtab", "duckdb-loadable-macros"] vtab-excel = ["vtab", "calamine"] diff --git a/crates/libduckdb-sys/Cargo.toml b/crates/libduckdb-sys/Cargo.toml index 207bdf56..ab4e89ce 100644 --- a/crates/libduckdb-sys/Cargo.toml +++ b/crates/libduckdb-sys/Cargo.toml @@ -25,12 +25,17 @@ json = ["bundled"] parquet = ["bundled"] extensions-full = ["httpfs", "json", "parquet"] +openssl_vendored = ["bundled", "openssl-src"] +openssl_bindgen = ["bundled", "bindgen", "pkg-config", "vcpkg"] +unstable_boringssl = ["bundled"] + [dependencies] [build-dependencies] autocfg = { workspace = true } bindgen = { workspace = true, features = ["runtime"], optional = true } flate2 = { workspace = true } +openssl-src = { version = "300.3.0", optional = true, features = ["legacy"] } pkg-config = { workspace = true, optional = true } cc = { workspace = true, features = ["parallel"], optional = true } vcpkg = { workspace = true, optional = true } diff --git a/crates/libduckdb-sys/openssl/find_normal.rs b/crates/libduckdb-sys/openssl/find_normal.rs index 4655a8af..591857dd 100644 --- a/crates/libduckdb-sys/openssl/find_normal.rs +++ b/crates/libduckdb-sys/openssl/find_normal.rs @@ -141,7 +141,7 @@ openssl-sys = {} openssl-sys crate build failed: no supported version of OpenSSL found. Ways to fix it: -- Use the `vendored` feature of openssl-sys crate to build OpenSSL from source. +- Use the `openssl_vendored` feature of libduckdb-sys crate to build OpenSSL from source. - Use Homebrew to install the `openssl` package. ", diff --git a/crates/libduckdb-sys/openssl/find_vendored.rs b/crates/libduckdb-sys/openssl/find_vendored.rs index 2ab44cfa..82348b6e 100644 --- a/crates/libduckdb-sys/openssl/find_vendored.rs +++ b/crates/libduckdb-sys/openssl/find_vendored.rs @@ -1,9 +1,8 @@ -use openssl_src; use std::path::PathBuf; pub fn get_openssl(_target: &str) -> (Vec, PathBuf) { let artifacts = openssl_src::Build::new().build(); - println!("cargo:vendored=1"); + println!("cargo:openssl_vendored=1"); println!("cargo:root={}", artifacts.lib_dir().parent().unwrap().display()); ( diff --git a/crates/libduckdb-sys/openssl/mod.rs b/crates/libduckdb-sys/openssl/mod.rs index a7bf791c..4877258a 100644 --- a/crates/libduckdb-sys/openssl/mod.rs +++ b/crates/libduckdb-sys/openssl/mod.rs @@ -22,7 +22,7 @@ extern crate autocfg; #[cfg(feature = "openssl_bindgen")] extern crate bindgen; extern crate cc; -#[cfg(feature = "vendored")] +#[cfg(feature = "openssl_vendored")] extern crate openssl_src; extern crate pkg_config; #[cfg(target_env = "msvc")] @@ -37,7 +37,7 @@ use std::{ mod cfgs; mod find_normal; -#[cfg(feature = "vendored")] +#[cfg(feature = "openssl_vendored")] mod find_vendored; #[cfg(feature = "openssl_bindgen")] mod run_bindgen; @@ -69,7 +69,7 @@ fn env(name: &str) -> Option { } fn find_openssl(target: &str) -> Result<(Vec, PathBuf), ()> { - #[cfg(feature = "vendored")] + #[cfg(feature = "openssl_vendored")] { // vendor if the feature is present, unless // OPENSSL_NO_VENDOR exists and isn't `0` @@ -183,7 +183,7 @@ fn postprocess(include_dirs: &[PathBuf]) -> Version { } } #[cfg(feature = "openssl_bindgen")] - run_bindgen::run(&include_dirs); + run_bindgen::run(include_dirs); version } From f48a4e3cd4fcc24405ffc0bc280aecd04c08acae Mon Sep 17 00:00:00 2001 From: Rijk van Putten Date: Thu, 6 Jun 2024 13:37:20 +0200 Subject: [PATCH 10/11] Implement appender for date/time types (#313) * Implement appender for date/time types * Remove unused import * Add unit test for date/time append --- crates/duckdb/src/appender/mod.rs | 47 +++++++++++++++++++++++----- crates/duckdb/src/types/value_ref.rs | 12 +++++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/crates/duckdb/src/appender/mod.rs b/crates/duckdb/src/appender/mod.rs index 65b6db0f..488db82f 100644 --- a/crates/duckdb/src/appender/mod.rs +++ b/crates/duckdb/src/appender/mod.rs @@ -3,7 +3,7 @@ use std::{ffi::c_void, fmt, os::raw::c_char}; use crate::{ error::result_from_duckdb_appender, - types::{TimeUnit, ToSql, ToSqlOutput}, + types::{ToSql, ToSqlOutput}, Error, }; @@ -118,15 +118,23 @@ impl Appender<'_> { ffi::duckdb_append_varchar_length(ptr, s.as_ptr() as *const c_char, s.len() as u64) }, ValueRef::Timestamp(u, i) => unsafe { - let micros = match u { - TimeUnit::Second => i * 1_000_000, - TimeUnit::Millisecond => i * 1_000, - TimeUnit::Microsecond => i, - TimeUnit::Nanosecond => i / 1_000, - }; - ffi::duckdb_append_timestamp(ptr, ffi::duckdb_timestamp { micros }) + ffi::duckdb_append_timestamp(ptr, ffi::duckdb_timestamp { micros: u.to_micros(i) }) }, ValueRef::Blob(b) => unsafe { ffi::duckdb_append_blob(ptr, b.as_ptr() as *const c_void, b.len() as u64) }, + ValueRef::Date32(d) => unsafe { ffi::duckdb_append_date(ptr, ffi::duckdb_date { days: d }) }, + ValueRef::Time64(u, v) => unsafe { + ffi::duckdb_append_time(ptr, ffi::duckdb_time { micros: u.to_micros(v) }) + }, + ValueRef::Interval { months, days, nanos } => unsafe { + ffi::duckdb_append_interval( + ptr, + ffi::duckdb_interval { + months, + days, + micros: nanos / 1000, + }, + ) + }, _ => unreachable!("not supported"), }; if rc != 0 { @@ -255,6 +263,29 @@ mod test { Ok(()) } + #[test] + #[cfg(feature = "chrono")] + fn test_append_datetime() -> Result<()> { + use crate::params; + use chrono::{NaiveDate, NaiveDateTime}; + + let db = Connection::open_in_memory()?; + db.execute_batch("CREATE TABLE foo(x DATE, y TIMESTAMP)")?; + + let date = NaiveDate::from_ymd_opt(2024, 6, 5).unwrap(); + let timestamp = date.and_hms_opt(18, 26, 53).unwrap(); + { + let mut app = db.appender("foo")?; + app.append_row(params![date, timestamp])?; + } + let (date2, timestamp2) = db.query_row("SELECT x, y FROM foo", [], |row| { + Ok((row.get::<_, NaiveDate>(0)?, row.get::<_, NaiveDateTime>(1)?)) + })?; + assert_eq!(date, date2); + assert_eq!(timestamp, timestamp2); + Ok(()) + } + #[test] fn test_appender_error() -> Result<(), crate::Error> { let conn = Connection::open_in_memory()?; diff --git a/crates/duckdb/src/types/value_ref.rs b/crates/duckdb/src/types/value_ref.rs index db06c462..ed89ac01 100644 --- a/crates/duckdb/src/types/value_ref.rs +++ b/crates/duckdb/src/types/value_ref.rs @@ -23,6 +23,18 @@ pub enum TimeUnit { Nanosecond, } +impl TimeUnit { + /// Convert a number of `TimeUnit` to microseconds. + pub fn to_micros(&self, value: i64) -> i64 { + match self { + TimeUnit::Second => value * 1_000_000, + TimeUnit::Millisecond => value * 1000, + TimeUnit::Microsecond => value, + TimeUnit::Nanosecond => value / 1000, + } + } +} + /// A non-owning [static type value](https://duckdb.org/docs/sql/data_types/overview). Typically the /// memory backing this value is owned by SQLite. /// From 2fea2694c044cfcd975c4359b1bc0a5b71f91157 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 6 Jun 2024 16:02:42 +0200 Subject: [PATCH 11/11] Support decimal128 without casting to double (#328) * support decimal128 without casting to double * fix parquet test * clippy * clippy... again --- crates/duckdb/src/vtab/arrow.rs | 125 +++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 41 deletions(-) diff --git a/crates/duckdb/src/vtab/arrow.rs b/crates/duckdb/src/vtab/arrow.rs index fa92e64c..941c6ea9 100644 --- a/crates/duckdb/src/vtab/arrow.rs +++ b/crates/duckdb/src/vtab/arrow.rs @@ -17,7 +17,7 @@ use arrow::{ record_batch::RecordBatch, }; -use num::cast::AsPrimitive; +use num::{cast::AsPrimitive, ToPrimitive}; /// A pointer to the Arrow record batch for the table function. #[repr(C)] @@ -165,7 +165,7 @@ pub fn to_duckdb_type_id(data_type: &DataType) -> Result Decimal, // DataType::Decimal256(_, _) => Decimal, - DataType::Decimal128(_, _) => Double, + DataType::Decimal128(_, _) => Decimal, DataType::Decimal256(_, _) => Double, DataType::Map(_, _) => Map, _ => { @@ -177,35 +177,34 @@ pub fn to_duckdb_type_id(data_type: &DataType) -> Result Result> { - if data_type.is_primitive() - || matches!( - data_type, - DataType::Boolean | DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary - ) - { - Ok(LogicalType::new(to_duckdb_type_id(data_type)?)) - } else if let DataType::Dictionary(_, value_type) = data_type { - to_duckdb_logical_type(value_type) - } else if let DataType::Struct(fields) = data_type { - let mut shape = vec![]; - for field in fields.iter() { - shape.push((field.name().as_str(), to_duckdb_logical_type(field.data_type())?)); - } - Ok(LogicalType::struct_type(shape.as_slice())) - } else if let DataType::List(child) = data_type { - Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?)) - } else if let DataType::LargeList(child) = data_type { - Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?)) - } else if let DataType::FixedSizeList(child, array_size) = data_type { - Ok(LogicalType::array( + match data_type { + DataType::Dictionary(_, value_type) => to_duckdb_logical_type(value_type), + DataType::Struct(fields) => { + let mut shape = vec![]; + for field in fields.iter() { + shape.push((field.name().as_str(), to_duckdb_logical_type(field.data_type())?)); + } + Ok(LogicalType::struct_type(shape.as_slice())) + } + DataType::List(child) | DataType::LargeList(child) => { + Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?)) + } + DataType::FixedSizeList(child, array_size) => Ok(LogicalType::array( &to_duckdb_logical_type(child.data_type())?, *array_size as u64, - )) - } else { - Err( - format!("Unsupported data type: {data_type}, please file an issue https://github.com/wangfenjin/duckdb-rs") - .into(), + )), + DataType::Decimal128(width, scale) if *scale > 0 => { + // DuckDB does not support negative decimal scales + Ok(LogicalType::decimal(*width, (*scale).try_into().unwrap())) + } + DataType::Boolean | DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary => { + Ok(LogicalType::new(to_duckdb_type_id(data_type)?)) + } + dtype if dtype.is_primitive() => Ok(LogicalType::new(to_duckdb_type_id(data_type)?)), + _ => Err(format!( + "Unsupported data type: {data_type}, please file an issue https://github.com/wangfenjin/duckdb-rs" ) + .into()), } } @@ -354,13 +353,11 @@ fn primitive_array_to_vector(array: &dyn Array, out: &mut dyn Vector) -> Result< out.as_mut_any().downcast_mut().unwrap(), ); } - DataType::Decimal128(_, _) => { + DataType::Decimal128(width, _) => { decimal_array_to_vector( - array - .as_any() - .downcast_ref::() - .expect("Unable to downcast to BooleanArray"), + as_primitive_array(array), out.as_mut_any().downcast_mut().unwrap(), + *width, ); } @@ -407,10 +404,43 @@ fn primitive_array_to_vector(array: &dyn Array, out: &mut dyn Vector) -> Result< } /// Convert Arrow [Decimal128Array] to a duckdb vector. -fn decimal_array_to_vector(array: &Decimal128Array, out: &mut FlatVector) { - assert!(array.len() <= out.capacity()); - for i in 0..array.len() { - out.as_mut_slice()[i] = array.value_as_string(i).parse::().unwrap(); +fn decimal_array_to_vector(array: &Decimal128Array, out: &mut FlatVector, width: u8) { + match width { + 1..=4 => { + let out_data = out.as_mut_slice(); + for (i, value) in array.values().iter().enumerate() { + out_data[i] = value.to_i16().unwrap(); + } + } + 5..=9 => { + let out_data = out.as_mut_slice(); + for (i, value) in array.values().iter().enumerate() { + out_data[i] = value.to_i32().unwrap(); + } + } + 10..=18 => { + let out_data = out.as_mut_slice(); + for (i, value) in array.values().iter().enumerate() { + out_data[i] = value.to_i64().unwrap(); + } + } + 19..=38 => { + let out_data = out.as_mut_slice(); + for (i, value) in array.values().iter().enumerate() { + out_data[i] = value.to_i128().unwrap(); + } + } + // This should never happen, arrow only supports 1-38 decimal digits + _ => panic!("Invalid decimal width: {}", width), + } + + // Set nulls + if let Some(nulls) = array.nulls() { + for (i, null) in nulls.into_iter().enumerate() { + if !null { + out.set_null(i); + } + } } } @@ -581,8 +611,8 @@ mod test { use crate::{Connection, Result}; use arrow::{ array::{ - Array, ArrayRef, AsArray, BinaryArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray, - Float64Array, GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, + Array, ArrayRef, AsArray, BinaryArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array, + FixedSizeListArray, GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }, @@ -606,9 +636,9 @@ mod test { let mut arr = stmt.query_arrow(param)?; let rb = arr.next().expect("no record batch"); assert_eq!(rb.num_columns(), 1); - let column = rb.column(0).as_any().downcast_ref::().unwrap(); + let column = rb.column(0).as_any().downcast_ref::().unwrap(); assert_eq!(column.len(), 1); - assert_eq!(column.value(0), 300.0); + assert_eq!(column.value(0), i128::from(30000)); Ok(()) } @@ -896,6 +926,19 @@ mod test { Ok(()) } + #[test] + fn test_decimal128_roundtrip() -> Result<(), Box> { + let array: PrimitiveArray = + Decimal128Array::from(vec![i128::from(1), i128::from(2), i128::from(3)]); + check_rust_primitive_array_roundtrip(array.clone(), array)?; + + // With width and scale + let array: PrimitiveArray = + Decimal128Array::from(vec![i128::from(12345)]).with_data_type(DataType::Decimal128(5, 2)); + check_rust_primitive_array_roundtrip(array.clone(), array)?; + Ok(()) + } + #[test] fn test_timestamp_tz_insert() -> Result<(), Box> { // TODO: This test should be reworked once we support TIMESTAMP_TZ properly