Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into fix-more-null-issues-in-vectors
Browse files Browse the repository at this point in the history
y-f-u authored Jul 15, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents 64e7a99 + 5996c3e commit 9fde2c8
Showing 17 changed files with 111 additions and 3,189 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yaml
Original file line number Diff line number Diff line change
@@ -38,7 +38,7 @@ jobs:
name: Download duckdb
with:
repository: "duckdb/duckdb"
tag: "v0.10.1"
tag: "v1.0.0"
fileName: ${{ matrix.duckdb }}
out-file-path: .

12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -7,21 +7,21 @@ members = [
]

[workspace.package]
version = "0.10.2"
version = "1.0.0"
authors = ["wangfenjin <[email protected]>"]
edition = "2021"
repository = "https://github.com/wangfenjin/duckdb-rs"
homepage = "https://github.com/wangfenjin/duckdb-rs"
repository = "https://github.com/duckdb/duckdb-rs"
homepage = "https://github.com/duckdb/duckdb-rs"
documentation = "http://docs.rs/duckdb/"
readme = "README.md"
keywords = ["duckdb", "database", "ffi"]
license = "MIT"
categories = ["database"]

[workspace.dependencies]
duckdb = { version = "0.10.2", path = "crates/duckdb" }
libduckdb-sys = { version = "0.10.2", path = "crates/libduckdb-sys" }
duckdb-loadable-macros = { version = "0.1.1", path = "crates/duckdb-loadable-macros" }
duckdb = { version = "1.0.0", path = "crates/duckdb" }
libduckdb-sys = { version = "1.0.0", path = "crates/libduckdb-sys" }
duckdb-loadable-macros = { version = "0.1.2", path = "crates/duckdb-loadable-macros" }
autocfg = "1.0"
bindgen = { version = "0.69", default-features = false }
byteorder = "1.3"
2 changes: 1 addition & 1 deletion crates/duckdb-loadable-macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "duckdb-loadable-macros"
version = "0.1.1"
version = "0.1.2"
authors.workspace = true
edition.workspace = true
license.workspace = true
8 changes: 2 additions & 6 deletions crates/duckdb/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "duckdb"
version = "0.10.2"
version = "1.0.0"
authors.workspace = true
edition.workspace = true
repository.workspace = true
@@ -20,19 +20,15 @@ name = "duckdb"
[features]
default = []
bundled = ["libduckdb-sys/bundled"]
httpfs = ["libduckdb-sys/httpfs", "bundled"]
json = ["libduckdb-sys/json", "bundled"]
parquet = ["libduckdb-sys/parquet", "bundled"]
openssl_vendored = ["libduckdb-sys/openssl_vendored", "bundled"]
unstable_boringssl = ["libduckdb-sys/unstable_boringssl", "bundled"]
openssl_bindgen = ["libduckdb-sys/openssl_bindgen", "bundled"]
vtab = []
vtab-loadable = ["vtab", "duckdb-loadable-macros"]
vtab-excel = ["vtab", "calamine"]
vtab-arrow = ["vtab", "num"]
appender-arrow = ["vtab-arrow"]
vtab-full = ["vtab-excel", "vtab-arrow", "appender-arrow"]
extensions-full = ["httpfs", "json", "parquet", "vtab-full"]
extensions-full = ["json", "parquet", "vtab-full"]
buildtime_bindgen = ["libduckdb-sys/buildtime_bindgen"]
modern-full = ["chrono", "serde_json", "url", "r2d2", "uuid", "polars"]
polars = ["dep:polars"]
2 changes: 1 addition & 1 deletion crates/duckdb/src/error.rs
Original file line number Diff line number Diff line change
@@ -122,7 +122,7 @@ impl From<::std::ffi::NulError> for Error {
}
}

const UNKNOWN_COLUMN: usize = std::usize::MAX;
const UNKNOWN_COLUMN: usize = usize::MAX;

/// The conversion isn't precise, but it's convenient to have it
/// to allow use of `get_raw(…).as_…()?` in callbacks that take `Error`.
11 changes: 0 additions & 11 deletions crates/duckdb/src/extension.rs
Original file line number Diff line number Diff line change
@@ -31,15 +31,4 @@ mod test {
);
Ok(())
}

// https://duckdb.org/docs/extensions/httpfs
#[test]
fn test_extension_httpfs() -> Result<()> {
let db = Connection::open_in_memory()?;
assert_eq!(
300f32,
db.query_row::<f32, _, _>(r#"SELECT SUM(value) FROM read_parquet('https://github.com/duckdb/duckdb-rs/raw/main/crates/duckdb/examples/int32_decimal.parquet');"#, [], |r| r.get(0))?
);
Ok(())
}
}
2 changes: 1 addition & 1 deletion crates/duckdb/src/statement.rs
Original file line number Diff line number Diff line change
@@ -448,7 +448,7 @@ impl Statement<'_> {
/// Note that if the SQL does not return results, [`Statement::raw_execute`]
/// should be used instead.
#[inline]
pub fn raw_query(&mut self) -> Rows<'_> {
pub fn raw_query(&self) -> Rows<'_> {
Rows::new(self)
}

9 changes: 3 additions & 6 deletions crates/duckdb/src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -261,10 +261,7 @@ impl fmt::Display for Type {
mod test {
use super::Value;
use crate::{params, Connection, Error, Result, Statement};
use std::{
f64::EPSILON,
os::raw::{c_double, c_int},
};
use std::os::raw::{c_double, c_int};

fn checked_memory_handle() -> Result<Connection> {
let db = Connection::open_in_memory()?;
@@ -385,7 +382,7 @@ mod test {
assert_eq!(vec![1, 2], row.get::<_, Vec<u8>>(0)?);
assert_eq!("text", row.get::<_, String>(1)?);
assert_eq!(1, row.get::<_, c_int>(2)?);
assert!((1.5 - row.get::<_, c_double>(3)?).abs() < EPSILON);
assert!((1.5 - row.get::<_, c_double>(3)?).abs() < f64::EPSILON);
assert_eq!(row.get::<_, Option<c_int>>(4)?, None);
assert_eq!(row.get::<_, Option<c_double>>(4)?, None);
assert_eq!(row.get::<_, Option<String>>(4)?, None);
@@ -453,7 +450,7 @@ mod test {
assert_eq!(Value::Text(String::from("text")), row.get::<_, Value>(1)?);
assert_eq!(Value::Int(1), row.get::<_, Value>(2)?);
match row.get::<_, Value>(3)? {
Value::Float(val) => assert!((1.5 - val).abs() < EPSILON as f32),
Value::Float(val) => assert!((1.5 - val).abs() < f32::EPSILON),
x => panic!("Invalid Value {x:?}"),
}
assert_eq!(Value::Null, row.get::<_, Value>(4)?);
77 changes: 71 additions & 6 deletions crates/duckdb/src/vtab/arrow.rs
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@ use crate::vtab::vector::Inserter;
use arrow::array::{
as_boolean_array, as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array,
as_struct_array, Array, ArrayData, AsArray, BinaryArray, BooleanArray, Decimal128Array, FixedSizeListArray,
GenericListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
GenericListArray, GenericStringArray, LargeStringArray, OffsetSizeTrait, PrimitiveArray, StructArray,
};

use arrow::{
@@ -229,6 +229,15 @@ pub fn record_batch_to_duckdb_data_chunk(
DataType::Utf8 => {
string_array_to_vector(as_string_array(col.as_ref()), &mut chunk.flat_vector(i));
}
DataType::LargeUtf8 => {
string_array_to_vector(
col.as_ref()
.as_any()
.downcast_ref::<LargeStringArray>()
.ok_or_else(|| Box::<dyn std::error::Error>::from("Unable to downcast to LargeStringArray"))?,
&mut chunk.flat_vector(i),
);
}
DataType::Binary => {
binary_array_to_vector(as_generic_binary_array(col.as_ref()), &mut chunk.flat_vector(i));
}
@@ -436,7 +445,7 @@ fn boolean_array_to_vector(array: &BooleanArray, out: &mut FlatVector) {
set_nulls_in_flat_vector(array, out);
}

fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) {
fn string_array_to_vector<O: OffsetSizeTrait>(array: &GenericStringArray<O>, out: &mut FlatVector) {
assert!(array.len() <= out.capacity());

// TODO: zero copy assignment
@@ -642,12 +651,12 @@ mod test {
use arrow::{
array::{
Array, ArrayRef, AsArray, BinaryArray, Date32Array, Date64Array, Decimal128Array, Decimal256Array,
FixedSizeListArray, GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray,
StructArray, Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray,
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
FixedSizeListArray, GenericByteArray, GenericListArray, Int32Array, LargeStringArray, ListArray,
OffsetSizeTrait, PrimitiveArray, StringArray, StructArray, Time32SecondArray, Time64MicrosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
},
buffer::{OffsetBuffer, ScalarBuffer},
datatypes::{i256, ArrowPrimitiveType, DataType, Field, Fields, Schema},
datatypes::{i256, ArrowPrimitiveType, ByteArrayType, DataType, Field, Fields, Schema},
record_batch::RecordBatch,
};
use std::{error::Error, sync::Arc};
@@ -852,6 +861,48 @@ mod test {
Ok(())
}

fn check_generic_byte_roundtrip<T1, T2>(
arry_in: GenericByteArray<T1>,
arry_out: GenericByteArray<T2>,
) -> Result<(), Box<dyn Error>>
where
T1: ByteArrayType,
T2: ByteArrayType,
{
let db = Connection::open_in_memory()?;
db.register_table_function::<ArrowVTab>("arrow")?;

// Roundtrip a record batch from Rust to DuckDB and back to Rust
let schema = Schema::new(vec![Field::new("a", arry_in.data_type().clone(), false)]);

let rb = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(arry_in.clone())])?;
let param = arrow_recordbatch_to_query_params(rb);
let mut stmt = db.prepare("select a from arrow(?, ?)")?;
let rb = stmt.query_arrow(param)?.next().expect("no record batch");

let output_any_array = rb.column(0);

assert!(
output_any_array.data_type().equals_datatype(arry_out.data_type()),
"{} != {}",
output_any_array.data_type(),
arry_out.data_type()
);

match output_any_array.as_bytes_opt::<T2>() {
Some(output_array) => {
assert_eq!(output_array.len(), arry_out.len());
for i in 0..output_array.len() {
assert_eq!(output_array.is_valid(i), arry_out.is_valid(i));
assert_eq!(output_array.value_data(), arry_out.value_data())
}
}
None => panic!("Expected GenericByteArray"),
}

Ok(())
}

#[test]
fn test_array_roundtrip() -> Result<(), Box<dyn Error>> {
check_generic_array_roundtrip(ListArray::new(
@@ -944,6 +995,20 @@ mod test {
])),
Some(vec![true, false, true].into()),
))?;
}

#[test]
fn test_utf8_roundtrip() -> Result<(), Box<dyn Error>> {
check_generic_byte_roundtrip(
StringArray::from(vec![Some("foo"), Some("Baz"), Some("bar")]),
StringArray::from(vec![Some("foo"), Some("Baz"), Some("bar")]),
)?;

// [`LargeStringArray`] will be downcasted to [`StringArray`].
check_generic_byte_roundtrip(
LargeStringArray::from(vec![Some("foo"), Some("Baz"), Some("bar")]),
StringArray::from(vec![Some("foo"), Some("Baz"), Some("bar")]),
)?;
Ok(())
}

6 changes: 3 additions & 3 deletions crates/duckdb/src/vtab/function.rs
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@ use std::{
/// An interface to store and retrieve data during the function bind stage
#[derive(Debug)]
pub struct BindInfo {
ptr: *mut c_void,
ptr: duckdb_bind_info,
}

impl BindInfo {
@@ -264,7 +264,7 @@ impl TableFunction {
///
/// # Arguments
/// * `function`: The init function
pub fn set_init(&self, init_func: Option<unsafe extern "C" fn(*mut c_void)>) -> &Self {
pub fn set_init(&self, init_func: Option<unsafe extern "C" fn(duckdb_init_info)>) -> &Self {
unsafe {
duckdb_table_function_set_init(self.ptr, init_func);
}
@@ -275,7 +275,7 @@ impl TableFunction {
///
/// # Arguments
/// * `function`: The bind function
pub fn set_bind(&self, bind_func: Option<unsafe extern "C" fn(*mut c_void)>) -> &Self {
pub fn set_bind(&self, bind_func: Option<unsafe extern "C" fn(duckdb_bind_info)>) -> &Self {
unsafe {
duckdb_table_function_set_bind(self.ptr, bind_func);
}
11 changes: 2 additions & 9 deletions crates/libduckdb-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "libduckdb-sys"
version = "0.10.2"
version = "1.0.0"
authors.workspace = true
edition.workspace = true
license.workspace = true
@@ -19,23 +19,16 @@ exclude = ["duckdb-sources"]
default = ["vcpkg", "pkg-config"]
bundled = ["cc"]
buildtime_bindgen = ["bindgen", "pkg-config", "vcpkg"]

httpfs = ["bundled"]
json = ["bundled"]
parquet = ["bundled"]
extensions-full = ["httpfs", "json", "parquet"]

openssl_vendored = ["bundled", "openssl-src"]
openssl_bindgen = ["bundled", "bindgen", "pkg-config", "vcpkg"]
unstable_boringssl = ["bundled"]
extensions-full = ["json", "parquet"]

[dependencies]

[build-dependencies]
autocfg = { workspace = true }
bindgen = { workspace = true, features = ["runtime"], optional = true }
flate2 = { workspace = true }
openssl-src = { version = "300.3.0", optional = true, features = ["legacy"] }
pkg-config = { workspace = true, optional = true }
cc = { workspace = true, features = ["parallel"], optional = true }
vcpkg = { workspace = true, optional = true }
Loading

0 comments on commit 9fde2c8

Please sign in to comment.