From 201a3d0d8982021a44e51bb8e7e4f38922e8e7ef Mon Sep 17 00:00:00 2001 From: Swoorup Joshi Date: Thu, 1 Aug 2024 21:08:23 +1000 Subject: [PATCH] Rename `LogicalType` and `DataChunk` to have `Handle` suffix (#361) * Use manifest root for specifying versions * Added duckdb string type * Add Handle suffix to `DataChunk` and `LogicalType` --- Cargo.toml | 3 +- crates/duckdb/Cargo.toml | 2 +- crates/duckdb/examples/hello-ext/main.rs | 10 ++-- crates/duckdb/src/appender/arrow.rs | 6 +-- crates/duckdb/src/core/data_chunk.rs | 62 ++++++++++++------------ crates/duckdb/src/core/logical_type.rs | 54 ++++++++++----------- crates/duckdb/src/core/mod.rs | 4 +- crates/duckdb/src/core/vector.rs | 23 ++++----- crates/duckdb/src/vtab/arrow.rs | 26 +++++----- crates/duckdb/src/vtab/excel.rs | 20 ++++---- crates/duckdb/src/vtab/function.rs | 8 +-- crates/duckdb/src/vtab/mod.rs | 31 ++++++------ crates/libduckdb-sys/src/lib.rs | 3 ++ crates/libduckdb-sys/src/string.rs | 60 +++++++++++++++++++++++ 14 files changed, 187 insertions(+), 125 deletions(-) create mode 100644 crates/libduckdb-sys/src/string.rs diff --git a/Cargo.toml b/Cargo.toml index 154136d7..65dbf60d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,10 +34,11 @@ doc-comment = "0.3" fallible-iterator = "0.3" fallible-streaming-iterator = "0.1" flate2 = "1.0" -hashlink = "0.8" +hashlink = "0.9" lazy_static = "1.4" memchr = "2.3" num = { version = "0.4", default-features = false } +num-integer = "0.1.46" pkg-config = "0.3.24" polars = "0.35.4" polars-core = "0.35.4" diff --git a/crates/duckdb/Cargo.toml b/crates/duckdb/Cargo.toml index 36b2ce90..d2783176 100644 --- a/crates/duckdb/Cargo.toml +++ b/crates/duckdb/Cargo.toml @@ -56,7 +56,7 @@ calamine = { workspace = true, optional = true } num = { workspace = true, features = ["std"], optional = true } duckdb-loadable-macros = { workspace = true, optional = true } polars = { workspace = true, features = ["dtype-full"], optional = true } -num-integer = {version = "0.1.46"} +num-integer = { workspace = true } [dev-dependencies] doc-comment = { workspace = true } diff --git a/crates/duckdb/examples/hello-ext/main.rs b/crates/duckdb/examples/hello-ext/main.rs index 74ef02d0..6f159e9a 100644 --- a/crates/duckdb/examples/hello-ext/main.rs +++ b/crates/duckdb/examples/hello-ext/main.rs @@ -3,7 +3,7 @@ extern crate duckdb_loadable_macros; extern crate libduckdb_sys; use duckdb::{ - core::{DataChunk, Inserter, LogicalType, LogicalTypeId}, + core::{DataChunkHandle, Inserter, LogicalTypeHandle, LogicalTypeId}, vtab::{BindInfo, Free, FunctionInfo, InitInfo, VTab}, Connection, Result, }; @@ -44,7 +44,7 @@ impl VTab for HelloVTab { type BindData = HelloBindData; unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box> { - bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar)); + bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); let param = bind.get_parameter(0).to_string(); unsafe { (*data).name = CString::new(param).unwrap().into_raw(); @@ -59,7 +59,7 @@ impl VTab for HelloVTab { Ok(()) } - unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box> { + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { let init_info = func.get_init_data::(); let bind_info = func.get_bind_data::(); @@ -80,8 +80,8 @@ impl VTab for HelloVTab { Ok(()) } - fn parameters() -> Option> { - Some(vec![LogicalType::new(LogicalTypeId::Varchar)]) + fn parameters() -> Option> { + Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) } } diff --git a/crates/duckdb/src/appender/arrow.rs b/crates/duckdb/src/appender/arrow.rs index 187af6f4..942242cc 100644 --- a/crates/duckdb/src/appender/arrow.rs +++ b/crates/duckdb/src/appender/arrow.rs @@ -1,6 +1,6 @@ use super::{ffi, Appender, Result}; use crate::{ - core::{DataChunk, LogicalType}, + core::{DataChunkHandle, LogicalTypeHandle}, error::result_from_duckdb_appender, vtab::{record_batch_to_duckdb_data_chunk, to_duckdb_logical_type}, Error, @@ -29,14 +29,14 @@ impl Appender<'_> { #[inline] pub fn append_record_batch(&mut self, record_batch: RecordBatch) -> Result<()> { let schema = record_batch.schema(); - let mut logical_type: Vec = vec![]; + let mut logical_type: Vec = vec![]; for field in schema.fields() { let logical_t = to_duckdb_logical_type(field.data_type()) .map_err(|_op| Error::ArrowTypeToDuckdbType(field.to_string(), field.data_type().clone()))?; logical_type.push(logical_t); } - let mut data_chunk = DataChunk::new(&logical_type); + let mut data_chunk = DataChunkHandle::new(&logical_type); record_batch_to_duckdb_data_chunk(&record_batch, &mut data_chunk).map_err(|_op| Error::AppendError)?; let rc = unsafe { duckdb_append_data_chunk(self.app, data_chunk.get_ptr()) }; diff --git a/crates/duckdb/src/core/data_chunk.rs b/crates/duckdb/src/core/data_chunk.rs index 3bc6d874..7b6d2e2c 100644 --- a/crates/duckdb/src/core/data_chunk.rs +++ b/crates/duckdb/src/core/data_chunk.rs @@ -1,5 +1,5 @@ use super::{ - logical_type::LogicalType, + logical_type::LogicalTypeHandle, vector::{ArrayVector, FlatVector, ListVector, StructVector}, }; use crate::ffi::{ @@ -7,22 +7,35 @@ use crate::ffi::{ duckdb_data_chunk_get_vector, duckdb_data_chunk_set_size, duckdb_destroy_data_chunk, }; -/// DataChunk in DuckDB. -pub struct DataChunk { +/// Handle to the DataChunk in DuckDB. +pub struct DataChunkHandle { /// Pointer to the DataChunk in duckdb C API. ptr: duckdb_data_chunk, - /// Whether this [DataChunk] own the [DataChunk::ptr]. + /// Whether this [DataChunkHandle] own the [DataChunk::ptr]. owned: bool, } -impl DataChunk { - /// Create a new [DataChunk] with the given [LogicalType]s. - pub fn new(logical_types: &[LogicalType]) -> Self { +impl Drop for DataChunkHandle { + fn drop(&mut self) { + if self.owned && !self.ptr.is_null() { + unsafe { duckdb_destroy_data_chunk(&mut self.ptr) } + self.ptr = std::ptr::null_mut(); + } + } +} + +impl DataChunkHandle { + pub(crate) unsafe fn new_unowned(ptr: duckdb_data_chunk) -> Self { + Self { ptr, owned: false } + } + + /// Create a new [DataChunkHandle] with the given [LogicalTypeHandle]s. + pub fn new(logical_types: &[LogicalTypeHandle]) -> Self { let num_columns = logical_types.len(); let mut c_types = logical_types.iter().map(|t| t.ptr).collect::>(); let ptr = unsafe { duckdb_create_data_chunk(c_types.as_mut_ptr(), num_columns as u64) }; - DataChunk { ptr, owned: true } + DataChunkHandle { ptr, owned: true } } /// Get the vector at the specific column index: `idx`. @@ -50,49 +63,34 @@ impl DataChunk { unsafe { duckdb_data_chunk_set_size(self.ptr, new_len as u64) }; } - /// Get the length / the number of rows in this [DataChunk]. + /// Get the length / the number of rows in this [DataChunkHandle]. pub fn len(&self) -> usize { unsafe { duckdb_data_chunk_get_size(self.ptr) as usize } } - /// Check whether this [DataChunk] is empty. + /// Check whether this [DataChunkHandle] is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } - /// Get the number of columns in this [DataChunk]. + /// Get the number of columns in this [DataChunkHandle]. pub fn num_columns(&self) -> usize { unsafe { duckdb_data_chunk_get_column_count(self.ptr) as usize } } - /// Get the ptr of duckdb_data_chunk in this [DataChunk]. + /// Get the ptr of duckdb_data_chunk in this [DataChunkHandle]. pub fn get_ptr(&self) -> duckdb_data_chunk { self.ptr } } -impl From for DataChunk { - fn from(ptr: duckdb_data_chunk) -> Self { - Self { ptr, owned: false } - } -} - -impl Drop for DataChunk { - fn drop(&mut self) { - if self.owned && !self.ptr.is_null() { - unsafe { duckdb_destroy_data_chunk(&mut self.ptr) } - self.ptr = std::ptr::null_mut(); - } - } -} - #[cfg(test)] mod test { use super::{super::logical_type::LogicalTypeId, *}; #[test] fn test_data_chunk_construction() { - let dc = DataChunk::new(&[LogicalType::new(LogicalTypeId::Integer)]); + let dc = DataChunkHandle::new(&[LogicalTypeHandle::from(LogicalTypeId::Integer)]); assert_eq!(dc.num_columns(), 1); @@ -101,7 +99,7 @@ mod test { #[test] fn test_vector() { - let datachunk = DataChunk::new(&[LogicalType::new(LogicalTypeId::Bigint)]); + let datachunk = DataChunkHandle::new(&[LogicalTypeHandle::from(LogicalTypeId::Bigint)]); let mut vector = datachunk.flat_vector(0); let data = vector.as_mut_slice::(); @@ -110,11 +108,11 @@ mod test { #[test] fn test_logi() { - let key = LogicalType::new(LogicalTypeId::Varchar); + let key = LogicalTypeHandle::from(LogicalTypeId::Varchar); - let value = LogicalType::new(LogicalTypeId::UTinyint); + let value = LogicalTypeHandle::from(LogicalTypeId::UTinyint); - let map = LogicalType::map(&key, &value); + let map = LogicalTypeHandle::map(&key, &value); assert_eq!(map.id(), LogicalTypeId::Map); diff --git a/crates/duckdb/src/core/logical_type.rs b/crates/duckdb/src/core/logical_type.rs index 9ea83681..ede3ba52 100644 --- a/crates/duckdb/src/core/logical_type.rs +++ b/crates/duckdb/src/core/logical_type.rs @@ -110,11 +110,11 @@ impl From for LogicalTypeId { /// DuckDB Logical Type. /// -pub struct LogicalType { +pub struct LogicalTypeHandle { pub(crate) ptr: duckdb_logical_type, } -impl Debug for LogicalType { +impl Debug for LogicalTypeHandle { /// Debug implementation for LogicalType fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { let id = self.id(); @@ -134,7 +134,7 @@ impl Debug for LogicalType { } } -impl Drop for LogicalType { +impl Drop for LogicalTypeHandle { /// Drop implementation for LogicalType fn drop(&mut self) { if !self.ptr.is_null() { @@ -147,25 +147,25 @@ impl Drop for LogicalType { } } -impl From for LogicalType { - /// Wrap a DuckDB logical type from C API - fn from(ptr: duckdb_logical_type) -> Self { - Self { ptr } - } -} - -impl LogicalType { - /// Create a new [LogicalType] from [LogicalTypeId] - pub fn new(id: LogicalTypeId) -> Self { +impl From for LogicalTypeHandle { + /// Create a new [LogicalTypeHandle] from [LogicalTypeId] + fn from(id: LogicalTypeId) -> Self { unsafe { Self { ptr: duckdb_create_logical_type(id as u32), } } } +} + +impl LogicalTypeHandle { + /// Create a DuckDB logical type from C API + pub(crate) unsafe fn new(ptr: duckdb_logical_type) -> Self { + Self { ptr } + } /// Creates a map type from its child type. - pub fn map(key: &LogicalType, value: &LogicalType) -> Self { + pub fn map(key: &LogicalTypeHandle, value: &LogicalTypeHandle) -> Self { unsafe { Self { ptr: duckdb_create_map_type(key.ptr, value.ptr), @@ -174,7 +174,7 @@ impl LogicalType { } /// Creates a list type from its child type. - pub fn list(child_type: &LogicalType) -> Self { + pub fn list(child_type: &LogicalTypeHandle) -> Self { unsafe { Self { ptr: duckdb_create_list_type(child_type.ptr), @@ -183,7 +183,7 @@ impl LogicalType { } /// Creates an array type from its child type. - pub fn array(child_type: &LogicalType, array_size: u64) -> Self { + pub fn array(child_type: &LogicalTypeHandle, array_size: u64) -> Self { unsafe { Self { ptr: duckdb_create_array_type(child_type.ptr, array_size), @@ -213,7 +213,7 @@ impl LogicalType { } /// Make a `LogicalType` for `struct` - pub fn struct_type(fields: &[(&str, LogicalType)]) -> Self { + pub fn struct_type(fields: &[(&str, LogicalTypeHandle)]) -> Self { let keys: Vec = fields.iter().map(|f| CString::new(f.0).unwrap()).collect(); let values: Vec = fields.iter().map(|it| it.1.ptr).collect(); let name_ptrs = keys.iter().map(|it| it.as_ptr()).collect::>(); @@ -230,7 +230,7 @@ impl LogicalType { } /// Make a `LogicalType` for `union` - pub fn union_type(fields: &[(&str, LogicalType)]) -> Self { + pub fn union_type(fields: &[(&str, LogicalTypeHandle)]) -> Self { let keys: Vec = fields.iter().map(|f| CString::new(f.0).unwrap()).collect(); let values: Vec = fields.iter().map(|it| it.1.ptr).collect(); let name_ptrs = keys.iter().map(|it| it.as_ptr()).collect::>(); @@ -287,18 +287,18 @@ impl LogicalType { _ => panic!("not a struct or union"), } }; - Self::from(c_logical_type) + unsafe { Self::new(c_logical_type) } } } #[cfg(test)] mod test { - use crate::core::{LogicalType, LogicalTypeId}; + use crate::core::{LogicalTypeHandle, LogicalTypeId}; #[test] fn test_struct() { - let fields = &[("hello", LogicalType::new(crate::core::LogicalTypeId::Boolean))]; - let typ = LogicalType::struct_type(fields); + let fields = &[("hello", LogicalTypeHandle::from(crate::core::LogicalTypeId::Boolean))]; + let typ = LogicalTypeHandle::struct_type(fields); assert_eq!(typ.num_children(), 1); assert_eq!(typ.child_name(0), "hello"); @@ -307,7 +307,7 @@ mod test { #[test] fn test_decimal() { - let typ = LogicalType::decimal(10, 2); + let typ = LogicalTypeHandle::decimal(10, 2); assert_eq!(typ.id(), crate::core::LogicalTypeId::Decimal); assert_eq!(typ.decimal_width(), 10); @@ -316,7 +316,7 @@ mod test { #[test] fn test_decimal_methods() { - let typ = LogicalType::new(crate::core::LogicalTypeId::Varchar); + let typ = LogicalTypeHandle::from(crate::core::LogicalTypeId::Varchar); assert_eq!(typ.decimal_width(), 0); assert_eq!(typ.decimal_scale(), 0); @@ -325,10 +325,10 @@ mod test { #[test] fn test_union_type() { let fields = &[ - ("hello", LogicalType::new(LogicalTypeId::Boolean)), - ("world", LogicalType::new(LogicalTypeId::Integer)), + ("hello", LogicalTypeHandle::from(LogicalTypeId::Boolean)), + ("world", LogicalTypeHandle::from(LogicalTypeId::Integer)), ]; - let typ = LogicalType::union_type(fields); + let typ = LogicalTypeHandle::union_type(fields); assert_eq!(typ.num_children(), 2); diff --git a/crates/duckdb/src/core/mod.rs b/crates/duckdb/src/core/mod.rs index 1726a35f..21e630fd 100644 --- a/crates/duckdb/src/core/mod.rs +++ b/crates/duckdb/src/core/mod.rs @@ -2,6 +2,6 @@ mod data_chunk; mod logical_type; mod vector; -pub use data_chunk::DataChunk; -pub use logical_type::{LogicalType, LogicalTypeId}; +pub use data_chunk::DataChunkHandle; +pub use logical_type::{LogicalTypeHandle, LogicalTypeId}; pub use vector::*; diff --git a/crates/duckdb/src/core/vector.rs b/crates/duckdb/src/core/vector.rs index 54f68607..befda697 100644 --- a/crates/duckdb/src/core/vector.rs +++ b/crates/duckdb/src/core/vector.rs @@ -1,8 +1,8 @@ use std::{any::Any, ffi::CString, slice}; -use libduckdb_sys::{duckdb_array_type_array_size, duckdb_array_vector_get_child}; +use libduckdb_sys::{duckdb_array_type_array_size, duckdb_array_vector_get_child, DuckDbString}; -use super::LogicalType; +use super::LogicalTypeHandle; use crate::ffi::{ duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve, duckdb_list_vector_set_size, duckdb_struct_type_child_count, duckdb_struct_type_child_name, @@ -71,8 +71,8 @@ impl FlatVector { } /// Returns the logical type of the vector - pub fn logical_type(&self) -> LogicalType { - LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) + pub fn logical_type(&self) -> LogicalTypeHandle { + unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) } } /// Set row as null @@ -202,8 +202,8 @@ impl From for ArrayVector { impl ArrayVector { /// Get the logical type of this ArrayVector. - pub fn logical_type(&self) -> LogicalType { - LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) + pub fn logical_type(&self) -> LogicalTypeHandle { + unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) } } /// Returns the size of the array type. @@ -259,19 +259,16 @@ impl StructVector { } /// Get the logical type of this struct vector. - pub fn logical_type(&self) -> LogicalType { - LogicalType::from(unsafe { duckdb_vector_get_column_type(self.ptr) }) + pub fn logical_type(&self) -> LogicalTypeHandle { + unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) } } /// Get the name of the child by idx. - pub fn child_name(&self, idx: usize) -> String { + pub fn child_name(&self, idx: usize) -> DuckDbString { let logical_type = self.logical_type(); unsafe { let child_name_ptr = duckdb_struct_type_child_name(logical_type.ptr, idx as u64); - let c_str = CString::from_raw(child_name_ptr); - let name = c_str.to_str().unwrap(); - // duckdb_free(child_name_ptr.cast()); - name.to_string() + DuckDbString::from_ptr(child_name_ptr) } } diff --git a/crates/duckdb/src/vtab/arrow.rs b/crates/duckdb/src/vtab/arrow.rs index 8adeec19..0dbbd7f5 100644 --- a/crates/duckdb/src/vtab/arrow.rs +++ b/crates/duckdb/src/vtab/arrow.rs @@ -1,4 +1,4 @@ -use super::{BindInfo, DataChunk, Free, FunctionInfo, InitInfo, LogicalType, LogicalTypeId, VTab}; +use super::{BindInfo, DataChunkHandle, Free, FunctionInfo, InitInfo, LogicalTypeHandle, LogicalTypeId, VTab}; use std::ptr::null_mut; use crate::core::{ArrayVector, FlatVector, Inserter, ListVector, StructVector, Vector}; @@ -99,7 +99,7 @@ impl VTab for ArrowVTab { Ok(()) } - unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box> { + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { let init_info = func.get_init_data::(); let bind_info = func.get_bind_data::(); unsafe { @@ -116,10 +116,10 @@ impl VTab for ArrowVTab { Ok(()) } - fn parameters() -> Option> { + fn parameters() -> Option> { Some(vec![ - LogicalType::new(LogicalTypeId::UBigint), // file path - LogicalType::new(LogicalTypeId::UBigint), // sheet name + LogicalTypeHandle::from(LogicalTypeId::UBigint), // file path + LogicalTypeHandle::from(LogicalTypeId::UBigint), // sheet name ]) } } @@ -173,7 +173,7 @@ pub fn to_duckdb_type_id(data_type: &DataType) -> Result Result> { +pub fn to_duckdb_logical_type(data_type: &DataType) -> Result> { match data_type { DataType::Dictionary(_, value_type) => to_duckdb_logical_type(value_type), DataType::Struct(fields) => { @@ -181,23 +181,23 @@ pub fn to_duckdb_logical_type(data_type: &DataType) -> Result { - Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?)) + Ok(LogicalTypeHandle::list(&to_duckdb_logical_type(child.data_type())?)) } - DataType::FixedSizeList(child, array_size) => Ok(LogicalType::array( + DataType::FixedSizeList(child, array_size) => Ok(LogicalTypeHandle::array( &to_duckdb_logical_type(child.data_type())?, *array_size as u64, )), DataType::Decimal128(width, scale) if *scale > 0 => { // DuckDB does not support negative decimal scales - Ok(LogicalType::decimal(*width, (*scale).try_into().unwrap())) + Ok(LogicalTypeHandle::decimal(*width, (*scale).try_into().unwrap())) } DataType::Boolean | DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary => { - Ok(LogicalType::new(to_duckdb_type_id(data_type)?)) + Ok(LogicalTypeHandle::from(to_duckdb_type_id(data_type)?)) } - dtype if dtype.is_primitive() => Ok(LogicalType::new(to_duckdb_type_id(data_type)?)), + dtype if dtype.is_primitive() => Ok(LogicalTypeHandle::from(to_duckdb_type_id(data_type)?)), _ => Err(format!( "Unsupported data type: {data_type}, please file an issue https://github.com/wangfenjin/duckdb-rs" ) @@ -213,7 +213,7 @@ pub fn to_duckdb_logical_type(data_type: &DataType) -> Result Result<(), Box> { // Fill the row assert_eq!(batch.num_columns(), chunk.num_columns()); diff --git a/crates/duckdb/src/vtab/excel.rs b/crates/duckdb/src/vtab/excel.rs index 883aa7a9..b9bfad59 100644 --- a/crates/duckdb/src/vtab/excel.rs +++ b/crates/duckdb/src/vtab/excel.rs @@ -1,4 +1,4 @@ -use super::{BindInfo, DataChunk, Free, FunctionInfo, InitInfo, LogicalType, LogicalTypeId, VTab}; +use super::{BindInfo, DataChunkHandle, Free, FunctionInfo, InitInfo, LogicalTypeHandle, LogicalTypeId, VTab}; use crate::core::Inserter; use calamine::{open_workbook_auto, DataType, Range, Reader}; @@ -74,7 +74,7 @@ impl VTab for ExcelVTab { header[idx] .get_string() .unwrap_or_else(|| panic!("idx {} header empty?", idx)), - LogicalType::new(LogicalTypeId::Varchar), + LogicalTypeHandle::from(LogicalTypeId::Varchar), ); } DataType::Float(_) => { @@ -82,7 +82,7 @@ impl VTab for ExcelVTab { header[idx] .get_string() .unwrap_or_else(|| panic!("idx {} header empty?", idx)), - LogicalType::new(LogicalTypeId::Double), + LogicalTypeHandle::from(LogicalTypeId::Double), ); } DataType::Int(_) => { @@ -90,7 +90,7 @@ impl VTab for ExcelVTab { header[idx] .get_string() .unwrap_or_else(|| panic!("idx {} header empty?", idx)), - LogicalType::new(LogicalTypeId::Bigint), + LogicalTypeHandle::from(LogicalTypeId::Bigint), ); } DataType::Bool(_) => { @@ -98,7 +98,7 @@ impl VTab for ExcelVTab { header[idx] .get_string() .unwrap_or_else(|| panic!("idx {} header empty?", idx)), - LogicalType::new(LogicalTypeId::Boolean), + LogicalTypeHandle::from(LogicalTypeId::Boolean), ); } DataType::DateTime(_) => { @@ -106,7 +106,7 @@ impl VTab for ExcelVTab { header[idx] .get_string() .unwrap_or_else(|| panic!("idx {} header empty?", idx)), - LogicalType::new(LogicalTypeId::Date), + LogicalTypeHandle::from(LogicalTypeId::Date), ); } _ => { @@ -132,7 +132,7 @@ impl VTab for ExcelVTab { Ok(()) } - unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box> { + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { let init_info = func.get_init_data::(); let bind_info = func.get_bind_data::(); unsafe { @@ -180,10 +180,10 @@ impl VTab for ExcelVTab { Ok(()) } - fn parameters() -> Option> { + fn parameters() -> Option> { Some(vec![ - LogicalType::new(LogicalTypeId::Varchar), // file path - LogicalType::new(LogicalTypeId::Varchar), // sheet name + LogicalTypeHandle::from(LogicalTypeId::Varchar), // file path + LogicalTypeHandle::from(LogicalTypeId::Varchar), // sheet name ]) } } diff --git a/crates/duckdb/src/vtab/function.rs b/crates/duckdb/src/vtab/function.rs index a1ce75aa..9d14b510 100644 --- a/crates/duckdb/src/vtab/function.rs +++ b/crates/duckdb/src/vtab/function.rs @@ -9,7 +9,7 @@ use super::{ duckdb_table_function_set_init, duckdb_table_function_set_local_init, duckdb_table_function_set_name, duckdb_table_function_supports_projection_pushdown, idx_t, }, - LogicalType, Value, + LogicalTypeHandle, Value, }; use std::{ ffi::{c_void, CString}, @@ -28,7 +28,7 @@ impl BindInfo { /// # Arguments /// * `name`: The name of the column /// * `type`: The logical type of the column - pub fn add_result_column(&self, column_name: &str, column_type: LogicalType) { + pub fn add_result_column(&self, column_name: &str, column_type: LogicalTypeHandle) { let c_str = CString::new(column_name).unwrap(); unsafe { duckdb_bind_add_result_column(self.ptr, c_str.as_ptr() as *const c_char, column_type.ptr); @@ -226,7 +226,7 @@ impl TableFunction { /// /// # Arguments /// * `logical_type`: The type of the parameter to add. - pub fn add_parameter(&self, logical_type: &LogicalType) -> &Self { + pub fn add_parameter(&self, logical_type: &LogicalTypeHandle) -> &Self { unsafe { duckdb_table_function_add_parameter(self.ptr, logical_type.ptr); } @@ -238,7 +238,7 @@ impl TableFunction { /// # Arguments /// * `name`: The name of the parameter to add. /// * `logical_type`: The type of the parameter to add. - pub fn add_named_parameter(&self, name: &str, logical_type: &LogicalType) -> &Self { + pub fn add_named_parameter(&self, name: &str, logical_type: &LogicalTypeHandle) -> &Self { unsafe { let string = CString::new(name).unwrap(); duckdb_table_function_add_named_parameter(self.ptr, string.as_ptr(), logical_type.ptr); diff --git a/crates/duckdb/src/vtab/mod.rs b/crates/duckdb/src/vtab/mod.rs index d1e843c2..9249fb1e 100644 --- a/crates/duckdb/src/vtab/mod.rs +++ b/crates/duckdb/src/vtab/mod.rs @@ -20,7 +20,7 @@ mod excel; pub use function::{BindInfo, FunctionInfo, InitInfo, TableFunction}; pub use value::Value; -use crate::core::{DataChunk, LogicalType, LogicalTypeId}; +use crate::core::{DataChunkHandle, LogicalTypeHandle, LogicalTypeId}; use ffi::{duckdb_bind_info, duckdb_data_chunk, duckdb_function_info, duckdb_init_info}; use ffi::duckdb_malloc; @@ -100,7 +100,7 @@ pub trait VTab: Sized { /// - The `init_info` and `bind_info` data pointed to remains valid and is not freed until after this function completes. /// - No other threads are concurrently mutating the data pointed to by `init_info` and `bind_info` without proper synchronization. /// - The `output` parameter is correctly initialized and can safely be written to. - unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box>; + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box>; /// Does the table function support pushdown /// default is false fn supports_pushdown() -> bool { @@ -108,12 +108,12 @@ pub trait VTab: Sized { } /// The parameters of the table function /// default is None - fn parameters() -> Option> { + fn parameters() -> Option> { None } /// The named parameters of the table function /// default is None - fn named_parameters() -> Option> { + fn named_parameters() -> Option> { None } } @@ -123,8 +123,8 @@ where T: VTab, { let info = FunctionInfo::from(info); - let mut output = DataChunk::from(output); - let result = T::func(&info, &mut output); + let mut data_chunk_handle = DataChunkHandle::new_unowned(output); + let result = T::func(&info, &mut data_chunk_handle); if result.is_err() { info.set_error(&result.err().unwrap().to_string()); } @@ -229,7 +229,7 @@ mod test { type BindData = HelloBindData; unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box> { - bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar)); + bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); let param = bind.get_parameter(0).to_string(); unsafe { (*data).name = CString::new(param).unwrap().into_raw(); @@ -244,7 +244,7 @@ mod test { Ok(()) } - unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box> { + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { let init_info = func.get_init_data::(); let bind_info = func.get_bind_data::(); @@ -265,8 +265,8 @@ mod test { Ok(()) } - fn parameters() -> Option> { - Some(vec![LogicalType::new(LogicalTypeId::Varchar)]) + fn parameters() -> Option> { + Some(vec![LogicalTypeHandle::from(LogicalTypeId::Varchar)]) } } @@ -276,7 +276,7 @@ mod test { type BindData = HelloBindData; unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box> { - bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar)); + bind.add_result_column("column0", LogicalTypeHandle::from(LogicalTypeId::Varchar)); let param = bind.get_named_parameter("name").unwrap().to_string(); assert!(bind.get_named_parameter("unknown_name").is_none()); unsafe { @@ -289,12 +289,15 @@ mod test { HelloVTab::init(init_info, data) } - unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box> { + unsafe fn func(func: &FunctionInfo, output: &mut DataChunkHandle) -> Result<(), Box> { HelloVTab::func(func, output) } - fn named_parameters() -> Option> { - Some(vec![("name".to_string(), LogicalType::new(LogicalTypeId::Varchar))]) + fn named_parameters() -> Option> { + Some(vec![( + "name".to_string(), + LogicalTypeHandle::from(LogicalTypeId::Varchar), + )]) } } diff --git a/crates/libduckdb-sys/src/lib.rs b/crates/libduckdb-sys/src/lib.rs index ae57cadf..5f683bd7 100644 --- a/crates/libduckdb-sys/src/lib.rs +++ b/crates/libduckdb-sys/src/lib.rs @@ -11,6 +11,9 @@ mod bindings { #[allow(clippy::all)] pub use bindings::*; +mod string; +pub use string::*; + pub const DuckDBError: duckdb_state = duckdb_state_DuckDBError; pub const DuckDBSuccess: duckdb_state = duckdb_state_DuckDBSuccess; diff --git a/crates/libduckdb-sys/src/string.rs b/crates/libduckdb-sys/src/string.rs new file mode 100644 index 00000000..d2f3515c --- /dev/null +++ b/crates/libduckdb-sys/src/string.rs @@ -0,0 +1,60 @@ +use std::{ + ffi::{c_char, CStr}, + ops::Deref, +}; + +use crate::duckdb_free; + +pub struct DuckDbString { + // Invariant: ptr[0..len+1] is valid C string, i.e. ptr[len] is NUL byte. + ptr: core::ptr::NonNull, + len: usize, +} + +impl DuckDbString { + /// Creates a `DuckDbString` from a raw pointer to a C string. + /// + /// # Safety + /// + /// The caller must ensure that the pointer is valid and points to a null-terminated C string. + /// The memory must remain valid for the lifetime of the returned `DuckDbString`. + pub unsafe fn from_ptr(ptr: *const c_char) -> Self { + let len = unsafe { CStr::from_ptr(ptr) }.to_bytes().len(); + unsafe { Self::from_raw_parts(ptr, len) } + } + + /// Creates a `DuckDbString` from raw parts. + /// + /// # Safety + /// + /// The caller must ensure that: + /// - `ptr` is a valid pointer to a null-terminated C string. + /// - `len` accurately represents the length of the string (excluding the null terminator). + /// - The memory referenced by `ptr` remains valid for the lifetime of the returned `DuckDbString`. + /// - The string data is not mutated for the lifetime of the returned `DuckDbString`. + pub unsafe fn from_raw_parts(ptr: *const c_char, len: usize) -> Self { + let ptr = unsafe { core::ptr::NonNull::new_unchecked(ptr as *mut c_char) }; + Self { ptr, len } + } + + fn to_bytes_with_nul(&self) -> &[u8] { + let ptr = self.ptr.as_ptr() as *const u8; + unsafe { core::slice::from_raw_parts(ptr, self.len + 1) } + } +} + +impl Deref for DuckDbString { + type Target = std::ffi::CStr; + + fn deref(&self) -> &Self::Target { + let bytes = self.to_bytes_with_nul(); + unsafe { CStr::from_bytes_with_nul_unchecked(bytes) } + } +} + +impl Drop for DuckDbString { + fn drop(&mut self) { + let ptr = self.ptr.as_ptr() as *mut core::ffi::c_void; + unsafe { duckdb_free(ptr) }; + } +}