Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the ArrowVTab module public #259

Merged
merged 7 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ memchr = "2.3"
uuid = { version = "1.0", optional = true }
smallvec = "1.6.1"
cast = { version = "0.3", features = ["std"] }
arrow = { version = "49", default-features = false, features = ["prettyprint", "ffi"] }
arrow = { version = "50", default-features = false, features = ["prettyprint", "ffi"] }
rust_decimal = "1.14"
strum = { version = "0.25", features = ["derive"] }
r2d2 = { version = "0.8.9", optional = true }
Expand Down
6 changes: 3 additions & 3 deletions examples/hello-ext/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl VTab for HelloVTab {
type InitData = HelloInitData;
type BindData = HelloBindData;

fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
let param = bind.get_parameter(0).to_string();
unsafe {
Expand All @@ -51,14 +51,14 @@ impl VTab for HelloVTab {
Ok(())
}

fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).done = false;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<HelloInitData>();
let bind_info = func.get_bind_data::<HelloBindData>();

Expand Down
15 changes: 9 additions & 6 deletions src/vtab/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ use arrow::{

use num::cast::AsPrimitive;

/// A pointer to the Arrow record batch for the table function.
#[repr(C)]
struct ArrowBindData {
pub struct ArrowBindData {
rb: *mut RecordBatch,
}

Expand All @@ -34,14 +35,16 @@ impl Free for ArrowBindData {
}
}

/// Keeps track of whether the Arrow record batch has been consumed.
#[repr(C)]
struct ArrowInitData {
pub struct ArrowInitData {
done: bool,
}

impl Free for ArrowInitData {}

struct ArrowVTab;
/// The Arrow table function.
pub struct ArrowVTab;

unsafe fn address_to_arrow_schema(address: usize) -> FFI_ArrowSchema {
let ptr = address as *mut FFI_ArrowSchema;
Expand Down Expand Up @@ -70,7 +73,7 @@ impl VTab for ArrowVTab {
type BindData = ArrowBindData;
type InitData = ArrowInitData;

fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
let param_count = bind.get_parameter_count();
assert!(param_count == 2);
let array = bind.get_parameter(0).to_int64();
Expand All @@ -88,14 +91,14 @@ impl VTab for ArrowVTab {
Ok(())
}

fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).done = false;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<ArrowInitData>();
let bind_info = func.get_bind_data::<ArrowBindData>();
unsafe {
Expand Down
6 changes: 3 additions & 3 deletions src/vtab/excel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl VTab for ExcelVTab {
type BindData = ExcelBindData;
type InitData = ExcelInitData;

fn bind(bind: &BindInfo, data: *mut ExcelBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut ExcelBindData) -> Result<(), Box<dyn std::error::Error>> {
let param_count = bind.get_parameter_count();
assert!(param_count == 2);
let path = bind.get_parameter(0).to_string();
Expand Down Expand Up @@ -125,14 +125,14 @@ impl VTab for ExcelVTab {
Ok(())
}

fn init(_: &InitInfo, data: *mut ExcelInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut ExcelInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).start = 1;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<ExcelInitData>();
let bind_info = func.get_bind_data::<ExcelBindData>();
unsafe {
Expand Down
55 changes: 45 additions & 10 deletions src/vtab/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ mod logical_type;
mod value;
mod vector;

/// The duckdb Arrow table function interface
#[cfg(feature = "vtab-arrow")]
mod arrow;
pub mod arrow;
#[cfg(feature = "vtab-arrow")]
pub use self::arrow::{
arrow_arraydata_to_query_params, arrow_ffi_to_query_params, arrow_recordbatch_to_query_params,
Expand Down Expand Up @@ -66,11 +67,45 @@ pub trait VTab: Sized {
type BindData: Sized + Free;

/// Bind data to the table function
fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box<dyn std::error::Error>>;
///
/// # Safety
///
/// This function is unsafe because it dereferences raw pointers (`data`) and manipulates the memory directly.
/// The caller must ensure that:
///
/// - The `data` pointer is valid and points to a properly initialized `BindData` instance.
/// - The lifetime of `data` must outlive the execution of `bind` to avoid dangling pointers, especially since
/// `bind` does not take ownership of `data`.
/// - Concurrent access to `data` (if applicable) must be properly synchronized.
/// - The `bind` object must be valid and correctly initialized.
unsafe fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box<dyn std::error::Error>>;
/// Initialize the table function
fn init(init: &InitInfo, data: *mut Self::InitData) -> Result<(), Box<dyn std::error::Error>>;
///
/// # Safety
///
/// This function is unsafe because it performs raw pointer dereferencing on the `data` argument.
/// The caller is responsible for ensuring that:
///
/// - The `data` pointer is non-null and points to a valid `InitData` instance.
/// - There is no data race when accessing `data`, meaning if `data` is accessed from multiple threads,
/// proper synchronization is required.
/// - The lifetime of `data` extends beyond the scope of this call to avoid use-after-free errors.
unsafe fn init(init: &InitInfo, data: *mut Self::InitData) -> Result<(), Box<dyn std::error::Error>>;
/// The actual function
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>>;
///
/// # Safety
///
/// This function is unsafe because it:
///
/// - Dereferences multiple raw pointers (`func` to access `init_info` and `bind_info`).
///
/// The caller must ensure that:
///
/// - All pointers (`func`, `output`, internal `init_info`, and `bind_info`) are valid and point to the expected types of data structures.
/// - The `init_info` and `bind_info` data pointed to remains valid and is not freed until after this function completes.
/// - No other threads are concurrently mutating the data pointed to by `init_info` and `bind_info` without proper synchronization.
/// - The `output` parameter is correctly initialized and can safely be written to.
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>>;
/// Does the table function support pushdown
/// default is false
fn supports_pushdown() -> bool {
Expand Down Expand Up @@ -197,7 +232,7 @@ mod test {
type InitData = HelloInitData;
type BindData = HelloBindData;

fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
let param = bind.get_parameter(0).to_string();
unsafe {
Expand All @@ -206,14 +241,14 @@ mod test {
Ok(())
}

fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).done = false;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<HelloInitData>();
let bind_info = func.get_bind_data::<HelloBindData>();

Expand Down Expand Up @@ -244,7 +279,7 @@ mod test {
type InitData = HelloInitData;
type BindData = HelloBindData;

fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn Error>> {
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
let param = bind.get_named_parameter("name").unwrap().to_string();
assert!(bind.get_named_parameter("unknown_name").is_none());
Expand All @@ -254,11 +289,11 @@ mod test {
Ok(())
}

fn init(init_info: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn Error>> {
unsafe fn init(init_info: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn Error>> {
HelloVTab::init(init_info, data)
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn Error>> {
HelloVTab::func(func, output)
}

Expand Down
Loading