Skip to content

Commit

Permalink
Make the ArrowVTab module public (#259)
Browse files Browse the repository at this point in the history
* Make the ArrowVTab module public

* chore: clippy lint fixes

* Add unsafe to HelloWithNamedVTab

* Update dependencies

---------

Co-authored-by: Mitch <[email protected]>
  • Loading branch information
phillipleblanc and mitchdevenport authored Mar 29, 2024
1 parent 34a6448 commit ff7d11d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ memchr = "2.3"
uuid = { version = "1.0", optional = true }
smallvec = "1.6.1"
cast = { version = "0.3", features = ["std"] }
arrow = { version = "49", default-features = false, features = ["prettyprint", "ffi"] }
arrow = { version = "50", default-features = false, features = ["prettyprint", "ffi"] }
rust_decimal = "1.14"
strum = { version = "0.25", features = ["derive"] }
r2d2 = { version = "0.8.9", optional = true }
Expand Down
6 changes: 3 additions & 3 deletions examples/hello-ext/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl VTab for HelloVTab {
type InitData = HelloInitData;
type BindData = HelloBindData;

fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
let param = bind.get_parameter(0).to_string();
unsafe {
Expand All @@ -51,14 +51,14 @@ impl VTab for HelloVTab {
Ok(())
}

fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).done = false;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<HelloInitData>();
let bind_info = func.get_bind_data::<HelloBindData>();

Expand Down
15 changes: 9 additions & 6 deletions src/vtab/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ use arrow::{

use num::cast::AsPrimitive;

/// A pointer to the Arrow record batch for the table function.
#[repr(C)]
struct ArrowBindData {
pub struct ArrowBindData {
rb: *mut RecordBatch,
}

Expand All @@ -34,14 +35,16 @@ impl Free for ArrowBindData {
}
}

/// Keeps track of whether the Arrow record batch has been consumed.
#[repr(C)]
struct ArrowInitData {
pub struct ArrowInitData {
done: bool,
}

impl Free for ArrowInitData {}

struct ArrowVTab;
/// The Arrow table function.
pub struct ArrowVTab;

unsafe fn address_to_arrow_schema(address: usize) -> FFI_ArrowSchema {
let ptr = address as *mut FFI_ArrowSchema;
Expand Down Expand Up @@ -70,7 +73,7 @@ impl VTab for ArrowVTab {
type BindData = ArrowBindData;
type InitData = ArrowInitData;

fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut ArrowBindData) -> Result<(), Box<dyn std::error::Error>> {
let param_count = bind.get_parameter_count();
assert!(param_count == 2);
let array = bind.get_parameter(0).to_int64();
Expand All @@ -88,14 +91,14 @@ impl VTab for ArrowVTab {
Ok(())
}

fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut ArrowInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).done = false;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<ArrowInitData>();
let bind_info = func.get_bind_data::<ArrowBindData>();
unsafe {
Expand Down
6 changes: 3 additions & 3 deletions src/vtab/excel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl VTab for ExcelVTab {
type BindData = ExcelBindData;
type InitData = ExcelInitData;

fn bind(bind: &BindInfo, data: *mut ExcelBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut ExcelBindData) -> Result<(), Box<dyn std::error::Error>> {
let param_count = bind.get_parameter_count();
assert!(param_count == 2);
let path = bind.get_parameter(0).to_string();
Expand Down Expand Up @@ -125,14 +125,14 @@ impl VTab for ExcelVTab {
Ok(())
}

fn init(_: &InitInfo, data: *mut ExcelInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut ExcelInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).start = 1;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<ExcelInitData>();
let bind_info = func.get_bind_data::<ExcelBindData>();
unsafe {
Expand Down
55 changes: 45 additions & 10 deletions src/vtab/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ mod logical_type;
mod value;
mod vector;

/// The duckdb Arrow table function interface
#[cfg(feature = "vtab-arrow")]
mod arrow;
pub mod arrow;
#[cfg(feature = "vtab-arrow")]
pub use self::arrow::{
arrow_arraydata_to_query_params, arrow_ffi_to_query_params, arrow_recordbatch_to_query_params,
Expand Down Expand Up @@ -66,11 +67,45 @@ pub trait VTab: Sized {
type BindData: Sized + Free;

/// Bind data to the table function
fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box<dyn std::error::Error>>;
///
/// # Safety
///
/// This function is unsafe because it dereferences raw pointers (`data`) and manipulates the memory directly.
/// The caller must ensure that:
///
/// - The `data` pointer is valid and points to a properly initialized `BindData` instance.
/// - The lifetime of `data` must outlive the execution of `bind` to avoid dangling pointers, especially since
/// `bind` does not take ownership of `data`.
/// - Concurrent access to `data` (if applicable) must be properly synchronized.
/// - The `bind` object must be valid and correctly initialized.
unsafe fn bind(bind: &BindInfo, data: *mut Self::BindData) -> Result<(), Box<dyn std::error::Error>>;
/// Initialize the table function
fn init(init: &InitInfo, data: *mut Self::InitData) -> Result<(), Box<dyn std::error::Error>>;
///
/// # Safety
///
/// This function is unsafe because it performs raw pointer dereferencing on the `data` argument.
/// The caller is responsible for ensuring that:
///
/// - The `data` pointer is non-null and points to a valid `InitData` instance.
/// - There is no data race when accessing `data`, meaning if `data` is accessed from multiple threads,
/// proper synchronization is required.
/// - The lifetime of `data` extends beyond the scope of this call to avoid use-after-free errors.
unsafe fn init(init: &InitInfo, data: *mut Self::InitData) -> Result<(), Box<dyn std::error::Error>>;
/// The actual function
fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>>;
///
/// # Safety
///
/// This function is unsafe because it:
///
/// - Dereferences multiple raw pointers (`func` to access `init_info` and `bind_info`).
///
/// The caller must ensure that:
///
/// - All pointers (`func`, `output`, internal `init_info`, and `bind_info`) are valid and point to the expected types of data structures.
/// - The `init_info` and `bind_info` data pointed to remains valid and is not freed until after this function completes.
/// - No other threads are concurrently mutating the data pointed to by `init_info` and `bind_info` without proper synchronization.
/// - The `output` parameter is correctly initialized and can safely be written to.
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>>;
/// Does the table function support pushdown
/// default is false
fn supports_pushdown() -> bool {
Expand Down Expand Up @@ -197,7 +232,7 @@ mod test {
type InitData = HelloInitData;
type BindData = HelloBindData;

fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn std::error::Error>> {
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
let param = bind.get_parameter(0).to_string();
unsafe {
Expand All @@ -206,14 +241,14 @@ mod test {
Ok(())
}

fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn init(_: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn std::error::Error>> {
unsafe {
(*data).done = false;
}
Ok(())
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn std::error::Error>> {
let init_info = func.get_init_data::<HelloInitData>();
let bind_info = func.get_bind_data::<HelloBindData>();

Expand Down Expand Up @@ -244,7 +279,7 @@ mod test {
type InitData = HelloInitData;
type BindData = HelloBindData;

fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn Error>> {
unsafe fn bind(bind: &BindInfo, data: *mut HelloBindData) -> Result<(), Box<dyn Error>> {
bind.add_result_column("column0", LogicalType::new(LogicalTypeId::Varchar));
let param = bind.get_named_parameter("name").unwrap().to_string();
assert!(bind.get_named_parameter("unknown_name").is_none());
Expand All @@ -254,11 +289,11 @@ mod test {
Ok(())
}

fn init(init_info: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn Error>> {
unsafe fn init(init_info: &InitInfo, data: *mut HelloInitData) -> Result<(), Box<dyn Error>> {
HelloVTab::init(init_info, data)
}

fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn Error>> {
unsafe fn func(func: &FunctionInfo, output: &mut DataChunk) -> Result<(), Box<dyn Error>> {
HelloVTab::func(func, output)
}

Expand Down

0 comments on commit ff7d11d

Please sign in to comment.