diff --git a/crates/duckdb/src/lib.rs b/crates/duckdb/src/lib.rs index f86ad9c1..d18f6daf 100644 --- a/crates/duckdb/src/lib.rs +++ b/crates/duckdb/src/lib.rs @@ -1003,7 +1003,6 @@ mod test { } mod query_and_then_tests { - use super::*; #[derive(Debug)] diff --git a/crates/duckdb/src/row.rs b/crates/duckdb/src/row.rs index cfdec186..ab6351ca 100644 --- a/crates/duckdb/src/row.rs +++ b/crates/duckdb/src/row.rs @@ -4,7 +4,7 @@ use super::{Error, Result, Statement}; use crate::types::{self, EnumType, FromSql, FromSqlError, ListType, ValueRef}; use arrow::{ - array::{self, Array, ArrayRef, DictionaryArray, ListArray, StructArray}, + array::{self, Array, ArrayRef, DictionaryArray, FixedSizeListArray, ListArray, MapArray, StructArray}, datatypes::*, }; use fallible_iterator::FallibleIterator; @@ -608,7 +608,20 @@ impl<'stmt> Row<'stmt> { row, ) } - _ => unreachable!("invalid value: {} {}", col, column.data_type()), + DataType::Struct(_) => { + let res = column.as_any().downcast_ref::().unwrap(); + ValueRef::Struct(res, row) + } + DataType::Map(..) => { + let arr = column.as_any().downcast_ref::().unwrap(); + ValueRef::Map(arr, row) + } + DataType::FixedSizeList(..) => { + let arr = column.as_any().downcast_ref::().unwrap(); + ValueRef::Array(arr, row) + } + DataType::Union(..) => ValueRef::Union(column, row), + _ => unreachable!("invalid value: {}, {}", col, column.data_type()), } } diff --git a/crates/duckdb/src/test_all_types.rs b/crates/duckdb/src/test_all_types.rs index 1c324751..4088be1b 100644 --- a/crates/duckdb/src/test_all_types.rs +++ b/crates/duckdb/src/test_all_types.rs @@ -2,7 +2,7 @@ use pretty_assertions::assert_eq; use rust_decimal::Decimal; use crate::{ - types::{TimeUnit, Type, Value, ValueRef}, + types::{OrderedMap, TimeUnit, Type, Value, ValueRef}, Connection, }; @@ -20,27 +20,9 @@ fn test_large_arrow_types() -> crate::Result<()> { } fn test_with_database(database: &Connection) -> crate::Result<()> { - let excluded = vec![ - // uhugeint, time_tz, and dec38_10 aren't supported in the duckdb arrow layer - "uhugeint", - "time_tz", - "dec38_10", - // union is currently blocked by https://github.com/duckdb/duckdb/pull/11326 - "union", - // these remaining types are not yet supported by duckdb-rs - "struct", - "struct_of_arrays", - "array_of_structs", - "map", - "fixed_int_array", - "fixed_varchar_array", - "fixed_nested_int_array", - "fixed_nested_varchar_array", - "fixed_struct_array", - "struct_of_fixed_array", - "fixed_array_of_int_list", - "list_of_fixed_int_array", - ]; + // uhugeint, time_tz, and dec38_10 aren't supported in the duckdb arrow layer + // union is currently blocked by https://github.com/duckdb/duckdb/pull/11326 + let excluded = ["uhugeint", "time_tz", "dec38_10", "union"]; let mut binding = database.prepare(&format!( "SELECT * EXCLUDE ({}) FROM test_all_types()", @@ -58,7 +40,7 @@ fn test_with_database(database: &Connection) -> crate::Result<()> { for column in row.stmt.column_names() { let value = row.get_ref_unwrap(row.stmt.column_index(&column)?); if idx != 2 { - assert_ne!(value.data_type(), Type::Null); + assert_ne!(value.data_type(), Type::Null, "column {column} is null: {value:?}"); } test_single(&mut idx, column, value); } @@ -249,6 +231,14 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) { } _ => assert_eq!(value, ValueRef::Null), }, + "float_array" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![Value::Float(1.0), Value::Float(2.0)]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, "date_array" => match idx { 0 => assert_eq!(value.to_owned(), Value::List(vec![])), 1 => assert_eq!( @@ -332,6 +322,304 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) { } _ => assert_eq!(value, ValueRef::Null), }, + "struct" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Null), + ("b".to_string(), Value::Null), + ])) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Int(42)), + ("b".to_string(), Value::Text("🦆🦆🦆🦆🦆🦆".to_string())), + ])) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "struct_of_arrays" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Null), + ("b".to_string(), Value::Null), + ])) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Struct(OrderedMap::from(vec![ + ( + "a".to_string(), + Value::List(vec![ + Value::Int(42), + Value::Int(999), + Value::Null, + Value::Null, + Value::Int(-42) + ]) + ), + ( + "b".to_string(), + Value::List(vec![ + Value::Text("🦆🦆🦆🦆🦆🦆".to_string()), + Value::Text("goose".to_string()), + Value::Null, + Value::Text("".to_string()), + ]), + ) + ])) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "array_of_structs" => match idx { + 0 => assert_eq!(value.to_owned(), Value::List(vec![])), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Null), + ("b".to_string(), Value::Null) + ])), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Int(42)), + ("b".to_string(), Value::Text("🦆🦆🦆🦆🦆🦆".to_string())) + ])), + Value::Null + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "map" => match idx { + 0 => assert_eq!(value.to_owned(), Value::Map(OrderedMap::from(vec![]))), + 1 => assert_eq!( + value.to_owned(), + Value::Map(OrderedMap::from(vec![ + (Value::Text("key1".to_string()), Value::Text("🦆🦆🦆🦆🦆🦆".to_string())), + (Value::Text("key2".to_string()), Value::Text("goose".to_string())), + ])) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "fixed_int_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "fixed_varchar_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Text("a".to_string()), + Value::Null, + Value::Text("c".to_string()) + ]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Text("d".to_string()), + Value::Text("e".to_string()), + Value::Text("f".to_string()) + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "fixed_nested_int_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]), + Value::Null, + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]) + ]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]), + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]), + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "fixed_nested_varchar_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Array(vec![ + Value::Text("a".to_string()), + Value::Null, + Value::Text("c".to_string()) + ]), + Value::Null, + Value::Array(vec![ + Value::Text("a".to_string()), + Value::Null, + Value::Text("c".to_string()) + ]) + ]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Array(vec![ + Value::Text("d".to_string()), + Value::Text("e".to_string()), + Value::Text("f".to_string()) + ]), + Value::Array(vec![ + Value::Text("a".to_string()), + Value::Null, + Value::Text("c".to_string()) + ]), + Value::Array(vec![ + Value::Text("d".to_string()), + Value::Text("e".to_string()), + Value::Text("f".to_string()) + ]), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "fixed_struct_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Null), + ("b".to_string(), Value::Null) + ])), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Int(42)), + ("b".to_string(), Value::Text("🦆🦆🦆🦆🦆🦆".to_string())) + ])), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Null), + ("b".to_string(), Value::Null) + ])), + ]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Int(42)), + ("b".to_string(), Value::Text("🦆🦆🦆🦆🦆🦆".to_string())) + ])), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Null), + ("b".to_string(), Value::Null) + ])), + Value::Struct(OrderedMap::from(vec![ + ("a".to_string(), Value::Int(42)), + ("b".to_string(), Value::Text("🦆🦆🦆🦆🦆🦆".to_string())) + ])), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "struct_of_fixed_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Struct(OrderedMap::from(vec![ + ( + "a".to_string(), + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]) + ), + ( + "b".to_string(), + Value::Array(vec![ + Value::Text("a".to_string()), + Value::Null, + Value::Text("c".to_string()) + ]) + ), + ])) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Struct(OrderedMap::from(vec![ + ( + "a".to_string(), + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]), + ), + ( + "b".to_string(), + Value::Array(vec![ + Value::Text("d".to_string()), + Value::Text("e".to_string()), + Value::Text("f".to_string()) + ]), + ) + ])) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "fixed_array_of_int_list" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::List(vec![]), + Value::List(vec![ + Value::Int(42), + Value::Int(999), + Value::Null, + Value::Null, + Value::Int(-42), + ]), + Value::List(vec![]), + ]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::Array(vec![ + Value::List(vec![ + Value::Int(42), + Value::Int(999), + Value::Null, + Value::Null, + Value::Int(-42), + ]), + Value::List(vec![]), + Value::List(vec![ + Value::Int(42), + Value::Int(999), + Value::Null, + Value::Null, + Value::Int(-42), + ]), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, + "list_of_fixed_int_array" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]), + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]), + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]), + ]) + ), + 1 => assert_eq!( + value.to_owned(), + Value::List(vec![ + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]), + Value::Array(vec![Value::Null, Value::Int(2), Value::Int(3)]), + Value::Array(vec![Value::Int(4), Value::Int(5), Value::Int(6)]), + ]) + ), + _ => assert_eq!(value, ValueRef::Null), + }, "bit" => match idx { 0 => assert_eq!(value, ValueRef::Blob(&[1, 145, 46, 42, 215]),), 1 => assert_eq!(value, ValueRef::Blob(&[3, 245])), @@ -371,6 +659,14 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) { 1 => assert_eq!(value.to_owned(), Value::Enum("enum_69999".to_string())), _ => assert_eq!(value, ValueRef::Null), }, + "union" => match idx { + 0 => assert_eq!( + value.to_owned(), + Value::Union(Box::new(Value::Text("Frank".to_owned()))) + ), + 1 => assert_eq!(value.to_owned(), Value::Union(Box::new(Value::SmallInt(5)))), + _ => assert_eq!(value.to_owned(), Value::Union(Box::new(Value::Null))), + }, _ => todo!("{column:?}"), } } diff --git a/crates/duckdb/src/types/mod.rs b/crates/duckdb/src/types/mod.rs index 93222b09..e288cc0a 100644 --- a/crates/duckdb/src/types/mod.rs +++ b/crates/duckdb/src/types/mod.rs @@ -69,6 +69,7 @@ impl ToSql for DateTimeSql { pub use self::{ from_sql::{FromSql, FromSqlError, FromSqlResult}, + ordered_map::OrderedMap, to_sql::{ToSql, ToSqlOutput}, value::Value, value_ref::{EnumType, ListType, TimeUnit, ValueRef}, @@ -88,6 +89,8 @@ mod url; mod value; mod value_ref; +mod ordered_map; + /// Empty struct that can be used to fill in a query parameter as `NULL`. /// /// ## Example @@ -151,6 +154,14 @@ pub enum Type { List(Box), /// ENUM Enum, + /// STRUCT + Struct(Vec<(String, Type)>), + /// MAP + Map(Box, Box), + /// ARRAY + Array(Box, u32), + /// UNION + Union, /// Any Any, } @@ -183,13 +194,30 @@ impl From<&DataType> for Type { // DataType::LargeBinary => Self::LargeBinary, DataType::LargeUtf8 | DataType::Utf8 => Self::Text, DataType::List(inner) => Self::List(Box::new(Type::from(inner.data_type()))), - // DataType::FixedSizeList(field, size) => Self::Array, + DataType::FixedSizeList(field, size) => { + Self::Array(Box::new(Type::from(field.data_type())), (*size).try_into().unwrap()) + } + // DataType::LargeList(_) => Self::LargeList, + DataType::Struct(inner) => Self::Struct( + inner + .iter() + .map(|f| (f.name().to_owned(), Type::from(f.data_type()))) + .collect(), + ), DataType::LargeList(inner) => Self::List(Box::new(Type::from(inner.data_type()))), - // DataType::Struct(inner) => Self::Struct, // DataType::Union(_, _) => Self::Union, DataType::Decimal128(..) => Self::Decimal, DataType::Decimal256(..) => Self::Decimal, - // DataType::Map(field, ..) => Self::Map, + DataType::Map(field, ..) => { + let data_type = field.data_type(); + match data_type { + DataType::Struct(fields) => Self::Map( + Box::new(Type::from(fields[0].data_type())), + Box::new(Type::from(fields[1].data_type())), + ), + _ => unreachable!(), + } + } res => unimplemented!("{}", res), } } @@ -218,8 +246,12 @@ impl fmt::Display for Type { Type::Date32 => f.pad("Date32"), Type::Time64 => f.pad("Time64"), Type::Interval => f.pad("Interval"), + Type::Struct(..) => f.pad("Struct"), Type::List(..) => f.pad("List"), Type::Enum => f.pad("Enum"), + Type::Map(..) => f.pad("Map"), + Type::Array(..) => f.pad("Array"), + Type::Union => f.pad("Union"), Type::Any => f.pad("Any"), } } diff --git a/crates/duckdb/src/types/ordered_map.rs b/crates/duckdb/src/types/ordered_map.rs new file mode 100644 index 00000000..09d9392f --- /dev/null +++ b/crates/duckdb/src/types/ordered_map.rs @@ -0,0 +1,28 @@ +/// An ordered map of key-value pairs. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OrderedMap(Vec<(K, V)>); + +impl From> for OrderedMap { + fn from(value: Vec<(K, V)>) -> Self { + OrderedMap(value) + } +} + +impl OrderedMap { + /// Returns the value corresponding to the key. + pub fn get(&self, key: &K) -> Option<&V> { + self.0.iter().find(|(k, _)| k == key).map(|(_, v)| v) + } + /// Returns an iterator over the keys in the map. + pub fn keys(&self) -> impl Iterator { + self.0.iter().map(|(k, _)| k) + } + /// Returns an iterator over the values in the map. + pub fn values(&self) -> impl Iterator { + self.0.iter().map(|(_, v)| v) + } + /// Returns an iterator over the key-value pairs in the map. + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } +} diff --git a/crates/duckdb/src/types/value.rs b/crates/duckdb/src/types/value.rs index 78ee8b39..03c7b3dd 100644 --- a/crates/duckdb/src/types/value.rs +++ b/crates/duckdb/src/types/value.rs @@ -1,4 +1,4 @@ -use super::{Null, TimeUnit, Type}; +use super::{Null, OrderedMap, TimeUnit, Type}; use rust_decimal::prelude::*; /// Owning [dynamic type value](http://sqlite.org/datatype3.html). Value's type is typically @@ -59,6 +59,14 @@ pub enum Value { List(Vec), /// The value is an enum Enum(String), + /// The value is a struct + Struct(OrderedMap), + /// The value is an array + Array(Vec), + /// The value is a map + Map(OrderedMap), + /// The value is a union + Union(Box), } impl From for Value { @@ -226,7 +234,7 @@ impl Value { Value::Date32(_) => Type::Date32, Value::Time64(..) => Type::Time64, Value::Interval { .. } => Type::Interval, - Value::List(_) => todo!(), + Value::Union(..) | Value::Struct(..) | Value::List(..) | Value::Array(..) | Value::Map(..) => todo!(), Value::Enum(..) => Type::Enum, } } diff --git a/crates/duckdb/src/types/value_ref.rs b/crates/duckdb/src/types/value_ref.rs index ed89ac01..45e53c9d 100644 --- a/crates/duckdb/src/types/value_ref.rs +++ b/crates/duckdb/src/types/value_ref.rs @@ -1,11 +1,14 @@ use super::{Type, Value}; -use crate::types::{FromSqlError, FromSqlResult}; +use crate::types::{FromSqlError, FromSqlResult, OrderedMap}; use crate::Row; use rust_decimal::prelude::*; use arrow::{ - array::{Array, ArrayRef, DictionaryArray, LargeListArray, ListArray}, + array::{ + Array, ArrayRef, DictionaryArray, FixedSizeListArray, LargeListArray, ListArray, MapArray, StructArray, + UnionArray, + }, datatypes::{UInt16Type, UInt32Type, UInt8Type}, }; @@ -92,6 +95,14 @@ pub enum ValueRef<'a> { List(ListType<'a>, usize), /// The value is an enum Enum(EnumType<'a>, usize), + /// The value is a struct + Struct(&'a StructArray, usize), + /// The value is an array + Array(&'a FixedSizeListArray, usize), + /// The value is a map + Map(&'a MapArray, usize), + /// The value is a union + Union(&'a ArrayRef, usize), } /// Wrapper type for different list sizes @@ -139,11 +150,15 @@ impl ValueRef<'_> { ValueRef::Date32(_) => Type::Date32, ValueRef::Time64(..) => Type::Time64, ValueRef::Interval { .. } => Type::Interval, + ValueRef::Struct(arr, _) => arr.data_type().into(), + ValueRef::Map(arr, _) => arr.data_type().into(), + ValueRef::Array(arr, _) => arr.data_type().into(), ValueRef::List(arr, _) => match arr { ListType::Large(arr) => arr.data_type().into(), ListType::Regular(arr) => arr.data_type().into(), }, ValueRef::Enum(..) => Type::Enum, + ValueRef::Union(arr, _) => arr.data_type().into(), } } @@ -241,6 +256,50 @@ impl From> for Value { panic!("Enum value is not a string") } } + ValueRef::Struct(items, idx) => { + let value: Vec<(String, Value)> = items + .columns() + .iter() + .zip(items.fields().iter().map(|f| f.name().to_owned())) + .map(|(column, name)| -> (String, Value) { + (name, Row::value_ref_internal(idx, 0, column).to_owned()) + }) + .collect(); + Value::Struct(OrderedMap::from(value)) + } + ValueRef::Map(arr, idx) => { + let keys = arr.keys(); + let values = arr.values(); + let offsets = arr.offsets(); + let range = offsets[idx]..offsets[idx + 1]; + Value::Map(OrderedMap::from( + range + .map(|row| { + let row = row.try_into().unwrap(); + let key = Row::value_ref_internal(row, idx, keys).to_owned(); + let value = Row::value_ref_internal(row, idx, values).to_owned(); + (key, value) + }) + .collect::>(), + )) + } + ValueRef::Array(items, idx) => { + let value_length = usize::try_from(items.value_length()).unwrap(); + let range = (idx * value_length)..((idx + 1) * value_length); + Value::Array( + range + .map(|row| Row::value_ref_internal(row, idx, items.values()).to_owned()) + .collect(), + ) + } + ValueRef::Union(column, idx) => { + let column = column.as_any().downcast_ref::().unwrap(); + let type_id = column.type_id(idx); + let value_offset = column.value_offset(idx); + + let tag = Row::value_ref_internal(idx, value_offset, column.child(type_id)); + Value::Union(Box::new(tag.to_owned())) + } } } } @@ -291,8 +350,10 @@ impl<'a> From<&'a Value> for ValueRef<'a> { Value::Date32(d) => ValueRef::Date32(d), Value::Time64(t, d) => ValueRef::Time64(t, d), Value::Interval { months, days, nanos } => ValueRef::Interval { months, days, nanos }, - Value::List(..) => unimplemented!(), Value::Enum(..) => todo!(), + Value::List(..) | Value::Struct(..) | Value::Map(..) | Value::Array(..) | Value::Union(..) => { + unimplemented!() + } } } }