Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Logical signature #13104

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b60ddea
fix sig
jayzhan211 Oct 4, 2024
9ead621
fix
jayzhan211 Oct 4, 2024
86e005f
fix error
jayzhan211 Oct 4, 2024
b1aa23b
fix all signature
jayzhan211 Oct 4, 2024
3b5695a
fix all signature
jayzhan211 Oct 4, 2024
f38ef3b
change default type
jayzhan211 Oct 4, 2024
b002c39
clippy
jayzhan211 Oct 4, 2024
b95c66a
fix docs
jayzhan211 Oct 5, 2024
10f5638
Merge branch 'main' of https://github.com/apache/datafusion into strp…
jayzhan211 Oct 5, 2024
984b5cc
rm deadcode
jayzhan211 Oct 8, 2024
045a202
Merge branch 'main' of https://github.com/apache/datafusion into strp…
jayzhan211 Oct 8, 2024
1b37176
cleanup
jayzhan211 Oct 8, 2024
f10d43d
cleanup
jayzhan211 Oct 8, 2024
6b0f2d8
rm test
jayzhan211 Oct 8, 2024
d89a5dc
upd coercible
jayzhan211 Oct 8, 2024
5521af9
upd lock
jayzhan211 Oct 8, 2024
2035ae1
Merge branch 'main' of https://github.com/apache/datafusion into logi…
jayzhan211 Oct 8, 2024
656864a
split flaot and use lazylock
jayzhan211 Oct 8, 2024
9f65e1a
backup
jayzhan211 Oct 8, 2024
3827974
[logical-types] add NativeType and LogicalType
notfilippo Oct 10, 2024
17a70d8
Add license header
notfilippo Oct 10, 2024
3dba963
Add NativeField and derivates
notfilippo Oct 15, 2024
4217970
Support TypeSignatures
notfilippo Oct 15, 2024
01f0089
Fix doc
notfilippo Oct 15, 2024
5b5f4c1
Add documentation
notfilippo Oct 17, 2024
88e1b3c
Fix doc tests
notfilippo Oct 17, 2024
ab16a2d
Remove dummy test
notfilippo Oct 17, 2024
0b2ed2d
Merge remote-tracking branch 'origin/main' into fr/native-and-logical…
notfilippo Oct 17, 2024
6150ea9
Merge remote-tracking branch 'origin/main' into fr/native-and-logical…
notfilippo Oct 18, 2024
7ed7891
From NativeField to LogicalField
notfilippo Oct 22, 2024
5ca1a62
Merge branch 'main' of https://github.com/apache/datafusion into logi…
jayzhan211 Oct 25, 2024
0e66e21
Merge branch 'fr/native-and-logical-types' into logical-signature
jayzhan211 Oct 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod scalar;
pub mod stats;
pub mod test_util;
pub mod tree_node;
pub mod types;
pub mod utils;

/// Reexport arrow crate
Expand Down
114 changes: 114 additions & 0 deletions datafusion/common/src/types/field.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow_schema::{Field, Fields, UnionFields};
use std::hash::{Hash, Hasher};
use std::{ops::Deref, sync::Arc};

use super::{LogicalTypeRef, NativeType};

/// A record of a logical type, its name and its nullability.
#[derive(Debug, Clone, Eq, PartialOrd, Ord)]
pub struct LogicalField {
pub name: String,
pub logical_type: LogicalTypeRef,
pub nullable: bool,
}

impl PartialEq for LogicalField {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.logical_type.eq(&other.logical_type)
&& self.nullable == other.nullable
}
}

impl Hash for LogicalField {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.logical_type.hash(state);
self.nullable.hash(state);
}
}

impl From<&Field> for LogicalField {
fn from(value: &Field) -> Self {
Self {
name: value.name().clone(),
logical_type: Arc::new(NativeType::from(value.data_type().clone())),
nullable: value.is_nullable(),
}
}
}

/// A reference counted [`LogicalField`].
pub type LogicalFieldRef = Arc<LogicalField>;

/// A cheaply cloneable, owned collection of [`LogicalFieldRef`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct LogicalFields(Arc<[LogicalFieldRef]>);

impl Deref for LogicalFields {
type Target = [LogicalFieldRef];

fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}

impl From<&Fields> for LogicalFields {
fn from(value: &Fields) -> Self {
value
.iter()
.map(|field| Arc::new(LogicalField::from(field.as_ref())))
.collect()
}
}

impl FromIterator<LogicalFieldRef> for LogicalFields {
fn from_iter<T: IntoIterator<Item = LogicalFieldRef>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}

/// A cheaply cloneable, owned collection of [`LogicalFieldRef`] and their
/// corresponding type ids.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct LogicalUnionFields(Arc<[(i8, LogicalFieldRef)]>);

impl Deref for LogicalUnionFields {
type Target = [(i8, LogicalFieldRef)];

fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}

impl From<&UnionFields> for LogicalUnionFields {
fn from(value: &UnionFields) -> Self {
value
.iter()
.map(|(i, field)| (i, Arc::new(LogicalField::from(field.as_ref()))))
.collect()
}
}

impl FromIterator<(i8, LogicalFieldRef)> for LogicalUnionFields {
fn from_iter<T: IntoIterator<Item = (i8, LogicalFieldRef)>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
118 changes: 118 additions & 0 deletions datafusion/common/src/types/logical.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use core::fmt;
use std::{cmp::Ordering, hash::Hash, sync::Arc};

use super::NativeType;

/// Signature that uniquely identifies a type among other types.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum TypeSignature<'a> {
/// Represents a built-in native type.
Native(&'a NativeType),
/// Represents an arrow-compatible extension type.
/// (<https://arrow.apache.org/docs/format/Columnar.html#extension-types>)
///
/// The `name` should contain the same value as 'ARROW:extension:name'.
Extension {
name: &'a str,
parameters: &'a [TypeParameter<'a>],
},
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum TypeParameter<'a> {
Type(TypeSignature<'a>),
Number(i128),
}

/// A reference counted [`LogicalType`].
pub type LogicalTypeRef = Arc<dyn LogicalType>;

/// Representation of a logical type with its signature and its native backing
/// type.
///
/// The logical type is meant to be used during the DataFusion logical planning
/// phase in order to reason about logical types without worrying about their
/// underlying physical implementation.
///
/// ### Extension types
///
/// [`LogicalType`] is a trait in order to allow the possibility of declaring
/// extension types:
///
/// ```
/// use datafusion_common::types::{LogicalType, NativeType, TypeSignature};
///
/// struct JSON {}
///
/// impl LogicalType for JSON {
/// fn native(&self) -> &NativeType {
/// &NativeType::Utf8
/// }
///
/// fn signature(&self) -> TypeSignature<'_> {
/// TypeSignature::Extension {
/// name: "JSON",
/// parameters: &[],
/// }
/// }
/// }
/// ```
pub trait LogicalType: Sync + Send {
fn native(&self) -> &NativeType;
fn signature(&self) -> TypeSignature<'_>;
}

impl fmt::Debug for dyn LogicalType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("LogicalType")
.field(&self.signature())
.field(&self.native())
.finish()
}
}

impl PartialEq for dyn LogicalType {
fn eq(&self, other: &Self) -> bool {
self.native().eq(other.native()) && self.signature().eq(&other.signature())
}
}

impl Eq for dyn LogicalType {}

impl PartialOrd for dyn LogicalType {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for dyn LogicalType {
fn cmp(&self, other: &Self) -> Ordering {
self.signature()
.cmp(&other.signature())
.then(self.native().cmp(other.native()))
}
}

impl Hash for dyn LogicalType {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.signature().hash(state);
self.native().hash(state);
}
}
24 changes: 24 additions & 0 deletions datafusion/common/src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

mod field;
mod logical;
mod native;

pub use field::*;
pub use logical::*;
pub use native::*;
Loading
Loading