From 8065fb2491c99298f8fe1cd58335fc9a5bda6276 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 18 Oct 2024 09:10:33 +0200 Subject: [PATCH] Implement Eq, PartialEq, Hash for PhysicalExpr --- datafusion/core/tests/sql/path_partition.rs | 3 +- .../physical-expr-common/src/physical_expr.rs | 81 ++++++++----------- .../physical-expr-common/src/sort_expr.rs | 4 - .../physical-expr/src/equivalence/class.rs | 7 +- .../physical-expr/src/expressions/binary.rs | 30 ++----- .../physical-expr/src/expressions/case.rs | 40 +-------- .../physical-expr/src/expressions/cast.rs | 30 ++----- .../physical-expr/src/expressions/column.rs | 21 +---- .../physical-expr/src/expressions/in_list.rs | 32 ++++---- .../src/expressions/is_not_null.rs | 22 +---- .../physical-expr/src/expressions/is_null.rs | 26 ++---- .../physical-expr/src/expressions/like.rs | 32 ++------ .../physical-expr/src/expressions/literal.rs | 18 +---- .../physical-expr/src/expressions/negative.rs | 27 ++----- .../physical-expr/src/expressions/no_op.rs | 17 +--- .../physical-expr/src/expressions/not.rs | 23 +----- .../physical-expr/src/expressions/try_cast.rs | 23 +----- .../src/expressions/unknown_column.rs | 14 ++-- datafusion/physical-expr/src/physical_expr.rs | 2 - .../physical-expr/src/scalar_function.rs | 26 +----- datafusion/physical-expr/src/utils/mod.rs | 2 +- .../tests/cases/roundtrip_physical_plan.rs | 22 ++--- 22 files changed, 116 insertions(+), 386 deletions(-) diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs index 919054e8330f..975984e5b11f 100644 --- a/datafusion/core/tests/sql/path_partition.rs +++ b/datafusion/core/tests/sql/path_partition.rs @@ -47,7 +47,6 @@ use bytes::Bytes; use chrono::{TimeZone, Utc}; use datafusion_expr::{col, lit, Expr, Operator}; use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal}; -use datafusion_physical_expr::PhysicalExpr; use futures::stream::{self, BoxStream}; use object_store::{ path::Path, GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, @@ -97,7 +96,7 @@ async fn parquet_partition_pruning_filter() -> Result<()> { assert!(pred.as_any().is::()); let pred = pred.as_any().downcast_ref::().unwrap(); - assert_eq!(pred, expected.as_any()); + assert_eq!(pred, expected.as_ref()); Ok(()) } diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs index cc725cf2cefb..41e46d261508 100644 --- a/datafusion/physical-expr-common/src/physical_expr.rs +++ b/datafusion/physical-expr-common/src/physical_expr.rs @@ -52,7 +52,7 @@ use datafusion_expr_common::sort_properties::ExprProperties; /// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html /// [`create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html /// [`Column`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html -pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq { +pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash { /// Returns the physical expression as [`Any`] so that it can be /// downcast to a specific implementation. fn as_any(&self) -> &dyn Any; @@ -141,38 +141,6 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq { Ok(Some(vec![])) } - /// Update the hash `state` with this expression requirements from - /// [`Hash`]. - /// - /// This method is required to support hashing [`PhysicalExpr`]s. To - /// implement it, typically the type implementing - /// [`PhysicalExpr`] implements [`Hash`] and - /// then the following boiler plate is used: - /// - /// # Example: - /// ``` - /// // User defined expression that derives Hash - /// #[derive(Hash, Debug, PartialEq, Eq)] - /// struct MyExpr { - /// val: u64 - /// } - /// - /// // impl PhysicalExpr { - /// // ... - /// # impl MyExpr { - /// // Boiler plate to call the derived Hash impl - /// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) { - /// use std::hash::Hash; - /// let mut s = state; - /// self.hash(&mut s); - /// } - /// // } - /// # } - /// ``` - /// Note: [`PhysicalExpr`] is not constrained by [`Hash`] - /// directly because it must remain object safe. - fn dyn_hash(&self, _state: &mut dyn Hasher); - /// Calculates the properties of this [`PhysicalExpr`] based on its /// children's properties (i.e. order and range), recursively aggregating /// the information from its children. In cases where the [`PhysicalExpr`] @@ -183,6 +151,39 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq { } } +pub trait DynEq { + fn dyn_eq(&self, other: &dyn Any) -> bool; +} + +impl DynEq for T { + fn dyn_eq(&self, other: &dyn Any) -> bool { + other + .downcast_ref::() + .map_or(false, |other| other == self) + } +} + +impl PartialEq for dyn PhysicalExpr { + fn eq(&self, other: &Self) -> bool { + self.dyn_eq(other.as_any()) + } +} + +impl Eq for dyn PhysicalExpr {} + +/// Note: [`PhysicalExpr`] is not constrained by [`Hash`] directly because it must remain +/// object safe. +pub trait DynHash { + fn dyn_hash(&self, _state: &mut dyn Hasher); +} + +impl DynHash for T { + fn dyn_hash(&self, mut state: &mut dyn Hasher) { + self.type_id().hash(&mut state); + self.hash(&mut state) + } +} + impl Hash for dyn PhysicalExpr { fn hash(&self, state: &mut H) { self.dyn_hash(state); @@ -210,20 +211,6 @@ pub fn with_new_children_if_necessary( } } -pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any { - if any.is::>() { - any.downcast_ref::>() - .unwrap() - .as_any() - } else if any.is::>() { - any.downcast_ref::>() - .unwrap() - .as_any() - } else { - any - } -} - /// Returns [`Display`] able a list of [`PhysicalExpr`] /// /// Example output: `[a + 1, b]` diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index 6c4bf156ce56..c8f741b2522f 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -56,14 +56,10 @@ use datafusion_expr_common::columnar_value::ColumnarValue; /// # fn evaluate(&self, batch: &RecordBatch) -> Result {todo!() } /// # fn children(&self) -> Vec<&Arc> {todo!()} /// # fn with_new_children(self: Arc, children: Vec>) -> Result> {todo!()} -/// # fn dyn_hash(&self, _state: &mut dyn Hasher) {todo!()} /// # } /// # impl Display for MyPhysicalExpr { /// # fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "a") } /// # } -/// # impl PartialEq for MyPhysicalExpr { -/// # fn eq(&self, _other: &dyn Any) -> bool { true } -/// # } /// # fn col(name: &str) -> Arc { Arc::new(MyPhysicalExpr) } /// // Sort by a ASC /// let options = SortOptions::default(); diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index c1851ddb22b5..a2dc25473b6f 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -67,8 +67,7 @@ pub struct ConstExpr { impl PartialEq for ConstExpr { fn eq(&self, other: &Self) -> bool { - self.across_partitions == other.across_partitions - && self.expr.eq(other.expr.as_any()) + self.across_partitions == other.across_partitions && self.expr.eq(&other.expr) } } @@ -121,7 +120,7 @@ impl ConstExpr { /// Returns true if this constant expression is equal to the given expression pub fn eq_expr(&self, other: impl AsRef) -> bool { - self.expr.eq(other.as_ref().as_any()) + self.expr.as_ref() == other.as_ref() } /// Returns a [`Display`]able list of `ConstExpr`. @@ -557,7 +556,7 @@ impl EquivalenceGroup { new_classes.push((source, vec![Arc::clone(target)])); } if let Some((_, values)) = - new_classes.iter_mut().find(|(key, _)| key.eq(source)) + new_classes.iter_mut().find(|(key, _)| *key == source) { if !physical_exprs_contains(values, target) { values.push(Arc::clone(target)); diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 47b04a876b37..7cdac6cbb1f9 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -17,11 +17,10 @@ mod kernels; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::{any::Any, sync::Arc}; use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison}; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::array::*; @@ -48,11 +47,11 @@ use kernels::{ }; /// Binary expression -#[derive(Debug, Hash, Clone)] -pub struct BinaryExpr { - left: Arc, +#[derive(Debug, Hash, Clone, Eq, PartialEq)] +pub struct BinaryExpr { + left: Arc, op: Operator, - right: Arc, + right: Arc, /// Specifies whether an error is returned on overflow or not fail_on_overflow: bool, } @@ -477,11 +476,6 @@ impl PhysicalExpr for BinaryExpr { } } - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } - /// For each operator, [`BinaryExpr`] has distinct rules. /// TODO: There may be rules specific to some data types and expression ranges. fn get_properties(&self, children: &[ExprProperties]) -> Result { @@ -525,20 +519,6 @@ impl PhysicalExpr for BinaryExpr { } } -impl PartialEq for BinaryExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - self.left.eq(&x.left) - && self.op == x.op - && self.right.eq(&x.right) - && self.fail_on_overflow.eq(&x.fail_on_overflow) - }) - .unwrap_or(false) - } -} - /// Casts dictionary array to result type for binary numerical operators. Such operators /// between array and scalar produce a dictionary array other than primitive array of the /// same operators between array and array. This leads to inconsistent result types causing diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index ffb431b200f2..61facedc826f 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -16,11 +16,10 @@ // under the License. use std::borrow::Cow; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::{any::Any, sync::Arc}; use crate::expressions::try_cast; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::array::*; @@ -37,7 +36,7 @@ use itertools::Itertools; type WhenThen = (Arc, Arc); -#[derive(Debug, Hash)] +#[derive(Debug, Hash, PartialEq, Eq)] enum EvalMethod { /// CASE WHEN condition THEN result /// [WHEN ...] @@ -80,7 +79,7 @@ enum EvalMethod { /// [WHEN ...] /// [ELSE result] /// END -#[derive(Debug, Hash)] +#[derive(Debug, Hash, PartialEq, Eq)] pub struct CaseExpr { /// Optional base expression that can be compared to literal values in the "when" expressions expr: Option>, @@ -506,39 +505,6 @@ impl PhysicalExpr for CaseExpr { )?)) } } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for CaseExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - let expr_eq = match (&self.expr, &x.expr) { - (Some(expr1), Some(expr2)) => expr1.eq(expr2), - (None, None) => true, - _ => false, - }; - let else_expr_eq = match (&self.else_expr, &x.else_expr) { - (Some(expr1), Some(expr2)) => expr1.eq(expr2), - (None, None) => true, - _ => false, - }; - expr_eq - && else_expr_eq - && self.when_then_expr.len() == x.when_then_expr.len() - && self.when_then_expr.iter().zip(x.when_then_expr.iter()).all( - |((when1, then1), (when2, then2))| { - when1.eq(when2) && then1.eq(then2) - }, - ) - }) - .unwrap_or(false) - } } /// Create a CASE expression diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index 5621473c4fdb..6feabb3b9a2c 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -17,10 +17,10 @@ use std::any::Any; use std::fmt; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use crate::physical_expr::{down_cast_any_ref, PhysicalExpr}; +use crate::physical_expr::PhysicalExpr; use arrow::compute::{can_cast_types, CastOptions}; use arrow::datatypes::{DataType, DataType::*, Schema}; @@ -42,10 +42,10 @@ const DEFAULT_SAFE_CAST_OPTIONS: CastOptions<'static> = CastOptions { }; /// CAST expression casts an expression to a specific data type and returns a runtime error on invalid cast -#[derive(Debug, Clone)] -pub struct CastExpr { +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct CastExpr { /// The expression to cast - pub expr: Arc, + pub expr: Arc, /// The data type to cast to cast_type: DataType, /// Cast options @@ -160,13 +160,6 @@ impl PhysicalExpr for CastExpr { ])) } - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.expr.hash(&mut s); - self.cast_type.hash(&mut s); - self.cast_options.hash(&mut s); - } - /// A [`CastExpr`] preserves the ordering of its child if the cast is done /// under the same datatype family. fn get_properties(&self, children: &[ExprProperties]) -> Result { @@ -186,19 +179,6 @@ impl PhysicalExpr for CastExpr { } } -impl PartialEq for CastExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - self.expr.eq(&x.expr) - && self.cast_type == x.cast_type - && self.cast_options == x.cast_options - }) - .unwrap_or(false) - } -} - /// Return a PhysicalExpression representing `expr` casted to /// `cast_type`, if any casting is needed. /// diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs index 4aad959584ac..5f6932f6d725 100644 --- a/datafusion/physical-expr/src/expressions/column.rs +++ b/datafusion/physical-expr/src/expressions/column.rs @@ -18,9 +18,10 @@ //! Physical column reference: [`Column`] use std::any::Any; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; +use crate::physical_expr::PhysicalExpr; use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, @@ -30,8 +31,6 @@ use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{internal_err, plan_err, Result}; use datafusion_expr::ColumnarValue; -use crate::physical_expr::{down_cast_any_ref, PhysicalExpr}; - /// Represents the column at a given index in a RecordBatch /// /// This is a physical expression that represents a column at a given index in an @@ -107,7 +106,7 @@ impl std::fmt::Display for Column { impl PhysicalExpr for Column { /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn std::any::Any { + fn as_any(&self) -> &dyn Any { self } @@ -139,20 +138,6 @@ impl PhysicalExpr for Column { ) -> Result> { Ok(self) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for Column { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self == x) - .unwrap_or(false) - } } impl Column { diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs index 0a3e5fcefcf6..bd0b61a56d66 100644 --- a/datafusion/physical-expr/src/expressions/in_list.rs +++ b/datafusion/physical-expr/src/expressions/in_list.rs @@ -22,7 +22,7 @@ use std::fmt::Debug; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use crate::physical_expr::{down_cast_any_ref, physical_exprs_bag_equal}; +use crate::physical_expr::physical_exprs_bag_equal; use crate::PhysicalExpr; use arrow::array::*; @@ -398,26 +398,24 @@ impl PhysicalExpr for InListExpr { self.static_filter.clone(), ))) } +} - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.expr.hash(&mut s); - self.negated.hash(&mut s); - self.list.hash(&mut s); - // Add `self.static_filter` when hash is available +impl PartialEq for InListExpr { + fn eq(&self, other: &Self) -> bool { + self.expr.eq(&other.expr) + && physical_exprs_bag_equal(&self.list, &other.list) + && self.negated == other.negated } } -impl PartialEq for InListExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - self.expr.eq(&x.expr) - && physical_exprs_bag_equal(&self.list, &x.list) - && self.negated == x.negated - }) - .unwrap_or(false) +impl Eq for InListExpr {} + +impl Hash for InListExpr { + fn hash(&self, state: &mut H) { + self.expr.hash(state); + self.negated.hash(state); + self.list.hash(state); + // Add `self.static_filter` when hash is available } } diff --git a/datafusion/physical-expr/src/expressions/is_not_null.rs b/datafusion/physical-expr/src/expressions/is_not_null.rs index cbab7d0c9d1f..0e1fe80ca22c 100644 --- a/datafusion/physical-expr/src/expressions/is_not_null.rs +++ b/datafusion/physical-expr/src/expressions/is_not_null.rs @@ -17,10 +17,9 @@ //! IS NOT NULL expression -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::{any::Any, sync::Arc}; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::{ datatypes::{DataType, Schema}, @@ -31,10 +30,10 @@ use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; /// IS NOT NULL expression -#[derive(Debug, Hash)] -pub struct IsNotNullExpr { +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct IsNotNullExpr { /// The input expression - arg: Arc, + arg: Arc, } impl IsNotNullExpr { @@ -92,21 +91,8 @@ impl PhysicalExpr for IsNotNullExpr { ) -> Result> { Ok(Arc::new(IsNotNullExpr::new(Arc::clone(&children[0])))) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } } -impl PartialEq for IsNotNullExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.arg.eq(&x.arg)) - .unwrap_or(false) - } -} /// Create an IS NOT NULL expression pub fn is_not_null(arg: Arc) -> Result> { Ok(Arc::new(IsNotNullExpr::new(arg))) diff --git a/datafusion/physical-expr/src/expressions/is_null.rs b/datafusion/physical-expr/src/expressions/is_null.rs index 1c8597d3fdea..2a6fa5d6dbb2 100644 --- a/datafusion/physical-expr/src/expressions/is_null.rs +++ b/datafusion/physical-expr/src/expressions/is_null.rs @@ -17,25 +17,23 @@ //! IS NULL expression -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::{any::Any, sync::Arc}; +use crate::PhysicalExpr; use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; - -use crate::physical_expr::down_cast_any_ref; -use crate::PhysicalExpr; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; /// IS NULL expression -#[derive(Debug, Hash)] -pub struct IsNullExpr { +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct IsNullExpr { /// Input expression - arg: Arc, + arg: Arc, } impl IsNullExpr { @@ -92,20 +90,6 @@ impl PhysicalExpr for IsNullExpr { ) -> Result> { Ok(Arc::new(IsNullExpr::new(Arc::clone(&children[0])))) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for IsNullExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.arg.eq(&x.arg)) - .unwrap_or(false) - } } /// Create an IS NULL expression diff --git a/datafusion/physical-expr/src/expressions/like.rs b/datafusion/physical-expr/src/expressions/like.rs index b84ba82b642d..fef86a0a2758 100644 --- a/datafusion/physical-expr/src/expressions/like.rs +++ b/datafusion/physical-expr/src/expressions/like.rs @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::{any::Any, sync::Arc}; -use crate::{physical_expr::down_cast_any_ref, PhysicalExpr}; - +use crate::PhysicalExpr; use arrow::record_batch::RecordBatch; use arrow_schema::{DataType, Schema}; use datafusion_common::{internal_err, Result}; @@ -27,12 +26,12 @@ use datafusion_expr::ColumnarValue; use datafusion_physical_expr_common::datum::apply_cmp; // Like expression -#[derive(Debug, Hash)] -pub struct LikeExpr { +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct LikeExpr { negated: bool, case_insensitive: bool, - expr: Arc, - pattern: Arc, + expr: Arc, + pattern: Arc, } impl LikeExpr { @@ -127,25 +126,6 @@ impl PhysicalExpr for LikeExpr { Arc::clone(&children[1]), ))) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for LikeExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - self.negated == x.negated - && self.case_insensitive == x.case_insensitive - && self.expr.eq(&x.expr) - && self.pattern.eq(&x.pattern) - }) - .unwrap_or(false) - } } /// used for optimize Dictionary like diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs index ed24e9028153..f0d02eb605b2 100644 --- a/datafusion/physical-expr/src/expressions/literal.rs +++ b/datafusion/physical-expr/src/expressions/literal.rs @@ -18,10 +18,10 @@ //! Literal expressions for physical operations use std::any::Any; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use crate::physical_expr::{down_cast_any_ref, PhysicalExpr}; +use crate::physical_expr::PhysicalExpr; use arrow::{ datatypes::{DataType, Schema}, @@ -86,11 +86,6 @@ impl PhysicalExpr for Literal { Ok(self) } - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } - fn get_properties(&self, _children: &[ExprProperties]) -> Result { Ok(ExprProperties { sort_properties: SortProperties::Singleton, @@ -99,15 +94,6 @@ impl PhysicalExpr for Literal { } } -impl PartialEq for Literal { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self == x) - .unwrap_or(false) - } -} - /// Create a literal expression pub fn lit(value: T) -> Arc { match value.lit() { diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs index b5ebc250cb89..ee653e31f2c1 100644 --- a/datafusion/physical-expr/src/expressions/negative.rs +++ b/datafusion/physical-expr/src/expressions/negative.rs @@ -18,10 +18,9 @@ //! Negation (-) expression use std::any::Any; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::{ @@ -38,10 +37,10 @@ use datafusion_expr::{ }; /// Negative expression -#[derive(Debug, Hash)] -pub struct NegativeExpr { +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct NegativeExpr { /// Input expression - arg: Arc, + arg: Arc, } impl NegativeExpr { @@ -100,11 +99,6 @@ impl PhysicalExpr for NegativeExpr { Ok(Arc::new(NegativeExpr::new(Arc::clone(&children[0])))) } - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } - /// Given the child interval of a NegativeExpr, it calculates the NegativeExpr's interval. /// It replaces the upper and lower bounds after multiplying them with -1. /// Ex: `(a, b]` => `[-b, -a)` @@ -142,15 +136,6 @@ impl PhysicalExpr for NegativeExpr { } } -impl PartialEq for NegativeExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.arg.eq(&x.arg)) - .unwrap_or(false) - } -} - /// Creates a unary expression NEGATIVE /// /// # Errors @@ -224,7 +209,7 @@ mod tests { #[test] fn test_evaluate_bounds() -> Result<()> { - let negative_expr = NegativeExpr { + let negative_expr = NegativeExpr:: { arg: Arc::new(Column::new("a", 0)), }; let child_interval = Interval::make(Some(-2), Some(1))?; @@ -238,7 +223,7 @@ mod tests { #[test] fn test_propagate_constraints() -> Result<()> { - let negative_expr = NegativeExpr { + let negative_expr = NegativeExpr:: { arg: Arc::new(Column::new("a", 0)), }; let original_child_interval = Interval::make(Some(-2), Some(3))?; diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs index 9148cb7c1c1d..c17b52f5cdff 100644 --- a/datafusion/physical-expr/src/expressions/no_op.rs +++ b/datafusion/physical-expr/src/expressions/no_op.rs @@ -18,7 +18,7 @@ //! NoOp placeholder for physical operations use std::any::Any; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; use arrow::{ @@ -26,7 +26,6 @@ use arrow::{ record_batch::RecordBatch, }; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use datafusion_common::{internal_err, Result}; use datafusion_expr::ColumnarValue; @@ -78,18 +77,4 @@ impl PhysicalExpr for NoOp { ) -> Result> { Ok(self) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for NoOp { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self == x) - .unwrap_or(false) - } } diff --git a/datafusion/physical-expr/src/expressions/not.rs b/datafusion/physical-expr/src/expressions/not.rs index b69954e00bba..0d5f1966af36 100644 --- a/datafusion/physical-expr/src/expressions/not.rs +++ b/datafusion/physical-expr/src/expressions/not.rs @@ -19,10 +19,9 @@ use std::any::Any; use std::fmt; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; @@ -30,10 +29,10 @@ use datafusion_common::{cast::as_boolean_array, Result, ScalarValue}; use datafusion_expr::ColumnarValue; /// Not expression -#[derive(Debug, Hash)] -pub struct NotExpr { +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct NotExpr { /// Input expression - arg: Arc, + arg: Arc, } impl NotExpr { @@ -99,20 +98,6 @@ impl PhysicalExpr for NotExpr { ) -> Result> { Ok(Arc::new(NotExpr::new(Arc::clone(&children[0])))) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for NotExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.arg.eq(&x.arg)) - .unwrap_or(false) - } } /// Creates a unary expression NOT diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs index 43b6c993d2b2..03e995948cae 100644 --- a/datafusion/physical-expr/src/expressions/try_cast.rs +++ b/datafusion/physical-expr/src/expressions/try_cast.rs @@ -17,10 +17,9 @@ use std::any::Any; use std::fmt; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; use arrow::compute; use arrow::compute::{cast_with_options, CastOptions}; @@ -32,10 +31,10 @@ use datafusion_common::{not_impl_err, Result, ScalarValue}; use datafusion_expr::ColumnarValue; /// TRY_CAST expression casts an expression to a specific data type and returns NULL on invalid cast -#[derive(Debug, Hash)] -pub struct TryCastExpr { +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct TryCastExpr { /// The expression to cast - expr: Arc, + expr: Arc, /// The data type to cast to cast_type: DataType, } @@ -110,20 +109,6 @@ impl PhysicalExpr for TryCastExpr { self.cast_type.clone(), ))) } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for TryCastExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.expr.eq(&x.expr) && self.cast_type == x.cast_type) - .unwrap_or(false) - } } /// Return a PhysicalExpression representing `expr` casted to diff --git a/datafusion/physical-expr/src/expressions/unknown_column.rs b/datafusion/physical-expr/src/expressions/unknown_column.rs index cb7221e7fa15..f64bea842e6b 100644 --- a/datafusion/physical-expr/src/expressions/unknown_column.rs +++ b/datafusion/physical-expr/src/expressions/unknown_column.rs @@ -17,7 +17,6 @@ //! UnKnownColumn expression -use std::any::Any; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -30,7 +29,7 @@ use arrow::{ use datafusion_common::{internal_err, Result}; use datafusion_expr::ColumnarValue; -#[derive(Debug, Hash, PartialEq, Eq, Clone)] +#[derive(Debug, Clone, Eq)] pub struct UnKnownColumn { name: String, } @@ -86,15 +85,16 @@ impl PhysicalExpr for UnKnownColumn { ) -> Result> { Ok(self) } +} - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); +impl Hash for UnKnownColumn { + fn hash(&self, state: &mut H) { + self.name.hash(state); } } -impl PartialEq for UnKnownColumn { - fn eq(&self, _other: &dyn Any) -> bool { +impl PartialEq for UnKnownColumn { + fn eq(&self, _other: &Self) -> bool { // UnknownColumn is not a valid expression, so it should not be equal to any other expression. // See https://github.com/apache/datafusion/pull/11536 false diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs index c718e6b054ef..610c3656998b 100644 --- a/datafusion/physical-expr/src/physical_expr.rs +++ b/datafusion/physical-expr/src/physical_expr.rs @@ -20,8 +20,6 @@ use std::sync::Arc; pub(crate) use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use itertools::izip; -pub use datafusion_physical_expr_common::physical_expr::down_cast_any_ref; - /// Shared [`PhysicalExpr`]. pub type PhysicalExprRef = Arc; diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 4d3db96ceb3c..eadd3ba26235 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -31,10 +31,9 @@ use std::any::Any; use std::fmt::{self, Debug, Formatter}; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; -use crate::physical_expr::{down_cast_any_ref, physical_exprs_equal}; use crate::PhysicalExpr; use arrow::datatypes::{DataType, Schema}; @@ -47,6 +46,7 @@ use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; use datafusion_expr::{expr_vec_fmt, ColumnarValue, Expr, ScalarUDF}; /// Physical expression of a scalar function +#[derive(Eq, PartialEq, Hash)] pub struct ScalarFunctionExpr { fun: Arc, name: String, @@ -197,14 +197,6 @@ impl PhysicalExpr for ScalarFunctionExpr { self.fun.propagate_constraints(interval, children) } - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.name.hash(&mut s); - self.args.hash(&mut s); - self.return_type.hash(&mut s); - // Add `self.fun` when hash is available - } - fn get_properties(&self, children: &[ExprProperties]) -> Result { let sort_properties = self.fun.output_ordering(children)?; let children_range = children @@ -220,20 +212,6 @@ impl PhysicalExpr for ScalarFunctionExpr { } } -impl PartialEq for ScalarFunctionExpr { - /// Comparing name, args and return_type - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - self.name == x.name - && physical_exprs_equal(&self.args, &x.args) - && self.return_type == x.return_type - }) - .unwrap_or(false) - } -} - /// Create a physical expression for the UDF. pub fn create_physical_expr( fun: &ScalarUDF, diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index 4bd022975ac3..52384f5c411c 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -529,7 +529,7 @@ pub(crate) mod tests { ) .unwrap(); - assert_eq!(actual.as_ref(), expected.as_any()); + assert_eq!(actual.as_ref(), expected.as_ref()); } #[test] diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 4a9bf6afb49e..e22eed73b394 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -17,7 +17,6 @@ use std::any::Any; use std::fmt::Display; -use std::hash::Hasher; use std::ops::Deref; use std::sync::Arc; use std::vec; @@ -747,23 +746,16 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> { output_ordering: vec![], }; - #[derive(Debug, Hash, Clone)] - struct CustomPredicateExpr { - inner: Arc, + #[derive(Debug, Hash, Clone, PartialEq, Eq)] + struct CustomPredicateExpr { + inner: Arc, } impl Display for CustomPredicateExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "CustomPredicateExpr") } } - impl PartialEq for CustomPredicateExpr { - fn eq(&self, other: &dyn Any) -> bool { - other - .downcast_ref::() - .map(|x| self.inner.eq(&x.inner)) - .unwrap_or(false) - } - } + impl PhysicalExpr for CustomPredicateExpr { fn as_any(&self) -> &dyn Any { self @@ -791,10 +783,6 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> { ) -> Result> { todo!() } - - fn dyn_hash(&self, _state: &mut dyn Hasher) { - unreachable!() - } } #[derive(Debug)] @@ -849,7 +837,7 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> { } } - let custom_predicate_expr = Arc::new(CustomPredicateExpr { + let custom_predicate_expr = Arc::new(CustomPredicateExpr:: { inner: Arc::new(Column::new("col", 1)), }); let exec_plan = ParquetExec::builder(scan_config)