From 62816370822cb5e16a46d5050a41f62016af917f Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Wed, 23 Aug 2023 00:45:47 +0800 Subject: [PATCH] feat: `array-empty` (#7313) * feat: array-empty * add definition in pbjson * add definition * remove useless tests * add the function in proto * add doc * refactor * format * remove useless code * fix format * support NULL * remove redundant code * support NULL * fix clippy --- datafusion/expr/src/built_in_function.rs | 8 ++++- datafusion/expr/src/expr_fn.rs | 6 ++++ .../physical-expr/src/array_expressions.rs | 15 ++++++++ datafusion/physical-expr/src/functions.rs | 3 ++ datafusion/proto/proto/datafusion.proto | 1 + datafusion/proto/src/generated/pbjson.rs | 3 ++ datafusion/proto/src/generated/prost.rs | 3 ++ .../proto/src/logical_plan/from_proto.rs | 9 ++++- datafusion/proto/src/logical_plan/to_proto.rs | 1 + datafusion/sqllogictest/test_files/array.slt | 36 +++++++++++++++++++ .../source/user-guide/sql/scalar_functions.md | 27 ++++++++++++++ 11 files changed, 110 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index e8b4654b97bd..9a4eb74c532b 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -138,6 +138,8 @@ pub enum BuiltinScalarFunction { ArrayDims, /// array_element ArrayElement, + /// array_empty + ArrayEmpty, /// array_length ArrayLength, /// array_ndims @@ -360,6 +362,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Trunc => Volatility::Immutable, BuiltinScalarFunction::ArrayAppend => Volatility::Immutable, BuiltinScalarFunction::ArrayConcat => Volatility::Immutable, + BuiltinScalarFunction::ArrayEmpty => Volatility::Immutable, BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable, BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable, BuiltinScalarFunction::ArrayHas => Volatility::Immutable, @@ -536,7 +539,8 @@ impl BuiltinScalarFunction { } BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny - | BuiltinScalarFunction::ArrayHas => Ok(Boolean), + | BuiltinScalarFunction::ArrayHas + | BuiltinScalarFunction::ArrayEmpty => Ok(Boolean), BuiltinScalarFunction::ArrayDims => { Ok(List(Arc::new(Field::new("item", UInt64, true)))) } @@ -829,6 +833,7 @@ impl BuiltinScalarFunction { Signature::variadic_any(self.volatility()) } BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()), + BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayElement => Signature::any(2, self.volatility()), BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayHasAll @@ -1319,6 +1324,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] { &["array_concat", "array_cat", "list_concat", "list_cat"] } BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"], + BuiltinScalarFunction::ArrayEmpty => &["empty"], BuiltinScalarFunction::ArrayElement => &[ "array_element", "array_extract", diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 47692dfefb4a..e3fd5ceb206b 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -552,6 +552,12 @@ scalar_expr!( first_array second_array, "returns true, if the element appears in the first array, otherwise false." ); +scalar_expr!( + ArrayEmpty, + array_empty, + array, + "returns 1 for an empty array or 0 for a non-empty array." +); scalar_expr!( ArrayHasAll, array_has_all, diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 4ad55f76f8c3..97d7ee4610ce 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -982,6 +982,21 @@ macro_rules! general_repeat_list { }}; } +/// Array_empty SQL function +pub fn array_empty(args: &[ArrayRef]) -> Result { + println!("args[0]: {:?}", &args[0]); + if args[0].as_any().downcast_ref::().is_some() { + return Ok(args[0].clone()); + } + + let array = as_list_array(&args[0])?; + let builder = array + .iter() + .map(|arr| arr.map(|arr| arr.len() == arr.null_count())) + .collect::(); + Ok(Arc::new(builder)) +} + /// Array_repeat SQL function pub fn array_repeat(args: &[ArrayRef]) -> Result { let element = &args[0]; diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 82226ecfa1e1..2d6dbfdf52c3 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -425,6 +425,9 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayConcat => { Arc::new(|args| make_scalar_function(array_expressions::array_concat)(args)) } + BuiltinScalarFunction::ArrayEmpty => { + Arc::new(|args| make_scalar_function(array_expressions::array_empty)(args)) + } BuiltinScalarFunction::ArrayHasAll => { Arc::new(|args| make_scalar_function(array_expressions::array_has_all)(args)) } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index e4ef7b1bd448..f31a593ad5fe 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -597,6 +597,7 @@ enum ScalarFunction { Flatten = 112; Isnan = 113; Iszero = 114; + ArrayEmpty = 115; } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index f1a9e9c7bb74..7d1a18349c14 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -18947,6 +18947,7 @@ impl serde::Serialize for ScalarFunction { Self::Flatten => "Flatten", Self::Isnan => "Isnan", Self::Iszero => "Iszero", + Self::ArrayEmpty => "ArrayEmpty", }; serializer.serialize_str(variant) } @@ -19073,6 +19074,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Flatten", "Isnan", "Iszero", + "ArrayEmpty", ]; struct GeneratedVisitor; @@ -19230,6 +19232,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Flatten" => Ok(ScalarFunction::Flatten), "Isnan" => Ok(ScalarFunction::Isnan), "Iszero" => Ok(ScalarFunction::Iszero), + "ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 6cf402fe66e9..fc55b7e23af2 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2377,6 +2377,7 @@ pub enum ScalarFunction { Flatten = 112, Isnan = 113, Iszero = 114, + ArrayEmpty = 115, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2500,6 +2501,7 @@ impl ScalarFunction { ScalarFunction::Flatten => "Flatten", ScalarFunction::Isnan => "Isnan", ScalarFunction::Iszero => "Iszero", + ScalarFunction::ArrayEmpty => "ArrayEmpty", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2620,6 +2622,7 @@ impl ScalarFunction { "Flatten" => Some(Self::Flatten), "Isnan" => Some(Self::Isnan), "Iszero" => Some(Self::Iszero), + "ArrayEmpty" => Some(Self::ArrayEmpty), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index d3329c696764..c5ab0c25f628 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -34,7 +34,6 @@ use datafusion_common::{ internal_err, Column, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue, }; -use datafusion_expr::expr::{Alias, Placeholder}; use datafusion_expr::{ abs, acos, acosh, array, array_append, array_concat, array_dims, array_element, array_has, array_has_all, array_has_any, array_length, array_ndims, array_position, @@ -59,6 +58,10 @@ use datafusion_expr::{ JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, }; +use datafusion_expr::{ + array_empty, + expr::{Alias, Placeholder}, +}; use std::sync::Arc; #[derive(Debug)] @@ -452,6 +455,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ToTimestamp => Self::ToTimestamp, ScalarFunction::ArrayAppend => Self::ArrayAppend, ScalarFunction::ArrayConcat => Self::ArrayConcat, + ScalarFunction::ArrayEmpty => Self::ArrayEmpty, ScalarFunction::ArrayHasAll => Self::ArrayHasAll, ScalarFunction::ArrayHasAny => Self::ArrayHasAny, ScalarFunction::ArrayHas => Self::ArrayHas, @@ -1355,6 +1359,9 @@ pub fn parse_expr( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, )), + ScalarFunction::ArrayEmpty => { + Ok(array_empty(parse_expr(&args[0], registry)?)) + } ScalarFunction::ArrayNdims => { Ok(array_ndims(parse_expr(&args[0], registry)?)) } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index cb3296438165..82df53af92c3 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1451,6 +1451,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ToTimestamp => Self::ToTimestamp, BuiltinScalarFunction::ArrayAppend => Self::ArrayAppend, BuiltinScalarFunction::ArrayConcat => Self::ArrayConcat, + BuiltinScalarFunction::ArrayEmpty => Self::ArrayEmpty, BuiltinScalarFunction::ArrayHasAll => Self::ArrayHasAll, BuiltinScalarFunction::ArrayHasAny => Self::ArrayHasAny, BuiltinScalarFunction::ArrayHas => Self::ArrayHas, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index eb949c4f8693..bd16072b29d7 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2363,6 +2363,42 @@ from flatten_table; [1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4] [1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +# empty scalar function #1 +query B +select empty(make_array(1)); +---- +false + +# empty scalar function #2 +query B +select empty(make_array()); +---- +true + +# empty scalar function #3 +query B +select empty(make_array(NULL)); +---- +true + +# empty scalar function #4 +query B +select empty(NULL); +---- +NULL + +# empty scalar function #5 +query B +select empty(column1) from arrays; +---- +false +false +false +false +NULL +false +false + ### Delete tables statement ok diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 6dbe5c05f6d1..c6e2f5ddd828 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1495,6 +1495,7 @@ from_unixtime(expression) - [array_slice](#array_slice) - [array_to_string](#array_to_string) - [cardinality](#cardinality) +- [empty](#empty) - [list_append](#list_append) - [list_cat](#list_cat) - [list_concat](#list_concat) @@ -1693,6 +1694,8 @@ array_element(array, index) - list_element - list_extract +### `array_empty` + ### `array_extract` _Alias of [array_element](#array_element)._ @@ -2188,6 +2191,30 @@ cardinality(array) +--------------------------------------+ ``` +### `empty` + +Returns 1 for an empty array or 0 for a non-empty array. + +``` +empty(array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +#### Example + +``` +❯ select empty([1]); ++------------------+ +| empty(List([1])) | ++------------------+ +| 0 | ++------------------+ +``` + ### `list_append` _Alias of [array_append](#array_append)._