From 0e2023d044eef862dec210a30051fd8dd9430f00 Mon Sep 17 00:00:00 2001
From: zhuliquan <zlqlovecode@foxmail.com>
Date: Thu, 17 Oct 2024 22:09:54 +0800
Subject: [PATCH 01/15] fix: using simple string match replace regex match for
 contains udf (#12931)

* fix: using simple string match replace regex match

* doc: update doc of contains

* test: add case for contains udf

---------

Co-authored-by: zhuliquan <zhuliquan@foxmail.com>
---
 datafusion/functions/src/string/contains.rs | 55 +++++++++++++--------
 1 file changed, 34 insertions(+), 21 deletions(-)
diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs
index 0f75731aa1c3..86f1eda03342 100644
--- a/datafusion/functions/src/string/contains.rs
+++ b/datafusion/functions/src/string/contains.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use crate::utils::make_scalar_function;
-use arrow::array::{Array, ArrayRef, AsArray, GenericStringArray, StringViewArray};
-use arrow::compute::regexp_is_match;
+use arrow::array::{Array, ArrayRef, AsArray};
+use arrow::compute::contains as arrow_contains;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
 use datafusion_common::exec_err;
@@ -102,40 +102,25 @@ fn get_contains_doc() -> &'static Documentation {
     })
 }
 
-/// use regexp_is_match_utf8_scalar to do the calculation for contains
+/// use `arrow::compute::contains` to do the calculation for contains
 pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
     match (args[0].data_type(), args[1].data_type()) {
         (Utf8View, Utf8View) => {
             let mod_str = args[0].as_string_view();
             let match_str = args[1].as_string_view();
-            let res = regexp_is_match::<
-                StringViewArray,
-                StringViewArray,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
+            let res = arrow_contains(mod_str, match_str)?;
             Ok(Arc::new(res) as ArrayRef)
         }
         (Utf8, Utf8) => {
             let mod_str = args[0].as_string::<i32>();
             let match_str = args[1].as_string::<i32>();
-            let res = regexp_is_match::<
-                GenericStringArray<i32>,
-                GenericStringArray<i32>,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
+            let res = arrow_contains(mod_str, match_str)?;
             Ok(Arc::new(res) as ArrayRef)
         }
         (LargeUtf8, LargeUtf8) => {
             let mod_str = args[0].as_string::<i64>();
             let match_str = args[1].as_string::<i64>();
-            let res = regexp_is_match::<
-                GenericStringArray<i64>,
-                GenericStringArray<i64>,
-                GenericStringArray<i32>,
-            >(mod_str, match_str, None)?;
-
+            let res = arrow_contains(mod_str, match_str)?;
             Ok(Arc::new(res) as ArrayRef)
         }
         other => {
@@ -143,3 +128,31 @@ pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
         }
     }
 }
+
+#[cfg(test)]
+mod test {
+    use super::ContainsFunc;
+    use arrow::array::{BooleanArray, StringArray};
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_contains_udf() {
+        let udf = ContainsFunc::new();
+        let array = ColumnarValue::Array(Arc::new(StringArray::from(vec![
+            Some("xxx?()"),
+            Some("yyy?()"),
+        ])));
+        let scalar = ColumnarValue::Scalar(ScalarValue::Utf8(Some("x?(".to_string())));
+        let actual = udf.invoke(&[array, scalar]).unwrap();
+        let expect = ColumnarValue::Array(Arc::new(BooleanArray::from(vec![
+            Some(true),
+            Some(false),
+        ])));
+        assert_eq!(
+            *actual.into_array(2).unwrap(),
+            *expect.into_array(2).unwrap()
+        );
+    }
+}

From 56946b4d5df89f6ac3f07e06591e909aa2942e4e Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 17 Oct 2024 16:10:14 +0200
Subject: [PATCH 02/15] Increase minimum supported Rust version (MSRV) to 1.79
 (#12962)

Current goal is to support four last stable versions or versions for 4
months whichever is lower.  Given 1.78.0 was released on: 2 May, 2024,
it does not need to be supported.
---
 .github/workflows/rust.yml             | 4 ++--
 Cargo.toml                             | 2 +-
 datafusion-cli/Cargo.toml              | 2 +-
 datafusion-cli/Dockerfile              | 2 +-
 datafusion/core/Cargo.toml             | 2 +-
 datafusion/proto-common/Cargo.toml     | 2 +-
 datafusion/proto-common/gen/Cargo.toml | 2 +-
 datafusion/proto/Cargo.toml            | 2 +-
 datafusion/proto/gen/Cargo.toml        | 2 +-
 datafusion/substrait/Cargo.toml        | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 4527d047e4c0..39b7b2b17857 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -574,9 +574,9 @@ jobs:
           #
           # To reproduce: 
           # 1. Install the version of Rust that is failing. Example: 
-          #    rustup install 1.78.0
+          #    rustup install 1.79.0
           # 2. Run the command that failed with that version. Example:
-          #    cargo +1.78.0 check -p datafusion
+          #    cargo +1.79.0 check -p datafusion
           # 
           # To resolve, either:
           # 1. Change your code to use older Rust features, 
diff --git a/Cargo.toml b/Cargo.toml
index 448607257ca1..2c142c87c892 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -58,7 +58,7 @@ homepage = "https://datafusion.apache.org"
 license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
-rust-version = "1.78"
+rust-version = "1.79"
 version = "42.0.0"
 
 [workspace.dependencies]
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index b86dbd2a3802..fe929495aae6 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -26,7 +26,7 @@ license = "Apache-2.0"
 homepage = "https://datafusion.apache.org"
 repository = "https://github.com/apache/datafusion"
 # Specify MSRV here as `cargo msrv` doesn't support workspace version
-rust-version = "1.78"
+rust-version = "1.79"
 readme = "README.md"
 
 [dependencies]
diff --git a/datafusion-cli/Dockerfile b/datafusion-cli/Dockerfile
index 7adead64db57..79c24f6baf3e 100644
--- a/datafusion-cli/Dockerfile
+++ b/datafusion-cli/Dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM rust:1.78-bookworm AS builder
+FROM rust:1.79-bookworm AS builder
 
 COPY . /usr/src/datafusion
 COPY ./datafusion /usr/src/datafusion/datafusion
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 28d0d136bd05..8c4ad80e2924 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -30,7 +30,7 @@ authors = { workspace = true }
 # Specify MSRV here as `cargo msrv` doesn't support workspace version and fails with
 # "Unable to find key 'package.rust-version' (or 'package.metadata.msrv') in 'arrow-datafusion/Cargo.toml'"
 # https://github.com/foresterre/cargo-msrv/issues/590
-rust-version = "1.78"
+rust-version = "1.79"
 
 [lints]
 workspace = true
diff --git a/datafusion/proto-common/Cargo.toml b/datafusion/proto-common/Cargo.toml
index 5051c8f9322f..6c53e1b1ced0 100644
--- a/datafusion/proto-common/Cargo.toml
+++ b/datafusion/proto-common/Cargo.toml
@@ -26,7 +26,7 @@ homepage = { workspace = true }
 repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
-rust-version = "1.78"
+rust-version = "1.79"
 
 # Exclude proto files so crates.io consumers don't need protoc
 exclude = ["*.proto"]
diff --git a/datafusion/proto-common/gen/Cargo.toml b/datafusion/proto-common/gen/Cargo.toml
index 0914669f82fa..6e5783f467a7 100644
--- a/datafusion/proto-common/gen/Cargo.toml
+++ b/datafusion/proto-common/gen/Cargo.toml
@@ -20,7 +20,7 @@ name = "gen-common"
 description = "Code generation for proto"
 version = "0.1.0"
 edition = { workspace = true }
-rust-version = "1.78"
+rust-version = "1.79"
 authors = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index d65c6ccaa660..3ffe5e3e76e7 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -27,7 +27,7 @@ repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
 # Specify MSRV here as `cargo msrv` doesn't support workspace version
-rust-version = "1.78"
+rust-version = "1.79"
 
 # Exclude proto files so crates.io consumers don't need protoc
 exclude = ["*.proto"]
diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml
index ea28ac86e8df..aee8fac4a120 100644
--- a/datafusion/proto/gen/Cargo.toml
+++ b/datafusion/proto/gen/Cargo.toml
@@ -20,7 +20,7 @@ name = "gen"
 description = "Code generation for proto"
 version = "0.1.0"
 edition = { workspace = true }
-rust-version = "1.78"
+rust-version = "1.79"
 authors = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 6f8f81401f3b..41755018284e 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -26,7 +26,7 @@ repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
 # Specify MSRV here as `cargo msrv` doesn't support workspace version
-rust-version = "1.78"
+rust-version = "1.79"
 
 [lints]
 workspace = true

From e63abe78f54cdbbba7ed92d65400525eeae59b71 Mon Sep 17 00:00:00 2001
From: Tornike Gurgenidze <togurgenidze@gmail.com>
Date: Thu, 17 Oct 2024 21:21:49 +0400
Subject: [PATCH 03/15] feat(substrait): add set operations to consumer, update
 substrait to `0.45.0` (#12863)

* feat(substait): add set operations to consumer

* add missing intersect all test, change distinct to is_all

* upgrade substrait crate to 0.45
---
 datafusion/substrait/Cargo.toml               |   2 +-
 .../substrait/src/logical_plan/consumer.rs    | 120 +++++++++++--
 .../substrait/src/logical_plan/producer.rs    |  12 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  66 +++++++
 .../intersect_multiset.substrait.json         | 166 ++++++++++++++++++
 .../intersect_multiset_all.substrait.json     | 166 ++++++++++++++++++
 .../intersect_primary.substrait.json          | 166 ++++++++++++++++++
 .../test_plans/minus_primary.substrait.json   | 166 ++++++++++++++++++
 .../minus_primary_all.substrait.json          | 166 ++++++++++++++++++
 .../test_plans/union_distinct.substrait.json  | 118 +++++++++++++
 datafusion/substrait/tests/utils.rs           |   1 +
 11 files changed, 1136 insertions(+), 13 deletions(-)
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json

diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 41755018284e..b0aa6acf3c7c 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -41,7 +41,7 @@ object_store = { workspace = true }
 pbjson-types = "0.7"
 # TODO use workspace version
 prost = "0.13"
-substrait = { version = "0.42", features = ["serde"] }
+substrait = { version = "0.45", features = ["serde"] }
 url = { workspace = true }
 
 [dev-dependencies]
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index c727f784ee01..4af02858e65a 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -196,6 +196,65 @@ fn split_eq_and_noneq_join_predicate_with_nulls_equality(
     (accum_join_keys, nulls_equal_nulls, join_filter)
 }
 
+async fn union_rels(
+    rels: &[Rel],
+    ctx: &SessionContext,
+    extensions: &Extensions,
+    is_all: bool,
+) -> Result<LogicalPlan> {
+    let mut union_builder = Ok(LogicalPlanBuilder::from(
+        from_substrait_rel(ctx, &rels[0], extensions).await?,
+    ));
+    for input in &rels[1..] {
+        let rel_plan = from_substrait_rel(ctx, input, extensions).await?;
+
+        union_builder = if is_all {
+            union_builder?.union(rel_plan)
+        } else {
+            union_builder?.union_distinct(rel_plan)
+        };
+    }
+    union_builder?.build()
+}
+
+async fn intersect_rels(
+    rels: &[Rel],
+    ctx: &SessionContext,
+    extensions: &Extensions,
+    is_all: bool,
+) -> Result<LogicalPlan> {
+    let mut rel = from_substrait_rel(ctx, &rels[0], extensions).await?;
+
+    for input in &rels[1..] {
+        rel = LogicalPlanBuilder::intersect(
+            rel,
+            from_substrait_rel(ctx, input, extensions).await?,
+            is_all,
+        )?
+    }
+
+    Ok(rel)
+}
+
+async fn except_rels(
+    rels: &[Rel],
+    ctx: &SessionContext,
+    extensions: &Extensions,
+    is_all: bool,
+) -> Result<LogicalPlan> {
+    let mut rel = from_substrait_rel(ctx, &rels[0], extensions).await?;
+
+    for input in &rels[1..] {
+        rel = LogicalPlanBuilder::except(
+            rel,
+            from_substrait_rel(ctx, input, extensions).await?,
+            is_all,
+        )?
+    }
+
+    Ok(rel)
+}
+
 /// Convert Substrait Plan to DataFusion LogicalPlan
 pub async fn from_substrait_plan(
     ctx: &SessionContext,
@@ -494,6 +553,7 @@ fn make_renamed_schema(
 }
 
 /// Convert Substrait Rel to DataFusion DataFrame
+#[allow(deprecated)]
 #[async_recursion]
 pub async fn from_substrait_rel(
     ctx: &SessionContext,
@@ -877,27 +937,65 @@ pub async fn from_substrait_rel(
             Ok(set_op) => match set_op {
                 set_rel::SetOp::UnionAll => {
                     if !set.inputs.is_empty() {
-                        let mut union_builder = Ok(LogicalPlanBuilder::from(
-                            from_substrait_rel(ctx, &set.inputs[0], extensions).await?,
-                        ));
-                        for input in &set.inputs[1..] {
-                            union_builder = union_builder?
-                                .union(from_substrait_rel(ctx, input, extensions).await?);
-                        }
-                        union_builder?.build()
+                        union_rels(&set.inputs, ctx, extensions, true).await
+                    } else {
+                        not_impl_err!("Union relation requires at least one input")
+                    }
+                }
+                set_rel::SetOp::UnionDistinct => {
+                    if !set.inputs.is_empty() {
+                        union_rels(&set.inputs, ctx, extensions, false).await
                     } else {
                         not_impl_err!("Union relation requires at least one input")
                     }
                 }
                 set_rel::SetOp::IntersectionPrimary => {
-                    if set.inputs.len() == 2 {
+                    if set.inputs.len() >= 2 {
                         LogicalPlanBuilder::intersect(
                             from_substrait_rel(ctx, &set.inputs[0], extensions).await?,
-                            from_substrait_rel(ctx, &set.inputs[1], extensions).await?,
+                            union_rels(&set.inputs[1..], ctx, extensions, true).await?,
                             false,
                         )
                     } else {
-                        not_impl_err!("Primary Intersect relation with more than two inputs isn't supported")
+                        not_impl_err!(
+                            "Primary Intersect relation requires at least two inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::IntersectionMultiset => {
+                    if set.inputs.len() >= 2 {
+                        intersect_rels(&set.inputs, ctx, extensions, false).await
+                    } else {
+                        not_impl_err!(
+                            "Multiset Intersect relation requires at least two inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::IntersectionMultisetAll => {
+                    if set.inputs.len() >= 2 {
+                        intersect_rels(&set.inputs, ctx, extensions, true).await
+                    } else {
+                        not_impl_err!(
+                            "MultisetAll Intersect relation requires at least two inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::MinusPrimary => {
+                    if set.inputs.len() >= 2 {
+                        except_rels(&set.inputs, ctx, extensions, false).await
+                    } else {
+                        not_impl_err!(
+                            "Primary Minus relation requires at least two inputs"
+                        )
+                    }
+                }
+                set_rel::SetOp::MinusPrimaryAll => {
+                    if set.inputs.len() >= 2 {
+                        except_rels(&set.inputs, ctx, extensions, true).await
+                    } else {
+                        not_impl_err!(
+                            "PrimaryAll Minus relation requires at least two inputs"
+                        )
                     }
                 }
                 _ => not_impl_err!("Unsupported set operator: {set_op:?}"),
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index 1165ce13d236..0e1375a8e0ea 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -172,6 +172,7 @@ pub fn to_substrait_extended_expr(
 }
 
 /// Convert DataFusion LogicalPlan to Substrait Rel
+#[allow(deprecated)]
 pub fn to_substrait_rel(
     plan: &LogicalPlan,
     ctx: &SessionContext,
@@ -227,6 +228,7 @@ pub fn to_substrait_rel(
                     advanced_extension: None,
                     read_type: Some(ReadType::VirtualTable(VirtualTable {
                         values: vec![],
+                        expressions: vec![],
                     })),
                 }))),
             }))
@@ -263,7 +265,10 @@ pub fn to_substrait_rel(
                     best_effort_filter: None,
                     projection: None,
                     advanced_extension: None,
-                    read_type: Some(ReadType::VirtualTable(VirtualTable { values })),
+                    read_type: Some(ReadType::VirtualTable(VirtualTable {
+                        values,
+                        expressions: vec![],
+                    })),
                 }))),
             }))
         }
@@ -359,6 +364,7 @@ pub fn to_substrait_rel(
                 rel_type: Some(RelType::Aggregate(Box::new(AggregateRel {
                     common: None,
                     input: Some(input),
+                    grouping_expressions: vec![],
                     groupings,
                     measures,
                     advanced_extension: None,
@@ -377,8 +383,10 @@ pub fn to_substrait_rel(
                 rel_type: Some(RelType::Aggregate(Box::new(AggregateRel {
                     common: None,
                     input: Some(input),
+                    grouping_expressions: vec![],
                     groupings: vec![Grouping {
                         grouping_expressions: grouping,
+                        expression_references: vec![],
                     }],
                     measures: vec![],
                     advanced_extension: None,
@@ -764,6 +772,7 @@ pub fn operator_to_name(op: Operator) -> &'static str {
     }
 }
 
+#[allow(deprecated)]
 pub fn parse_flat_grouping_exprs(
     ctx: &SessionContext,
     exprs: &[Expr],
@@ -776,6 +785,7 @@ pub fn parse_flat_grouping_exprs(
         .collect::<Result<Vec<_>>>()?;
     Ok(Grouping {
         grouping_expressions,
+        expression_references: vec![],
     })
 }
 
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index ae87dad0153e..23ac601a44ec 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -687,6 +687,72 @@ async fn simple_intersect_consume() -> Result<()> {
     .await
 }
 
+#[tokio::test]
+async fn primary_intersect_consume() -> Result<()> {
+    let proto_plan =
+        read_json("tests/testdata/test_plans/intersect_primary.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data INTERSECT (SELECT a FROM data2 UNION ALL SELECT a FROM data2)",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn multiset_intersect_consume() -> Result<()> {
+    let proto_plan =
+        read_json("tests/testdata/test_plans/intersect_multiset.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data INTERSECT SELECT a FROM data2 INTERSECT SELECT a FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn multiset_intersect_all_consume() -> Result<()> {
+    let proto_plan =
+        read_json("tests/testdata/test_plans/intersect_multiset_all.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data INTERSECT ALL SELECT a FROM data2 INTERSECT ALL SELECT a FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn primary_except_consume() -> Result<()> {
+    let proto_plan = read_json("tests/testdata/test_plans/minus_primary.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data EXCEPT SELECT a FROM data2 EXCEPT SELECT a FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn primary_except_all_consume() -> Result<()> {
+    let proto_plan =
+        read_json("tests/testdata/test_plans/minus_primary_all.substrait.json");
+
+    assert_substrait_sql(
+        proto_plan,
+        "SELECT a FROM data EXCEPT ALL SELECT a FROM data2 EXCEPT ALL SELECT a FROM data2",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn union_distinct_consume() -> Result<()> {
+    let proto_plan = read_json("tests/testdata/test_plans/union_distinct.substrait.json");
+
+    assert_substrait_sql(proto_plan, "SELECT a FROM data UNION SELECT a FROM data2").await
+}
+
 #[tokio::test]
 async fn simple_intersect_table_reuse() -> Result<()> {
     // Substrait does currently NOT maintain the alias of the tables.
diff --git a/datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json
new file mode 100644
index 000000000000..8ff69bd82c3a
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_INTERSECTION_MULTISET"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json
new file mode 100644
index 000000000000..56daf6ed46f4
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/intersect_multiset_all.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_INTERSECTION_MULTISET_ALL"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json b/datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json
new file mode 100644
index 000000000000..229dd7251705
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/intersect_primary.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_INTERSECTION_PRIMARY"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json b/datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json
new file mode 100644
index 000000000000..33b0e2ab8c80
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/minus_primary.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_MINUS_PRIMARY"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json b/datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json
new file mode 100644
index 000000000000..229f78ab5bf6
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/minus_primary_all.substrait.json
@@ -0,0 +1,166 @@
+{
+    "relations": [
+      {
+        "root": {
+          "input": {
+            "set": {
+              "inputs": [
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                },
+                {
+                  "project": {
+                    "common": {
+                      "emit": {
+                        "outputMapping": [
+                          1
+                        ]
+                      }
+                    },
+                    "input": {
+                      "read": {
+                        "common": {
+                          "direct": {}
+                        },
+                        "baseSchema": {
+                          "names": [
+                            "a"
+                          ],
+                          "struct": {
+                            "types": [
+                              {
+                                "i64": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "namedTable": {
+                          "names": [
+                            "data2"
+                          ]
+                        }
+                      }
+                    },
+                    "expressions": [
+                      {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    ]
+                  }
+                }
+              ],
+              "op": "SET_OP_MINUS_PRIMARY_ALL"
+            }
+          },
+          "names": [
+            "a"
+          ]
+        }
+      }
+    ],
+    "version": {
+      "minorNumber": 54,
+      "producer": "subframe"
+    }
+  }
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json b/datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json
new file mode 100644
index 000000000000..e8b02749660d
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/union_distinct.substrait.json
@@ -0,0 +1,118 @@
+{
+    "relations": [
+        {
+        "root": {
+            "input": {
+            "set": {
+                "inputs": [
+                {
+                    "project": {
+                    "common": {
+                        "emit": {
+                        "outputMapping": [
+                            1
+                        ]
+                        }
+                    },
+                    "input": {
+                        "read": {
+                        "common": {
+                            "direct": {}
+                        },
+                        "baseSchema": {
+                            "names": [
+                            "a"
+                            ],
+                            "struct": {
+                            "types": [
+                                {
+                                "i64": {
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                }
+                                }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                            }
+                        },
+                        "namedTable": {
+                            "names": [
+                            "data"
+                            ]
+                        }
+                        }
+                    },
+                    "expressions": [
+                        {
+                        "selection": {
+                            "directReference": {
+                            "structField": {}
+                            },
+                            "rootReference": {}
+                        }
+                        }
+                    ]
+                    }
+                },
+                {
+                    "project": {
+                    "common": {
+                        "emit": {
+                        "outputMapping": [
+                            1
+                        ]
+                        }
+                    },
+                    "input": {
+                        "read": {
+                        "common": {
+                            "direct": {}
+                        },
+                        "baseSchema": {
+                            "names": [
+                            "a"
+                            ],
+                            "struct": {
+                            "types": [
+                                {
+                                "i64": {
+                                    "nullability": "NULLABILITY_NULLABLE"
+                                }
+                                }
+                            ],
+                            "nullability": "NULLABILITY_NULLABLE"
+                            }
+                        },
+                        "namedTable": {
+                            "names": [
+                            "data2"
+                            ]
+                        }
+                        }
+                    },
+                    "expressions": [
+                        {
+                        "selection": {
+                            "directReference": {
+                            "structField": {}
+                            },
+                            "rootReference": {}
+                        }
+                        }
+                    ]
+                    }
+                }
+                ],
+                "op": "SET_OP_UNION_DISTINCT"
+            }
+            },
+            "names": [
+            "a"
+            ]
+        }
+        }
+    ],
+    "version": {
+        "minorNumber": 54,
+        "producer": "subframe"
+    }
+}
\ No newline at end of file
diff --git a/datafusion/substrait/tests/utils.rs b/datafusion/substrait/tests/utils.rs
index 9f63b74ef0fc..00cbfb0c412c 100644
--- a/datafusion/substrait/tests/utils.rs
+++ b/datafusion/substrait/tests/utils.rs
@@ -147,6 +147,7 @@ pub mod test {
             Ok(())
         }
 
+        #[allow(deprecated)]
         fn collect_schemas_from_rel(&mut self, rel: &Rel) -> Result<()> {
             let rel_type = rel
                 .rel_type

From 1ba1e539b01bbfc7f9001423cfe1ff0015a99db7 Mon Sep 17 00:00:00 2001
From: Jax Liu <liugs963@gmail.com>
Date: Fri, 18 Oct 2024 01:21:59 +0800
Subject: [PATCH 04/15] Unparse `SubqueryAlias` without projections to SQL
 (#12896)

* change pub function comment to doc

* unparse subquery alias without projections

* fix tests

* rollback the empty line

* rollback the empty line

* exclude the table_scan with pushdown case

* fmt and clippy

* simplify the ast to string and remove unused debug code
---
 datafusion/sql/src/unparser/plan.rs       |  64 ++++++-----
 datafusion/sql/src/unparser/rewrite.rs    |  93 ++++++++--------
 datafusion/sql/tests/cases/plan_to_sql.rs | 124 +++++++++++++++++-----
 3 files changed, 184 insertions(+), 97 deletions(-)

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index d150f0e532c6..9b4818b98cb0 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -15,19 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::unparser::utils::unproject_agg_exprs;
-use datafusion_common::{
-    internal_err, not_impl_err,
-    tree_node::{TransformedResult, TreeNode},
-    Column, DataFusionError, Result, TableReference,
-};
-use datafusion_expr::{
-    expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
-    LogicalPlanBuilder, Projection, SortExpr,
-};
-use sqlparser::ast::{self, Ident, SetExpr};
-use std::sync::Arc;
-
 use super::{
     ast::{
         BuilderError, DerivedRelationBuilder, QueryBuilder, RelationBuilder,
@@ -44,6 +31,18 @@ use super::{
     },
     Unparser,
 };
+use crate::unparser::utils::unproject_agg_exprs;
+use datafusion_common::{
+    internal_err, not_impl_err,
+    tree_node::{TransformedResult, TreeNode},
+    Column, DataFusionError, Result, TableReference,
+};
+use datafusion_expr::{
+    expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
+    LogicalPlanBuilder, Projection, SortExpr, TableScan,
+};
+use sqlparser::ast::{self, Ident, SetExpr};
+use std::sync::Arc;
 
 /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`]
 ///
@@ -249,12 +248,9 @@ impl Unparser<'_> {
     ) -> Result<()> {
         match plan {
             LogicalPlan::TableScan(scan) => {
-                if scan.projection.is_some()
-                    || !scan.filters.is_empty()
-                    || scan.fetch.is_some()
+                if let Some(unparsed_table_scan) =
+                    Self::unparse_table_scan_pushdown(plan, None)?
                 {
-                    let unparsed_table_scan =
-                        Self::unparse_table_scan_pushdown(plan, None)?;
                     return self.select_to_sql_recursively(
                         &unparsed_table_scan,
                         query,
@@ -498,10 +494,18 @@ impl Unparser<'_> {
             LogicalPlan::SubqueryAlias(plan_alias) => {
                 let (plan, mut columns) =
                     subquery_alias_inner_query_and_columns(plan_alias);
-                let plan = Self::unparse_table_scan_pushdown(
+                let unparsed_table_scan = Self::unparse_table_scan_pushdown(
                     plan,
                     Some(plan_alias.alias.clone()),
                 )?;
+                // if the child plan is a TableScan with pushdown operations, we don't need to
+                // create an additional subquery for it
+                if !select.already_projected() && unparsed_table_scan.is_none() {
+                    select.projection(vec![ast::SelectItem::Wildcard(
+                        ast::WildcardAdditionalOptions::default(),
+                    )]);
+                }
+                let plan = unparsed_table_scan.unwrap_or_else(|| plan.clone());
                 if !columns.is_empty()
                     && !self.dialect.supports_column_alias_in_table_alias()
                 {
@@ -582,12 +586,21 @@ impl Unparser<'_> {
         }
     }
 
+    fn is_scan_with_pushdown(scan: &TableScan) -> bool {
+        scan.projection.is_some() || !scan.filters.is_empty() || scan.fetch.is_some()
+    }
+
+    /// Try to unparse a table scan with pushdown operations into a new subquery plan.
+    /// If the table scan is without any pushdown operations, return None.
     fn unparse_table_scan_pushdown(
         plan: &LogicalPlan,
         alias: Option<TableReference>,
-    ) -> Result<LogicalPlan> {
+    ) -> Result<Option<LogicalPlan>> {
         match plan {
             LogicalPlan::TableScan(table_scan) => {
+                if !Self::is_scan_with_pushdown(table_scan) {
+                    return Ok(None);
+                }
                 let mut filter_alias_rewriter =
                     alias.as_ref().map(|alias_name| TableAliasRewriter {
                         table_schema: table_scan.source.schema(),
@@ -648,18 +661,15 @@ impl Unparser<'_> {
                     builder = builder.limit(0, Some(fetch))?;
                 }
 
-                builder.build()
+                Ok(Some(builder.build()?))
             }
             LogicalPlan::SubqueryAlias(subquery_alias) => {
-                let new_plan = Self::unparse_table_scan_pushdown(
+                Self::unparse_table_scan_pushdown(
                     &subquery_alias.input,
                     Some(subquery_alias.alias.clone()),
-                )?;
-                LogicalPlanBuilder::from(new_plan)
-                    .alias(subquery_alias.alias.clone())?
-                    .build()
+                )
             }
-            _ => Ok(plan.clone()),
+            _ => Ok(None),
         }
     }
 
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index 304a02f037e6..3049df9396cb 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -101,25 +101,25 @@ fn rewrite_sort_expr_for_union(exprs: Vec<SortExpr>) -> Result<Vec<SortExpr>> {
     Ok(sort_exprs)
 }
 
-// Rewrite logic plan for query that order by columns are not in projections
-// Plan before rewrite:
-//
-// Projection: j1.j1_string, j2.j2_string
-//   Sort: j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST
-//     Projection: j1.j1_string, j2.j2_string, j1.j1_id, j2.j2_id
-//       Inner Join:  Filter: j1.j1_id = j2.j2_id
-//         TableScan: j1
-//         TableScan: j2
-//
-// Plan after rewrite
-//
-// Sort: j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST
-//   Projection: j1.j1_string, j2.j2_string
-//     Inner Join:  Filter: j1.j1_id = j2.j2_id
-//       TableScan: j1
-//       TableScan: j2
-//
-// This prevents the original plan generate query with derived table but missing alias.
+/// Rewrite logic plan for query that order by columns are not in projections
+/// Plan before rewrite:
+///
+/// Projection: j1.j1_string, j2.j2_string
+///   Sort: j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST
+///     Projection: j1.j1_string, j2.j2_string, j1.j1_id, j2.j2_id
+///       Inner Join:  Filter: j1.j1_id = j2.j2_id
+///         TableScan: j1
+///         TableScan: j2
+///
+/// Plan after rewrite
+///
+/// Sort: j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST
+///   Projection: j1.j1_string, j2.j2_string
+///     Inner Join:  Filter: j1.j1_id = j2.j2_id
+///       TableScan: j1
+///       TableScan: j2
+///
+/// This prevents the original plan generate query with derived table but missing alias.
 pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
     p: &Projection,
 ) -> Option<LogicalPlan> {
@@ -191,33 +191,33 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
     }
 }
 
-// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
-// subquery
-// - `(SELECT column_a as a from table) AS A`
-// - `(SELECT column_a from table) AS A (a)`
-//
-// A roundtrip example for table alias with columns
-//
-// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
-//
-// LogicPlan:
-// Projection: c.id
-//   SubqueryAlias: c
-//     Projection: j1.j1_id AS id
-//       Projection: j1.j1_id
-//         TableScan: j1
-//
-// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
-// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
-// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
-// `(SELECT j1.j1_id FROM j1)`
-//
-// With this logic, the unparsed query will be:
-// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
-//
-// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
-// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
-// Column in the Projections. Once the parser side is fixed, this logic should work
+/// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
+/// subquery
+/// - `(SELECT column_a as a from table) AS A`
+/// - `(SELECT column_a from table) AS A (a)`
+///
+/// A roundtrip example for table alias with columns
+///
+/// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
+///
+/// LogicPlan:
+/// Projection: c.id
+///   SubqueryAlias: c
+///     Projection: j1.j1_id AS id
+///       Projection: j1.j1_id
+///         TableScan: j1
+///
+/// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
+/// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
+/// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
+/// `(SELECT j1.j1_id FROM j1)`
+///
+/// With this logic, the unparsed query will be:
+/// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
+///
+/// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
+/// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
+/// Column in the Projections. Once the parser side is fixed, this logic should work
 pub(super) fn subquery_alias_inner_query_and_columns(
     subquery_alias: &datafusion_expr::SubqueryAlias,
 ) -> (&LogicalPlan, Vec<Ident>) {
@@ -330,6 +330,7 @@ fn find_projection(logical_plan: &LogicalPlan) -> Option<&Projection> {
         _ => None,
     }
 }
+
 /// A `TreeNodeRewriter` implementation that rewrites `Expr::Column` expressions by
 /// replacing the column's name with an alias if the column exists in the provided schema.
 ///
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index aff9f99c8cd3..e4e5d6a92964 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -71,7 +71,7 @@ fn roundtrip_expr() {
 
         let ast = expr_to_sql(&expr)?;
 
-        Ok(format!("{}", ast))
+        Ok(ast.to_string())
     };
 
     for (table, query, expected) in tests {
@@ -192,7 +192,7 @@ fn roundtrip_statement() -> Result<()> {
 
         let roundtrip_statement = plan_to_sql(&plan)?;
 
-        let actual = format!("{}", &roundtrip_statement);
+        let actual = &roundtrip_statement.to_string();
         println!("roundtrip sql: {actual}");
         println!("plan {}", plan.display_indent());
 
@@ -224,7 +224,7 @@ fn roundtrip_crossjoin() -> Result<()> {
 
     let roundtrip_statement = plan_to_sql(&plan)?;
 
-    let actual = format!("{}", &roundtrip_statement);
+    let actual = &roundtrip_statement.to_string();
     println!("roundtrip sql: {actual}");
     println!("plan {}", plan.display_indent());
 
@@ -237,7 +237,7 @@ fn roundtrip_crossjoin() -> Result<()> {
         \n    TableScan: j1\
         \n    TableScan: j2";
 
-    assert_eq!(format!("{plan_roundtrip}"), expected);
+    assert_eq!(plan_roundtrip.to_string(), expected);
 
     Ok(())
 }
@@ -478,7 +478,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
         let unparser = Unparser::new(&*query.unparser_dialect);
         let roundtrip_statement = unparser.plan_to_sql(&plan)?;
 
-        let actual = format!("{}", &roundtrip_statement);
+        let actual = &roundtrip_statement.to_string();
         println!("roundtrip sql: {actual}");
         println!("plan {}", plan.display_indent());
 
@@ -508,7 +508,7 @@ Projection: unnest_placeholder(unnest_table.struct_col).field1, unnest_placehold
     Projection: unnest_table.struct_col AS unnest_placeholder(unnest_table.struct_col), unnest_table.array_col AS unnest_placeholder(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
       TableScan: unnest_table"#.trim_start();
 
-    assert_eq!(format!("{plan}"), expected);
+    assert_eq!(plan.to_string(), expected);
 
     Ok(())
 }
@@ -528,7 +528,7 @@ fn test_table_references_in_plan_to_sql() {
             .unwrap();
         let sql = plan_to_sql(&plan).unwrap();
 
-        assert_eq!(format!("{}", sql), expected_sql)
+        assert_eq!(sql.to_string(), expected_sql)
     }
 
     test(
@@ -558,7 +558,7 @@ fn test_table_scan_with_no_projection_in_plan_to_sql() {
             .build()
             .unwrap();
         let sql = plan_to_sql(&plan).unwrap();
-        assert_eq!(format!("{}", sql), expected_sql)
+        assert_eq!(sql.to_string(), expected_sql)
     }
 
     test(
@@ -667,27 +667,103 @@ where
 }
 
 #[test]
-fn test_table_scan_pushdown() -> Result<()> {
+fn test_table_scan_alias() -> Result<()> {
     let schema = Schema::new(vec![
         Field::new("id", DataType::Utf8, false),
         Field::new("age", DataType::Utf8, false),
     ]);
 
+    let plan = table_scan(Some("t1"), &schema, None)?
+        .project(vec![col("id")])?
+        .alias("a")?
+        .build()?;
+    let sql = plan_to_sql(&plan)?;
+    assert_eq!(sql.to_string(), "SELECT * FROM (SELECT t1.id FROM t1) AS a");
+
+    let plan = table_scan(Some("t1"), &schema, None)?
+        .project(vec![col("id")])?
+        .alias("a")?
+        .build()?;
+
+    let sql = plan_to_sql(&plan)?;
+    assert_eq!(sql.to_string(), "SELECT * FROM (SELECT t1.id FROM t1) AS a");
+
+    let plan = table_scan(Some("t1"), &schema, None)?
+        .filter(col("id").gt(lit(5)))?
+        .project(vec![col("id")])?
+        .alias("a")?
+        .build()?;
+    let sql = plan_to_sql(&plan)?;
+    assert_eq!(
+        sql.to_string(),
+        "SELECT * FROM (SELECT t1.id FROM t1 WHERE (t1.id > 5)) AS a"
+    );
+
+    let table_scan_with_two_filter = table_scan_with_filters(
+        Some("t1"),
+        &schema,
+        None,
+        vec![col("id").gt(lit(1)), col("age").lt(lit(2))],
+    )?
+    .project(vec![col("id")])?
+    .alias("a")?
+    .build()?;
+    let table_scan_with_two_filter = plan_to_sql(&table_scan_with_two_filter)?;
+    assert_eq!(
+        table_scan_with_two_filter.to_string(),
+        "SELECT * FROM (SELECT t1.id FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))) AS a"
+    );
+
+    let table_scan_with_fetch =
+        table_scan_with_filter_and_fetch(Some("t1"), &schema, None, vec![], Some(10))?
+            .project(vec![col("id")])?
+            .alias("a")?
+            .build()?;
+    let table_scan_with_fetch = plan_to_sql(&table_scan_with_fetch)?;
+    assert_eq!(
+        table_scan_with_fetch.to_string(),
+        "SELECT * FROM (SELECT t1.id FROM (SELECT * FROM t1 LIMIT 10)) AS a"
+    );
+
+    let table_scan_with_pushdown_all = table_scan_with_filter_and_fetch(
+        Some("t1"),
+        &schema,
+        Some(vec![0, 1]),
+        vec![col("id").gt(lit(1))],
+        Some(10),
+    )?
+    .project(vec![col("id")])?
+    .alias("a")?
+    .build()?;
+    let table_scan_with_pushdown_all = plan_to_sql(&table_scan_with_pushdown_all)?;
+    assert_eq!(
+        table_scan_with_pushdown_all.to_string(),
+        "SELECT * FROM (SELECT t1.id FROM (SELECT t1.id, t1.age FROM t1 WHERE (t1.id > 1) LIMIT 10)) AS a"
+    );
+    Ok(())
+}
+
+#[test]
+fn test_table_scan_pushdown() -> Result<()> {
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Utf8, false),
+        Field::new("age", DataType::Utf8, false),
+    ]);
     let scan_with_projection =
         table_scan(Some("t1"), &schema, Some(vec![0, 1]))?.build()?;
     let scan_with_projection = plan_to_sql(&scan_with_projection)?;
     assert_eq!(
-        format!("{}", scan_with_projection),
+        scan_with_projection.to_string(),
         "SELECT t1.id, t1.age FROM t1"
     );
 
     let scan_with_projection = table_scan(Some("t1"), &schema, Some(vec![1]))?.build()?;
     let scan_with_projection = plan_to_sql(&scan_with_projection)?;
-    assert_eq!(format!("{}", scan_with_projection), "SELECT t1.age FROM t1");
+    assert_eq!(scan_with_projection.to_string(), "SELECT t1.age FROM t1");
 
     let scan_with_no_projection = table_scan(Some("t1"), &schema, None)?.build()?;
     let scan_with_no_projection = plan_to_sql(&scan_with_no_projection)?;
-    assert_eq!(format!("{}", scan_with_no_projection), "SELECT * FROM t1");
+    assert_eq!(scan_with_no_projection.to_string(), "SELECT * FROM t1");
 
     let table_scan_with_projection_alias =
         table_scan(Some("t1"), &schema, Some(vec![0, 1]))?
@@ -696,7 +772,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_projection_alias =
         plan_to_sql(&table_scan_with_projection_alias)?;
     assert_eq!(
-        format!("{}", table_scan_with_projection_alias),
+        table_scan_with_projection_alias.to_string(),
         "SELECT ta.id, ta.age FROM t1 AS ta"
     );
 
@@ -707,7 +783,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_projection_alias =
         plan_to_sql(&table_scan_with_projection_alias)?;
     assert_eq!(
-        format!("{}", table_scan_with_projection_alias),
+        table_scan_with_projection_alias.to_string(),
         "SELECT ta.age FROM t1 AS ta"
     );
 
@@ -717,7 +793,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_no_projection_alias =
         plan_to_sql(&table_scan_with_no_projection_alias)?;
     assert_eq!(
-        format!("{}", table_scan_with_no_projection_alias),
+        table_scan_with_no_projection_alias.to_string(),
         "SELECT * FROM t1 AS ta"
     );
 
@@ -729,7 +805,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let query_from_table_scan_with_projection =
         plan_to_sql(&query_from_table_scan_with_projection)?;
     assert_eq!(
-        format!("{}", query_from_table_scan_with_projection),
+        query_from_table_scan_with_projection.to_string(),
         "SELECT * FROM (SELECT t1.id, t1.age FROM t1)"
     );
 
@@ -742,7 +818,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_filter = plan_to_sql(&table_scan_with_filter)?;
     assert_eq!(
-        format!("{}", table_scan_with_filter),
+        table_scan_with_filter.to_string(),
         "SELECT * FROM t1 WHERE (t1.id > t1.age)"
     );
 
@@ -755,7 +831,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_two_filter = plan_to_sql(&table_scan_with_two_filter)?;
     assert_eq!(
-        format!("{}", table_scan_with_two_filter),
+        table_scan_with_two_filter.to_string(),
         "SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"
     );
 
@@ -769,7 +845,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
     assert_eq!(
-        format!("{}", table_scan_with_filter_alias),
+        table_scan_with_filter_alias.to_string(),
         "SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
     );
 
@@ -783,7 +859,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_projection_and_filter =
         plan_to_sql(&table_scan_with_projection_and_filter)?;
     assert_eq!(
-        format!("{}", table_scan_with_projection_and_filter),
+        table_scan_with_projection_and_filter.to_string(),
         "SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age)"
     );
 
@@ -797,7 +873,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_projection_and_filter =
         plan_to_sql(&table_scan_with_projection_and_filter)?;
     assert_eq!(
-        format!("{}", table_scan_with_projection_and_filter),
+        table_scan_with_projection_and_filter.to_string(),
         "SELECT t1.age FROM t1 WHERE (t1.id > t1.age)"
     );
 
@@ -806,7 +882,7 @@ fn test_table_scan_pushdown() -> Result<()> {
             .build()?;
     let table_scan_with_inline_fetch = plan_to_sql(&table_scan_with_inline_fetch)?;
     assert_eq!(
-        format!("{}", table_scan_with_inline_fetch),
+        table_scan_with_inline_fetch.to_string(),
         "SELECT * FROM t1 LIMIT 10"
     );
 
@@ -821,7 +897,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     let table_scan_with_projection_and_inline_fetch =
         plan_to_sql(&table_scan_with_projection_and_inline_fetch)?;
     assert_eq!(
-        format!("{}", table_scan_with_projection_and_inline_fetch),
+        table_scan_with_projection_and_inline_fetch.to_string(),
         "SELECT t1.id, t1.age FROM t1 LIMIT 10"
     );
 
@@ -835,7 +911,7 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_all = plan_to_sql(&table_scan_with_all)?;
     assert_eq!(
-        format!("{}", table_scan_with_all),
+        table_scan_with_all.to_string(),
         "SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age) LIMIT 10"
     );
     Ok(())

From b098893a34f83f1a1df290168377d7622938b3f5 Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <emil.ejbyfeldt@gmail.com>
Date: Thu, 17 Oct 2024 19:22:18 +0200
Subject: [PATCH 05/15] Fix 2 bugs related to push down partition filters
 (#12902)

* Report errors in partition filters

This patch fixes 2 bugs. Errors in partition filters are ignored and
that we allow partitions filters be push down for unpartition tables
but we never evaluate such filters.

The first bug is fixed by reporting errors for partition filters and
only evaluating the filters we allowed as partition filters in
`supports_filters_pushdown`.

The second bug is fixed by only allowing partition filters to be pushed
down when we have partition columns.

* Update datafusion/sqllogictest/test_files/errors.slt
---
 datafusion/core/src/dataframe/mod.rs          |  4 +-
 .../core/src/datasource/listing/helpers.rs    | 36 +++++-----
 .../core/src/datasource/listing/table.rs      | 69 ++++++++++---------
 .../sqllogictest/test_files/arrow_files.slt   |  5 ++
 datafusion/sqllogictest/test_files/errors.slt |  4 ++
 5 files changed, 65 insertions(+), 53 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 67e2a4780d06..8a0829cd5e4b 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -2987,9 +2987,7 @@ mod tests {
             JoinType::Inner,
             Some(Expr::Literal(ScalarValue::Null)),
         )?;
-        let expected_plan = "CrossJoin:\
-        \n  TableScan: a projection=[c1], full_filters=[Boolean(NULL)]\
-        \n  TableScan: b projection=[c1]";
+        let expected_plan = "EmptyRelation";
         assert_eq!(expected_plan, format!("{}", join.into_optimized_plan()?));
 
         // JOIN ON expression must be boolean type
diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index 72d7277d6ae2..47012f777ad1 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -24,6 +24,7 @@ use std::sync::Arc;
 use super::ListingTableUrl;
 use super::PartitionedFile;
 use crate::execution::context::SessionState;
+use datafusion_common::internal_err;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{BinaryExpr, Operator};
 
@@ -285,25 +286,20 @@ async fn prune_partitions(
     let props = ExecutionProps::new();
 
     // Applies `filter` to `batch` returning `None` on error
-    let do_filter = |filter| -> Option<ArrayRef> {
-        let expr = create_physical_expr(filter, &df_schema, &props).ok()?;
-        expr.evaluate(&batch)
-            .ok()?
-            .into_array(partitions.len())
-            .ok()
+    let do_filter = |filter| -> Result<ArrayRef> {
+        let expr = create_physical_expr(filter, &df_schema, &props)?;
+        expr.evaluate(&batch)?.into_array(partitions.len())
     };
 
-    //.Compute the conjunction of the filters, ignoring errors
+    //.Compute the conjunction of the filters
     let mask = filters
         .iter()
-        .fold(None, |acc, filter| match (acc, do_filter(filter)) {
-            (Some(a), Some(b)) => Some(and(&a, b.as_boolean()).unwrap_or(a)),
-            (None, Some(r)) => Some(r.as_boolean().clone()),
-            (r, None) => r,
-        });
+        .map(|f| do_filter(f).map(|a| a.as_boolean().clone()))
+        .reduce(|a, b| Ok(and(&a?, &b?)?));
 
     let mask = match mask {
-        Some(mask) => mask,
+        Some(Ok(mask)) => mask,
+        Some(Err(err)) => return Err(err),
         None => return Ok(partitions),
     };
 
@@ -401,8 +397,8 @@ fn evaluate_partition_prefix<'a>(
 
 /// Discover the partitions on the given path and prune out files
 /// that belong to irrelevant partitions using `filters` expressions.
-/// `filters` might contain expressions that can be resolved only at the
-/// file level (e.g. Parquet row group pruning).
+/// `filters` should only contain expressions that can be evaluated
+/// using only the partition columns.
 pub async fn pruned_partition_list<'a>(
     ctx: &'a SessionState,
     store: &'a dyn ObjectStore,
@@ -413,6 +409,12 @@ pub async fn pruned_partition_list<'a>(
 ) -> Result<BoxStream<'a, Result<PartitionedFile>>> {
     // if no partition col => simply list all the files
     if partition_cols.is_empty() {
+        if !filters.is_empty() {
+            return internal_err!(
+                "Got partition filters for unpartitioned table {}",
+                table_path
+            );
+        }
         return Ok(Box::pin(
             table_path
                 .list_all_files(ctx, store, file_extension)
@@ -631,13 +633,11 @@ mod tests {
         ]);
         let filter1 = Expr::eq(col("part1"), lit("p1v2"));
         let filter2 = Expr::eq(col("part2"), lit("p2v1"));
-        // filter3 cannot be resolved at partition pruning
-        let filter3 = Expr::eq(col("part2"), col("other"));
         let pruned = pruned_partition_list(
             &state,
             store.as_ref(),
             &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter1, filter2, filter3],
+            &[filter1, filter2],
             ".parquet",
             &[
                 (String::from("part1"), DataType::Utf8),
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index a9c6aec17537..1e9f06c20b47 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -782,6 +782,16 @@ impl ListingTable {
     }
 }
 
+// Expressions can be used for parttion pruning if they can be evaluated using
+// only the partiton columns and there are partition columns.
+fn can_be_evaluted_for_partition_pruning(
+    partition_column_names: &[&str],
+    expr: &Expr,
+) -> bool {
+    !partition_column_names.is_empty()
+        && expr_applicable_for_cols(partition_column_names, expr)
+}
+
 #[async_trait]
 impl TableProvider for ListingTable {
     fn as_any(&self) -> &dyn Any {
@@ -807,10 +817,28 @@ impl TableProvider for ListingTable {
         filters: &[Expr],
         limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        // extract types of partition columns
+        let table_partition_cols = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .collect::<Result<Vec<_>>>()?;
+
+        let table_partition_col_names = table_partition_cols
+            .iter()
+            .map(|field| field.name().as_str())
+            .collect::<Vec<_>>();
+        // If the filters can be resolved using only partition cols, there is no need to
+        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
+        let (partition_filters, filters): (Vec<_>, Vec<_>) =
+            filters.iter().cloned().partition(|filter| {
+                can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter)
+            });
         // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here?
         let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
         let (mut partitioned_file_lists, statistics) = self
-            .list_files_for_scan(session_state, filters, limit)
+            .list_files_for_scan(session_state, &partition_filters, limit)
             .await?;
 
         // if no files need to be read, return an `EmptyExec`
@@ -846,28 +874,6 @@ impl TableProvider for ListingTable {
             None => {} // no ordering required
         };
 
-        // extract types of partition columns
-        let table_partition_cols = self
-            .options
-            .table_partition_cols
-            .iter()
-            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
-            .collect::<Result<Vec<_>>>()?;
-
-        // If the filters can be resolved using only partition cols, there is no need to
-        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
-        let table_partition_col_names = table_partition_cols
-            .iter()
-            .map(|field| field.name().as_str())
-            .collect::<Vec<_>>();
-        let filters = filters
-            .iter()
-            .filter(|filter| {
-                !expr_applicable_for_cols(&table_partition_col_names, filter)
-            })
-            .cloned()
-            .collect::<Vec<_>>();
-
         let filters = conjunction(filters.to_vec())
             .map(|expr| -> Result<_> {
                 // NOTE: Use the table schema (NOT file schema) here because `expr` may contain references to partition columns.
@@ -908,18 +914,17 @@ impl TableProvider for ListingTable {
         &self,
         filters: &[&Expr],
     ) -> Result<Vec<TableProviderFilterPushDown>> {
+        let partition_column_names = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| col.0.as_str())
+            .collect::<Vec<_>>();
         filters
             .iter()
             .map(|filter| {
-                if expr_applicable_for_cols(
-                    &self
-                        .options
-                        .table_partition_cols
-                        .iter()
-                        .map(|col| col.0.as_str())
-                        .collect::<Vec<_>>(),
-                    filter,
-                ) {
+                if can_be_evaluted_for_partition_pruning(&partition_column_names, filter)
+                {
                     // if filter can be handled by partition pruning, it is exact
                     return Ok(TableProviderFilterPushDown::Exact);
                 }
diff --git a/datafusion/sqllogictest/test_files/arrow_files.slt b/datafusion/sqllogictest/test_files/arrow_files.slt
index e66ba7477fc4..e73acc384cb3 100644
--- a/datafusion/sqllogictest/test_files/arrow_files.slt
+++ b/datafusion/sqllogictest/test_files/arrow_files.slt
@@ -118,3 +118,8 @@ EXPLAIN SELECT f0 FROM arrow_partitioned WHERE part = 456
 ----
 logical_plan TableScan: arrow_partitioned projection=[f0], full_filters=[arrow_partitioned.part = Int32(456)]
 physical_plan ArrowExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_table_arrow/part=456/data.arrow]]}, projection=[f0]
+
+
+# Errors in partition filters should be reported
+query error Divide by zero error
+SELECT f0 FROM arrow_partitioned WHERE CASE WHEN true THEN 1 / 0 ELSE part END = 1;
diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt
index ce0947525344..da46a7e5e679 100644
--- a/datafusion/sqllogictest/test_files/errors.slt
+++ b/datafusion/sqllogictest/test_files/errors.slt
@@ -133,3 +133,7 @@ create table foo as values (1), ('foo');
 
 query error No function matches
 select 1 group by substr('');
+
+# Error in filter should be reported
+query error Divide by zero
+SELECT c2 from aggregate_test_100 where CASE WHEN true THEN 1 / 0 ELSE 0 END = 1;

From 54bd26ed12f854b87d20f0d70ac64c02fcd5150f Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Thu, 17 Oct 2024 19:22:40 +0200
Subject: [PATCH 06/15] Move TableConstraint to Constraints conversion (#12953)

Reduce datafusion-common dependency on sqlparser
---
 .../common/src/functional_dependencies.rs     | 73 +-----------------
 datafusion/sql/src/statement.rs               | 77 ++++++++++++++++++-
 2 files changed, 74 insertions(+), 76 deletions(-)

diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
index 90f4e6e7e3d1..ed9a68c19536 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -23,11 +23,8 @@ use std::fmt::{Display, Formatter};
 use std::ops::Deref;
 use std::vec::IntoIter;
 
-use crate::error::_plan_err;
 use crate::utils::{merge_and_order_indices, set_difference};
-use crate::{DFSchema, DFSchemaRef, DataFusionError, JoinType, Result};
-
-use sqlparser::ast::TableConstraint;
+use crate::{DFSchema, JoinType};
 
 /// This object defines a constraint on a table.
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -60,74 +57,6 @@ impl Constraints {
         Self { inner: constraints }
     }
 
-    /// Convert each `TableConstraint` to corresponding `Constraint`
-    pub fn new_from_table_constraints(
-        constraints: &[TableConstraint],
-        df_schema: &DFSchemaRef,
-    ) -> Result<Self> {
-        let constraints = constraints
-            .iter()
-            .map(|c: &TableConstraint| match c {
-                TableConstraint::Unique { name, columns, .. } => {
-                    let field_names = df_schema.field_names();
-                    // Get unique constraint indices in the schema:
-                    let indices = columns
-                        .iter()
-                        .map(|u| {
-                            let idx = field_names
-                                .iter()
-                                .position(|item| *item == u.value)
-                                .ok_or_else(|| {
-                                    let name = name
-                                        .as_ref()
-                                        .map(|name| format!("with name '{name}' "))
-                                        .unwrap_or("".to_string());
-                                    DataFusionError::Execution(
-                                        format!("Column for unique constraint {}not found in schema: {}", name,u.value)
-                                    )
-                                })?;
-                            Ok(idx)
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-                    Ok(Constraint::Unique(indices))
-                }
-                TableConstraint::PrimaryKey { columns, .. } => {
-                    let field_names = df_schema.field_names();
-                    // Get primary key indices in the schema:
-                    let indices = columns
-                        .iter()
-                        .map(|pk| {
-                            let idx = field_names
-                                .iter()
-                                .position(|item| *item == pk.value)
-                                .ok_or_else(|| {
-                                    DataFusionError::Execution(format!(
-                                        "Column for primary key not found in schema: {}",
-                                        pk.value
-                                    ))
-                                })?;
-                            Ok(idx)
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-                    Ok(Constraint::PrimaryKey(indices))
-                }
-                TableConstraint::ForeignKey { .. } => {
-                    _plan_err!("Foreign key constraints are not currently supported")
-                }
-                TableConstraint::Check { .. } => {
-                    _plan_err!("Check constraints are not currently supported")
-                }
-                TableConstraint::Index { .. } => {
-                    _plan_err!("Indexes are not currently supported")
-                }
-                TableConstraint::FulltextOrSpatial { .. } => {
-                    _plan_err!("Indexes are not currently supported")
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
-        Ok(Constraints::new_unverified(constraints))
-    }
-
     /// Check whether constraints is empty
     pub fn is_empty(&self) -> bool {
         self.inner.is_empty()
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index edb4316db1e0..4109f1371187 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -30,10 +30,11 @@ use crate::planner::{
 use crate::utils::normalize_ident;
 
 use arrow_schema::{DataType, Fields};
+use datafusion_common::error::_plan_err;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
     exec_err, not_impl_err, plan_datafusion_err, plan_err, schema_err,
-    unqualified_field_not_found, Column, Constraints, DFSchema, DFSchemaRef,
+    unqualified_field_not_found, Column, Constraint, Constraints, DFSchema, DFSchemaRef,
     DataFusionError, Result, ScalarValue, SchemaError, SchemaReference, TableReference,
     ToDFSchema,
 };
@@ -427,7 +428,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             plan
                         };
 
-                        let constraints = Constraints::new_from_table_constraints(
+                        let constraints = Self::new_constraint_from_table_constraints(
                             &all_constraints,
                             plan.schema(),
                         )?;
@@ -452,7 +453,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             schema,
                         };
                         let plan = LogicalPlan::EmptyRelation(plan);
-                        let constraints = Constraints::new_from_table_constraints(
+                        let constraints = Self::new_constraint_from_table_constraints(
                             &all_constraints,
                             plan.schema(),
                         )?;
@@ -1242,7 +1243,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let name = self.object_name_to_table_reference(name)?;
         let constraints =
-            Constraints::new_from_table_constraints(&all_constraints, &df_schema)?;
+            Self::new_constraint_from_table_constraints(&all_constraints, &df_schema)?;
         Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
             PlanCreateExternalTable {
                 schema: df_schema,
@@ -1262,6 +1263,74 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         )))
     }
 
+    /// Convert each `TableConstraint` to corresponding `Constraint`
+    fn new_constraint_from_table_constraints(
+        constraints: &[TableConstraint],
+        df_schema: &DFSchemaRef,
+    ) -> Result<Constraints> {
+        let constraints = constraints
+            .iter()
+            .map(|c: &TableConstraint| match c {
+                TableConstraint::Unique { name, columns, .. } => {
+                    let field_names = df_schema.field_names();
+                    // Get unique constraint indices in the schema:
+                    let indices = columns
+                        .iter()
+                        .map(|u| {
+                            let idx = field_names
+                                .iter()
+                                .position(|item| *item == u.value)
+                                .ok_or_else(|| {
+                                    let name = name
+                                        .as_ref()
+                                        .map(|name| format!("with name '{name}' "))
+                                        .unwrap_or("".to_string());
+                                    DataFusionError::Execution(
+                                        format!("Column for unique constraint {}not found in schema: {}", name,u.value)
+                                    )
+                                })?;
+                            Ok(idx)
+                        })
+                        .collect::<Result<Vec<_>>>()?;
+                    Ok(Constraint::Unique(indices))
+                }
+                TableConstraint::PrimaryKey { columns, .. } => {
+                    let field_names = df_schema.field_names();
+                    // Get primary key indices in the schema:
+                    let indices = columns
+                        .iter()
+                        .map(|pk| {
+                            let idx = field_names
+                                .iter()
+                                .position(|item| *item == pk.value)
+                                .ok_or_else(|| {
+                                    DataFusionError::Execution(format!(
+                                        "Column for primary key not found in schema: {}",
+                                        pk.value
+                                    ))
+                                })?;
+                            Ok(idx)
+                        })
+                        .collect::<Result<Vec<_>>>()?;
+                    Ok(Constraint::PrimaryKey(indices))
+                }
+                TableConstraint::ForeignKey { .. } => {
+                    _plan_err!("Foreign key constraints are not currently supported")
+                }
+                TableConstraint::Check { .. } => {
+                    _plan_err!("Check constraints are not currently supported")
+                }
+                TableConstraint::Index { .. } => {
+                    _plan_err!("Indexes are not currently supported")
+                }
+                TableConstraint::FulltextOrSpatial { .. } => {
+                    _plan_err!("Indexes are not currently supported")
+                }
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(Constraints::new_unverified(constraints))
+    }
+
     fn parse_options_map(
         &self,
         options: Vec<(String, Value)>,

From ccfe020a9a98203d7d37d1431e351be8d4418f63 Mon Sep 17 00:00:00 2001
From: Jonathan Chen <86070045+jonathanc-n@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:23:04 -0400
Subject: [PATCH 07/15] Added current_timestamp alias (#12958)

* Add current_timestamp

* ft fix?

* fmt fix
---
 datafusion/functions/src/datetime/now.rs          | 6 ++++++
 datafusion/sqllogictest/test_files/timestamps.slt | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index 74eb5aea4255..690008d97212 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -28,6 +28,7 @@ use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility}
 #[derive(Debug)]
 pub struct NowFunc {
     signature: Signature,
+    aliases: Vec<String>,
 }
 
 impl Default for NowFunc {
@@ -40,6 +41,7 @@ impl NowFunc {
     pub fn new() -> Self {
         Self {
             signature: Signature::uniform(0, vec![], Volatility::Stable),
+            aliases: vec!["current_timestamp".to_string()],
         }
     }
 }
@@ -85,6 +87,10 @@ impl ScalarUDFImpl for NowFunc {
         )))
     }
 
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
     fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
         false
     }
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index a680e0db522d..d866ec8c94dd 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -84,6 +84,11 @@ select case when current_time() = (now()::bigint % 86400000000000)::time then 'O
 ----
 OK
 
+query B
+select now() = current_timestamp;
+----
+true
+
 ##########
 ## Timestamp Handling Tests
 ##########

From ad273cab8bf300a704baf005df072bb980645e51 Mon Sep 17 00:00:00 2001
From: Sergei Grebnov <sergei.grebnov@gmail.com>
Date: Thu, 17 Oct 2024 10:23:29 -0700
Subject: [PATCH 08/15] Improve unparsing for `ORDER BY`, `UNION`, Windows
 functions with Aggregation (#12946)

* Improve unparsing for ORDER BY with Aggregation functions (#38)

* Improve UNION unparsing (#39)

* Scalar functions in ORDER BY unparsing support (#41)

* Improve unparsing for complex Window functions with Aggregation (#42)

* WindowFunction order_by should respect `supports_nulls_first_in_sort` dialect setting (#43)

* Fix plan_to_sql

* Improve
---
 datafusion/sql/src/unparser/expr.rs       | 10 +---
 datafusion/sql/src/unparser/plan.rs       | 42 +++++++++-----
 datafusion/sql/src/unparser/utils.rs      | 69 ++++++++++++++++++-----
 datafusion/sql/tests/cases/plan_to_sql.rs | 63 ++++++++++++++++++++-
 4 files changed, 148 insertions(+), 36 deletions(-)

diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 1be5aa68bfba..8864c97bb1ff 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -76,11 +76,6 @@ pub fn expr_to_sql(expr: &Expr) -> Result<ast::Expr> {
     unparser.expr_to_sql(expr)
 }
 
-pub fn sort_to_sql(sort: &Sort) -> Result<ast::OrderByExpr> {
-    let unparser = Unparser::default();
-    unparser.sort_to_sql(sort)
-}
-
 const LOWEST: &BinaryOperator = &BinaryOperator::Or;
 // Closest precedence we have to IS operator is BitwiseAnd (any other) in PG docs
 // (https://www.postgresql.org/docs/7.2/sql-precedence.html)
@@ -229,9 +224,10 @@ impl Unparser<'_> {
                         ast::WindowFrameUnits::Groups
                     }
                 };
-                let order_by: Vec<ast::OrderByExpr> = order_by
+
+                let order_by = order_by
                     .iter()
-                    .map(sort_to_sql)
+                    .map(|sort_expr| self.sort_to_sql(sort_expr))
                     .collect::<Result<Vec<_>>>()?;
 
                 let start_bound = self.convert_bound(&window_frame.start_bound)?;
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 9b4818b98cb0..c22400f1faa1 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -27,7 +27,7 @@ use super::{
     },
     utils::{
         find_agg_node_within_select, find_window_nodes_within_select,
-        unproject_window_exprs,
+        unproject_sort_expr, unproject_window_exprs,
     },
     Unparser,
 };
@@ -352,19 +352,30 @@ impl Unparser<'_> {
                 if select.already_projected() {
                     return self.derive(plan, relation);
                 }
-                if let Some(query_ref) = query {
-                    if let Some(fetch) = sort.fetch {
-                        query_ref.limit(Some(ast::Expr::Value(ast::Value::Number(
-                            fetch.to_string(),
-                            false,
-                        ))));
-                    }
-                    query_ref.order_by(self.sorts_to_sql(sort.expr.clone())?);
-                } else {
+                let Some(query_ref) = query else {
                     return internal_err!(
                         "Sort operator only valid in a statement context."
                     );
-                }
+                };
+
+                if let Some(fetch) = sort.fetch {
+                    query_ref.limit(Some(ast::Expr::Value(ast::Value::Number(
+                        fetch.to_string(),
+                        false,
+                    ))));
+                };
+
+                let agg = find_agg_node_within_select(plan, select.already_projected());
+                // unproject sort expressions
+                let sort_exprs: Vec<SortExpr> = sort
+                    .expr
+                    .iter()
+                    .map(|sort_expr| {
+                        unproject_sort_expr(sort_expr, agg, sort.input.as_ref())
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                query_ref.order_by(self.sorts_to_sql(&sort_exprs)?);
 
                 self.select_to_sql_recursively(
                     sort.input.as_ref(),
@@ -402,7 +413,7 @@ impl Unparser<'_> {
                             .collect::<Result<Vec<_>>>()?;
                         if let Some(sort_expr) = &on.sort_expr {
                             if let Some(query_ref) = query {
-                                query_ref.order_by(self.sorts_to_sql(sort_expr.clone())?);
+                                query_ref.order_by(self.sorts_to_sql(sort_expr)?);
                             } else {
                                 return internal_err!(
                                     "Sort operator only valid in a statement context."
@@ -546,6 +557,11 @@ impl Unparser<'_> {
                     );
                 }
 
+                // Covers cases where the UNION is a subquery and the projection is at the top level
+                if select.already_projected() {
+                    return self.derive(plan, relation);
+                }
+
                 let input_exprs: Vec<SetExpr> = union
                     .inputs
                     .iter()
@@ -691,7 +707,7 @@ impl Unparser<'_> {
         }
     }
 
-    fn sorts_to_sql(&self, sort_exprs: Vec<SortExpr>) -> Result<Vec<ast::OrderByExpr>> {
+    fn sorts_to_sql(&self, sort_exprs: &[SortExpr]) -> Result<Vec<ast::OrderByExpr>> {
         sort_exprs
             .iter()
             .map(|sort_expr| self.sort_to_sql(sort_expr))
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index e8c4eca569b1..5e3a3aa600b6 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -20,10 +20,11 @@ use std::cmp::Ordering;
 use datafusion_common::{
     internal_err,
     tree_node::{Transformed, TreeNode},
-    Column, DataFusionError, Result, ScalarValue,
+    Column, Result, ScalarValue,
 };
 use datafusion_expr::{
-    utils::grouping_set_to_exprlist, Aggregate, Expr, LogicalPlan, Window,
+    utils::grouping_set_to_exprlist, Aggregate, Expr, LogicalPlan, Projection, SortExpr,
+    Window,
 };
 use sqlparser::ast;
 
@@ -118,21 +119,11 @@ pub(crate) fn unproject_agg_exprs(
             if let Expr::Column(c) = sub_expr {
                 if let Some(unprojected_expr) = find_agg_expr(agg, &c)? {
                     Ok(Transformed::yes(unprojected_expr.clone()))
-                } else if let Some(mut unprojected_expr) =
+                } else if let Some(unprojected_expr) =
                     windows.and_then(|w| find_window_expr(w, &c.name).cloned())
                 {
-                    if let Expr::WindowFunction(func) = &mut unprojected_expr {
-                        // Window function can contain an aggregation column, e.g., 'avg(sum(ss_sales_price)) over ...' that needs to be unprojected
-                        func.args.iter_mut().try_for_each(|arg| {
-                            if let Expr::Column(c) = arg {
-                                if let Some(expr) = find_agg_expr(agg, c)? {
-                                    *arg = expr.clone();
-                                }
-                            }
-                            Ok::<(), DataFusionError>(())
-                        })?;
-                    }
-                    Ok(Transformed::yes(unprojected_expr))
+                    // Window function can contain an aggregation columns, e.g., 'avg(sum(ss_sales_price)) over ...' that needs to be unprojected
+                    return Ok(Transformed::yes(unproject_agg_exprs(&unprojected_expr, agg, None)?));
                 } else {
                     internal_err!(
                         "Tried to unproject agg expr for column '{}' that was not found in the provided Aggregate!", &c.name
@@ -200,6 +191,54 @@ fn find_window_expr<'a>(
         .find(|expr| expr.schema_name().to_string() == column_name)
 }
 
+/// Transforms a Column expression into the actual expression from aggregation or projection if found.
+/// This is required because if an ORDER BY expression is present in an Aggregate or Select, it is replaced
+/// with a Column expression (e.g., "sum(catalog_returns.cr_net_loss)"). We need to transform it back to
+/// the actual expression, such as sum("catalog_returns"."cr_net_loss").
+pub(crate) fn unproject_sort_expr(
+    sort_expr: &SortExpr,
+    agg: Option<&Aggregate>,
+    input: &LogicalPlan,
+) -> Result<SortExpr> {
+    let mut sort_expr = sort_expr.clone();
+
+    // Remove alias if present, because ORDER BY cannot use aliases
+    if let Expr::Alias(alias) = &sort_expr.expr {
+        sort_expr.expr = *alias.expr.clone();
+    }
+
+    let Expr::Column(ref col_ref) = sort_expr.expr else {
+        return Ok(sort_expr);
+    };
+
+    if col_ref.relation.is_some() {
+        return Ok(sort_expr);
+    };
+
+    // In case of aggregation there could be columns containing aggregation functions we need to unproject
+    if let Some(agg) = agg {
+        if agg.schema.is_column_from_schema(col_ref) {
+            let new_expr = unproject_agg_exprs(&sort_expr.expr, agg, None)?;
+            sort_expr.expr = new_expr;
+            return Ok(sort_expr);
+        }
+    }
+
+    // If SELECT and ORDER BY contain the same expression with a scalar function, the ORDER BY expression will
+    // be replaced by a Column expression (e.g., "substr(customer.c_last_name, Int64(0), Int64(5))"), and we need
+    // to transform it back to the actual expression.
+    if let LogicalPlan::Projection(Projection { expr, schema, .. }) = input {
+        if let Ok(idx) = schema.index_of_column(col_ref) {
+            if let Some(Expr::ScalarFunction(scalar_fn)) = expr.get(idx) {
+                sort_expr.expr = Expr::ScalarFunction(scalar_fn.clone());
+            }
+        }
+        return Ok(sort_expr);
+    }
+
+    Ok(sort_expr)
+}
+
 /// Converts a date_part function to SQL, tailoring it to the supported date field extraction style.
 pub(crate) fn date_part_to_sql(
     unparser: &Unparser,
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index e4e5d6a92964..74abdf075f23 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -22,6 +22,9 @@ use arrow_schema::*;
 use datafusion_common::{DFSchema, Result, TableReference};
 use datafusion_expr::test::function_stub::{count_udaf, max_udaf, min_udaf, sum_udaf};
 use datafusion_expr::{col, lit, table_scan, wildcard, LogicalPlanBuilder};
+use datafusion_functions::unicode;
+use datafusion_functions_aggregate::grouping::grouping_udaf;
+use datafusion_functions_window::rank::rank_udwf;
 use datafusion_sql::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_sql::unparser::dialect::{
     DefaultDialect as UnparserDefaultDialect, Dialect as UnparserDialect,
@@ -139,6 +142,13 @@ fn roundtrip_statement() -> Result<()> {
             SELECT j2_string as string FROM j2
             ORDER BY string DESC
             LIMIT 10"#,
+            r#"SELECT col1, id FROM (
+                SELECT j1_string AS col1, j1_id AS id FROM j1
+                UNION ALL
+                SELECT j2_string AS col1, j2_id AS id FROM j2
+                UNION ALL
+                SELECT j3_string AS col1, j3_id AS id FROM j3
+            ) AS subquery GROUP BY col1, id ORDER BY col1 ASC, id ASC"#,
             "SELECT id, count(*) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             last_name, sum(id) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             first_name from person",
@@ -657,7 +667,12 @@ where
         .unwrap();
 
     let context = MockContextProvider {
-        state: MockSessionState::default(),
+        state: MockSessionState::default()
+            .with_aggregate_function(sum_udaf())
+            .with_aggregate_function(max_udaf())
+            .with_aggregate_function(grouping_udaf())
+            .with_window_function(rank_udwf())
+            .with_scalar_function(Arc::new(unicode::substr().as_ref().clone())),
     };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
@@ -969,3 +984,49 @@ fn test_with_offset0() {
 fn test_with_offset95() {
     sql_round_trip(MySqlDialect {}, "select 1 offset 95", "SELECT 1 OFFSET 95");
 }
+
+#[test]
+fn test_order_by_to_sql() {
+    // order by aggregation function
+    sql_round_trip(
+        GenericDialect {},
+        r#"SELECT id, first_name, SUM(id) FROM person GROUP BY id, first_name ORDER BY SUM(id) ASC, first_name DESC, id, first_name LIMIT 10"#,
+        r#"SELECT person.id, person.first_name, sum(person.id) FROM person GROUP BY person.id, person.first_name ORDER BY sum(person.id) ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#,
+    );
+
+    // order by aggregation function alias
+    sql_round_trip(
+        GenericDialect {},
+        r#"SELECT id, first_name, SUM(id) as total_sum FROM person GROUP BY id, first_name ORDER BY total_sum ASC, first_name DESC, id, first_name LIMIT 10"#,
+        r#"SELECT person.id, person.first_name, sum(person.id) AS total_sum FROM person GROUP BY person.id, person.first_name ORDER BY total_sum ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#,
+    );
+
+    // order by scalar function from projection
+    sql_round_trip(
+        GenericDialect {},
+        r#"SELECT id, first_name, substr(first_name,0,5) FROM person ORDER BY id, substr(first_name,0,5)"#,
+        r#"SELECT person.id, person.first_name, substr(person.first_name, 0, 5) FROM person ORDER BY person.id ASC NULLS LAST, substr(person.first_name, 0, 5) ASC NULLS LAST"#,
+    );
+}
+
+#[test]
+fn test_aggregation_to_sql() {
+    sql_round_trip(
+        GenericDialect {},
+        r#"SELECT id, first_name,
+        SUM(id) AS total_sum,
+        SUM(id) OVER (PARTITION BY first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS moving_sum,
+        MAX(SUM(id)) OVER (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS max_total,
+        rank() OVER (PARTITION BY grouping(id) + grouping(age), CASE WHEN grouping(age) = 0 THEN id END ORDER BY sum(id) DESC) AS rank_within_parent_1,
+        rank() OVER (PARTITION BY grouping(age) + grouping(id), CASE WHEN (CAST(grouping(age) AS BIGINT) = 0) THEN id END ORDER BY sum(id) DESC) AS rank_within_parent_2
+        FROM person
+        GROUP BY id, first_name;"#,
+        r#"SELECT person.id, person.first_name,
+sum(person.id) AS total_sum, sum(person.id) OVER (PARTITION BY person.first_name ROWS BETWEEN '5' PRECEDING AND '2' FOLLOWING) AS moving_sum,
+max(sum(person.id)) OVER (PARTITION BY person.first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS max_total,
+rank() OVER (PARTITION BY (grouping(person.id) + grouping(person.age)), CASE WHEN (grouping(person.age) = 0) THEN person.id END ORDER BY sum(person.id) DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rank_within_parent_1,
+rank() OVER (PARTITION BY (grouping(person.age) + grouping(person.id)), CASE WHEN (CAST(grouping(person.age) AS BIGINT) = 0) THEN person.id END ORDER BY sum(person.id) DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rank_within_parent_2
+FROM person
+GROUP BY person.id, person.first_name"#.replace("\n", " ").as_str(),
+    );
+}

From 0ed369e925ae8856e36b166bfcea8601019c6967 Mon Sep 17 00:00:00 2001
From: Georgi Krastev <georgi.krastev@coralogix.com>
Date: Thu, 17 Oct 2024 20:24:07 +0300
Subject: [PATCH 09/15] Handle one-element array return value in
 ScalarFunctionExpr (#12965)

This was done in #12922 only for math functions.
We now generalize this fallback to all scalar UDFs.
---
 datafusion/expr-common/src/columnar_value.rs   | 11 -----------
 datafusion/functions/src/macros.rs             | 12 ++++++------
 .../physical-expr/src/scalar_function.rs       | 18 +++++++++++++++---
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/datafusion/expr-common/src/columnar_value.rs b/datafusion/expr-common/src/columnar_value.rs
index 1ee90eb4b4a8..57056d0806a7 100644
--- a/datafusion/expr-common/src/columnar_value.rs
+++ b/datafusion/expr-common/src/columnar_value.rs
@@ -217,17 +217,6 @@ impl ColumnarValue {
             }
         }
     }
-
-    /// Converts an [`ArrayRef`] to a [`ColumnarValue`] based on the supplied arguments.
-    /// This is useful for scalar UDF implementations to fulfil their contract:
-    /// if all arguments are scalar values, the result should also be a scalar value.
-    pub fn from_args_and_result(args: &[Self], result: ArrayRef) -> Result<Self> {
-        if result.len() == 1 && args.iter().all(|arg| matches!(arg, Self::Scalar(_))) {
-            Ok(Self::Scalar(ScalarValue::try_from_array(&result, 0)?))
-        } else {
-            Ok(Self::Array(result))
-        }
-    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index 85ffaa868f24..744a0189125c 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -228,8 +228,8 @@ macro_rules! make_math_unary_udf {
                     $EVALUATE_BOUNDS(inputs)
                 }
 
-                fn invoke(&self, col_args: &[ColumnarValue]) -> Result<ColumnarValue> {
-                    let args = ColumnarValue::values_to_arrays(col_args)?;
+                fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+                    let args = ColumnarValue::values_to_arrays(args)?;
                     let arr: ArrayRef = match args[0].data_type() {
                         DataType::Float64 => {
                             Arc::new(make_function_scalar_inputs_return_type!(
@@ -257,7 +257,7 @@ macro_rules! make_math_unary_udf {
                         }
                     };
 
-                    ColumnarValue::from_args_and_result(col_args, arr)
+                    Ok(ColumnarValue::Array(arr))
                 }
 
                 fn documentation(&self) -> Option<&Documentation> {
@@ -344,8 +344,8 @@ macro_rules! make_math_binary_udf {
                     $OUTPUT_ORDERING(input)
                 }
 
-                fn invoke(&self, col_args: &[ColumnarValue]) -> Result<ColumnarValue> {
-                    let args = ColumnarValue::values_to_arrays(col_args)?;
+                fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+                    let args = ColumnarValue::values_to_arrays(args)?;
                     let arr: ArrayRef = match args[0].data_type() {
                         DataType::Float64 => Arc::new(make_function_inputs2!(
                             &args[0],
@@ -372,7 +372,7 @@ macro_rules! make_math_binary_udf {
                         }
                     };
 
-                    ColumnarValue::from_args_and_result(col_args, arr)
+                    Ok(ColumnarValue::Array(arr))
                 }
 
                 fn documentation(&self) -> Option<&Documentation> {
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index 130c335d1c95..4d3db96ceb3c 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -39,7 +39,8 @@ use crate::PhysicalExpr;
 
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, DFSchema, Result};
+use arrow_array::Array;
+use datafusion_common::{internal_err, DFSchema, Result, ScalarValue};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::sort_properties::ExprProperties;
 use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
@@ -147,8 +148,19 @@ impl PhysicalExpr for ScalarFunctionExpr {
 
         if let ColumnarValue::Array(array) = &output {
             if array.len() != batch.num_rows() {
-                return internal_err!("UDF returned a different number of rows than expected. Expected: {}, Got: {}",
-                        batch.num_rows(), array.len());
+                // If the arguments are a non-empty slice of scalar values, we can assume that
+                // returning a one-element array is equivalent to returning a scalar.
+                let preserve_scalar = array.len() == 1
+                    && !inputs.is_empty()
+                    && inputs
+                        .iter()
+                        .all(|arg| matches!(arg, ColumnarValue::Scalar(_)));
+                return if preserve_scalar {
+                    ScalarValue::try_from_array(array, 0).map(ColumnarValue::Scalar)
+                } else {
+                    internal_err!("UDF returned a different number of rows than expected. Expected: {}, Got: {}",
+                            batch.num_rows(), array.len())
+                };
             }
         }
         Ok(output)

From f718fe2270cb4bf9e3a933b2351d28c62216060c Mon Sep 17 00:00:00 2001
From: Jagdish Parihar <jatin6972@gmail.com>
Date: Thu, 17 Oct 2024 22:54:28 +0530
Subject: [PATCH 10/15] Migrate datetime documentation to code (#12966)

* added code docs for the datetime functions

* removed old docs for time and date functions

* fixed description for to_unixtime()

* removed todo comments

* fix merge

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../functions/src/datetime/current_date.rs    |  27 +-
 .../functions/src/datetime/current_time.rs    |  30 +-
 datafusion/functions/src/datetime/date_bin.rs |  43 +-
 .../functions/src/datetime/date_part.rs       |  45 +-
 .../functions/src/datetime/date_trunc.rs      |  39 +-
 .../functions/src/datetime/from_unixtime.rs   |  29 +-
 .../functions/src/datetime/make_date.rs       |  48 +-
 datafusion/functions/src/datetime/now.rs      |  29 +-
 datafusion/functions/src/datetime/to_char.rs  |  41 +-
 .../functions/src/datetime/to_local_time.rs   |  73 ++-
 .../functions/src/datetime/to_timestamp.rs    | 214 ++++++-
 .../functions/src/datetime/to_unixtime.rs     |  50 +-
 .../source/user-guide/sql/scalar_functions.md | 605 ++----------------
 .../user-guide/sql/scalar_functions_new.md    | 489 ++++++++++++++
 14 files changed, 1177 insertions(+), 585 deletions(-)

diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
index 8b180ff41b91..24046611a71f 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -22,8 +22,12 @@ use arrow::datatypes::DataType::Date32;
 use chrono::{Datelike, NaiveDate};
 
 use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
+};
+use std::sync::OnceLock;
 
 #[derive(Debug)]
 pub struct CurrentDateFunc {
@@ -95,4 +99,25 @@ impl ScalarUDFImpl for CurrentDateFunc {
             ScalarValue::Date32(days),
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_current_date_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_current_date_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description(r#"
+Returns the current UTC date.
+
+The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
+"#)
+            .with_syntax_example("current_date()")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index 803759d4e904..4122b54b07e8 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -15,15 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Time64;
 use arrow::datatypes::TimeUnit::Nanosecond;
+use std::any::Any;
+use std::sync::OnceLock;
 
 use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
+};
 
 #[derive(Debug)]
 pub struct CurrentTimeFunc {
@@ -84,4 +87,25 @@ impl ScalarUDFImpl for CurrentTimeFunc {
             ScalarValue::Time64Nanosecond(nano),
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_current_time_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_current_time_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description(r#"
+Returns the current UTC time.
+
+The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
+"#)
+            .with_syntax_example("current_time()")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 997f1a36ad04..e335c4e097f7 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::temporal_conversions::NANOSECONDS;
 use arrow::array::types::{
@@ -35,10 +35,11 @@ use datafusion_common::{exec_err, not_impl_err, plan_err, Result, ScalarValue};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
 };
 
 use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 
 #[derive(Debug)]
 pub struct DateBinFunc {
@@ -163,6 +164,44 @@ impl ScalarUDFImpl for DateBinFunc {
             Ok(SortProperties::Unordered)
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_date_bin_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_date_bin_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description(r#"
+Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
+
+For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
+"#)
+            .with_syntax_example("date_bin(interval, expression, origin-timestamp)")
+            .with_argument("interval", "Bin interval.")
+            .with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.")
+            .with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
+
+The following intervals are supported:
+
+- nanoseconds
+- microseconds
+- milliseconds
+- seconds
+- minutes
+- hours
+- days
+- weeks
+- months
+- years
+- century
+")
+            .build()
+            .unwrap()
+    })
 }
 
 enum Interval {
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index b6a9a1c7e9db..3fefa5051376 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::str::FromStr;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{Array, ArrayRef, Float64Array};
 use arrow::compute::kernels::cast_utils::IntervalUnit;
@@ -37,9 +37,10 @@ use datafusion_common::cast::{
     as_timestamp_nanosecond_array, as_timestamp_second_array,
 };
 use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
 };
 
 #[derive(Debug)]
@@ -217,6 +218,46 @@ impl ScalarUDFImpl for DatePartFunc {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_date_part_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_date_part_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Returns the specified part of the date as an integer.")
+            .with_syntax_example("date_part(part, expression)")
+            .with_argument(
+                "part",
+                r#"Part of the date to return. The following date parts are supported:
+
+    - year
+    - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
+    - month
+    - week (week of the year)
+    - day (day of the month)
+    - hour
+    - minute
+    - second
+    - millisecond
+    - microsecond
+    - nanosecond
+    - dow (day of the week)
+    - doy (day of the year)
+    - epoch (seconds since Unix epoch)
+"#,
+            )
+            .with_argument(
+                "expression",
+                "Time expression to operate on. Can be a constant, column, or function.",
+            )
+            .build()
+            .unwrap()
+    })
 }
 
 /// Invoke [`date_part`] and cast the result to Float64
diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs
index f4786b16685f..4808f020e0ca 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -18,7 +18,7 @@
 use std::any::Any;
 use std::ops::{Add, Sub};
 use std::str::FromStr;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::temporal_conversions::{
     as_datetime_with_timezone, timestamp_ns_to_datetime,
@@ -36,12 +36,13 @@ use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
 };
 
 use chrono::{
     DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
 };
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 
 #[derive(Debug)]
 pub struct DateTruncFunc {
@@ -241,6 +242,40 @@ impl ScalarUDFImpl for DateTruncFunc {
             Ok(SortProperties::Unordered)
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_date_trunc_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_date_trunc_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Truncates a timestamp value to a specified precision.")
+            .with_syntax_example("date_trunc(precision, expression)")
+            .with_argument(
+                "precision",
+                r#"Time precision to truncate to. The following precisions are supported:
+
+    - year / YEAR
+    - quarter / QUARTER
+    - month / MONTH
+    - week / WEEK
+    - day / DAY
+    - hour / HOUR
+    - minute / MINUTE
+    - second / SECOND
+"#,
+            )
+            .with_argument(
+                "expression",
+                "Time expression to operate on. Can be a constant, column, or function.",
+            )
+            .build()
+            .unwrap()
+    })
 }
 
 fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs
index d36ebe735ee7..84aa9feec654 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -15,14 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Int64, Timestamp};
 use arrow::datatypes::TimeUnit::Second;
+use std::any::Any;
+use std::sync::OnceLock;
 
 use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
 
 #[derive(Debug)]
 pub struct FromUnixtimeFunc {
@@ -78,4 +81,24 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_from_unixtime_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_from_unixtime_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.")
+            .with_syntax_example("from_unixtime(expression)")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            )
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs
index ded7b454f9eb..78bd7c63a412 100644
--- a/datafusion/functions/src/datetime/make_date.rs
+++ b/datafusion/functions/src/datetime/make_date.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::builder::PrimitiveBuilder;
 use arrow::array::cast::AsArray;
@@ -27,7 +27,10 @@ use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf
 use chrono::prelude::*;
 
 use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
 
 #[derive(Debug)]
 pub struct MakeDateFunc {
@@ -148,6 +151,47 @@ impl ScalarUDFImpl for MakeDateFunc {
 
         Ok(value)
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_make_date_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_make_date_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Make a date from year/month/day component parts.")
+            .with_syntax_example("make_date(year, month, day)")
+            .with_argument(
+                "year",
+                " Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.", )
+            .with_argument(
+                "month",
+                "Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.",
+            )
+            .with_argument("day", "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.")
+            .with_sql_example(r#"```sql
+> select make_date(2023, 1, 31);
++-------------------------------------------+
+| make_date(Int64(2023),Int64(1),Int64(31)) |
++-------------------------------------------+
+| 2023-01-31                                |
++-------------------------------------------+
+> select make_date('2023', '01', '31');
++-----------------------------------------------+
+| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) |
++-----------------------------------------------+
+| 2023-01-31                                    |
++-----------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 /// Converts the year/month/day fields to an `i32` representing the days from
diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index 690008d97212..c13bbfb18105 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -15,15 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Timestamp;
 use arrow::datatypes::TimeUnit::Nanosecond;
+use std::any::Any;
+use std::sync::OnceLock;
 
 use datafusion_common::{internal_err, ExprSchema, Result, ScalarValue};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
-use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
+};
 
 #[derive(Debug)]
 pub struct NowFunc {
@@ -86,6 +89,9 @@ impl ScalarUDFImpl for NowFunc {
             ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())),
         )))
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_unixtime_doc())
+    }
 
     fn aliases(&self) -> &[String] {
         &self.aliases
@@ -95,3 +101,20 @@ impl ScalarUDFImpl for NowFunc {
         false
     }
 }
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_unixtime_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description(r#"
+Returns the current UTC timestamp.
+
+The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
+"#)
+            .with_syntax_example("now()")
+            .build()
+            .unwrap()
+    })
+}
diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs
index f2e5af978ca0..430dcedd92cf 100644
--- a/datafusion/functions/src/datetime/to_char.rs
+++ b/datafusion/functions/src/datetime/to_char.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::cast::AsArray;
 use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
@@ -29,9 +29,10 @@ use arrow::error::ArrowError;
 use arrow::util::display::{ArrayFormatter, DurationFormat, FormatOptions};
 
 use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
 };
 
 #[derive(Debug)]
@@ -137,6 +138,42 @@ impl ScalarUDFImpl for ToCharFunc {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_char_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_char_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.")
+            .with_syntax_example("to_char(expression, format)")
+            .with_argument(
+                "expression",
+                " Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration."
+            )
+            .with_argument(
+                "format",
+                "A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression.",
+            )
+            .with_argument("day", "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.")
+            .with_sql_example(r#"```sql
+> select to_char('2023-03-01'::date, '%d-%m-%Y');
++----------------------------------------------+
+| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
++----------------------------------------------+
+| 01-03-2023                                   |
++----------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 fn _build_format_options<'a>(
diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs
index 0e33da14547e..7646137ce656 100644
--- a/datafusion/functions/src/datetime/to_local_time.rs
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::ops::Add;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::timezone::Tz;
 use arrow::array::{Array, ArrayRef, PrimitiveBuilder};
@@ -31,7 +31,10 @@ use arrow::datatypes::{
 use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
 
 /// A UDF function that converts a timezone-aware timestamp to local time (with no offset or
 /// timezone information). In other words, this function strips off the timezone from the timestamp,
@@ -351,6 +354,72 @@ impl ScalarUDFImpl for ToLocalTimeFunc {
             _ => plan_err!("The to_local_time function can only accept Timestamp as the arg got {first_arg}"),
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_local_time_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_local_time_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes.")
+            .with_syntax_example("to_local_time(expression)")
+            .with_argument(
+                "expression",
+                "Time expression to operate on. Can be a constant, column, or function."
+            )
+            .with_sql_example(r#"```sql
+> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp);
++---------------------------------------------+
+| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
++---------------------------------------------+
+| 2024-04-01T00:00:20                         |
++---------------------------------------------+
+
+> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
++---------------------------------------------+
+| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
++---------------------------------------------+
+| 2024-04-01T00:00:20                         |
++---------------------------------------------+
+
+> SELECT
+  time,
+  arrow_typeof(time) as type,
+  to_local_time(time) as to_local_time,
+  arrow_typeof(to_local_time(time)) as to_local_time_type
+FROM (
+  SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
+);
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+| time                      | type                                           | to_local_time       | to_local_time_type          |
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+
+# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
+# than UTC boundaries
+
+> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin;
++---------------------+
+| date_bin            |
++---------------------+
+| 2024-04-01T00:00:00 |
++---------------------+
+
+> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone;
++---------------------------+
+| date_bin_with_timezone    |
++---------------------------+
+| 2024-04-01T00:00:00+02:00 |
++---------------------------+
+```"#)
+            .build()
+            .unwrap()
+    })
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs
index b17c9a005d1f..9479e25fe61f 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::datatypes::DataType::*;
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
@@ -25,10 +25,12 @@ use arrow::datatypes::{
     TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
 };
 
-use datafusion_common::{exec_err, Result, ScalarType};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-
 use crate::datetime::common::*;
+use datafusion_common::{exec_err, Result, ScalarType};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
 
 #[derive(Debug)]
 pub struct ToTimestampFunc {
@@ -182,6 +184,50 @@ impl ScalarUDFImpl for ToTimestampFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_timestamp_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_timestamp_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description(r#"
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+
+Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+"#)
+            .with_syntax_example("to_timestamp(expression[, ..., format_n])")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            )
+            .with_argument(
+                "format_n",
+                "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
+            )
+            .with_sql_example(r#"```sql
+> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
++-----------------------------------------------------------+
+| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-----------------------------------------------------------+
+| 2023-01-31T14:26:56.123456789                             |
++-----------------------------------------------------------+
+> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++--------------------------------------------------------------------------------------------------------+
+| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++--------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456789                                                                          |
++--------------------------------------------------------------------------------------------------------+
+```
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 impl ScalarUDFImpl for ToTimestampSecondsFunc {
@@ -230,6 +276,46 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_timestamp_seconds_doc())
+    }
+}
+
+static TO_TIMESTAMP_SECONDS_DOC: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_timestamp_seconds_doc() -> &'static Documentation {
+    TO_TIMESTAMP_SECONDS_DOC.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.")
+            .with_syntax_example("to_timestamp_seconds(expression[, ..., format_n])")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            )
+            .with_argument(
+                "format_n",
+                "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
+            )
+            .with_sql_example(r#"```sql
+> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
++-------------------------------------------------------------------+
+| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-------------------------------------------------------------------+
+| 2023-01-31T14:26:56                                               |
++-------------------------------------------------------------------+
+> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++----------------------------------------------------------------------------------------------------------------+
+| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++----------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00                                                                                            |
++----------------------------------------------------------------------------------------------------------------+
+```
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 impl ScalarUDFImpl for ToTimestampMillisFunc {
@@ -280,6 +366,46 @@ impl ScalarUDFImpl for ToTimestampMillisFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_timestamp_millis_doc())
+    }
+}
+
+static TO_TIMESTAMP_MILLIS_DOC: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_timestamp_millis_doc() -> &'static Documentation {
+    crate::datetime::to_timestamp::TO_TIMESTAMP_MILLIS_DOC.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.")
+            .with_syntax_example("to_timestamp_millis(expression[, ..., format_n])")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            )
+            .with_argument(
+                "format_n",
+                "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
+            )
+            .with_sql_example(r#"```sql
+> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
++------------------------------------------------------------------+
+| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++------------------------------------------------------------------+
+| 2023-01-31T14:26:56.123                                          |
++------------------------------------------------------------------+
+> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++---------------------------------------------------------------------------------------------------------------+
+| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++---------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123                                                                                       |
++---------------------------------------------------------------------------------------------------------------+
+```
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 impl ScalarUDFImpl for ToTimestampMicrosFunc {
@@ -330,6 +456,46 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_timestamp_micros_doc())
+    }
+}
+
+static TO_TIMESTAMP_MICROS_DOC: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_timestamp_micros_doc() -> &'static Documentation {
+    TO_TIMESTAMP_MICROS_DOC.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.")
+            .with_syntax_example("to_timestamp_micros(expression[, ..., format_n])")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            )
+            .with_argument(
+                "format_n",
+                "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
+            )
+            .with_sql_example(r#"```sql
+> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
++------------------------------------------------------------------+
+| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++------------------------------------------------------------------+
+| 2023-01-31T14:26:56.123456                                       |
++------------------------------------------------------------------+
+> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++---------------------------------------------------------------------------------------------------------------+
+| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++---------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456                                                                                    |
++---------------------------------------------------------------------------------------------------------------+
+```
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 impl ScalarUDFImpl for ToTimestampNanosFunc {
@@ -380,6 +546,46 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_timestamp_nanos_doc())
+    }
+}
+
+static TO_TIMESTAMP_NANOS_DOC: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_timestamp_nanos_doc() -> &'static Documentation {
+    TO_TIMESTAMP_NANOS_DOC.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.")
+            .with_syntax_example("to_timestamp_nanos(expression[, ..., format_n])")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            )
+            .with_argument(
+                "format_n",
+                "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
+            )
+            .with_sql_example(r#"```sql
+> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
++-----------------------------------------------------------------+
+| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-----------------------------------------------------------------+
+| 2023-01-31T14:26:56.123456789                                   |
++-----------------------------------------------------------------+
+> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++--------------------------------------------------------------------------------------------------------------+
+| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++--------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456789                                                                                |
++---------------------------------------------------------------------------------------------------------------+
+```
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+"#)
+            .build()
+            .unwrap()
+    })
 }
 
 /// Returns the return type for the to_timestamp_* function, preserving
diff --git a/datafusion/functions/src/datetime/to_unixtime.rs b/datafusion/functions/src/datetime/to_unixtime.rs
index 396dadccb4b3..10f0f87a4ab1 100644
--- a/datafusion/functions/src/datetime/to_unixtime.rs
+++ b/datafusion/functions/src/datetime/to_unixtime.rs
@@ -15,15 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-
 use arrow::datatypes::{DataType, TimeUnit};
+use std::any::Any;
+use std::sync::OnceLock;
 
+use super::to_timestamp::ToTimestampSecondsFunc;
 use crate::datetime::common::*;
 use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
-
-use super::to_timestamp::ToTimestampSecondsFunc;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::{
+    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
 
 #[derive(Debug)]
 pub struct ToUnixtimeFunc {
@@ -86,4 +88,42 @@ impl ScalarUDFImpl for ToUnixtimeFunc {
             }
         }
     }
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_to_unixtime_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_to_unixtime_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_DATETIME)
+            .with_description("Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.")
+            .with_syntax_example("to_unixtime(expression[, ..., format_n])")
+            .with_argument(
+                "expression",
+                "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
+            ).with_argument(
+            "format_n",
+            "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.")
+            .with_sql_example(r#"
+```sql
+> select to_unixtime('2020-09-08T12:00:00+00:00');
++------------------------------------------------+
+| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) |
++------------------------------------------------+
+| 1599566400                                     |
++------------------------------------------------+
+> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
++-----------------------------------------------------------------------------------------------------------------------------+
+| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) |
++-----------------------------------------------------------------------------------------------------------------------------+
+| 1673638290                                                                                                                  |
++-----------------------------------------------------------------------------------------------------------------------------+
+```
+"#)
+            .build()
+            .unwrap()
+    })
 }
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index d1f816898d93..547ea108080e 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -51,175 +51,7 @@ position(substr in origstr)
 
 ## Time and Date Functions
 
-- [now](#now)
-- [current_date](#current_date)
-- [current_time](#current_time)
-- [date_bin](#date_bin)
-- [date_trunc](#date_trunc)
-- [datetrunc](#datetrunc)
-- [date_part](#date_part)
-- [datepart](#datepart)
 - [extract](#extract)
-- [today](#today)
-- [make_date](#make_date)
-- [to_char](#to_char)
-- [to_local_time](#to_local_time)
-- [to_timestamp](#to_timestamp)
-- [to_timestamp_millis](#to_timestamp_millis)
-- [to_timestamp_micros](#to_timestamp_micros)
-- [to_timestamp_seconds](#to_timestamp_seconds)
-- [to_timestamp_nanos](#to_timestamp_nanos)
-- [from_unixtime](#from_unixtime)
-- [to_unixtime](#to_unixtime)
-
-### `now`
-
-Returns the current UTC timestamp.
-
-The `now()` return value is determined at query time and will return the same timestamp,
-no matter when in the query plan the function executes.
-
-```
-now()
-```
-
-### `current_date`
-
-Returns the current UTC date.
-
-The `current_date()` return value is determined at query time and will return the same date,
-no matter when in the query plan the function executes.
-
-```
-current_date()
-```
-
-#### Aliases
-
-- today
-
-### `today`
-
-_Alias of [current_date](#current_date)._
-
-### `current_time`
-
-Returns the current UTC time.
-
-The `current_time()` return value is determined at query time and will return the same time,
-no matter when in the query plan the function executes.
-
-```
-current_time()
-```
-
-### `date_bin`
-
-Calculates time intervals and returns the start of the interval nearest to the specified timestamp.
-Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows"
-and applying an aggregate or selector function to each window.
-
-For example, if you "bin" or "window" data into 15 minute intervals, an input
-timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15
-minute bin it is in: `2023-01-01T18:15:00Z`.
-
-```
-date_bin(interval, expression, origin-timestamp)
-```
-
-#### Arguments
-
-- **interval**: Bin interval.
-- **expression**: Time expression to operate on.
-  Can be a constant, column, or function.
-- **origin-timestamp**: Optional. Starting point used to determine bin boundaries. If not specified
-  defaults `1970-01-01T00:00:00Z` (the UNIX epoch in UTC).
-
-The following intervals are supported:
-
-- nanoseconds
-- microseconds
-- milliseconds
-- seconds
-- minutes
-- hours
-- days
-- weeks
-- months
-- years
-- century
-
-### `date_trunc`
-
-Truncates a timestamp value to a specified precision.
-
-```
-date_trunc(precision, expression)
-```
-
-#### Arguments
-
-- **precision**: Time precision to truncate to.
-  The following precisions are supported:
-
-  - year / YEAR
-  - quarter / QUARTER
-  - month / MONTH
-  - week / WEEK
-  - day / DAY
-  - hour / HOUR
-  - minute / MINUTE
-  - second / SECOND
-
-- **expression**: Time expression to operate on.
-  Can be a constant, column, or function.
-
-#### Aliases
-
-- datetrunc
-
-### `datetrunc`
-
-_Alias of [date_trunc](#date_trunc)._
-
-### `date_part`
-
-Returns the specified part of the date as an integer.
-
-```
-date_part(part, expression)
-```
-
-#### Arguments
-
-- **part**: Part of the date to return.
-  The following date parts are supported:
-
-  - year
-  - quarter _(emits value in inclusive range [1, 4] based on which quartile of the year the date is in)_
-  - month
-  - week _(week of the year)_
-  - day _(day of the month)_
-  - hour
-  - minute
-  - second
-  - millisecond
-  - microsecond
-  - nanosecond
-  - dow _(day of the week)_
-  - doy _(day of the year)_
-  - epoch _(seconds since Unix epoch)_
-
-- **expression**: Time expression to operate on.
-  Can be a constant, column, or function.
-
-#### Aliases
-
-- datepart
-
-### `datepart`
-
-_Alias of [date_part](#date_part)._
 
 ### `extract`
 
@@ -238,394 +70,10 @@ date_part('day', '2024-04-13'::date)
 
 See [date_part](#date_part).
 
-### `make_date`
-
-Make a date from year/month/day component parts.
-
-```
-make_date(year, month, day)
-```
-
-#### Arguments
-
-- **year**: Year to use when making the date.
-  Can be a constant, column or function, and any combination of arithmetic operators.
-- **month**: Month to use when making the date.
-  Can be a constant, column or function, and any combination of arithmetic operators.
-- **day**: Day to use when making the date.
-  Can be a constant, column or function, and any combination of arithmetic operators.
-
-#### Example
-
-```
-> select make_date(2023, 1, 31);
-+-------------------------------------------+
-| make_date(Int64(2023),Int64(1),Int64(31)) |
-+-------------------------------------------+
-| 2023-01-31                                |
-+-------------------------------------------+
-> select make_date('2023', '01', '31');
-+-----------------------------------------------+
-| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) |
-+-----------------------------------------------+
-| 2023-01-31                                    |
-+-----------------------------------------------+
-```
-
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs)
-
-### `to_char`
-
-Returns a string representation of a date, time, timestamp or duration based
-on a [Chrono format]. Unlike the PostgreSQL equivalent of this function
-numerical formatting is not supported.
-
-```
-to_char(expression, format)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function that results in a
-  date, time, timestamp or duration.
-- **format**: A [Chrono format] string to use to convert the expression.
-
-#### Example
-
-```
-> select to_char('2023-03-01'::date, '%d-%m-%Y');
-+----------------------------------------------+
-| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
-+----------------------------------------------+
-| 01-03-2023                                   |
-+----------------------------------------------+
-```
-
-Additional examples can be found [here]
-
-[here]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs
-
-#### Aliases
-
-- date_format
-
-### `to_local_time`
-
-Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or
-timezone information). This function handles daylight saving time changes.
-
-```
-to_local_time(expression)
-```
-
-#### Arguments
-
-- **expression**: Time expression to operate on. Can be a constant, column, or function.
-
-#### Example
-
-```
-> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp);
-+---------------------------------------------+
-| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
-+---------------------------------------------+
-| 2024-04-01T00:00:20                         |
-+---------------------------------------------+
-
-> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
-+---------------------------------------------+
-| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
-+---------------------------------------------+
-| 2024-04-01T00:00:20                         |
-+---------------------------------------------+
-
-> SELECT
-  time,
-  arrow_typeof(time) as type,
-  to_local_time(time) as to_local_time,
-  arrow_typeof(to_local_time(time)) as to_local_time_type
-FROM (
-  SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
-);
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-| time                      | type                                           | to_local_time       | to_local_time_type          |
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-
-# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
-# than UTC boundaries
-
-> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin;
-+---------------------+
-| date_bin            |
-+---------------------+
-| 2024-04-01T00:00:00 |
-+---------------------+
-
-> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone;
-+---------------------------+
-| date_bin_with_timezone    |
-+---------------------------+
-| 2024-04-01T00:00:00+02:00 |
-+---------------------------+
-```
-
-### `to_timestamp`
-
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`).
-Supports strings, integer, unsigned integer, and double types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided.
-Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
-Returns the corresponding timestamp.
-
-Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`.
-Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
-for the input outside of supported bounds.
-
-```
-to_timestamp(expression[, ..., format_n])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
-  they appear with the first successful one being returned. If none of the formats successfully parse the expression
-  an error will be returned.
-
-[chrono format]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
-
-#### Example
-
-```
-> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
-+-----------------------------------------------------------+
-| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
-+-----------------------------------------------------------+
-| 2023-01-31T14:26:56.123456789                             |
-+-----------------------------------------------------------+
-> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
-+--------------------------------------------------------------------------------------------------------+
-| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
-+--------------------------------------------------------------------------------------------------------+
-| 2023-05-17T03:59:00.123456789                                                                          |
-+--------------------------------------------------------------------------------------------------------+
-```
-
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
-
-### `to_timestamp_millis`
-
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`).
-Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
-Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
-Returns the corresponding timestamp.
-
-```
-to_timestamp_millis(expression[, ..., format_n])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
-  they appear with the first successful one being returned. If none of the formats successfully parse the expression
-  an error will be returned.
-
-#### Example
-
-```
-> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
-+------------------------------------------------------------------+
-| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
-+------------------------------------------------------------------+
-| 2023-01-31T14:26:56.123                                          |
-+------------------------------------------------------------------+
-> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
-+---------------------------------------------------------------------------------------------------------------+
-| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
-+---------------------------------------------------------------------------------------------------------------+
-| 2023-05-17T03:59:00.123                                                                                       |
-+---------------------------------------------------------------------------------------------------------------+
-```
-
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
-
-### `to_timestamp_micros`
-
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`).
-Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
-Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`)
-Returns the corresponding timestamp.
-
-```
-to_timestamp_micros(expression[, ..., format_n])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
-  they appear with the first successful one being returned. If none of the formats successfully parse the expression
-  an error will be returned.
-
-#### Example
-
-```
-> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
-+------------------------------------------------------------------+
-| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
-+------------------------------------------------------------------+
-| 2023-01-31T14:26:56.123456                                       |
-+------------------------------------------------------------------+
-> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
-+---------------------------------------------------------------------------------------------------------------+
-| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
-+---------------------------------------------------------------------------------------------------------------+
-| 2023-05-17T03:59:00.123456                                                                                    |
-+---------------------------------------------------------------------------------------------------------------+
-```
-
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
-
-### `to_timestamp_nanos`
-
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`).
-Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
-Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
-Returns the corresponding timestamp.
-
-```
-to_timestamp_nanos(expression[, ..., format_n])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
-  they appear with the first successful one being returned. If none of the formats successfully parse the expression
-  an error will be returned.
-
-#### Example
-
-```
-> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
-+-----------------------------------------------------------------+
-| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
-+-----------------------------------------------------------------+
-| 2023-01-31T14:26:56.123456789                                   |
-+-----------------------------------------------------------------+
-> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
-+--------------------------------------------------------------------------------------------------------------+
-| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
-+--------------------------------------------------------------------------------------------------------------+
-| 2023-05-17T03:59:00.123456789                                                                                |
-+---------------------------------------------------------------------------------------------------------------+
-```
-
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
-
-### `to_timestamp_seconds`
-
-Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`).
-Supports strings, integer, and unsigned integer types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format]s are provided.
-Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
-Returns the corresponding timestamp.
-
-```
-to_timestamp_seconds(expression[, ..., format_n])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
-  they appear with the first successful one being returned. If none of the formats successfully parse the expression
-  an error will be returned.
-
-#### Example
-
-```
-> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
-+-------------------------------------------------------------------+
-| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
-+-------------------------------------------------------------------+
-| 2023-01-31T14:26:56                                               |
-+-------------------------------------------------------------------+
-> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
-+----------------------------------------------------------------------------------------------------------------+
-| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
-+----------------------------------------------------------------------------------------------------------------+
-| 2023-05-17T03:59:00                                                                                            |
-+----------------------------------------------------------------------------------------------------------------+
-```
-
-Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
-
-### `from_unixtime`
-
-Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`).
-Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`)
-return the corresponding timestamp.
-
-```
-from_unixtime(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-
-### `to_unixtime`
-
-Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`).
-Supports strings, dates, timestamps and double types as input.
-Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided.
-
-```
-to_unixtime(expression[, ..., format_n])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic operators.
-- **format_n**: Optional [Chrono format] strings to use to parse the expression. Formats will be tried in the order
-  they appear with the first successful one being returned. If none of the formats successfully parse the expression
-  an error will be returned.
-
-#### Example
-
-```
-> select to_unixtime('2020-09-08T12:00:00+00:00');
-+------------------------------------------------+
-| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) |
-+------------------------------------------------+
-| 1599566400                                     |
-+------------------------------------------------+
-> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
-+-----------------------------------------------------------------------------------------------------------------------------+
-| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) |
-+-----------------------------------------------------------------------------------------------------------------------------+
-| 1673638290                                                                                                                  |
-+-----------------------------------------------------------------------------------------------------------------------------+
-```
-
 ## Array Functions
 
 - [unnest](#unnest)
+- [range](#range)
 
 ### `unnest`
 
@@ -669,11 +117,60 @@ Transforms an array into rows.
 +-----------------------------------+
 ```
 
+### `range`
+
+Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` or
+`SELECT range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);`
+
+The range start..end contains all values with start <= x < end. It is empty if start >= end.
+
+Step can not be 0 (then the range will be nonsense.).
+
+Note that when the required range is a number, it accepts (stop), (start, stop), and (start, stop, step) as parameters,
+but when the required range is a date or timestamp, it must be 3 non-NULL parameters.
+For example,
+
+```
+SELECT range(3);
+SELECT range(1,5);
+SELECT range(1,5,1);
+```
+
+are allowed in number ranges
+
+but in date and timestamp ranges, only
+
+```
+SELECT range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);
+SELECT range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', INTERVAL '1' MONTH);
+```
+
+is allowed, and
+
+```
+SELECT range(DATE '1992-09-01', DATE '1993-03-01', NULL);
+SELECT range(NULL, DATE '1993-03-01', INTERVAL '1' MONTH);
+SELECT range(DATE '1992-09-01', NULL, INTERVAL '1' MONTH);
+```
+
+are not allowed
+
+#### Arguments
+
+- **start**: start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported.
+- **end**: end of the range (not included). Type must be the same as start.
+- **step**: increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges.
+
+#### Aliases
+
+- generate_series
+
 ## Struct Functions
 
 - [unnest](#unnest-struct)
 
-For more struct functions see the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
+For more struct functions see the new documentation [
+`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html)
 
 ### `unnest (struct)`
 
diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md
index 0a073db543b0..8f1e30f1fa53 100644
--- a/docs/source/user-guide/sql/scalar_functions_new.md
+++ b/docs/source/user-guide/sql/scalar_functions_new.md
@@ -1800,7 +1800,239 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
 
 ## Time and Date Functions
 
+- [current_date](#current_date)
+- [current_time](#current_time)
+- [date_bin](#date_bin)
+- [date_format](#date_format)
+- [date_part](#date_part)
+- [date_trunc](#date_trunc)
+- [datepart](#datepart)
+- [datetrunc](#datetrunc)
+- [from_unixtime](#from_unixtime)
+- [make_date](#make_date)
+- [now](#now)
+- [to_char](#to_char)
 - [to_date](#to_date)
+- [to_local_time](#to_local_time)
+- [to_timestamp](#to_timestamp)
+- [to_timestamp_micros](#to_timestamp_micros)
+- [to_timestamp_millis](#to_timestamp_millis)
+- [to_timestamp_nanos](#to_timestamp_nanos)
+- [to_timestamp_seconds](#to_timestamp_seconds)
+- [to_unixtime](#to_unixtime)
+- [today](#today)
+
+### `current_date`
+
+Returns the current UTC date.
+
+The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
+
+```
+current_date()
+```
+
+#### Aliases
+
+- today
+
+### `current_time`
+
+Returns the current UTC time.
+
+The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
+
+```
+current_time()
+```
+
+### `date_bin`
+
+Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
+
+For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
+
+```
+date_bin(interval, expression, origin-timestamp)
+```
+
+#### Arguments
+
+- **interval**: Bin interval.
+- **expression**: Time expression to operate on. Can be a constant, column, or function.
+- **origin-timestamp**: Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
+
+The following intervals are supported:
+
+- nanoseconds
+- microseconds
+- milliseconds
+- seconds
+- minutes
+- hours
+- days
+- weeks
+- months
+- years
+- century
+
+### `date_format`
+
+_Alias of [to_char](#to_char)._
+
+### `date_part`
+
+Returns the specified part of the date as an integer.
+
+```
+date_part(part, expression)
+```
+
+#### Arguments
+
+- **part**: Part of the date to return. The following date parts are supported:
+
+  - year
+  - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
+  - month
+  - week (week of the year)
+  - day (day of the month)
+  - hour
+  - minute
+  - second
+  - millisecond
+  - microsecond
+  - nanosecond
+  - dow (day of the week)
+  - doy (day of the year)
+  - epoch (seconds since Unix epoch)
+
+- **expression**: Time expression to operate on. Can be a constant, column, or function.
+
+#### Aliases
+
+- datepart
+
+### `date_trunc`
+
+Truncates a timestamp value to a specified precision.
+
+```
+date_trunc(precision, expression)
+```
+
+#### Arguments
+
+- **precision**: Time precision to truncate to. The following precisions are supported:
+
+  - year / YEAR
+  - quarter / QUARTER
+  - month / MONTH
+  - week / WEEK
+  - day / DAY
+  - hour / HOUR
+  - minute / MINUTE
+  - second / SECOND
+
+- **expression**: Time expression to operate on. Can be a constant, column, or function.
+
+#### Aliases
+
+- datetrunc
+
+### `datepart`
+
+_Alias of [date_part](#date_part)._
+
+### `datetrunc`
+
+_Alias of [date_trunc](#date_trunc)._
+
+### `from_unixtime`
+
+Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.
+
+```
+from_unixtime(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+
+### `make_date`
+
+Make a date from year/month/day component parts.
+
+```
+make_date(year, month, day)
+```
+
+#### Arguments
+
+- **year**: Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.
+- **month**: Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.
+- **day**: Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> select make_date(2023, 1, 31);
++-------------------------------------------+
+| make_date(Int64(2023),Int64(1),Int64(31)) |
++-------------------------------------------+
+| 2023-01-31                                |
++-------------------------------------------+
+> select make_date('2023', '01', '31');
++-----------------------------------------------+
+| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) |
++-----------------------------------------------+
+| 2023-01-31                                    |
++-----------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs)
+
+### `now`
+
+Returns the current UTC timestamp.
+
+The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
+
+```
+now()
+```
+
+### `to_char`
+
+Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.
+
+```
+to_char(expression, format)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration.
+- **format**: A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression.
+- **day**: Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> select to_char('2023-03-01'::date, '%d-%m-%Y');
++----------------------------------------------+
+| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
++----------------------------------------------+
+| 01-03-2023                                   |
++----------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs)
+
+#### Aliases
+
+- date_format
 
 ### `to_date`
 
@@ -1842,6 +2074,263 @@ to_date('2017-05-31', '%Y-%m-%d')
 
 Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
 
+### `to_local_time`
+
+Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes.
+
+```
+to_local_time(expression)
+```
+
+#### Arguments
+
+- **expression**: Time expression to operate on. Can be a constant, column, or function.
+
+#### Example
+
+```sql
+> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp);
++---------------------------------------------+
+| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
++---------------------------------------------+
+| 2024-04-01T00:00:20                         |
++---------------------------------------------+
+
+> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
++---------------------------------------------+
+| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
++---------------------------------------------+
+| 2024-04-01T00:00:20                         |
++---------------------------------------------+
+
+> SELECT
+  time,
+  arrow_typeof(time) as type,
+  to_local_time(time) as to_local_time,
+  arrow_typeof(to_local_time(time)) as to_local_time_type
+FROM (
+  SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
+);
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+| time                      | type                                           | to_local_time       | to_local_time_type          |
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
++---------------------------+------------------------------------------------+---------------------+-----------------------------+
+
+# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
+# than UTC boundaries
+
+> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin;
++---------------------+
+| date_bin            |
++---------------------+
+| 2024-04-01T00:00:00 |
++---------------------+
+
+> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone;
++---------------------------+
+| date_bin_with_timezone    |
++---------------------------+
+| 2024-04-01T00:00:00+02:00 |
++---------------------------+
+```
+
+### `to_timestamp`
+
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+
+Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+
+```
+to_timestamp(expression[, ..., format_n])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+
+#### Example
+
+```sql
+> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
++-----------------------------------------------------------+
+| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-----------------------------------------------------------+
+| 2023-01-31T14:26:56.123456789                             |
++-----------------------------------------------------------+
+> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++--------------------------------------------------------------------------------------------------------+
+| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++--------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456789                                                                          |
++--------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+
+### `to_timestamp_micros`
+
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.
+
+```
+to_timestamp_micros(expression[, ..., format_n])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+
+#### Example
+
+```sql
+> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
++------------------------------------------------------------------+
+| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++------------------------------------------------------------------+
+| 2023-01-31T14:26:56.123456                                       |
++------------------------------------------------------------------+
+> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++---------------------------------------------------------------------------------------------------------------+
+| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++---------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456                                                                                    |
++---------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+
+### `to_timestamp_millis`
+
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+
+```
+to_timestamp_millis(expression[, ..., format_n])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+
+#### Example
+
+```sql
+> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
++------------------------------------------------------------------+
+| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++------------------------------------------------------------------+
+| 2023-01-31T14:26:56.123                                          |
++------------------------------------------------------------------+
+> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++---------------------------------------------------------------------------------------------------------------+
+| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++---------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123                                                                                       |
++---------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+
+### `to_timestamp_nanos`
+
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+
+```
+to_timestamp_nanos(expression[, ..., format_n])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+
+#### Example
+
+```sql
+> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
++-----------------------------------------------------------------+
+| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-----------------------------------------------------------------+
+| 2023-01-31T14:26:56.123456789                                   |
++-----------------------------------------------------------------+
+> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++--------------------------------------------------------------------------------------------------------------+
+| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++--------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00.123456789                                                                                |
++---------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+
+### `to_timestamp_seconds`
+
+Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
+
+```
+to_timestamp_seconds(expression[, ..., format_n])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+
+#### Example
+
+```sql
+> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
++-------------------------------------------------------------------+
+| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
++-------------------------------------------------------------------+
+| 2023-01-31T14:26:56                                               |
++-------------------------------------------------------------------+
+> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
++----------------------------------------------------------------------------------------------------------------+
+| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
++----------------------------------------------------------------------------------------------------------------+
+| 2023-05-17T03:59:00                                                                                            |
++----------------------------------------------------------------------------------------------------------------+
+```
+
+Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
+
+### `to_unixtime`
+
+Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
+
+```
+to_unixtime(expression[, ..., format_n])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
+- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.
+
+#### Example
+
+```sql
+> select to_unixtime('2020-09-08T12:00:00+00:00');
++------------------------------------------------+
+| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) |
++------------------------------------------------+
+| 1599566400                                     |
++------------------------------------------------+
+> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
++-----------------------------------------------------------------------------------------------------------------------------+
+| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) |
++-----------------------------------------------------------------------------------------------------------------------------+
+| 1673638290                                                                                                                  |
++-----------------------------------------------------------------------------------------------------------------------------+
+```
+
+### `today`
+
+_Alias of [current_date](#current_date)._
+
 ## Array Functions
 
 - [array_any_value](#array_any_value)

From 700b07fd64b96e3f66ef01dce13dcef7c8588437 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 17 Oct 2024 20:12:37 -0400
Subject: [PATCH 11/15] Fix CI / regenerate functions (#12991)

---
 docs/source/user-guide/sql/scalar_functions_new.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md
index 8f1e30f1fa53..ffc2b680b5c5 100644
--- a/docs/source/user-guide/sql/scalar_functions_new.md
+++ b/docs/source/user-guide/sql/scalar_functions_new.md
@@ -1802,6 +1802,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
 
 - [current_date](#current_date)
 - [current_time](#current_time)
+- [current_timestamp](#current_timestamp)
 - [date_bin](#date_bin)
 - [date_format](#date_format)
 - [date_part](#date_part)
@@ -1846,6 +1847,10 @@ The `current_time()` return value is determined at query time and will return th
 current_time()
 ```
 
+### `current_timestamp`
+
+_Alias of [now](#now)._
+
 ### `date_bin`
 
 Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
@@ -2003,6 +2008,10 @@ The `now()` return value is determined at query time and will return the same ti
 now()
 ```
 
+#### Aliases
+
+- current_timestamp
+
 ### `to_char`
 
 Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.

From efe5708978a480d11d5406a7d7df76d73e15c5d7 Mon Sep 17 00:00:00 2001
From: jcsherin <jacob@protoship.io>
Date: Fri, 18 Oct 2024 16:56:41 +0530
Subject: [PATCH 12/15] Convert `BuiltInWindowFunction::{Lead, Lag}` to a user
 defined window function (#12857)

* Move `lead-lag` to `functions-window` package

* Builds with warnings

* Adds `PartitionEvaluatorArgs`

* Extracts `shift_offset` from input expressions

* Computes shift offset

* Get default value from input expression

* Implements `partition_evaluator`

* Fixes compiler warnings

* Comments out failing tests

* Fixes `cargo test` errors and warnings

* Minor: taplo formatting

* Delete code

* Define `lead`, `lag` user-defined window functions

* Fixes `cargo build` errors

* Export udwf and expression public APIs

* Mark result field as nullable

* Delete `return_type` tests for `lead` and `lag`

* Disables test: window function case insensitive

* Fixes: lowercase name in logical plan

* Reverts to old methods for computing `shift_offset`, `default_value`

* Implements expression reversal

* Fixes: lowercase name in logical plans

* Fixes: doc test compilation errors
Fixes: doc test build errors

* Temporarily quite clippy errors

* Fixes proto defintion

* Minor: fixes formatting

* Fixes: doc tests

* Uses macro for defining `lag_udwf()` and `leag_udwf()`

* Fixes: window fuzz test cases

* Copies doc comments verbatim from `BuiltInWindowFunction` enum

* Deletes from window function case insensitive test

* Deletes `BuiltInWindowFunction` expression APIs

* Delete from `create_built_in_window_expr`

* Deletes proto serialization

* Delete from `BuiltInWindowFunction` enum

* Deletes test for finding built-in window function

* Fixes build errors + deletes redundant code

* Deletes more code

* Delete unnecessary structs

* Refactors shift offset computation

* Passes range unit test

* Fixes: clippy::get-first error

* Rewrite unit tests for WindowUDF

* Fixes: unit test for lag with default value

* Consistent input expressions and data types in unit tests

* Minor: fixes formatting

* Restore original helper method for unit tests

* Revert "Refactors shift offset computation"

This reverts commit 000ceb76409e66230f9c5017a30fa3c9bb1e6575.

* Moves helper functions into `functions-window-common` package

* Uses common helper functions in `{lead, lag}`

* Minor: formatting

* Revert "Moves helper functions into `functions-window-common` package"

This reverts commit ab8a83c9c11ca3a245278f6f300438feaacb0978.

* Moves common functions to utils

* Minor: formatting fixes

* Update lowercase names in explain output

* Adds doc for `lead()` and `lag()` expression functions

* Add doc for `WindowShiftKind::shift_offset`

* Remove `arrow` dev dependency

* Minor: formatting

* Update inner doc comment

* Serialize 1 or more window function arguments

* Adds logical plan roundtrip test cases

* Refactor: readability of unit tests

* Minor: rename variable bindings

* Minor: copy edit

* Revert "Remove `arrow` dev dependency"

This reverts commit 3eb09856c8ec4ddce20472deee2df590c2fd3f35.

* Move null argument handling helper to utils

* Disable failing sqllogic tests for handling NULL input

* Revert "Disable failing sqllogic tests for handling NULL input"

This reverts commit 270a2030637012d549c001e973a0a1bb6b3d4dd0.

* Fixes: incorrect NULL handling in `lead`/`lag` window function

* Adds more tests cases

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion-cli/Cargo.lock                     |   1 +
 .../core/tests/fuzz_cases/window_fuzz.rs      |  13 +-
 .../expr/src/built_in_window_function.rs      |  32 +-
 datafusion/expr/src/expr.rs                   |  38 --
 datafusion/expr/src/udwf.rs                   |  23 +
 datafusion/expr/src/window_function.rs        |  34 --
 .../functions-window-common/src/expr.rs       |  64 +++
 datafusion/functions-window-common/src/lib.rs |   1 +
 datafusion/functions-window/Cargo.toml        |   1 +
 .../src}/lead_lag.rs                          | 392 ++++++++++++------
 datafusion/functions-window/src/lib.rs        |   8 +
 datafusion/functions-window/src/utils.rs      |  53 +++
 .../physical-expr/src/expressions/mod.rs      |   1 -
 datafusion/physical-expr/src/window/mod.rs    |   1 -
 datafusion/physical-plan/src/windows/mod.rs   |  88 +---
 datafusion/proto/proto/datafusion.proto       |   6 +-
 datafusion/proto/src/generated/pbjson.rs      |  30 +-
 datafusion/proto/src/generated/prost.rs       |  14 +-
 .../proto/src/logical_plan/from_proto.rs      |  17 +-
 datafusion/proto/src/logical_plan/to_proto.rs |  14 +-
 .../proto/src/physical_plan/to_proto.rs       |  20 -
 .../tests/cases/roundtrip_logical_plan.rs     |  12 +-
 datafusion/sqllogictest/test_files/union.slt  |   8 +-
 datafusion/sqllogictest/test_files/window.slt |  56 ++-
 24 files changed, 520 insertions(+), 407 deletions(-)
 create mode 100644 datafusion/functions-window-common/src/expr.rs
 rename datafusion/{physical-expr/src/window => functions-window/src}/lead_lag.rs (59%)
 create mode 100644 datafusion/functions-window/src/utils.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index aa64e14fca8e..dfd07a7658ff 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1445,6 +1445,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-expr",
  "datafusion-functions-window-common",
+ "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "log",
  "paste",
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 4a33334770a0..d649919f1b6a 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -45,6 +45,7 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
 use test_utils::add_empty_batches;
 
 use datafusion::functions_window::row_number::row_number_udwf;
+use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
 use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
 use hashbrown::HashMap;
 use rand::distributions::Alphanumeric;
@@ -197,7 +198,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
         // )
         (
             // Window function
-            WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lag),
+            WindowFunctionDefinition::WindowUDF(lag_udwf()),
             // its name
             "LAG",
             // no argument
@@ -211,7 +212,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
         // )
         (
             // Window function
-            WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lead),
+            WindowFunctionDefinition::WindowUDF(lead_udwf()),
             // its name
             "LEAD",
             // no argument
@@ -393,9 +394,7 @@ fn get_random_function(
         window_fn_map.insert(
             "lead",
             (
-                WindowFunctionDefinition::BuiltInWindowFunction(
-                    BuiltInWindowFunction::Lead,
-                ),
+                WindowFunctionDefinition::WindowUDF(lead_udwf()),
                 vec![
                     arg.clone(),
                     lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))),
@@ -406,9 +405,7 @@ fn get_random_function(
         window_fn_map.insert(
             "lag",
             (
-                WindowFunctionDefinition::BuiltInWindowFunction(
-                    BuiltInWindowFunction::Lag,
-                ),
+                WindowFunctionDefinition::WindowUDF(lag_udwf()),
                 vec![
                     arg.clone(),
                     lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))),
diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs
index 6a30080fb38b..2c70a07a4e15 100644
--- a/datafusion/expr/src/built_in_window_function.rs
+++ b/datafusion/expr/src/built_in_window_function.rs
@@ -22,7 +22,7 @@ use std::str::FromStr;
 
 use crate::type_coercion::functions::data_types;
 use crate::utils;
-use crate::{Signature, TypeSignature, Volatility};
+use crate::{Signature, Volatility};
 use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result};
 
 use arrow::datatypes::DataType;
@@ -44,17 +44,7 @@ pub enum BuiltInWindowFunction {
     CumeDist,
     /// Integer ranging from 1 to the argument value, dividing the partition as equally as possible
     Ntile,
-    /// Returns value evaluated at the row that is offset rows before the current row within the partition;
-    /// If there is no such row, instead return default (which must be of the same type as value).
-    /// Both offset and default are evaluated with respect to the current row.
-    /// If omitted, offset defaults to 1 and default to null
-    Lag,
-    /// Returns value evaluated at the row that is offset rows after the current row within the partition;
-    /// If there is no such row, instead return default (which must be of the same type as value).
-    /// Both offset and default are evaluated with respect to the current row.
-    /// If omitted, offset defaults to 1 and default to null
-    Lead,
-    /// Returns value evaluated at the row that is the first row of the window frame
+    /// returns value evaluated at the row that is the first row of the window frame
     FirstValue,
     /// Returns value evaluated at the row that is the last row of the window frame
     LastValue,
@@ -68,8 +58,6 @@ impl BuiltInWindowFunction {
         match self {
             CumeDist => "CUME_DIST",
             Ntile => "NTILE",
-            Lag => "LAG",
-            Lead => "LEAD",
             FirstValue => "first_value",
             LastValue => "last_value",
             NthValue => "NTH_VALUE",
@@ -83,8 +71,6 @@ impl FromStr for BuiltInWindowFunction {
         Ok(match name.to_uppercase().as_str() {
             "CUME_DIST" => BuiltInWindowFunction::CumeDist,
             "NTILE" => BuiltInWindowFunction::Ntile,
-            "LAG" => BuiltInWindowFunction::Lag,
-            "LEAD" => BuiltInWindowFunction::Lead,
             "FIRST_VALUE" => BuiltInWindowFunction::FirstValue,
             "LAST_VALUE" => BuiltInWindowFunction::LastValue,
             "NTH_VALUE" => BuiltInWindowFunction::NthValue,
@@ -117,9 +103,7 @@ impl BuiltInWindowFunction {
         match self {
             BuiltInWindowFunction::Ntile => Ok(DataType::UInt64),
             BuiltInWindowFunction::CumeDist => Ok(DataType::Float64),
-            BuiltInWindowFunction::Lag
-            | BuiltInWindowFunction::Lead
-            | BuiltInWindowFunction::FirstValue
+            BuiltInWindowFunction::FirstValue
             | BuiltInWindowFunction::LastValue
             | BuiltInWindowFunction::NthValue => Ok(input_expr_types[0].clone()),
         }
@@ -130,16 +114,6 @@ impl BuiltInWindowFunction {
         // Note: The physical expression must accept the type returned by this function or the execution panics.
         match self {
             BuiltInWindowFunction::CumeDist => Signature::any(0, Volatility::Immutable),
-            BuiltInWindowFunction::Lag | BuiltInWindowFunction::Lead => {
-                Signature::one_of(
-                    vec![
-                        TypeSignature::Any(1),
-                        TypeSignature::Any(2),
-                        TypeSignature::Any(3),
-                    ],
-                    Volatility::Immutable,
-                )
-            }
             BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => {
                 Signature::any(1, Volatility::Immutable)
             }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 3e692189e488..f3f71a87278b 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -2560,30 +2560,6 @@ mod test {
         Ok(())
     }
 
-    #[test]
-    fn test_lead_return_type() -> Result<()> {
-        let fun = find_df_window_func("lead").unwrap();
-        let observed = fun.return_type(&[DataType::Utf8], &[true], "")?;
-        assert_eq!(DataType::Utf8, observed);
-
-        let observed = fun.return_type(&[DataType::Float64], &[true], "")?;
-        assert_eq!(DataType::Float64, observed);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_lag_return_type() -> Result<()> {
-        let fun = find_df_window_func("lag").unwrap();
-        let observed = fun.return_type(&[DataType::Utf8], &[true], "")?;
-        assert_eq!(DataType::Utf8, observed);
-
-        let observed = fun.return_type(&[DataType::Float64], &[true], "")?;
-        assert_eq!(DataType::Float64, observed);
-
-        Ok(())
-    }
-
     #[test]
     fn test_nth_value_return_type() -> Result<()> {
         let fun = find_df_window_func("nth_value").unwrap();
@@ -2621,8 +2597,6 @@ mod test {
         let names = vec![
             "cume_dist",
             "ntile",
-            "lag",
-            "lead",
             "first_value",
             "last_value",
             "nth_value",
@@ -2660,18 +2634,6 @@ mod test {
                 built_in_window_function::BuiltInWindowFunction::LastValue
             ))
         );
-        assert_eq!(
-            find_df_window_func("LAG"),
-            Some(WindowFunctionDefinition::BuiltInWindowFunction(
-                built_in_window_function::BuiltInWindowFunction::Lag
-            ))
-        );
-        assert_eq!(
-            find_df_window_func("LEAD"),
-            Some(WindowFunctionDefinition::BuiltInWindowFunction(
-                built_in_window_function::BuiltInWindowFunction::Lead
-            ))
-        );
         assert_eq!(find_df_window_func("not_exist"), None)
     }
 
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 6d8f2be97e02..6ab94c1e841a 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -34,8 +34,10 @@ use crate::{
     Signature,
 };
 use datafusion_common::{not_impl_err, Result};
+use datafusion_functions_window_common::expr::ExpressionArgs;
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
 /// Logical representation of a user-defined window function (UDWF)
 /// A UDWF is different from a UDF in that it is stateful across batches.
@@ -149,6 +151,12 @@ impl WindowUDF {
         self.inner.simplify()
     }
 
+    /// Expressions that are passed to the [`PartitionEvaluator`].
+    ///
+    /// See [`WindowUDFImpl::expressions`] for more details.
+    pub fn expressions(&self, expr_args: ExpressionArgs) -> Vec<Arc<dyn PhysicalExpr>> {
+        self.inner.expressions(expr_args)
+    }
     /// Return a `PartitionEvaluator` for evaluating this window function
     pub fn partition_evaluator_factory(
         &self,
@@ -302,6 +310,14 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
     /// types are accepted and the function's Volatility.
     fn signature(&self) -> &Signature;
 
+    /// Returns the expressions that are passed to the [`PartitionEvaluator`].
+    fn expressions(&self, expr_args: ExpressionArgs) -> Vec<Arc<dyn PhysicalExpr>> {
+        expr_args
+            .input_exprs()
+            .first()
+            .map_or(vec![], |expr| vec![Arc::clone(expr)])
+    }
+
     /// Invoke the function, returning the [`PartitionEvaluator`] instance
     fn partition_evaluator(
         &self,
@@ -480,6 +496,13 @@ impl WindowUDFImpl for AliasedWindowUDFImpl {
         self.inner.signature()
     }
 
+    fn expressions(&self, expr_args: ExpressionArgs) -> Vec<Arc<dyn PhysicalExpr>> {
+        expr_args
+            .input_exprs()
+            .first()
+            .map_or(vec![], |expr| vec![Arc::clone(expr)])
+    }
+
     fn partition_evaluator(
         &self,
         partition_evaluator_args: PartitionEvaluatorArgs,
diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs
index 7ac6fb7d167c..3e1870c59c15 100644
--- a/datafusion/expr/src/window_function.rs
+++ b/datafusion/expr/src/window_function.rs
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::ScalarValue;
-
 use crate::{expr::WindowFunction, BuiltInWindowFunction, Expr, Literal};
 
 /// Create an expression to represent the `cume_dist` window function
@@ -29,38 +27,6 @@ pub fn ntile(arg: Expr) -> Expr {
     Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::Ntile, vec![arg]))
 }
 
-/// Create an expression to represent the `lag` window function
-pub fn lag(
-    arg: Expr,
-    shift_offset: Option<i64>,
-    default_value: Option<ScalarValue>,
-) -> Expr {
-    let shift_offset_lit = shift_offset
-        .map(|v| v.lit())
-        .unwrap_or(ScalarValue::Null.lit());
-    let default_lit = default_value.unwrap_or(ScalarValue::Null).lit();
-    Expr::WindowFunction(WindowFunction::new(
-        BuiltInWindowFunction::Lag,
-        vec![arg, shift_offset_lit, default_lit],
-    ))
-}
-
-/// Create an expression to represent the `lead` window function
-pub fn lead(
-    arg: Expr,
-    shift_offset: Option<i64>,
-    default_value: Option<ScalarValue>,
-) -> Expr {
-    let shift_offset_lit = shift_offset
-        .map(|v| v.lit())
-        .unwrap_or(ScalarValue::Null.lit());
-    let default_lit = default_value.unwrap_or(ScalarValue::Null).lit();
-    Expr::WindowFunction(WindowFunction::new(
-        BuiltInWindowFunction::Lead,
-        vec![arg, shift_offset_lit, default_lit],
-    ))
-}
-
 /// Create an expression to represent the `nth_value` window function
 pub fn nth_value(arg: Expr, n: i64) -> Expr {
     Expr::WindowFunction(WindowFunction::new(
diff --git a/datafusion/functions-window-common/src/expr.rs b/datafusion/functions-window-common/src/expr.rs
new file mode 100644
index 000000000000..1d99fe7acf15
--- /dev/null
+++ b/datafusion/functions-window-common/src/expr.rs
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use std::sync::Arc;
+
+/// Arguments passed to user-defined window function
+#[derive(Debug, Default)]
+pub struct ExpressionArgs<'a> {
+    /// The expressions passed as arguments to the user-defined window
+    /// function.
+    input_exprs: &'a [Arc<dyn PhysicalExpr>],
+    /// The corresponding data types of expressions passed as arguments
+    /// to the user-defined window function.
+    input_types: &'a [DataType],
+}
+
+impl<'a> ExpressionArgs<'a> {
+    /// Create an instance of [`ExpressionArgs`].
+    ///
+    /// # Arguments
+    ///
+    /// * `input_exprs` - The expressions passed as arguments
+    ///     to the user-defined window function.
+    /// * `input_types` - The data types corresponding to the
+    ///     arguments to the user-defined window function.
+    ///
+    pub fn new(
+        input_exprs: &'a [Arc<dyn PhysicalExpr>],
+        input_types: &'a [DataType],
+    ) -> Self {
+        Self {
+            input_exprs,
+            input_types,
+        }
+    }
+
+    /// Returns the expressions passed as arguments to the user-defined
+    /// window function.
+    pub fn input_exprs(&self) -> &'a [Arc<dyn PhysicalExpr>] {
+        self.input_exprs
+    }
+
+    /// Returns the [`DataType`]s corresponding to the input expressions
+    /// to the user-defined window function.
+    pub fn input_types(&self) -> &'a [DataType] {
+        self.input_types
+    }
+}
diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs
index 53f9eb1c9ac6..da8d096da562 100644
--- a/datafusion/functions-window-common/src/lib.rs
+++ b/datafusion/functions-window-common/src/lib.rs
@@ -18,5 +18,6 @@
 //! Common user-defined window functionality for [DataFusion]
 //!
 //! [DataFusion]: <https://crates.io/crates/datafusion>
+pub mod expr;
 pub mod field;
 pub mod partition;
diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml
index 952e5720c77c..262c21fcec65 100644
--- a/datafusion/functions-window/Cargo.toml
+++ b/datafusion/functions-window/Cargo.toml
@@ -41,6 +41,7 @@ path = "src/lib.rs"
 datafusion-common = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions-window-common = { workspace = true }
+datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 log = { workspace = true }
 paste = "1.0.15"
diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs
similarity index 59%
rename from datafusion/physical-expr/src/window/lead_lag.rs
rename to datafusion/functions-window/src/lead_lag.rs
index 1656b7c3033a..f81521099751 100644
--- a/datafusion/physical-expr/src/window/lead_lag.rs
+++ b/datafusion/functions-window/src/lead_lag.rs
@@ -15,125 +15,275 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines physical expression for `lead` and `lag` that can evaluated
-//! at runtime during query execution
-use crate::window::BuiltInWindowFunctionExpr;
-use crate::PhysicalExpr;
-use arrow::array::ArrayRef;
-use arrow::datatypes::{DataType, Field};
-use arrow_array::Array;
+//! `lead` and `lag` window function implementations
+
+use crate::utils::{get_scalar_value_from_args, get_signed_integer};
+use datafusion_common::arrow::array::ArrayRef;
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_common::arrow::datatypes::Field;
 use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue};
-use datafusion_expr::PartitionEvaluator;
+use datafusion_expr::{
+    Literal, PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, Volatility,
+    WindowUDFImpl,
+};
+use datafusion_functions_window_common::expr::ExpressionArgs;
+use datafusion_functions_window_common::field::WindowUDFFieldArgs;
+use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::any::Any;
 use std::cmp::min;
 use std::collections::VecDeque;
 use std::ops::{Neg, Range};
 use std::sync::Arc;
 
-/// window shift expression
+get_or_init_udwf!(
+    Lag,
+    lag,
+    "Returns the row value that precedes the current row by a specified \
+    offset within partition. If no such row exists, then returns the \
+    default value.",
+    WindowShift::lag
+);
+get_or_init_udwf!(
+    Lead,
+    lead,
+    "Returns the value from a row that follows the current row by a \
+    specified offset within the partition. If no such row exists, then \
+    returns the default value.",
+    WindowShift::lead
+);
+
+/// Create an expression to represent the `lag` window function
+///
+/// returns value evaluated at the row that is offset rows before the current row within the partition;
+/// if there is no such row, instead return default (which must be of the same type as value).
+/// Both offset and default are evaluated with respect to the current row.
+/// If omitted, offset defaults to 1 and default to null
+pub fn lag(
+    arg: datafusion_expr::Expr,
+    shift_offset: Option<i64>,
+    default_value: Option<ScalarValue>,
+) -> datafusion_expr::Expr {
+    let shift_offset_lit = shift_offset
+        .map(|v| v.lit())
+        .unwrap_or(ScalarValue::Null.lit());
+    let default_lit = default_value.unwrap_or(ScalarValue::Null).lit();
+
+    lag_udwf().call(vec![arg, shift_offset_lit, default_lit])
+}
+
+/// Create an expression to represent the `lead` window function
+///
+/// returns value evaluated at the row that is offset rows after the current row within the partition;
+/// if there is no such row, instead return default (which must be of the same type as value).
+/// Both offset and default are evaluated with respect to the current row.
+/// If omitted, offset defaults to 1 and default to null
+pub fn lead(
+    arg: datafusion_expr::Expr,
+    shift_offset: Option<i64>,
+    default_value: Option<ScalarValue>,
+) -> datafusion_expr::Expr {
+    let shift_offset_lit = shift_offset
+        .map(|v| v.lit())
+        .unwrap_or(ScalarValue::Null.lit());
+    let default_lit = default_value.unwrap_or(ScalarValue::Null).lit();
+
+    lead_udwf().call(vec![arg, shift_offset_lit, default_lit])
+}
+
 #[derive(Debug)]
-pub struct WindowShift {
-    name: String,
-    /// Output data type
-    data_type: DataType,
-    shift_offset: i64,
-    expr: Arc<dyn PhysicalExpr>,
-    default_value: ScalarValue,
-    ignore_nulls: bool,
+enum WindowShiftKind {
+    Lag,
+    Lead,
 }
 
-impl WindowShift {
-    /// Get shift_offset of window shift expression
-    pub fn get_shift_offset(&self) -> i64 {
-        self.shift_offset
+impl WindowShiftKind {
+    fn name(&self) -> &'static str {
+        match self {
+            WindowShiftKind::Lag => "lag",
+            WindowShiftKind::Lead => "lead",
+        }
     }
 
-    /// Get the default_value for window shift expression.
-    pub fn get_default_value(&self) -> ScalarValue {
-        self.default_value.clone()
+    /// In [`WindowShiftEvaluator`] a positive offset is used to signal
+    /// computation of `lag()`. So here we negate the input offset
+    /// value when computing `lead()`.
+    fn shift_offset(&self, value: Option<i64>) -> i64 {
+        match self {
+            WindowShiftKind::Lag => value.unwrap_or(1),
+            WindowShiftKind::Lead => value.map(|v| v.neg()).unwrap_or(-1),
+        }
     }
 }
 
-/// lead() window function
-pub fn lead(
-    name: String,
-    data_type: DataType,
-    expr: Arc<dyn PhysicalExpr>,
-    shift_offset: Option<i64>,
-    default_value: ScalarValue,
-    ignore_nulls: bool,
-) -> WindowShift {
-    WindowShift {
-        name,
-        data_type,
-        shift_offset: shift_offset.map(|v| v.neg()).unwrap_or(-1),
-        expr,
-        default_value,
-        ignore_nulls,
-    }
+/// window shift expression
+#[derive(Debug)]
+pub struct WindowShift {
+    signature: Signature,
+    kind: WindowShiftKind,
 }
 
-/// lag() window function
-pub fn lag(
-    name: String,
-    data_type: DataType,
-    expr: Arc<dyn PhysicalExpr>,
-    shift_offset: Option<i64>,
-    default_value: ScalarValue,
-    ignore_nulls: bool,
-) -> WindowShift {
-    WindowShift {
-        name,
-        data_type,
-        shift_offset: shift_offset.unwrap_or(1),
-        expr,
-        default_value,
-        ignore_nulls,
+impl WindowShift {
+    fn new(kind: WindowShiftKind) -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::Any(1),
+                    TypeSignature::Any(2),
+                    TypeSignature::Any(3),
+                ],
+                Volatility::Immutable,
+            ),
+            kind,
+        }
+    }
+
+    pub fn lag() -> Self {
+        Self::new(WindowShiftKind::Lag)
+    }
+
+    pub fn lead() -> Self {
+        Self::new(WindowShiftKind::Lead)
     }
 }
 
-impl BuiltInWindowFunctionExpr for WindowShift {
-    /// Return a reference to Any that can be used for downcasting
+impl WindowUDFImpl for WindowShift {
     fn as_any(&self) -> &dyn Any {
         self
     }
 
-    fn field(&self) -> Result<Field> {
-        let nullable = true;
-        Ok(Field::new(&self.name, self.data_type.clone(), nullable))
+    fn name(&self) -> &str {
+        self.kind.name()
     }
 
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        vec![Arc::clone(&self.expr)]
+    fn signature(&self) -> &Signature {
+        &self.signature
     }
 
-    fn name(&self) -> &str {
-        &self.name
+    /// Handles the case where `NULL` expression is passed as an
+    /// argument to `lead`/`lag`. The type is refined depending
+    /// on the default value argument.
+    ///
+    /// For more details see: <https://github.com/apache/datafusion/issues/12717>
+    fn expressions(&self, expr_args: ExpressionArgs) -> Vec<Arc<dyn PhysicalExpr>> {
+        parse_expr(expr_args.input_exprs(), expr_args.input_types())
+            .into_iter()
+            .collect::<Vec<_>>()
     }
 
-    fn create_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
+    fn partition_evaluator(
+        &self,
+        partition_evaluator_args: PartitionEvaluatorArgs,
+    ) -> Result<Box<dyn PartitionEvaluator>> {
+        let shift_offset =
+            get_scalar_value_from_args(partition_evaluator_args.input_exprs(), 1)?
+                .map(get_signed_integer)
+                .map_or(Ok(None), |v| v.map(Some))
+                .map(|n| self.kind.shift_offset(n))
+                .map(|offset| {
+                    if partition_evaluator_args.is_reversed() {
+                        -offset
+                    } else {
+                        offset
+                    }
+                })?;
+        let default_value = parse_default_value(
+            partition_evaluator_args.input_exprs(),
+            partition_evaluator_args.input_types(),
+        )?;
+
         Ok(Box::new(WindowShiftEvaluator {
-            shift_offset: self.shift_offset,
-            default_value: self.default_value.clone(),
-            ignore_nulls: self.ignore_nulls,
+            shift_offset,
+            default_value,
+            ignore_nulls: partition_evaluator_args.ignore_nulls(),
             non_null_offsets: VecDeque::new(),
         }))
     }
 
-    fn reverse_expr(&self) -> Option<Arc<dyn BuiltInWindowFunctionExpr>> {
-        Some(Arc::new(Self {
-            name: self.name.clone(),
-            data_type: self.data_type.clone(),
-            shift_offset: -self.shift_offset,
-            expr: Arc::clone(&self.expr),
-            default_value: self.default_value.clone(),
-            ignore_nulls: self.ignore_nulls,
-        }))
+    fn field(&self, field_args: WindowUDFFieldArgs) -> Result<Field> {
+        let return_type = parse_expr_type(field_args.input_types())?;
+
+        Ok(Field::new(field_args.name(), return_type, true))
     }
+
+    fn reverse_expr(&self) -> ReversedUDWF {
+        match self.kind {
+            WindowShiftKind::Lag => ReversedUDWF::Reversed(lag_udwf()),
+            WindowShiftKind::Lead => ReversedUDWF::Reversed(lead_udwf()),
+        }
+    }
+}
+
+/// When `lead`/`lag` is evaluated on a `NULL` expression we attempt to
+/// refine it by matching it with the type of the default value.
+///
+/// For e.g. in `lead(NULL, 1, false)` the generic `ScalarValue::Null`
+/// is refined into `ScalarValue::Boolean(None)`. Only the type is
+/// refined, the expression value remains `NULL`.
+///
+/// When the window function is evaluated with `NULL` expression
+/// this guarantees that the type matches with that of the default
+/// value.
+///
+/// For more details see: <https://github.com/apache/datafusion/issues/12717>
+fn parse_expr(
+    input_exprs: &[Arc<dyn PhysicalExpr>],
+    input_types: &[DataType],
+) -> Result<Arc<dyn PhysicalExpr>> {
+    assert!(!input_exprs.is_empty());
+    assert!(!input_types.is_empty());
+
+    let expr = Arc::clone(input_exprs.first().unwrap());
+    let expr_type = input_types.first().unwrap();
+
+    // Handles the most common case where NULL is unexpected
+    if !expr_type.is_null() {
+        return Ok(expr);
+    }
+
+    let default_value = get_scalar_value_from_args(input_exprs, 2)?;
+    default_value.map_or(Ok(expr), |value| {
+        ScalarValue::try_from(&value.data_type()).map(|v| {
+            Arc::new(datafusion_physical_expr::expressions::Literal::new(v))
+                as Arc<dyn PhysicalExpr>
+        })
+    })
+}
+
+/// Returns the data type of the default value(if provided) when the
+/// expression is `NULL`.
+///
+/// Otherwise, returns the expression type unchanged.
+fn parse_expr_type(input_types: &[DataType]) -> Result<DataType> {
+    assert!(!input_types.is_empty());
+    let expr_type = input_types.first().unwrap_or(&DataType::Null);
+
+    // Handles the most common case where NULL is unexpected
+    if !expr_type.is_null() {
+        return Ok(expr_type.clone());
+    }
+
+    let default_value_type = input_types.get(2).unwrap_or(&DataType::Null);
+    Ok(default_value_type.clone())
+}
+
+/// Handles type coercion and null value refinement for default value
+/// argument depending on the data type of the input expression.
+fn parse_default_value(
+    input_exprs: &[Arc<dyn PhysicalExpr>],
+    input_types: &[DataType],
+) -> Result<ScalarValue> {
+    let expr_type = parse_expr_type(input_types)?;
+    let unparsed = get_scalar_value_from_args(input_exprs, 2)?;
+
+    unparsed
+        .filter(|v| !v.data_type().is_null())
+        .map(|v| v.cast_to(&expr_type))
+        .unwrap_or(ScalarValue::try_from(expr_type))
 }
 
 #[derive(Debug)]
-pub(crate) struct WindowShiftEvaluator {
+struct WindowShiftEvaluator {
     shift_offset: i64,
     default_value: ScalarValue,
     ignore_nulls: bool,
@@ -205,7 +355,7 @@ fn shift_with_default_value(
     offset: i64,
     default_value: &ScalarValue,
 ) -> Result<ArrayRef> {
-    use arrow::compute::concat;
+    use datafusion_common::arrow::compute::concat;
 
     let value_len = array.len() as i64;
     if offset == 0 {
@@ -402,19 +552,22 @@ impl PartitionEvaluator for WindowShiftEvaluator {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::expressions::Column;
-    use arrow::{array::*, datatypes::*};
+    use arrow::array::*;
     use datafusion_common::cast::as_int32_array;
-
-    fn test_i32_result(expr: WindowShift, expected: Int32Array) -> Result<()> {
+    use datafusion_physical_expr::expressions::{Column, Literal};
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+
+    fn test_i32_result(
+        expr: WindowShift,
+        partition_evaluator_args: PartitionEvaluatorArgs,
+        expected: Int32Array,
+    ) -> Result<()> {
         let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8]));
         let values = vec![arr];
-        let schema = Schema::new(vec![Field::new("arr", DataType::Int32, false)]);
-        let batch = RecordBatch::try_new(Arc::new(schema), values.clone())?;
-        let values = expr.evaluate_args(&batch)?;
+        let num_rows = values.len();
         let result = expr
-            .create_evaluator()?
-            .evaluate_all(&values, batch.num_rows())?;
+            .partition_evaluator(partition_evaluator_args)?
+            .evaluate_all(&values, num_rows)?;
         let result = as_int32_array(&result)?;
         assert_eq!(expected, *result);
         Ok(())
@@ -466,16 +619,12 @@ mod tests {
     }
 
     #[test]
-    fn lead_lag_window_shift() -> Result<()> {
+    fn test_lead_window_shift() -> Result<()> {
+        let expr = Arc::new(Column::new("c3", 0)) as Arc<dyn PhysicalExpr>;
+
         test_i32_result(
-            lead(
-                "lead".to_owned(),
-                DataType::Int32,
-                Arc::new(Column::new("c3", 0)),
-                None,
-                ScalarValue::Null.cast_to(&DataType::Int32)?,
-                false,
-            ),
+            WindowShift::lead(),
+            PartitionEvaluatorArgs::new(&[expr], &[DataType::Int32], false, false),
             [
                 Some(-2),
                 Some(3),
@@ -488,17 +637,16 @@ mod tests {
             ]
             .iter()
             .collect::<Int32Array>(),
-        )?;
+        )
+    }
+
+    #[test]
+    fn test_lag_window_shift() -> Result<()> {
+        let expr = Arc::new(Column::new("c3", 0)) as Arc<dyn PhysicalExpr>;
 
         test_i32_result(
-            lag(
-                "lead".to_owned(),
-                DataType::Int32,
-                Arc::new(Column::new("c3", 0)),
-                None,
-                ScalarValue::Null.cast_to(&DataType::Int32)?,
-                false,
-            ),
+            WindowShift::lag(),
+            PartitionEvaluatorArgs::new(&[expr], &[DataType::Int32], false, false),
             [
                 None,
                 Some(1),
@@ -511,17 +659,24 @@ mod tests {
             ]
             .iter()
             .collect::<Int32Array>(),
-        )?;
+        )
+    }
+
+    #[test]
+    fn test_lag_with_default() -> Result<()> {
+        let expr = Arc::new(Column::new("c3", 0)) as Arc<dyn PhysicalExpr>;
+        let shift_offset =
+            Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc<dyn PhysicalExpr>;
+        let default_value = Arc::new(Literal::new(ScalarValue::Int32(Some(100))))
+            as Arc<dyn PhysicalExpr>;
+
+        let input_exprs = &[expr, shift_offset, default_value];
+        let input_types: &[DataType] =
+            &[DataType::Int32, DataType::Int32, DataType::Int32];
 
         test_i32_result(
-            lag(
-                "lead".to_owned(),
-                DataType::Int32,
-                Arc::new(Column::new("c3", 0)),
-                None,
-                ScalarValue::Int32(Some(100)),
-                false,
-            ),
+            WindowShift::lag(),
+            PartitionEvaluatorArgs::new(input_exprs, input_types, false, false),
             [
                 Some(100),
                 Some(1),
@@ -534,7 +689,6 @@ mod tests {
             ]
             .iter()
             .collect::<Int32Array>(),
-        )?;
-        Ok(())
+        )
     }
 }
diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs
index ef624e13e61c..5a2aafa2892e 100644
--- a/datafusion/functions-window/src/lib.rs
+++ b/datafusion/functions-window/src/lib.rs
@@ -31,11 +31,17 @@ use datafusion_expr::WindowUDF;
 
 #[macro_use]
 pub mod macros;
+
+pub mod lead_lag;
+
 pub mod rank;
 pub mod row_number;
+mod utils;
 
 /// Fluent-style API for creating `Expr`s
 pub mod expr_fn {
+    pub use super::lead_lag::lag;
+    pub use super::lead_lag::lead;
     pub use super::rank::{dense_rank, percent_rank, rank};
     pub use super::row_number::row_number;
 }
@@ -44,6 +50,8 @@ pub mod expr_fn {
 pub fn all_default_window_functions() -> Vec<Arc<WindowUDF>> {
     vec![
         row_number::row_number_udwf(),
+        lead_lag::lead_udwf(),
+        lead_lag::lag_udwf(),
         rank::rank_udwf(),
         rank::dense_rank_udwf(),
         rank::percent_rank_udwf(),
diff --git a/datafusion/functions-window/src/utils.rs b/datafusion/functions-window/src/utils.rs
new file mode 100644
index 000000000000..69f68aa78f2c
--- /dev/null
+++ b/datafusion/functions-window/src/utils.rs
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::arrow::datatypes::DataType;
+use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_physical_expr::expressions::Literal;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use std::sync::Arc;
+
+pub(crate) fn get_signed_integer(value: ScalarValue) -> Result<i64> {
+    if value.is_null() {
+        return Ok(0);
+    }
+
+    if !value.data_type().is_integer() {
+        return exec_err!("Expected an integer value");
+    }
+
+    value.cast_to(&DataType::Int64)?.try_into()
+}
+
+pub(crate) fn get_scalar_value_from_args(
+    args: &[Arc<dyn PhysicalExpr>],
+    index: usize,
+) -> Result<Option<ScalarValue>> {
+    Ok(if let Some(field) = args.get(index) {
+        let tmp = field
+            .as_any()
+            .downcast_ref::<Literal>()
+            .ok_or_else(|| DataFusionError::NotImplemented(
+                format!("There is only support Literal types for field at idx: {index} in Window Function"),
+            ))?
+            .value()
+            .clone();
+        Some(tmp)
+    } else {
+        None
+    })
+}
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index e07e11e43199..54b8aafdb4da 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -36,7 +36,6 @@ mod unknown_column;
 /// Module with some convenient methods used in expression building
 pub use crate::aggregate::stats::StatsType;
 pub use crate::window::cume_dist::{cume_dist, CumeDist};
-pub use crate::window::lead_lag::{lag, lead, WindowShift};
 pub use crate::window::nth_value::NthValue;
 pub use crate::window::ntile::Ntile;
 pub use crate::PhysicalSortExpr;
diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs
index 938bdac50f97..c0fe3c2933a7 100644
--- a/datafusion/physical-expr/src/window/mod.rs
+++ b/datafusion/physical-expr/src/window/mod.rs
@@ -19,7 +19,6 @@ mod aggregate;
 mod built_in;
 mod built_in_window_function_expr;
 pub(crate) mod cume_dist;
-pub(crate) mod lead_lag;
 pub(crate) mod nth_value;
 pub(crate) mod ntile;
 mod sliding_aggregate;
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index e6a773f6b1ea..adf61f27bc6f 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -21,7 +21,7 @@ use std::borrow::Borrow;
 use std::sync::Arc;
 
 use crate::{
-    expressions::{cume_dist, lag, lead, Literal, NthValue, Ntile, PhysicalSortExpr},
+    expressions::{cume_dist, Literal, NthValue, Ntile, PhysicalSortExpr},
     ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr,
 };
 
@@ -48,6 +48,7 @@ mod utils;
 mod window_agg_exec;
 
 pub use bounded_window_agg_exec::BoundedWindowAggExec;
+use datafusion_functions_window_common::expr::ExpressionArgs;
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
 use datafusion_physical_expr::expressions::Column;
@@ -206,52 +207,6 @@ fn get_unsigned_integer(value: ScalarValue) -> Result<u64> {
     value.cast_to(&DataType::UInt64)?.try_into()
 }
 
-fn get_casted_value(
-    default_value: Option<ScalarValue>,
-    dtype: &DataType,
-) -> Result<ScalarValue> {
-    match default_value {
-        Some(v) if !v.data_type().is_null() => v.cast_to(dtype),
-        // If None or Null datatype
-        _ => ScalarValue::try_from(dtype),
-    }
-}
-
-/// Rewrites the NULL expression (1st argument) with an expression
-/// which is the same data type as the default value (3rd argument).
-/// Also rewrites the return type with the same data type as the
-/// default value.
-///
-/// If a default value is not provided, or it is NULL the original
-/// expression (1st argument) and return type is returned without
-/// any modifications.
-fn rewrite_null_expr_and_data_type(
-    args: &[Arc<dyn PhysicalExpr>],
-    expr_type: &DataType,
-) -> Result<(Arc<dyn PhysicalExpr>, DataType)> {
-    assert!(!args.is_empty());
-    let expr = Arc::clone(&args[0]);
-
-    // The input expression and the return is type is unchanged
-    // when the input expression is not NULL.
-    if !expr_type.is_null() {
-        return Ok((expr, expr_type.clone()));
-    }
-
-    get_scalar_value_from_args(args, 2)?
-        .and_then(|value| {
-            ScalarValue::try_from(value.data_type().clone())
-                .map(|sv| {
-                    Ok((
-                        Arc::new(Literal::new(sv)) as Arc<dyn PhysicalExpr>,
-                        value.data_type().clone(),
-                    ))
-                })
-                .ok()
-        })
-        .unwrap_or(Ok((expr, expr_type.clone())))
-}
-
 fn create_built_in_window_expr(
     fun: &BuiltInWindowFunction,
     args: &[Arc<dyn PhysicalExpr>],
@@ -286,42 +241,6 @@ fn create_built_in_window_expr(
                 Arc::new(Ntile::new(name, n as u64, out_data_type))
             }
         }
-        BuiltInWindowFunction::Lag => {
-            // rewrite NULL expression and the return datatype
-            let (arg, out_data_type) =
-                rewrite_null_expr_and_data_type(args, out_data_type)?;
-            let shift_offset = get_scalar_value_from_args(args, 1)?
-                .map(get_signed_integer)
-                .map_or(Ok(None), |v| v.map(Some))?;
-            let default_value =
-                get_casted_value(get_scalar_value_from_args(args, 2)?, &out_data_type)?;
-            Arc::new(lag(
-                name,
-                default_value.data_type().clone(),
-                arg,
-                shift_offset,
-                default_value,
-                ignore_nulls,
-            ))
-        }
-        BuiltInWindowFunction::Lead => {
-            // rewrite NULL expression and the return datatype
-            let (arg, out_data_type) =
-                rewrite_null_expr_and_data_type(args, out_data_type)?;
-            let shift_offset = get_scalar_value_from_args(args, 1)?
-                .map(get_signed_integer)
-                .map_or(Ok(None), |v| v.map(Some))?;
-            let default_value =
-                get_casted_value(get_scalar_value_from_args(args, 2)?, &out_data_type)?;
-            Arc::new(lead(
-                name,
-                default_value.data_type().clone(),
-                arg,
-                shift_offset,
-                default_value,
-                ignore_nulls,
-            ))
-        }
         BuiltInWindowFunction::NthValue => {
             let arg = Arc::clone(&args[0]);
             let n = get_signed_integer(
@@ -415,7 +334,8 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr {
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.args.clone()
+        self.fun
+            .expressions(ExpressionArgs::new(&self.args, &self.input_types))
     }
 
     fn create_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 5256f7473c95..9964ab498fb1 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -515,8 +515,8 @@ enum BuiltInWindowFunction {
   //  PERCENT_RANK = 3;
   CUME_DIST = 4;
   NTILE = 5;
-  LAG = 6;
-  LEAD = 7;
+  // LAG = 6;
+  // LEAD = 7;
   FIRST_VALUE = 8;
   LAST_VALUE = 9;
   NTH_VALUE = 10;
@@ -528,7 +528,7 @@ message WindowExprNode {
     string udaf = 3;
     string udwf = 9;
   }
-  LogicalExprNode expr = 4;
+  repeated LogicalExprNode exprs = 4;
   repeated LogicalExprNode partition_by = 5;
   repeated SortExprNode order_by = 6;
   // repeated LogicalExprNode filter = 7;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index e876008e853f..4417d1149681 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -1664,8 +1664,6 @@ impl serde::Serialize for BuiltInWindowFunction {
             Self::Unspecified => "UNSPECIFIED",
             Self::CumeDist => "CUME_DIST",
             Self::Ntile => "NTILE",
-            Self::Lag => "LAG",
-            Self::Lead => "LEAD",
             Self::FirstValue => "FIRST_VALUE",
             Self::LastValue => "LAST_VALUE",
             Self::NthValue => "NTH_VALUE",
@@ -1683,8 +1681,6 @@ impl<'de> serde::Deserialize<'de> for BuiltInWindowFunction {
             "UNSPECIFIED",
             "CUME_DIST",
             "NTILE",
-            "LAG",
-            "LEAD",
             "FIRST_VALUE",
             "LAST_VALUE",
             "NTH_VALUE",
@@ -1731,8 +1727,6 @@ impl<'de> serde::Deserialize<'de> for BuiltInWindowFunction {
                     "UNSPECIFIED" => Ok(BuiltInWindowFunction::Unspecified),
                     "CUME_DIST" => Ok(BuiltInWindowFunction::CumeDist),
                     "NTILE" => Ok(BuiltInWindowFunction::Ntile),
-                    "LAG" => Ok(BuiltInWindowFunction::Lag),
-                    "LEAD" => Ok(BuiltInWindowFunction::Lead),
                     "FIRST_VALUE" => Ok(BuiltInWindowFunction::FirstValue),
                     "LAST_VALUE" => Ok(BuiltInWindowFunction::LastValue),
                     "NTH_VALUE" => Ok(BuiltInWindowFunction::NthValue),
@@ -21157,7 +21151,7 @@ impl serde::Serialize for WindowExprNode {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.expr.is_some() {
+        if !self.exprs.is_empty() {
             len += 1;
         }
         if !self.partition_by.is_empty() {
@@ -21176,8 +21170,8 @@ impl serde::Serialize for WindowExprNode {
             len += 1;
         }
         let mut struct_ser = serializer.serialize_struct("datafusion.WindowExprNode", len)?;
-        if let Some(v) = self.expr.as_ref() {
-            struct_ser.serialize_field("expr", v)?;
+        if !self.exprs.is_empty() {
+            struct_ser.serialize_field("exprs", &self.exprs)?;
         }
         if !self.partition_by.is_empty() {
             struct_ser.serialize_field("partitionBy", &self.partition_by)?;
@@ -21218,7 +21212,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "expr",
+            "exprs",
             "partition_by",
             "partitionBy",
             "order_by",
@@ -21235,7 +21229,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
-            Expr,
+            Exprs,
             PartitionBy,
             OrderBy,
             WindowFrame,
@@ -21264,7 +21258,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                         E: serde::de::Error,
                     {
                         match value {
-                            "expr" => Ok(GeneratedField::Expr),
+                            "exprs" => Ok(GeneratedField::Exprs),
                             "partitionBy" | "partition_by" => Ok(GeneratedField::PartitionBy),
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
@@ -21291,7 +21285,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                 where
                     V: serde::de::MapAccess<'de>,
             {
-                let mut expr__ = None;
+                let mut exprs__ = None;
                 let mut partition_by__ = None;
                 let mut order_by__ = None;
                 let mut window_frame__ = None;
@@ -21299,11 +21293,11 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                 let mut window_function__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
-                        GeneratedField::Expr => {
-                            if expr__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("expr"));
+                        GeneratedField::Exprs => {
+                            if exprs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("exprs"));
                             }
-                            expr__ = map_.next_value()?;
+                            exprs__ = Some(map_.next_value()?);
                         }
                         GeneratedField::PartitionBy => {
                             if partition_by__.is_some() {
@@ -21352,7 +21346,7 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                     }
                 }
                 Ok(WindowExprNode {
-                    expr: expr__,
+                    exprs: exprs__.unwrap_or_default(),
                     partition_by: partition_by__.unwrap_or_default(),
                     order_by: order_by__.unwrap_or_default(),
                     window_frame: window_frame__,
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 2aa14f7e80b0..d3fe031a48c9 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -538,7 +538,7 @@ pub mod logical_expr_node {
         TryCast(::prost::alloc::boxed::Box<super::TryCastNode>),
         /// window expressions
         #[prost(message, tag = "18")]
-        WindowExpr(::prost::alloc::boxed::Box<super::WindowExprNode>),
+        WindowExpr(super::WindowExprNode),
         /// AggregateUDF expressions
         #[prost(message, tag = "19")]
         AggregateUdfExpr(::prost::alloc::boxed::Box<super::AggregateUdfExprNode>),
@@ -735,8 +735,8 @@ pub struct ScalarUdfExprNode {
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct WindowExprNode {
-    #[prost(message, optional, boxed, tag = "4")]
-    pub expr: ::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
+    #[prost(message, repeated, tag = "4")]
+    pub exprs: ::prost::alloc::vec::Vec<LogicalExprNode>,
     #[prost(message, repeated, tag = "5")]
     pub partition_by: ::prost::alloc::vec::Vec<LogicalExprNode>,
     #[prost(message, repeated, tag = "6")]
@@ -1828,8 +1828,8 @@ pub enum BuiltInWindowFunction {
     ///   PERCENT_RANK = 3;
     CumeDist = 4,
     Ntile = 5,
-    Lag = 6,
-    Lead = 7,
+    /// LAG = 6;
+    /// LEAD = 7;
     FirstValue = 8,
     LastValue = 9,
     NthValue = 10,
@@ -1844,8 +1844,6 @@ impl BuiltInWindowFunction {
             Self::Unspecified => "UNSPECIFIED",
             Self::CumeDist => "CUME_DIST",
             Self::Ntile => "NTILE",
-            Self::Lag => "LAG",
-            Self::Lead => "LEAD",
             Self::FirstValue => "FIRST_VALUE",
             Self::LastValue => "LAST_VALUE",
             Self::NthValue => "NTH_VALUE",
@@ -1857,8 +1855,6 @@ impl BuiltInWindowFunction {
             "UNSPECIFIED" => Some(Self::Unspecified),
             "CUME_DIST" => Some(Self::CumeDist),
             "NTILE" => Some(Self::Ntile),
-            "LAG" => Some(Self::Lag),
-            "LEAD" => Some(Self::Lead),
             "FIRST_VALUE" => Some(Self::FirstValue),
             "LAST_VALUE" => Some(Self::LastValue),
             "NTH_VALUE" => Some(Self::NthValue),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 32e1b68203ce..20d007048a00 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -142,8 +142,6 @@ impl From<protobuf::BuiltInWindowFunction> for BuiltInWindowFunction {
     fn from(built_in_function: protobuf::BuiltInWindowFunction) -> Self {
         match built_in_function {
             protobuf::BuiltInWindowFunction::Unspecified => todo!(),
-            protobuf::BuiltInWindowFunction::Lag => Self::Lag,
-            protobuf::BuiltInWindowFunction::Lead => Self::Lead,
             protobuf::BuiltInWindowFunction::FirstValue => Self::FirstValue,
             protobuf::BuiltInWindowFunction::CumeDist => Self::CumeDist,
             protobuf::BuiltInWindowFunction::Ntile => Self::Ntile,
@@ -286,10 +284,7 @@ pub fn parse_expr(
                         .map_err(|_| Error::unknown("BuiltInWindowFunction", *i))?
                         .into();
 
-                    let args =
-                        parse_optional_expr(expr.expr.as_deref(), registry, codec)?
-                            .map(|e| vec![e])
-                            .unwrap_or_else(Vec::new);
+                    let args = parse_exprs(&expr.exprs, registry, codec)?;
 
                     Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::BuiltInWindowFunction(
@@ -309,10 +304,7 @@ pub fn parse_expr(
                         None => registry.udaf(udaf_name)?,
                     };
 
-                    let args =
-                        parse_optional_expr(expr.expr.as_deref(), registry, codec)?
-                            .map(|e| vec![e])
-                            .unwrap_or_else(Vec::new);
+                    let args = parse_exprs(&expr.exprs, registry, codec)?;
                     Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::AggregateUDF(udaf_function),
                         args,
@@ -329,10 +321,7 @@ pub fn parse_expr(
                         None => registry.udwf(udwf_name)?,
                     };
 
-                    let args =
-                        parse_optional_expr(expr.expr.as_deref(), registry, codec)?
-                            .map(|e| vec![e])
-                            .unwrap_or_else(Vec::new);
+                    let args = parse_exprs(&expr.exprs, registry, codec)?;
                     Expr::WindowFunction(WindowFunction::new(
                         expr::WindowFunctionDefinition::WindowUDF(udwf_function),
                         args,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 07823b422f71..15fec3a8b2a8 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -119,8 +119,6 @@ impl From<&BuiltInWindowFunction> for protobuf::BuiltInWindowFunction {
             BuiltInWindowFunction::NthValue => Self::NthValue,
             BuiltInWindowFunction::Ntile => Self::Ntile,
             BuiltInWindowFunction::CumeDist => Self::CumeDist,
-            BuiltInWindowFunction::Lag => Self::Lag,
-            BuiltInWindowFunction::Lead => Self::Lead,
         }
     }
 }
@@ -333,25 +331,19 @@ pub fn serialize_expr(
                     )
                 }
             };
-            let arg_expr: Option<Box<protobuf::LogicalExprNode>> = if !args.is_empty() {
-                let arg = &args[0];
-                Some(Box::new(serialize_expr(arg, codec)?))
-            } else {
-                None
-            };
             let partition_by = serialize_exprs(partition_by, codec)?;
             let order_by = serialize_sorts(order_by, codec)?;
 
             let window_frame: Option<protobuf::WindowFrame> =
                 Some(window_frame.try_into()?);
-            let window_expr = Box::new(protobuf::WindowExprNode {
-                expr: arg_expr,
+            let window_expr = protobuf::WindowExprNode {
+                exprs: serialize_exprs(args, codec)?,
                 window_function: Some(window_function),
                 partition_by,
                 order_by,
                 window_frame,
                 fun_definition,
-            });
+            };
             protobuf::LogicalExprNode {
                 expr_type: Some(ExprType::WindowExpr(window_expr)),
             }
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 85d4fe8a16d0..6072baca688c 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -25,7 +25,6 @@ use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion::physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, InListExpr, IsNotNullExpr,
     IsNullExpr, Literal, NegativeExpr, NotExpr, NthValue, Ntile, TryCastExpr,
-    WindowShift,
 };
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr};
@@ -119,25 +118,6 @@ pub fn serialize_physical_window_expr(
                 )))),
             );
             protobuf::BuiltInWindowFunction::Ntile
-        } else if let Some(window_shift_expr) =
-            built_in_fn_expr.downcast_ref::<WindowShift>()
-        {
-            args.insert(
-                1,
-                Arc::new(Literal::new(datafusion_common::ScalarValue::Int64(Some(
-                    window_shift_expr.get_shift_offset(),
-                )))),
-            );
-            args.insert(
-                2,
-                Arc::new(Literal::new(window_shift_expr.get_default_value())),
-            );
-
-            if window_shift_expr.get_shift_offset() >= 0 {
-                protobuf::BuiltInWindowFunction::Lag
-            } else {
-                protobuf::BuiltInWindowFunction::Lead
-            }
         } else if let Some(nth_value_expr) = built_in_fn_expr.downcast_ref::<NthValue>() {
             match nth_value_expr.get_kind() {
                 NthValueKind::First => protobuf::BuiltInWindowFunction::FirstValue,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index ffa8fc1eefe9..c017395d979f 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -47,8 +47,10 @@ use datafusion::functions_aggregate::expr_fn::{
 };
 use datafusion::functions_aggregate::min_max::max_udaf;
 use datafusion::functions_nested::map::map;
-use datafusion::functions_window::rank::{dense_rank, percent_rank, rank, rank_udwf};
-use datafusion::functions_window::row_number::row_number;
+use datafusion::functions_window::expr_fn::{
+    dense_rank, lag, lead, percent_rank, rank, row_number,
+};
+use datafusion::functions_window::rank::rank_udwf;
 use datafusion::prelude::*;
 use datafusion::test_util::{TestTableFactory, TestTableProvider};
 use datafusion_common::config::TableOptions;
@@ -942,6 +944,12 @@ async fn roundtrip_expr_api() -> Result<()> {
         rank(),
         dense_rank(),
         percent_rank(),
+        lead(col("b"), None, None),
+        lead(col("b"), Some(2), None),
+        lead(col("b"), Some(2), Some(ScalarValue::from(100))),
+        lag(col("b"), None, None),
+        lag(col("b"), Some(2), None),
+        lag(col("b"), Some(2), Some(ScalarValue::from(100))),
         nth_value(col("b"), 1, vec![]),
         nth_value(
             col("b"),
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index a3d0ff4383ae..fb7afdda2ea8 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -503,9 +503,9 @@ logical_plan
 12)----Projection: Int64(1) AS cnt
 13)------Limit: skip=0, fetch=3
 14)--------EmptyRelation
-15)----Projection: LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt
+15)----Projection: lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt
 16)------Limit: skip=0, fetch=3
-17)--------WindowAggr: windowExpr=[[LEAD(b.c1, Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+17)--------WindowAggr: windowExpr=[[lead(b.c1, Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
 18)----------SubqueryAlias: b
 19)------------Projection: Int64(1) AS c1
 20)--------------EmptyRelation
@@ -528,8 +528,8 @@ physical_plan
 16)------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], has_header=true
 17)------ProjectionExec: expr=[1 as cnt]
 18)--------PlaceholderRowExec
-19)------ProjectionExec: expr=[LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
-20)--------BoundedWindowAggExec: wdw=[LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+19)------ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
+20)--------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
 21)----------ProjectionExec: expr=[1 as c1]
 22)------------PlaceholderRowExec
 
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 1b612f921262..b3f2786d3dba 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -1376,16 +1376,16 @@ EXPLAIN SELECT
     LIMIT 5
 ----
 logical_plan
-01)Projection: aggregate_test_100.c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv2, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lag1, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead1, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2
+01)Projection: aggregate_test_100.c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2
 02)--Limit: skip=0, fetch=5
-03)----WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, LAG(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, LEAD(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-04)------WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, LAG(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
+03)----WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+04)------WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
 05)--------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2]
+01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2636,15 +2636,15 @@ EXPLAIN SELECT
 ----
 logical_plan
 01)Sort: annotated_data_finite.ts DESC NULLS FIRST, fetch=5
-02)--Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2
-03)----WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
-04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
+02)--Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2
+03)----WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
+04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]]
 05)--------TableScan: annotated_data_finite projection=[ts, inc_col]
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
-02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
-03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted]
+02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
+03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted]
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIIII
@@ -4971,6 +4971,26 @@ SELECT LAG(NULL, 1, false) OVER () FROM t1;
 ----
 false
 
+query B
+SELECT LEAD(NULL, 0, true) OVER () FROM t1;
+----
+NULL
+
+query B
+SELECT LAG(NULL, 0, true) OVER () FROM t1;
+----
+NULL
+
+query B
+SELECT LEAD(NULL, 1, true) OVER () FROM t1;
+----
+true
+
+query B
+SELECT LAG(NULL, 1, true) OVER () FROM t1;
+----
+true
+
 statement ok
 insert into t1 values (2);
 
@@ -4986,6 +5006,18 @@ SELECT LAG(NULL, 1, false) OVER () FROM t1;
 false
 NULL
 
+query B
+SELECT LEAD(NULL, 1, true) OVER () FROM t1;
+----
+NULL
+true
+
+query B
+SELECT LAG(NULL, 1, true) OVER () FROM t1;
+----
+true
+NULL
+
 statement ok
 DROP TABLE t1;
 

From 24148bd65fdf61fba340b69dc87a7920850cb19f Mon Sep 17 00:00:00 2001
From: Piotr Findeisen <piotr.findeisen@gmail.com>
Date: Fri, 18 Oct 2024 13:28:03 +0200
Subject: [PATCH 13/15] Add links to new_constraint_from_table_constraints doc
 (#12995)

---
 datafusion/sql/src/statement.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 4109f1371187..60e3413b836f 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1263,7 +1263,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         )))
     }
 
-    /// Convert each `TableConstraint` to corresponding `Constraint`
+    /// Convert each [TableConstraint] to corresponding [Constraint]
     fn new_constraint_from_table_constraints(
         constraints: &[TableConstraint],
         df_schema: &DFSchemaRef,

From 87e931c976a7aa24cecaa9bf3658b42bba12a51e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alihan=20=C3=87elikcan?= <alihan.celikcan@synnada.ai>
Date: Fri, 18 Oct 2024 14:34:42 +0300
Subject: [PATCH 14/15] Split output batches of joins that do not respect batch
 size (#12969)

* Add BatchSplitter to joins that do not respect batch size

* Group relevant imports

* Update configs.md

* Update SQL logic tests for config

* Review

* Use PrimitiveBuilder for PrimitiveArray concatenation

* Fix into_builder() bug

* Apply suggestions from code review

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Update config docs

* Format

* Update config SQL Logic Test

---------

Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/config.rs               |  26 +-
 datafusion/execution/src/config.rs            |  14 +
 .../physical-plan/src/joins/cross_join.rs     |  84 +++--
 .../physical-plan/src/joins/hash_join.rs      |   2 +-
 .../src/joins/nested_loop_join.rs             | 356 ++++++++++++------
 .../src/joins/stream_join_utils.rs            |  83 ++--
 .../src/joins/symmetric_hash_join.rs          | 252 +++++++------
 datafusion/physical-plan/src/joins/utils.rs   | 220 +++++++++--
 .../test_files/information_schema.slt         |   2 +
 docs/source/user-guide/configs.md             |   1 +
 10 files changed, 709 insertions(+), 331 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 1e1c5d5424b0..47ffe0b1c66b 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -338,6 +338,12 @@ config_namespace! {
         /// if the source of statistics is accurate.
         /// We plan to make this the default in the future.
         pub use_row_number_estimates_to_optimize_partitioning: bool, default = false
+
+        /// Should DataFusion enforce batch size in joins or not. By default,
+        /// DataFusion will not enforce batch size in joins. Enforcing batch size
+        /// in joins can reduce memory usage when joining large
+        /// tables with a highly-selective join filter, but is also slightly slower.
+        pub enforce_batch_size_in_joins: bool, default = false
     }
 }
 
@@ -1222,16 +1228,18 @@ impl ConfigField for TableOptions {
     fn set(&mut self, key: &str, value: &str) -> Result<()> {
         // Extensions are handled in the public `ConfigOptions::set`
         let (key, rem) = key.split_once('.').unwrap_or((key, ""));
-        let Some(format) = &self.current_format else {
-            return _config_err!("Specify a format for TableOptions");
-        };
         match key {
-            "format" => match format {
-                #[cfg(feature = "parquet")]
-                ConfigFileType::PARQUET => self.parquet.set(rem, value),
-                ConfigFileType::CSV => self.csv.set(rem, value),
-                ConfigFileType::JSON => self.json.set(rem, value),
-            },
+            "format" => {
+                let Some(format) = &self.current_format else {
+                    return _config_err!("Specify a format for TableOptions");
+                };
+                match format {
+                    #[cfg(feature = "parquet")]
+                    ConfigFileType::PARQUET => self.parquet.set(rem, value),
+                    ConfigFileType::CSV => self.csv.set(rem, value),
+                    ConfigFileType::JSON => self.json.set(rem, value),
+                }
+            }
             _ => _config_err!("Config value \"{key}\" not found on TableOptions"),
         }
     }
diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs
index cede75d21ca4..53646dc5b468 100644
--- a/datafusion/execution/src/config.rs
+++ b/datafusion/execution/src/config.rs
@@ -432,6 +432,20 @@ impl SessionConfig {
         self
     }
 
+    /// Enables or disables the enforcement of batch size in joins
+    pub fn with_enforce_batch_size_in_joins(
+        mut self,
+        enforce_batch_size_in_joins: bool,
+    ) -> Self {
+        self.options.execution.enforce_batch_size_in_joins = enforce_batch_size_in_joins;
+        self
+    }
+
+    /// Returns true if the joins will be enforced to output batches of the configured size
+    pub fn enforce_batch_size_in_joins(&self) -> bool {
+        self.options.execution.enforce_batch_size_in_joins
+    }
+
     /// Convert configuration options to name-value pairs with values
     /// converted to strings.
     ///
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index a70645f3d6c0..8f2bef56da76 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -19,7 +19,8 @@
 //! and producing batches in parallel for the right partitions
 
 use super::utils::{
-    adjust_right_output_partitioning, BuildProbeJoinMetrics, OnceAsync, OnceFut,
+    adjust_right_output_partitioning, BatchSplitter, BatchTransformer,
+    BuildProbeJoinMetrics, NoopBatchTransformer, OnceAsync, OnceFut,
     StatefulStreamResult,
 };
 use crate::coalesce_partitions::CoalescePartitionsExec;
@@ -86,6 +87,7 @@ impl CrossJoinExec {
 
         let schema = Arc::new(Schema::new(all_columns).with_metadata(metadata));
         let cache = Self::compute_properties(&left, &right, Arc::clone(&schema));
+
         CrossJoinExec {
             left,
             right,
@@ -246,6 +248,10 @@ impl ExecutionPlan for CrossJoinExec {
         let reservation =
             MemoryConsumer::new("CrossJoinExec").register(context.memory_pool());
 
+        let batch_size = context.session_config().batch_size();
+        let enforce_batch_size_in_joins =
+            context.session_config().enforce_batch_size_in_joins();
+
         let left_fut = self.left_fut.once(|| {
             load_left_input(
                 Arc::clone(&self.left),
@@ -255,15 +261,29 @@ impl ExecutionPlan for CrossJoinExec {
             )
         });
 
-        Ok(Box::pin(CrossJoinStream {
-            schema: Arc::clone(&self.schema),
-            left_fut,
-            right: stream,
-            left_index: 0,
-            join_metrics,
-            state: CrossJoinStreamState::WaitBuildSide,
-            left_data: RecordBatch::new_empty(self.left().schema()),
-        }))
+        if enforce_batch_size_in_joins {
+            Ok(Box::pin(CrossJoinStream {
+                schema: Arc::clone(&self.schema),
+                left_fut,
+                right: stream,
+                left_index: 0,
+                join_metrics,
+                state: CrossJoinStreamState::WaitBuildSide,
+                left_data: RecordBatch::new_empty(self.left().schema()),
+                batch_transformer: BatchSplitter::new(batch_size),
+            }))
+        } else {
+            Ok(Box::pin(CrossJoinStream {
+                schema: Arc::clone(&self.schema),
+                left_fut,
+                right: stream,
+                left_index: 0,
+                join_metrics,
+                state: CrossJoinStreamState::WaitBuildSide,
+                left_data: RecordBatch::new_empty(self.left().schema()),
+                batch_transformer: NoopBatchTransformer::new(),
+            }))
+        }
     }
 
     fn statistics(&self) -> Result<Statistics> {
@@ -319,7 +339,7 @@ fn stats_cartesian_product(
 }
 
 /// A stream that issues [RecordBatch]es as they arrive from the right  of the join.
-struct CrossJoinStream {
+struct CrossJoinStream<T> {
     /// Input schema
     schema: Arc<Schema>,
     /// Future for data from left side
@@ -334,9 +354,11 @@ struct CrossJoinStream {
     state: CrossJoinStreamState,
     /// Left data
     left_data: RecordBatch,
+    /// Batch transformer
+    batch_transformer: T,
 }
 
-impl RecordBatchStream for CrossJoinStream {
+impl<T: BatchTransformer + Unpin + Send> RecordBatchStream for CrossJoinStream<T> {
     fn schema(&self) -> SchemaRef {
         Arc::clone(&self.schema)
     }
@@ -390,7 +412,7 @@ fn build_batch(
 }
 
 #[async_trait]
-impl Stream for CrossJoinStream {
+impl<T: BatchTransformer + Unpin + Send> Stream for CrossJoinStream<T> {
     type Item = Result<RecordBatch>;
 
     fn poll_next(
@@ -401,7 +423,7 @@ impl Stream for CrossJoinStream {
     }
 }
 
-impl CrossJoinStream {
+impl<T: BatchTransformer> CrossJoinStream<T> {
     /// Separate implementation function that unpins the [`CrossJoinStream`] so
     /// that partial borrows work correctly
     fn poll_next_impl(
@@ -470,21 +492,33 @@ impl CrossJoinStream {
     fn build_batches(&mut self) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         let right_batch = self.state.try_as_record_batch()?;
         if self.left_index < self.left_data.num_rows() {
-            let join_timer = self.join_metrics.join_time.timer();
-            let result =
-                build_batch(self.left_index, right_batch, &self.left_data, &self.schema);
-            join_timer.done();
-
-            if let Ok(ref batch) = result {
-                self.join_metrics.output_batches.add(1);
-                self.join_metrics.output_rows.add(batch.num_rows());
+            match self.batch_transformer.next() {
+                None => {
+                    let join_timer = self.join_metrics.join_time.timer();
+                    let result = build_batch(
+                        self.left_index,
+                        right_batch,
+                        &self.left_data,
+                        &self.schema,
+                    );
+                    join_timer.done();
+
+                    self.batch_transformer.set_batch(result?);
+                }
+                Some((batch, last)) => {
+                    if last {
+                        self.left_index += 1;
+                    }
+
+                    self.join_metrics.output_batches.add(1);
+                    self.join_metrics.output_rows.add(batch.num_rows());
+                    return Ok(StatefulStreamResult::Ready(Some(batch)));
+                }
             }
-            self.left_index += 1;
-            result.map(|r| StatefulStreamResult::Ready(Some(r)))
         } else {
             self.state = CrossJoinStreamState::FetchProbeBatch;
-            Ok(StatefulStreamResult::Continue)
         }
+        Ok(StatefulStreamResult::Continue)
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 74a45a7e4761..3b730c01291c 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -1438,7 +1438,7 @@ impl HashJoinStream {
             index_alignment_range_start..index_alignment_range_end,
             self.join_type,
             self.right_side_ordered,
-        );
+        )?;
 
         let result = build_batch_from_indices(
             &self.schema,
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 6068e7526316..358ff02473a6 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -25,7 +25,10 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::task::Poll;
 
-use super::utils::{asymmetric_join_output_partitioning, need_produce_result_in_final};
+use super::utils::{
+    asymmetric_join_output_partitioning, need_produce_result_in_final, BatchSplitter,
+    BatchTransformer, NoopBatchTransformer, StatefulStreamResult,
+};
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::joins::utils::{
     adjust_indices_by_join_type, apply_join_filter_to_indices, build_batch_from_indices,
@@ -35,8 +38,8 @@ use crate::joins::utils::{
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
-    execution_mode_from_children, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
+    execution_mode_from_children, handle_state, DisplayAs, DisplayFormatType,
+    Distribution, ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
     RecordBatchStream, SendableRecordBatchStream,
 };
 
@@ -45,7 +48,9 @@ use arrow::compute::concat_batches;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_util;
-use datafusion_common::{exec_datafusion_err, JoinSide, Result, Statistics};
+use datafusion_common::{
+    exec_datafusion_err, internal_err, JoinSide, Result, Statistics,
+};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_expr::JoinType;
@@ -230,10 +235,11 @@ impl NestedLoopJoinExec {
             asymmetric_join_output_partitioning(left, right, &join_type);
 
         // Determine execution mode:
-        let mut mode = execution_mode_from_children([left, right]);
-        if mode.is_unbounded() {
-            mode = ExecutionMode::PipelineBreaking;
-        }
+        let mode = if left.execution_mode().is_unbounded() {
+            ExecutionMode::PipelineBreaking
+        } else {
+            execution_mode_from_children([left, right])
+        };
 
         PlanProperties::new(eq_properties, output_partitioning, mode)
     }
@@ -345,6 +351,10 @@ impl ExecutionPlan for NestedLoopJoinExec {
             )
         });
 
+        let batch_size = context.session_config().batch_size();
+        let enforce_batch_size_in_joins =
+            context.session_config().enforce_batch_size_in_joins();
+
         let outer_table = self.right.execute(partition, context)?;
 
         let indices_cache = (UInt64Array::new_null(0), UInt32Array::new_null(0));
@@ -352,18 +362,38 @@ impl ExecutionPlan for NestedLoopJoinExec {
         // Right side has an order and it is maintained during operation.
         let right_side_ordered =
             self.maintains_input_order()[1] && self.right.output_ordering().is_some();
-        Ok(Box::pin(NestedLoopJoinStream {
-            schema: Arc::clone(&self.schema),
-            filter: self.filter.clone(),
-            join_type: self.join_type,
-            outer_table,
-            inner_table,
-            is_exhausted: false,
-            column_indices: self.column_indices.clone(),
-            join_metrics,
-            indices_cache,
-            right_side_ordered,
-        }))
+
+        if enforce_batch_size_in_joins {
+            Ok(Box::pin(NestedLoopJoinStream {
+                schema: Arc::clone(&self.schema),
+                filter: self.filter.clone(),
+                join_type: self.join_type,
+                outer_table,
+                inner_table,
+                column_indices: self.column_indices.clone(),
+                join_metrics,
+                indices_cache,
+                right_side_ordered,
+                state: NestedLoopJoinStreamState::WaitBuildSide,
+                batch_transformer: BatchSplitter::new(batch_size),
+                left_data: None,
+            }))
+        } else {
+            Ok(Box::pin(NestedLoopJoinStream {
+                schema: Arc::clone(&self.schema),
+                filter: self.filter.clone(),
+                join_type: self.join_type,
+                outer_table,
+                inner_table,
+                column_indices: self.column_indices.clone(),
+                join_metrics,
+                indices_cache,
+                right_side_ordered,
+                state: NestedLoopJoinStreamState::WaitBuildSide,
+                batch_transformer: NoopBatchTransformer::new(),
+                left_data: None,
+            }))
+        }
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -442,8 +472,37 @@ async fn collect_left_input(
     ))
 }
 
+/// This enumeration represents various states of the nested loop join algorithm.
+#[derive(Debug, Clone)]
+enum NestedLoopJoinStreamState {
+    /// The initial state, indicating that build-side data not collected yet
+    WaitBuildSide,
+    /// Indicates that build-side has been collected, and stream is ready for
+    /// fetching probe-side
+    FetchProbeBatch,
+    /// Indicates that a non-empty batch has been fetched from probe-side, and
+    /// is ready to be processed
+    ProcessProbeBatch(RecordBatch),
+    /// Indicates that probe-side has been fully processed
+    ExhaustedProbeSide,
+    /// Indicates that NestedLoopJoinStream execution is completed
+    Completed,
+}
+
+impl NestedLoopJoinStreamState {
+    /// Tries to extract a `ProcessProbeBatchState` from the
+    /// `NestedLoopJoinStreamState` enum. Returns an error if state is not
+    /// `ProcessProbeBatchState`.
+    fn try_as_process_probe_batch(&mut self) -> Result<&RecordBatch> {
+        match self {
+            NestedLoopJoinStreamState::ProcessProbeBatch(state) => Ok(state),
+            _ => internal_err!("Expected join stream in ProcessProbeBatch state"),
+        }
+    }
+}
+
 /// A stream that issues [RecordBatch]es as they arrive from the right  of the join.
-struct NestedLoopJoinStream {
+struct NestedLoopJoinStream<T> {
     /// Input schema
     schema: Arc<Schema>,
     /// join filter
@@ -454,8 +513,6 @@ struct NestedLoopJoinStream {
     outer_table: SendableRecordBatchStream,
     /// the inner table data of the nested loop join
     inner_table: OnceFut<JoinLeftData>,
-    /// There is nothing to process anymore and left side is processed in case of full join
-    is_exhausted: bool,
     /// Information of index and left / right placement of columns
     column_indices: Vec<ColumnIndex>,
     // TODO: support null aware equal
@@ -466,6 +523,12 @@ struct NestedLoopJoinStream {
     indices_cache: (UInt64Array, UInt32Array),
     /// Whether the right side is ordered
     right_side_ordered: bool,
+    /// Current state of the stream
+    state: NestedLoopJoinStreamState,
+    /// Transforms the output batch before returning.
+    batch_transformer: T,
+    /// Result of the left data future
+    left_data: Option<Arc<JoinLeftData>>,
 }
 
 /// Creates a Cartesian product of two input batches, preserving the order of the right batch,
@@ -544,107 +607,164 @@ fn build_join_indices(
     }
 }
 
-impl NestedLoopJoinStream {
+impl<T: BatchTransformer> NestedLoopJoinStream<T> {
     fn poll_next_impl(
         &mut self,
         cx: &mut std::task::Context<'_>,
     ) -> Poll<Option<Result<RecordBatch>>> {
-        // all left row
+        loop {
+            return match self.state {
+                NestedLoopJoinStreamState::WaitBuildSide => {
+                    handle_state!(ready!(self.collect_build_side(cx)))
+                }
+                NestedLoopJoinStreamState::FetchProbeBatch => {
+                    handle_state!(ready!(self.fetch_probe_batch(cx)))
+                }
+                NestedLoopJoinStreamState::ProcessProbeBatch(_) => {
+                    handle_state!(self.process_probe_batch())
+                }
+                NestedLoopJoinStreamState::ExhaustedProbeSide => {
+                    handle_state!(self.process_unmatched_build_batch())
+                }
+                NestedLoopJoinStreamState::Completed => Poll::Ready(None),
+            };
+        }
+    }
+
+    fn collect_build_side(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
         let build_timer = self.join_metrics.build_time.timer();
-        let left_data = match ready!(self.inner_table.get_shared(cx)) {
-            Ok(data) => data,
-            Err(e) => return Poll::Ready(Some(Err(e))),
-        };
+        // build hash table from left (build) side, if not yet done
+        self.left_data = Some(ready!(self.inner_table.get_shared(cx))?);
         build_timer.done();
 
-        // Get or initialize visited_left_side bitmap if required by join type
+        self.state = NestedLoopJoinStreamState::FetchProbeBatch;
+
+        Poll::Ready(Ok(StatefulStreamResult::Continue))
+    }
+
+    /// Fetches next batch from probe-side
+    ///
+    /// If a non-empty batch has been fetched, updates state to
+    /// `ProcessProbeBatchState`, otherwise updates state to `ExhaustedProbeSide`.
+    fn fetch_probe_batch(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
+        match ready!(self.outer_table.poll_next_unpin(cx)) {
+            None => {
+                self.state = NestedLoopJoinStreamState::ExhaustedProbeSide;
+            }
+            Some(Ok(right_batch)) => {
+                self.state = NestedLoopJoinStreamState::ProcessProbeBatch(right_batch);
+            }
+            Some(Err(err)) => return Poll::Ready(Err(err)),
+        };
+
+        Poll::Ready(Ok(StatefulStreamResult::Continue))
+    }
+
+    /// Joins current probe batch with build-side data and produces batch with
+    /// matched output, updates state to `FetchProbeBatch`.
+    fn process_probe_batch(
+        &mut self,
+    ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
+        let Some(left_data) = self.left_data.clone() else {
+            return internal_err!(
+                "Expected left_data to be Some in ProcessProbeBatch state"
+            );
+        };
         let visited_left_side = left_data.bitmap();
+        let batch = self.state.try_as_process_probe_batch()?;
+
+        match self.batch_transformer.next() {
+            None => {
+                // Setting up timer & updating input metrics
+                self.join_metrics.input_batches.add(1);
+                self.join_metrics.input_rows.add(batch.num_rows());
+                let timer = self.join_metrics.join_time.timer();
+
+                let result = join_left_and_right_batch(
+                    left_data.batch(),
+                    batch,
+                    self.join_type,
+                    self.filter.as_ref(),
+                    &self.column_indices,
+                    &self.schema,
+                    visited_left_side,
+                    &mut self.indices_cache,
+                    self.right_side_ordered,
+                );
+                timer.done();
+
+                self.batch_transformer.set_batch(result?);
+                Ok(StatefulStreamResult::Continue)
+            }
+            Some((batch, last)) => {
+                if last {
+                    self.state = NestedLoopJoinStreamState::FetchProbeBatch;
+                }
 
-        // Check is_exhausted before polling the outer_table, such that when the outer table
-        // does not support `FusedStream`, Self will not poll it again
-        if self.is_exhausted {
-            return Poll::Ready(None);
+                self.join_metrics.output_batches.add(1);
+                self.join_metrics.output_rows.add(batch.num_rows());
+                Ok(StatefulStreamResult::Ready(Some(batch)))
+            }
         }
+    }
 
-        self.outer_table
-            .poll_next_unpin(cx)
-            .map(|maybe_batch| match maybe_batch {
-                Some(Ok(right_batch)) => {
-                    // Setting up timer & updating input metrics
-                    self.join_metrics.input_batches.add(1);
-                    self.join_metrics.input_rows.add(right_batch.num_rows());
-                    let timer = self.join_metrics.join_time.timer();
-
-                    let result = join_left_and_right_batch(
-                        left_data.batch(),
-                        &right_batch,
-                        self.join_type,
-                        self.filter.as_ref(),
-                        &self.column_indices,
-                        &self.schema,
-                        visited_left_side,
-                        &mut self.indices_cache,
-                        self.right_side_ordered,
-                    );
-
-                    // Recording time & updating output metrics
-                    if let Ok(batch) = &result {
-                        timer.done();
-                        self.join_metrics.output_batches.add(1);
-                        self.join_metrics.output_rows.add(batch.num_rows());
-                    }
-
-                    Some(result)
-                }
-                Some(err) => Some(err),
-                None => {
-                    if need_produce_result_in_final(self.join_type) {
-                        // At this stage `visited_left_side` won't be updated, so it's
-                        // safe to report about probe completion.
-                        //
-                        // Setting `is_exhausted` / returning None will prevent from
-                        // multiple calls of `report_probe_completed()`
-                        if !left_data.report_probe_completed() {
-                            self.is_exhausted = true;
-                            return None;
-                        };
-
-                        // Only setting up timer, input is exhausted
-                        let timer = self.join_metrics.join_time.timer();
-                        // use the global left bitmap to produce the left indices and right indices
-                        let (left_side, right_side) =
-                            get_final_indices_from_shared_bitmap(
-                                visited_left_side,
-                                self.join_type,
-                            );
-                        let empty_right_batch =
-                            RecordBatch::new_empty(self.outer_table.schema());
-                        // use the left and right indices to produce the batch result
-                        let result = build_batch_from_indices(
-                            &self.schema,
-                            left_data.batch(),
-                            &empty_right_batch,
-                            &left_side,
-                            &right_side,
-                            &self.column_indices,
-                            JoinSide::Left,
-                        );
-                        self.is_exhausted = true;
-
-                        // Recording time & updating output metrics
-                        if let Ok(batch) = &result {
-                            timer.done();
-                            self.join_metrics.output_batches.add(1);
-                            self.join_metrics.output_rows.add(batch.num_rows());
-                        }
-
-                        Some(result)
-                    } else {
-                        // end of the join loop
-                        None
-                    }
-                }
-            })
+    /// Processes unmatched build-side rows for certain join types and produces
+    /// output batch, updates state to `Completed`.
+    fn process_unmatched_build_batch(
+        &mut self,
+    ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
+        let Some(left_data) = self.left_data.clone() else {
+            return internal_err!(
+                "Expected left_data to be Some in ExhaustedProbeSide state"
+            );
+        };
+        let visited_left_side = left_data.bitmap();
+        if need_produce_result_in_final(self.join_type) {
+            // At this stage `visited_left_side` won't be updated, so it's
+            // safe to report about probe completion.
+            //
+            // Setting `is_exhausted` / returning None will prevent from
+            // multiple calls of `report_probe_completed()`
+            if !left_data.report_probe_completed() {
+                self.state = NestedLoopJoinStreamState::Completed;
+                return Ok(StatefulStreamResult::Ready(None));
+            };
+
+            // Only setting up timer, input is exhausted
+            let timer = self.join_metrics.join_time.timer();
+            // use the global left bitmap to produce the left indices and right indices
+            let (left_side, right_side) =
+                get_final_indices_from_shared_bitmap(visited_left_side, self.join_type);
+            let empty_right_batch = RecordBatch::new_empty(self.outer_table.schema());
+            // use the left and right indices to produce the batch result
+            let result = build_batch_from_indices(
+                &self.schema,
+                left_data.batch(),
+                &empty_right_batch,
+                &left_side,
+                &right_side,
+                &self.column_indices,
+                JoinSide::Left,
+            );
+            self.state = NestedLoopJoinStreamState::Completed;
+
+            // Recording time
+            if result.is_ok() {
+                timer.done();
+            }
+
+            Ok(StatefulStreamResult::Ready(Some(result?)))
+        } else {
+            // end of the join loop
+            self.state = NestedLoopJoinStreamState::Completed;
+            Ok(StatefulStreamResult::Ready(None))
+        }
     }
 }
 
@@ -684,7 +804,7 @@ fn join_left_and_right_batch(
         0..right_batch.num_rows(),
         join_type,
         right_side_ordered,
-    );
+    )?;
 
     build_batch_from_indices(
         schema,
@@ -705,7 +825,7 @@ fn get_final_indices_from_shared_bitmap(
     get_final_indices_from_bit_map(&bitmap, join_type)
 }
 
-impl Stream for NestedLoopJoinStream {
+impl<T: BatchTransformer + Unpin + Send> Stream for NestedLoopJoinStream<T> {
     type Item = Result<RecordBatch>;
 
     fn poll_next(
@@ -716,14 +836,14 @@ impl Stream for NestedLoopJoinStream {
     }
 }
 
-impl RecordBatchStream for NestedLoopJoinStream {
+impl<T: BatchTransformer + Unpin + Send> RecordBatchStream for NestedLoopJoinStream<T> {
     fn schema(&self) -> SchemaRef {
         Arc::clone(&self.schema)
     }
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
     use super::*;
     use crate::{
         common, expressions::Column, memory::MemoryExec, repartition::RepartitionExec,
@@ -850,7 +970,7 @@ mod tests {
         JoinFilter::new(filter_expression, column_indices, intermediate_schema)
     }
 
-    async fn multi_partitioned_join_collect(
+    pub(crate) async fn multi_partitioned_join_collect(
         left: Arc<dyn ExecutionPlan>,
         right: Arc<dyn ExecutionPlan>,
         join_type: &JoinType,
diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs
index ba9384aef1a6..bddd152341da 100644
--- a/datafusion/physical-plan/src/joins/stream_join_utils.rs
+++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs
@@ -31,8 +31,7 @@ use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder};
 use arrow_schema::{Schema, SchemaRef};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{
-    arrow_datafusion_err, plan_datafusion_err, DataFusionError, JoinSide, Result,
-    ScalarValue,
+    arrow_datafusion_err, DataFusionError, JoinSide, Result, ScalarValue,
 };
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_physical_expr::expressions::Column;
@@ -369,34 +368,40 @@ impl SortedFilterExpr {
         filter_expr: Arc<dyn PhysicalExpr>,
         filter_schema: &Schema,
     ) -> Result<Self> {
-        let dt = &filter_expr.data_type(filter_schema)?;
+        let dt = filter_expr.data_type(filter_schema)?;
         Ok(Self {
             origin_sorted_expr,
             filter_expr,
-            interval: Interval::make_unbounded(dt)?,
+            interval: Interval::make_unbounded(&dt)?,
             node_index: 0,
         })
     }
+
     /// Get origin expr information
     pub fn origin_sorted_expr(&self) -> &PhysicalSortExpr {
         &self.origin_sorted_expr
     }
+
     /// Get filter expr information
     pub fn filter_expr(&self) -> &Arc<dyn PhysicalExpr> {
         &self.filter_expr
     }
+
     /// Get interval information
     pub fn interval(&self) -> &Interval {
         &self.interval
     }
+
     /// Sets interval
     pub fn set_interval(&mut self, interval: Interval) {
         self.interval = interval;
     }
+
     /// Node index in ExprIntervalGraph
     pub fn node_index(&self) -> usize {
         self.node_index
     }
+
     /// Node index setter in ExprIntervalGraph
     pub fn set_node_index(&mut self, node_index: usize) {
         self.node_index = node_index;
@@ -409,41 +414,45 @@ impl SortedFilterExpr {
 /// on the first or the last value of the expression in `build_input_buffer`
 /// and `probe_batch`.
 ///
-/// # Arguments
+/// # Parameters
 ///
 /// * `build_input_buffer` - The [RecordBatch] on the build side of the join.
 /// * `build_sorted_filter_expr` - Build side [SortedFilterExpr] to update.
 /// * `probe_batch` - The `RecordBatch` on the probe side of the join.
 /// * `probe_sorted_filter_expr` - Probe side `SortedFilterExpr` to update.
 ///
-/// ### Note
-/// ```text
+/// ## Note
 ///
-/// Interval arithmetic is used to calculate viable join ranges for build-side
-/// pruning. This is done by first creating an interval for join filter values in
-/// the build side of the join, which spans [-∞, FV] or [FV, ∞] depending on the
-/// ordering (descending/ascending) of the filter expression. Here, FV denotes the
-/// first value on the build side. This range is then compared with the probe side
-/// interval, which either spans [-∞, LV] or [LV, ∞] depending on the ordering
-/// (ascending/descending) of the probe side. Here, LV denotes the last value on
-/// the probe side.
+/// Utilizing interval arithmetic, this function computes feasible join intervals
+/// on the pruning side by evaluating the prospective value ranges that might
+/// emerge in subsequent data batches from the enforcer side. This is done by
+/// first creating an interval for join filter values in the pruning side of the
+/// join, which spans `[-∞, FV]` or `[FV, ∞]` depending on the ordering (descending/
+/// ascending) of the filter expression. Here, `FV` denotes the first value on the
+/// pruning side. This range is then compared with the enforcer side interval,
+/// which either spans `[-∞, LV]` or `[LV, ∞]` depending on the ordering (ascending/
+/// descending) of the probe side. Here, `LV` denotes the last value on the enforcer
+/// side.
 ///
 /// As a concrete example, consider the following query:
 ///
+/// ```text
 ///   SELECT * FROM left_table, right_table
 ///   WHERE
 ///     left_key = right_key AND
 ///     a > b - 3 AND
 ///     a < b + 10
+/// ```
 ///
-/// where columns "a" and "b" come from tables "left_table" and "right_table",
+/// where columns `a` and `b` come from tables `left_table` and `right_table`,
 /// respectively. When a new `RecordBatch` arrives at the right side, the
-/// condition a > b - 3 will possibly indicate a prunable range for the left
+/// condition `a > b - 3` will possibly indicate a prunable range for the left
 /// side. Conversely, when a new `RecordBatch` arrives at the left side, the
-/// condition a < b + 10 will possibly indicate prunability for the right side.
-/// Let’s inspect what happens when a new RecordBatch` arrives at the right
+/// condition `a < b + 10` will possibly indicate prunability for the right side.
+/// Let’s inspect what happens when a new `RecordBatch` arrives at the right
 /// side (i.e. when the left side is the build side):
 ///
+/// ```text
 ///         Build      Probe
 ///       +-------+  +-------+
 ///       | a | z |  | b | y |
@@ -456,13 +465,13 @@ impl SortedFilterExpr {
 ///       |+--|--+|  |+--|--+|
 ///       | 7 | 1 |  | 6 | 3 |
 ///       +-------+  +-------+
+/// ```
 ///
 /// In this case, the interval representing viable (i.e. joinable) values for
-/// column "a" is [1, ∞], and the interval representing possible future values
-/// for column "b" is [6, ∞]. With these intervals at hand, we next calculate
+/// column `a` is `[1, ∞]`, and the interval representing possible future values
+/// for column `b` is `[6, ∞]`. With these intervals at hand, we next calculate
 /// intervals for the whole filter expression and propagate join constraint by
 /// traversing the expression graph.
-/// ```
 pub fn calculate_filter_expr_intervals(
     build_input_buffer: &RecordBatch,
     build_sorted_filter_expr: &mut SortedFilterExpr,
@@ -710,13 +719,21 @@ fn update_sorted_exprs_with_node_indices(
     }
 }
 
-/// Prepares and sorts expressions based on a given filter, left and right execution plans, and sort expressions.
+/// Prepares and sorts expressions based on a given filter, left and right schemas,
+/// and sort expressions.
 ///
-/// # Arguments
+/// This function prepares sorted filter expressions for both the left and right
+/// sides of a join operation. It first builds the filter order for each side
+/// based on the provided `ExecutionPlan`. If both sides have valid sorted filter
+/// expressions, the function then constructs an expression interval graph and
+/// updates the sorted expressions with node indices. The final sorted filter
+/// expressions for both sides are then returned.
+///
+/// # Parameters
 ///
 /// * `filter` - The join filter to base the sorting on.
-/// * `left` - The left execution plan.
-/// * `right` - The right execution plan.
+/// * `left` - The `ExecutionPlan` for the left side of the join.
+/// * `right` - The `ExecutionPlan` for the right side of the join.
 /// * `left_sort_exprs` - The expressions to sort on the left side.
 /// * `right_sort_exprs` - The expressions to sort on the right side.
 ///
@@ -730,9 +747,11 @@ pub fn prepare_sorted_exprs(
     left_sort_exprs: &[PhysicalSortExpr],
     right_sort_exprs: &[PhysicalSortExpr],
 ) -> Result<(SortedFilterExpr, SortedFilterExpr, ExprIntervalGraph)> {
-    // Build the filter order for the left side
-    let err = || plan_datafusion_err!("Filter does not include the child order");
+    let err = || {
+        datafusion_common::plan_datafusion_err!("Filter does not include the child order")
+    };
 
+    // Build the filter order for the left side:
     let left_temp_sorted_filter_expr = build_filter_input_order(
         JoinSide::Left,
         filter,
@@ -741,7 +760,7 @@ pub fn prepare_sorted_exprs(
     )?
     .ok_or_else(err)?;
 
-    // Build the filter order for the right side
+    // Build the filter order for the right side:
     let right_temp_sorted_filter_expr = build_filter_input_order(
         JoinSide::Right,
         filter,
@@ -952,15 +971,15 @@ pub mod tests {
         let filter_expr = complicated_filter(&intermediate_schema)?;
         let column_indices = vec![
             ColumnIndex {
-                index: 0,
+                index: left_schema.index_of("la1")?,
                 side: JoinSide::Left,
             },
             ColumnIndex {
-                index: 4,
+                index: left_schema.index_of("la2")?,
                 side: JoinSide::Left,
             },
             ColumnIndex {
-                index: 0,
+                index: right_schema.index_of("ra1")?,
                 side: JoinSide::Right,
             },
         ];
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index ac718a95e9f4..70ada3892aea 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -32,7 +32,6 @@ use std::task::{Context, Poll};
 use std::vec;
 
 use crate::common::SharedMemoryReservation;
-use crate::handle_state;
 use crate::joins::hash_join::{equal_rows_arr, update_hash};
 use crate::joins::stream_join_utils::{
     calculate_filter_expr_intervals, combine_two_batches,
@@ -42,8 +41,9 @@ use crate::joins::stream_join_utils::{
 };
 use crate::joins::utils::{
     apply_join_filter_to_indices, build_batch_from_indices, build_join_schema,
-    check_join_is_valid, symmetric_join_output_partitioning, ColumnIndex, JoinFilter,
-    JoinHashMapType, JoinOn, JoinOnRef, StatefulStreamResult,
+    check_join_is_valid, symmetric_join_output_partitioning, BatchSplitter,
+    BatchTransformer, ColumnIndex, JoinFilter, JoinHashMapType, JoinOn, JoinOnRef,
+    NoopBatchTransformer, StatefulStreamResult,
 };
 use crate::{
     execution_mode_from_children,
@@ -465,23 +465,27 @@ impl ExecutionPlan for SymmetricHashJoinExec {
                  consider using RepartitionExec"
             );
         }
-        // If `filter_state` and `filter` are both present, then calculate sorted filter expressions
-        // for both sides, and build an expression graph.
-        let (left_sorted_filter_expr, right_sorted_filter_expr, graph) =
-            match (&self.left_sort_exprs, &self.right_sort_exprs, &self.filter) {
-                (Some(left_sort_exprs), Some(right_sort_exprs), Some(filter)) => {
-                    let (left, right, graph) = prepare_sorted_exprs(
-                        filter,
-                        &self.left,
-                        &self.right,
-                        left_sort_exprs,
-                        right_sort_exprs,
-                    )?;
-                    (Some(left), Some(right), Some(graph))
-                }
-                // If `filter_state` or `filter` is not present, then return None for all three values:
-                _ => (None, None, None),
-            };
+        // If `filter_state` and `filter` are both present, then calculate sorted
+        // filter expressions for both sides, and build an expression graph.
+        let (left_sorted_filter_expr, right_sorted_filter_expr, graph) = match (
+            self.left_sort_exprs(),
+            self.right_sort_exprs(),
+            &self.filter,
+        ) {
+            (Some(left_sort_exprs), Some(right_sort_exprs), Some(filter)) => {
+                let (left, right, graph) = prepare_sorted_exprs(
+                    filter,
+                    &self.left,
+                    &self.right,
+                    left_sort_exprs,
+                    right_sort_exprs,
+                )?;
+                (Some(left), Some(right), Some(graph))
+            }
+            // If `filter_state` or `filter` is not present, then return None
+            // for all three values:
+            _ => (None, None, None),
+        };
 
         let (on_left, on_right) = self.on.iter().cloned().unzip();
 
@@ -494,6 +498,10 @@ impl ExecutionPlan for SymmetricHashJoinExec {
 
         let right_stream = self.right.execute(partition, Arc::clone(&context))?;
 
+        let batch_size = context.session_config().batch_size();
+        let enforce_batch_size_in_joins =
+            context.session_config().enforce_batch_size_in_joins();
+
         let reservation = Arc::new(Mutex::new(
             MemoryConsumer::new(format!("SymmetricHashJoinStream[{partition}]"))
                 .register(context.memory_pool()),
@@ -502,29 +510,52 @@ impl ExecutionPlan for SymmetricHashJoinExec {
             reservation.lock().try_grow(g.size())?;
         }
 
-        Ok(Box::pin(SymmetricHashJoinStream {
-            left_stream,
-            right_stream,
-            schema: self.schema(),
-            filter: self.filter.clone(),
-            join_type: self.join_type,
-            random_state: self.random_state.clone(),
-            left: left_side_joiner,
-            right: right_side_joiner,
-            column_indices: self.column_indices.clone(),
-            metrics: StreamJoinMetrics::new(partition, &self.metrics),
-            graph,
-            left_sorted_filter_expr,
-            right_sorted_filter_expr,
-            null_equals_null: self.null_equals_null,
-            state: SHJStreamState::PullRight,
-            reservation,
-        }))
+        if enforce_batch_size_in_joins {
+            Ok(Box::pin(SymmetricHashJoinStream {
+                left_stream,
+                right_stream,
+                schema: self.schema(),
+                filter: self.filter.clone(),
+                join_type: self.join_type,
+                random_state: self.random_state.clone(),
+                left: left_side_joiner,
+                right: right_side_joiner,
+                column_indices: self.column_indices.clone(),
+                metrics: StreamJoinMetrics::new(partition, &self.metrics),
+                graph,
+                left_sorted_filter_expr,
+                right_sorted_filter_expr,
+                null_equals_null: self.null_equals_null,
+                state: SHJStreamState::PullRight,
+                reservation,
+                batch_transformer: BatchSplitter::new(batch_size),
+            }))
+        } else {
+            Ok(Box::pin(SymmetricHashJoinStream {
+                left_stream,
+                right_stream,
+                schema: self.schema(),
+                filter: self.filter.clone(),
+                join_type: self.join_type,
+                random_state: self.random_state.clone(),
+                left: left_side_joiner,
+                right: right_side_joiner,
+                column_indices: self.column_indices.clone(),
+                metrics: StreamJoinMetrics::new(partition, &self.metrics),
+                graph,
+                left_sorted_filter_expr,
+                right_sorted_filter_expr,
+                null_equals_null: self.null_equals_null,
+                state: SHJStreamState::PullRight,
+                reservation,
+                batch_transformer: NoopBatchTransformer::new(),
+            }))
+        }
     }
 }
 
 /// A stream that issues [RecordBatch]es as they arrive from the right  of the join.
-struct SymmetricHashJoinStream {
+struct SymmetricHashJoinStream<T> {
     /// Input streams
     left_stream: SendableRecordBatchStream,
     right_stream: SendableRecordBatchStream,
@@ -556,15 +587,19 @@ struct SymmetricHashJoinStream {
     reservation: SharedMemoryReservation,
     /// State machine for input execution
     state: SHJStreamState,
+    /// Transforms the output batch before returning.
+    batch_transformer: T,
 }
 
-impl RecordBatchStream for SymmetricHashJoinStream {
+impl<T: BatchTransformer + Unpin + Send> RecordBatchStream
+    for SymmetricHashJoinStream<T>
+{
     fn schema(&self) -> SchemaRef {
         Arc::clone(&self.schema)
     }
 }
 
-impl Stream for SymmetricHashJoinStream {
+impl<T: BatchTransformer + Unpin + Send> Stream for SymmetricHashJoinStream<T> {
     type Item = Result<RecordBatch>;
 
     fn poll_next(
@@ -1140,7 +1175,7 @@ impl OneSideHashJoiner {
 /// - Transition to `BothExhausted { final_result: true }`:
 ///   - Occurs in `prepare_for_final_results_after_exhaustion` when both streams are
 ///     exhausted, indicating completion of processing and availability of final results.
-impl SymmetricHashJoinStream {
+impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
     /// Implements the main polling logic for the join stream.
     ///
     /// This method continuously checks the state of the join stream and
@@ -1159,26 +1194,45 @@ impl SymmetricHashJoinStream {
         cx: &mut Context<'_>,
     ) -> Poll<Option<Result<RecordBatch>>> {
         loop {
-            return match self.state() {
-                SHJStreamState::PullRight => {
-                    handle_state!(ready!(self.fetch_next_from_right_stream(cx)))
-                }
-                SHJStreamState::PullLeft => {
-                    handle_state!(ready!(self.fetch_next_from_left_stream(cx)))
+            match self.batch_transformer.next() {
+                None => {
+                    let result = match self.state() {
+                        SHJStreamState::PullRight => {
+                            ready!(self.fetch_next_from_right_stream(cx))
+                        }
+                        SHJStreamState::PullLeft => {
+                            ready!(self.fetch_next_from_left_stream(cx))
+                        }
+                        SHJStreamState::RightExhausted => {
+                            ready!(self.handle_right_stream_end(cx))
+                        }
+                        SHJStreamState::LeftExhausted => {
+                            ready!(self.handle_left_stream_end(cx))
+                        }
+                        SHJStreamState::BothExhausted {
+                            final_result: false,
+                        } => self.prepare_for_final_results_after_exhaustion(),
+                        SHJStreamState::BothExhausted { final_result: true } => {
+                            return Poll::Ready(None);
+                        }
+                    };
+
+                    match result? {
+                        StatefulStreamResult::Ready(None) => {
+                            return Poll::Ready(None);
+                        }
+                        StatefulStreamResult::Ready(Some(batch)) => {
+                            self.batch_transformer.set_batch(batch);
+                        }
+                        _ => {}
+                    }
                 }
-                SHJStreamState::RightExhausted => {
-                    handle_state!(ready!(self.handle_right_stream_end(cx)))
-                }
-                SHJStreamState::LeftExhausted => {
-                    handle_state!(ready!(self.handle_left_stream_end(cx)))
-                }
-                SHJStreamState::BothExhausted {
-                    final_result: false,
-                } => {
-                    handle_state!(self.prepare_for_final_results_after_exhaustion())
+                Some((batch, _)) => {
+                    self.metrics.output_batches.add(1);
+                    self.metrics.output_rows.add(batch.num_rows());
+                    return Poll::Ready(Some(Ok(batch)));
                 }
-                SHJStreamState::BothExhausted { final_result: true } => Poll::Ready(None),
-            };
+            }
         }
     }
     /// Asynchronously pulls the next batch from the right stream.
@@ -1384,11 +1438,8 @@ impl SymmetricHashJoinStream {
         // Combine the left and right results:
         let result = combine_two_batches(&self.schema, left_result, right_result)?;
 
-        // Update the metrics and return the result:
-        if let Some(batch) = &result {
-            // Update the metrics:
-            self.metrics.output_batches.add(1);
-            self.metrics.output_rows.add(batch.num_rows());
+        // Return the result:
+        if result.is_some() {
             return Ok(StatefulStreamResult::Ready(result));
         }
         Ok(StatefulStreamResult::Continue)
@@ -1523,11 +1574,6 @@ impl SymmetricHashJoinStream {
         let capacity = self.size();
         self.metrics.stream_memory_usage.set(capacity);
         self.reservation.lock().try_resize(capacity)?;
-        // Update the metrics if we have a batch; otherwise, continue the loop.
-        if let Some(batch) = &result {
-            self.metrics.output_batches.add(1);
-            self.metrics.output_rows.add(batch.num_rows());
-        }
         Ok(result)
     }
 }
@@ -1716,15 +1762,15 @@ mod tests {
         let filter_expr = complicated_filter(&intermediate_schema)?;
         let column_indices = vec![
             ColumnIndex {
-                index: 0,
+                index: left_schema.index_of("la1")?,
                 side: JoinSide::Left,
             },
             ColumnIndex {
-                index: 4,
+                index: left_schema.index_of("la2")?,
                 side: JoinSide::Left,
             },
             ColumnIndex {
-                index: 0,
+                index: right_schema.index_of("ra1")?,
                 side: JoinSide::Right,
             },
         ];
@@ -1771,10 +1817,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Int32, true),
@@ -1825,10 +1868,7 @@ mod tests {
         let (left, right) =
             create_memory_table(left_partition, right_partition, vec![], vec![])?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Int32, true),
@@ -1877,10 +1917,7 @@ mod tests {
         let (left, right) =
             create_memory_table(left_partition, right_partition, vec![], vec![])?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
         experiment(left, right, None, join_type, on, task_ctx).await?;
         Ok(())
     }
@@ -1926,10 +1963,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Int32, true),
@@ -1987,10 +2021,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Int32, true),
@@ -2048,10 +2079,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Int32, true),
@@ -2111,10 +2139,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Int32, true),
@@ -2170,10 +2195,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("0", DataType::Int32, true),
@@ -2237,10 +2259,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("0", DataType::Int32, true),
@@ -2296,10 +2315,7 @@ mod tests {
 
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
         let left_sorted = vec![PhysicalSortExpr {
             expr: col("lt1", left_schema)?,
             options: SortOptions {
@@ -2380,10 +2396,7 @@ mod tests {
 
         let left_schema = &left_partition[0].schema();
         let right_schema = &right_partition[0].schema();
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
         let left_sorted = vec![PhysicalSortExpr {
             expr: col("li1", left_schema)?,
             options: SortOptions {
@@ -2473,10 +2486,7 @@ mod tests {
             vec![right_sorted],
         )?;
 
-        let on = vec![(
-            Arc::new(Column::new_with_schema("lc1", left_schema)?) as _,
-            Arc::new(Column::new_with_schema("rc1", right_schema)?) as _,
-        )];
+        let on = vec![(col("lc1", left_schema)?, col("rc1", right_schema)?)];
 
         let intermediate_schema = Schema::new(vec![
             Field::new("left", DataType::Float64, true),
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 89f3feaf07be..c520e4271416 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -546,15 +546,16 @@ pub struct ColumnIndex {
     pub side: JoinSide,
 }
 
-/// Filter applied before join output
+/// Filter applied before join output. Fields are crate-public to allow
+/// downstream implementations to experiment with custom joins.
 #[derive(Debug, Clone)]
 pub struct JoinFilter {
     /// Filter expression
-    expression: Arc<dyn PhysicalExpr>,
+    pub(crate) expression: Arc<dyn PhysicalExpr>,
     /// Column indices required to construct intermediate batch for filtering
-    column_indices: Vec<ColumnIndex>,
+    pub(crate) column_indices: Vec<ColumnIndex>,
     /// Physical schema of intermediate batch
-    schema: Schema,
+    pub(crate) schema: Schema,
 }
 
 impl JoinFilter {
@@ -1280,15 +1281,15 @@ pub(crate) fn adjust_indices_by_join_type(
     adjust_range: Range<usize>,
     join_type: JoinType,
     preserve_order_for_right: bool,
-) -> (UInt64Array, UInt32Array) {
+) -> Result<(UInt64Array, UInt32Array)> {
     match join_type {
         JoinType::Inner => {
             // matched
-            (left_indices, right_indices)
+            Ok((left_indices, right_indices))
         }
         JoinType::Left => {
             // matched
-            (left_indices, right_indices)
+            Ok((left_indices, right_indices))
             // unmatched left row will be produced in the end of loop, and it has been set in the left visited bitmap
         }
         JoinType::Right => {
@@ -1307,22 +1308,22 @@ pub(crate) fn adjust_indices_by_join_type(
             // need to remove the duplicated record in the right side
             let right_indices = get_semi_indices(adjust_range, &right_indices);
             // the left_indices will not be used later for the `right semi` join
-            (left_indices, right_indices)
+            Ok((left_indices, right_indices))
         }
         JoinType::RightAnti => {
             // need to remove the duplicated record in the right side
             // get the anti index for the right side
             let right_indices = get_anti_indices(adjust_range, &right_indices);
             // the left_indices will not be used later for the `right anti` join
-            (left_indices, right_indices)
+            Ok((left_indices, right_indices))
         }
         JoinType::LeftSemi | JoinType::LeftAnti => {
             // matched or unmatched left row will be produced in the end of loop
             // When visit the right batch, we can output the matched left row and don't need to wait the end of loop
-            (
+            Ok((
                 UInt64Array::from_iter_values(vec![]),
                 UInt32Array::from_iter_values(vec![]),
-            )
+            ))
         }
     }
 }
@@ -1347,27 +1348,64 @@ pub(crate) fn append_right_indices(
     right_indices: UInt32Array,
     adjust_range: Range<usize>,
     preserve_order_for_right: bool,
-) -> (UInt64Array, UInt32Array) {
+) -> Result<(UInt64Array, UInt32Array)> {
     if preserve_order_for_right {
-        append_probe_indices_in_order(left_indices, right_indices, adjust_range)
+        Ok(append_probe_indices_in_order(
+            left_indices,
+            right_indices,
+            adjust_range,
+        ))
     } else {
         let right_unmatched_indices = get_anti_indices(adjust_range, &right_indices);
 
         if right_unmatched_indices.is_empty() {
-            (left_indices, right_indices)
+            Ok((left_indices, right_indices))
         } else {
-            let unmatched_size = right_unmatched_indices.len();
+            // `into_builder()` can fail here when there is nothing to be filtered and
+            // left_indices or right_indices has the same reference to the cached indices.
+            // In that case, we use a slower alternative.
+
             // the new left indices: left_indices + null array
+            let mut new_left_indices_builder =
+                left_indices.into_builder().unwrap_or_else(|left_indices| {
+                    let mut builder = UInt64Builder::with_capacity(
+                        left_indices.len() + right_unmatched_indices.len(),
+                    );
+                    debug_assert_eq!(
+                        left_indices.null_count(),
+                        0,
+                        "expected left indices to have no nulls"
+                    );
+                    builder.append_slice(left_indices.values());
+                    builder
+                });
+            new_left_indices_builder.append_nulls(right_unmatched_indices.len());
+            let new_left_indices = UInt64Array::from(new_left_indices_builder.finish());
+
             // the new right indices: right_indices + right_unmatched_indices
-            let new_left_indices = left_indices
-                .iter()
-                .chain(std::iter::repeat(None).take(unmatched_size))
-                .collect();
-            let new_right_indices = right_indices
-                .iter()
-                .chain(right_unmatched_indices.iter())
-                .collect();
-            (new_left_indices, new_right_indices)
+            let mut new_right_indices_builder = right_indices
+                .into_builder()
+                .unwrap_or_else(|right_indices| {
+                    let mut builder = UInt32Builder::with_capacity(
+                        right_indices.len() + right_unmatched_indices.len(),
+                    );
+                    debug_assert_eq!(
+                        right_indices.null_count(),
+                        0,
+                        "expected right indices to have no nulls"
+                    );
+                    builder.append_slice(right_indices.values());
+                    builder
+                });
+            debug_assert_eq!(
+                right_unmatched_indices.null_count(),
+                0,
+                "expected right unmatched indices to have no nulls"
+            );
+            new_right_indices_builder.append_slice(right_unmatched_indices.values());
+            let new_right_indices = UInt32Array::from(new_right_indices_builder.finish());
+
+            Ok((new_left_indices, new_right_indices))
         }
     }
 }
@@ -1635,6 +1673,91 @@ pub(crate) fn asymmetric_join_output_partitioning(
     }
 }
 
+/// Trait for incrementally generating Join output.
+///
+/// This trait is used to limit some join outputs
+/// so it does not produce single large batches
+pub(crate) trait BatchTransformer: Debug + Clone {
+    /// Sets the next `RecordBatch` to be processed.
+    fn set_batch(&mut self, batch: RecordBatch);
+
+    /// Retrieves the next `RecordBatch` from the transformer.
+    /// Returns `None` if all batches have been produced.
+    /// The boolean flag indicates whether the batch is the last one.
+    fn next(&mut self) -> Option<(RecordBatch, bool)>;
+}
+
+#[derive(Debug, Clone)]
+/// A batch transformer that does nothing.
+pub(crate) struct NoopBatchTransformer {
+    /// RecordBatch to be processed
+    batch: Option<RecordBatch>,
+}
+
+impl NoopBatchTransformer {
+    pub fn new() -> Self {
+        Self { batch: None }
+    }
+}
+
+impl BatchTransformer for NoopBatchTransformer {
+    fn set_batch(&mut self, batch: RecordBatch) {
+        self.batch = Some(batch);
+    }
+
+    fn next(&mut self) -> Option<(RecordBatch, bool)> {
+        self.batch.take().map(|batch| (batch, true))
+    }
+}
+
+#[derive(Debug, Clone)]
+/// Splits large batches into smaller batches with a maximum number of rows.
+pub(crate) struct BatchSplitter {
+    /// RecordBatch to be split
+    batch: Option<RecordBatch>,
+    /// Maximum number of rows in a split batch
+    batch_size: usize,
+    /// Current row index
+    row_index: usize,
+}
+
+impl BatchSplitter {
+    /// Creates a new `BatchSplitter` with the specified batch size.
+    pub(crate) fn new(batch_size: usize) -> Self {
+        Self {
+            batch: None,
+            batch_size,
+            row_index: 0,
+        }
+    }
+}
+
+impl BatchTransformer for BatchSplitter {
+    fn set_batch(&mut self, batch: RecordBatch) {
+        self.batch = Some(batch);
+        self.row_index = 0;
+    }
+
+    fn next(&mut self) -> Option<(RecordBatch, bool)> {
+        let Some(batch) = &self.batch else {
+            return None;
+        };
+
+        let remaining_rows = batch.num_rows() - self.row_index;
+        let rows_to_slice = remaining_rows.min(self.batch_size);
+        let sliced_batch = batch.slice(self.row_index, rows_to_slice);
+        self.row_index += rows_to_slice;
+
+        let mut last = false;
+        if self.row_index >= batch.num_rows() {
+            self.batch = None;
+            last = true;
+        }
+
+        Some((sliced_batch, last))
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::pin::Pin;
@@ -1643,11 +1766,13 @@ mod tests {
 
     use arrow::datatypes::{DataType, Fields};
     use arrow::error::{ArrowError, Result as ArrowResult};
+    use arrow_array::Int32Array;
     use arrow_schema::SortOptions;
-
     use datafusion_common::stats::Precision::{Absent, Exact, Inexact};
     use datafusion_common::{arrow_datafusion_err, arrow_err, ScalarValue};
 
+    use rstest::rstest;
+
     fn check(
         left: &[Column],
         right: &[Column],
@@ -2554,4 +2679,49 @@ mod tests {
 
         Ok(())
     }
+
+    fn create_test_batch(num_rows: usize) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+        let data = Arc::new(Int32Array::from_iter_values(0..num_rows as i32));
+        RecordBatch::try_new(schema, vec![data]).unwrap()
+    }
+
+    fn assert_split_batches(
+        batches: Vec<(RecordBatch, bool)>,
+        batch_size: usize,
+        num_rows: usize,
+    ) {
+        let mut row_count = 0;
+        for (batch, last) in batches.into_iter() {
+            assert_eq!(batch.num_rows(), (num_rows - row_count).min(batch_size));
+            let column = batch
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            for i in 0..batch.num_rows() {
+                assert_eq!(column.value(i), i as i32 + row_count as i32);
+            }
+            row_count += batch.num_rows();
+            assert_eq!(last, row_count == num_rows);
+        }
+    }
+
+    #[rstest]
+    #[test]
+    fn test_batch_splitter(
+        #[values(1, 3, 11)] batch_size: usize,
+        #[values(1, 6, 50)] num_rows: usize,
+    ) {
+        let mut splitter = BatchSplitter::new(batch_size);
+        splitter.set_batch(create_test_batch(num_rows));
+
+        let mut batches = Vec::with_capacity(num_rows.div_ceil(batch_size));
+        while let Some(batch) = splitter.next() {
+            batches.push(batch);
+        }
+
+        assert!(splitter.next().is_none());
+        assert_split_batches(batches, batch_size, num_rows);
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 7acdf25b6596..57bf029a63c1 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -173,6 +173,7 @@ datafusion.execution.batch_size 8192
 datafusion.execution.coalesce_batches true
 datafusion.execution.collect_statistics false
 datafusion.execution.enable_recursive_ctes true
+datafusion.execution.enforce_batch_size_in_joins false
 datafusion.execution.keep_partition_by_columns false
 datafusion.execution.listing_table_ignore_subdirectory true
 datafusion.execution.max_buffered_batches_per_output_file 2
@@ -263,6 +264,7 @@ datafusion.execution.batch_size 8192 Default batch size while creating new batch
 datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
 datafusion.execution.collect_statistics false Should DataFusion collect statistics after listing files
 datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
+datafusion.execution.enforce_batch_size_in_joins false Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.
 datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches
 datafusion.execution.listing_table_ignore_subdirectory true Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index f34d148f092f..c61a7b673334 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -91,6 +91,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.skip_partial_aggregation_probe_ratio_threshold     | 0.8                       | Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.skip_partial_aggregation_probe_rows_threshold      | 100000                    | Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.execution.use_row_number_estimates_to_optimize_partitioning  | false                     | Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.                                                                                                                                                                                                             |
+| datafusion.execution.enforce_batch_size_in_joins                        | false                     | Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.                                                                                                                                                                                                                                                                                                     |
 | datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                |

From e9435a920ed84a1956b23e7ab6d13fe833cce3eb Mon Sep 17 00:00:00 2001
From: yi wang <48236141+my-vegetable-has-exploded@users.noreply.github.com>
Date: Sat, 19 Oct 2024 00:52:23 +0800
Subject: [PATCH 15/15] =?UTF-8?q?Fix=EF=BC=9Afix=20HashJoin=20projection?=
 =?UTF-8?q?=20swap=20(#12967)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* swap_hash_join works with joins with projections

* use non swapped hash join's projection

* clean up

* fix hashjoin projection swap.

* assert hashjoinexec.

* Update datafusion/core/src/physical_optimizer/join_selection.rs

Co-authored-by: Eduard Karacharov <eduard.karacharov@gmail.com>

* fix clippy.

---------

Co-authored-by: Onur Satici <onursatici@gmail.com>
Co-authored-by: Eduard Karacharov <eduard.karacharov@gmail.com>
---
 .../src/physical_optimizer/join_selection.rs  | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 499fb9cbbcf0..dfaa7dbb8910 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -183,13 +183,15 @@ pub fn swap_hash_join(
         partition_mode,
         hash_join.null_equals_null(),
     )?;
+    // In case of anti / semi joins or if there is embedded projection in HashJoinExec, output column order is preserved, no need to add projection again
     if matches!(
         hash_join.join_type(),
         JoinType::LeftSemi
             | JoinType::RightSemi
             | JoinType::LeftAnti
             | JoinType::RightAnti
-    ) {
+    ) || hash_join.projection.is_some()
+    {
         Ok(Arc::new(new_join))
     } else {
         // TODO avoid adding ProjectionExec again and again, only adding Final Projection
@@ -1287,6 +1289,33 @@ mod tests_statistical {
         );
     }
 
+    #[tokio::test]
+    async fn test_hash_join_swap_on_joins_with_projections() -> Result<()> {
+        let (big, small) = create_big_and_small();
+        let join = Arc::new(HashJoinExec::try_new(
+            Arc::clone(&big),
+            Arc::clone(&small),
+            vec![(
+                Arc::new(Column::new_with_schema("big_col", &big.schema())?),
+                Arc::new(Column::new_with_schema("small_col", &small.schema())?),
+            )],
+            None,
+            &JoinType::Inner,
+            Some(vec![1]),
+            PartitionMode::Partitioned,
+            false,
+        )?);
+        let swapped = swap_hash_join(&join.clone(), PartitionMode::Partitioned)
+            .expect("swap_hash_join must support joins with projections");
+        let swapped_join = swapped.as_any().downcast_ref::<HashJoinExec>().expect(
+            "ProjectionExec won't be added above if HashJoinExec contains embedded projection",
+        );
+        assert_eq!(swapped_join.projection, Some(vec![0_usize]));
+        assert_eq!(swapped.schema().fields.len(), 1);
+        assert_eq!(swapped.schema().fields[0].name(), "small_col");
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_swap_reverting_projection() {
         let left_schema = Schema::new(vec![