From e9b6e3fab21def91326e5f73e5844529a2453c3d Mon Sep 17 00:00:00 2001
From: Matt Green <emgeee@users.noreply.github.com>
Date: Mon, 23 Sep 2024 09:56:38 -0700
Subject: [PATCH] add Python udf example (#42)

* include datafusion-python as sub module

* copy datafusion-python code in

* find/replace datafusion._internal -> denormalized._internal.datafusion

* update imports to make things work

* add example udf

* fix cargo

* rename example file

* update cargo lock
---
 Cargo.lock                                    |   34 +-
 py-denormalized/pyproject.toml                |    1 +
 .../python/denormalized/context.py            |    3 +-
 .../denormalized/datafusion/__init__.py       |  112 +
 .../python/denormalized/datafusion/catalog.py |   76 +
 .../python/denormalized/datafusion/common.py  |   62 +
 .../python/denormalized/datafusion/context.py | 1029 +++++++
 .../denormalized/datafusion/dataframe.py      |  572 ++++
 .../python/denormalized/datafusion/expr.py    |  718 +++++
 .../denormalized/datafusion/functions.py      | 2659 +++++++++++++++++
 .../denormalized/datafusion/input/__init__.py |   27 +
 .../denormalized/datafusion/input/base.py     |   48 +
 .../denormalized/datafusion/input/location.py |   89 +
 .../denormalized/datafusion/object_store.py   |   35 +
 .../python/denormalized/datafusion/py.typed   |   16 +
 .../denormalized/datafusion/record_batch.py   |   76 +
 .../python/denormalized/datafusion/udf.py     |  248 ++
 .../python/denormalized/datastream.py         |   37 +-
 py-denormalized/python/denormalized/utils.py  |   13 +
 .../python/examples/stream_aggregate.py       |   23 +-
 .../python/examples/udf_example.py            |   60 +
 py-denormalized/src/datastream.rs             |    5 +
 py-denormalized/src/lib.rs                    |   20 +-
 23 files changed, 5906 insertions(+), 57 deletions(-)
 create mode 100644 py-denormalized/python/denormalized/datafusion/__init__.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/catalog.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/common.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/context.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/dataframe.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/expr.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/functions.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/input/__init__.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/input/base.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/input/location.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/object_store.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/py.typed
 create mode 100644 py-denormalized/python/denormalized/datafusion/record_batch.py
 create mode 100644 py-denormalized/python/denormalized/datafusion/udf.py
 create mode 100644 py-denormalized/python/denormalized/utils.py
 create mode 100644 py-denormalized/python/examples/udf_example.py

diff --git a/Cargo.lock b/Cargo.lock
index c76efd5..c482824 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1236,7 +1236,7 @@ dependencies = [
 [[package]]
 name = "datafusion-python"
 version = "41.0.0"
-source = "git+https://github.com/probably-nothing-labs/datafusion-python?branch=denormalized-tweaks#b6d50fff2e0a8b4a5c01b20877c8b1d120a257e1"
+source = "git+https://github.com/probably-nothing-labs/datafusion-python?branch=denormalized-tweaks#91642ad59f0c98ee1450dc80883b64b5ef1edfbb"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2524,9 +2524,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.30"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
+checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
 
 [[package]]
 name = "portable-atomic"
@@ -2564,9 +2564,9 @@ dependencies = [
 
 [[package]]
 name = "prost"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b2ecbe40f08db5c006b5764a2645f7f3f141ce756412ac9e1dd6087e6d32995"
+checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2574,9 +2574,9 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac"
+checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
 dependencies = [
  "anyhow",
  "itertools 0.13.0",
@@ -2587,9 +2587,9 @@ dependencies = [
 
 [[package]]
 name = "prost-types"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60caa6738c7369b940c3d49246a8d1749323674c65cb13010134f5c9bad5b519"
+checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
 dependencies = [
  "prost",
 ]
@@ -3190,18 +3190,18 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
 
 [[package]]
 name = "snafu"
-version = "0.8.4"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d"
+checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019"
 dependencies = [
  "snafu-derive",
 ]
 
 [[package]]
 name = "snafu-derive"
-version = "0.8.4"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5"
+checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
@@ -3357,18 +3357,18 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.63"
+version = "1.0.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
+checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.63"
+version = "1.0.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
+checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/py-denormalized/pyproject.toml b/py-denormalized/pyproject.toml
index a630d1c..4eb7bb8 100644
--- a/py-denormalized/pyproject.toml
+++ b/py-denormalized/pyproject.toml
@@ -30,6 +30,7 @@ dev-dependencies = ["pip>=24.2", "ipython>=8.26.0", "pytest>=8.3.2"]
 # Enable docstring linting using the google style guide
 [tool.ruff.lint]
 select = ["E4", "E7", "E9", "F", "D", "W"]
+ignore = ["D103"]
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
diff --git a/py-denormalized/python/denormalized/context.py b/py-denormalized/python/denormalized/context.py
index ce444b5..90ca5d2 100644
--- a/py-denormalized/python/denormalized/context.py
+++ b/py-denormalized/python/denormalized/context.py
@@ -1,6 +1,5 @@
 from denormalized._internal import PyContext
-
-from denormalized.datastream import DataStream as DataStream
+from .datastream import DataStream
 
 class Context:
     """Context."""
diff --git a/py-denormalized/python/denormalized/datafusion/__init__.py b/py-denormalized/python/denormalized/datafusion/__init__.py
new file mode 100644
index 0000000..7419ad7
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/__init__.py
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DataFusion python package.
+
+This is a Python library that binds to Apache Arrow in-memory query engine DataFusion.
+See https://datafusion.apache.org/python for more information.
+"""
+
+try:
+    import importlib.metadata as importlib_metadata
+except ImportError:
+    import importlib_metadata
+
+from .context import (
+    SessionContext,
+    SessionConfig,
+    RuntimeConfig,
+    SQLOptions,
+)
+
+from .catalog import Catalog, Database, Table
+
+# The following imports are okay to remain as opaque to the user.
+from denormalized._internal import Config, LogicalPlan, ExecutionPlan, runtime
+
+from .record_batch import RecordBatchStream, RecordBatch
+
+from .udf import ScalarUDF, AggregateUDF, Accumulator
+
+from .common import (
+    DFSchema,
+)
+
+from .dataframe import DataFrame
+
+from .expr import (
+    Expr,
+    WindowFrame,
+)
+
+from . import functions, object_store
+
+__all__ = [
+    "Accumulator",
+    "Config",
+    "DataFrame",
+    "SessionContext",
+    "SessionConfig",
+    "SQLOptions",
+    "RuntimeConfig",
+    "Expr",
+    "ScalarUDF",
+    "WindowFrame",
+    "column",
+    "col",
+    "literal",
+    "lit",
+    "DFSchema",
+    "runtime",
+    "Catalog",
+    "Database",
+    "Table",
+    "AggregateUDF",
+    "LogicalPlan",
+    "ExecutionPlan",
+    "RecordBatch",
+    "RecordBatchStream",
+    "common",
+    "expr",
+    "functions",
+    "object_store",
+]
+
+
+def column(value: str):
+    """Create a column expression."""
+    return Expr.column(value)
+
+
+def col(value: str):
+    """Create a column expression."""
+    return Expr.column(value)
+
+
+def literal(value):
+    """Create a literal expression."""
+    return Expr.literal(value)
+
+
+def lit(value):
+    """Create a literal expression."""
+    return Expr.literal(value)
+
+
+udf = ScalarUDF.udf
+
+udaf = AggregateUDF.udaf
diff --git a/py-denormalized/python/denormalized/datafusion/catalog.py b/py-denormalized/python/denormalized/datafusion/catalog.py
new file mode 100644
index 0000000..d8c9092
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/catalog.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Data catalog providers."""
+
+from __future__ import annotations
+
+import denormalized._internal as df_internal
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import pyarrow
+
+
+class Catalog:
+    """DataFusion data catalog."""
+
+    def __init__(self, catalog: df_internal.Catalog) -> None:
+        """This constructor is not typically called by the end user."""
+        self.catalog = catalog
+
+    def names(self) -> list[str]:
+        """Returns the list of databases in this catalog."""
+        return self.catalog.names()
+
+    def database(self, name: str = "public") -> Database:
+        """Returns the database with the given ``name`` from this catalog."""
+        return Database(self.catalog.database(name))
+
+
+class Database:
+    """DataFusion Database."""
+
+    def __init__(self, db: df_internal.Database) -> None:
+        """This constructor is not typically called by the end user."""
+        self.db = db
+
+    def names(self) -> set[str]:
+        """Returns the list of all tables in this database."""
+        return self.db.names()
+
+    def table(self, name: str) -> Table:
+        """Return the table with the given ``name`` from this database."""
+        return Table(self.db.table(name))
+
+
+class Table:
+    """DataFusion table."""
+
+    def __init__(self, table: df_internal.Table) -> None:
+        """This constructor is not typically called by the end user."""
+        self.table = table
+
+    def schema(self) -> pyarrow.Schema:
+        """Returns the schema associated with this table."""
+        return self.table.schema()
+
+    @property
+    def kind(self) -> str:
+        """Returns the kind of table."""
+        return self.table.kind()
diff --git a/py-denormalized/python/denormalized/datafusion/common.py b/py-denormalized/python/denormalized/datafusion/common.py
new file mode 100644
index 0000000..73ed7c4
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/common.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Common data types used throughout the DataFusion project."""
+
+from denormalized._internal import common as common_internal
+from enum import Enum
+
+# TODO these should all have proper wrapper classes
+
+DFSchema = common_internal.DFSchema
+DataType = common_internal.DataType
+DataTypeMap = common_internal.DataTypeMap
+PythonType = common_internal.PythonType
+RexType = common_internal.RexType
+SqlFunction = common_internal.SqlFunction
+SqlSchema = common_internal.SqlSchema
+SqlStatistics = common_internal.SqlStatistics
+SqlTable = common_internal.SqlTable
+SqlType = common_internal.SqlType
+SqlView = common_internal.SqlView
+
+__all__ = [
+    "DFSchema",
+    "DataType",
+    "DataTypeMap",
+    "RexType",
+    "PythonType",
+    "SqlType",
+    "NullTreatment",
+    "SqlTable",
+    "SqlSchema",
+    "SqlView",
+    "SqlStatistics",
+    "SqlFunction",
+]
+
+
+class NullTreatment(Enum):
+    """Describe how null values are to be treated by functions.
+
+    This is used primarily by aggregate and window functions. It can be set on
+    these functions using the builder approach described in
+    ref:`_window_functions` and ref:`_aggregation` in the online documentation.
+
+    """
+
+    RESPECT_NULLS = common_internal.NullTreatment.RESPECT_NULLS
+    IGNORE_NULLS = common_internal.NullTreatment.IGNORE_NULLS
diff --git a/py-denormalized/python/denormalized/datafusion/context.py b/py-denormalized/python/denormalized/datafusion/context.py
new file mode 100644
index 0000000..19c0760
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/context.py
@@ -0,0 +1,1029 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Session Context and it's associated configuration."""
+
+from __future__ import annotations
+
+from denormalized._internal import SessionConfig as SessionConfigInternal
+from denormalized._internal import RuntimeConfig as RuntimeConfigInternal
+from denormalized._internal import SQLOptions as SQLOptionsInternal
+from denormalized._internal import SessionContext as SessionContextInternal
+from denormalized._internal import LogicalPlan, ExecutionPlan
+
+from denormalized._internal import AggregateUDF
+from denormalized.datafusion.catalog import Catalog, Table
+from denormalized.datafusion.dataframe import DataFrame
+from denormalized.datafusion.expr import Expr, SortExpr, sort_list_to_raw_sort_list
+from denormalized.datafusion.record_batch import RecordBatchStream
+from denormalized.datafusion.udf import ScalarUDF
+
+from typing import Any, TYPE_CHECKING
+from typing_extensions import deprecated
+
+if TYPE_CHECKING:
+    import pyarrow
+    import pandas
+    import polars
+    import pathlib
+
+
+class SessionConfig:
+    """Session configuration options."""
+
+    def __init__(self, config_options: dict[str, str] | None = None) -> None:
+        """Create a new :py:class:`SessionConfig` with the given configuration options.
+
+        Args:
+            config_options: Configuration options.
+        """
+        self.config_internal = SessionConfigInternal(config_options)
+
+    def with_create_default_catalog_and_schema(
+        self, enabled: bool = True
+    ) -> SessionConfig:
+        """Control if the default catalog and schema will be automatically created.
+
+        Args:
+            enabled: Whether the default catalog and schema will be
+                automatically created.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = (
+            self.config_internal.with_create_default_catalog_and_schema(enabled)
+        )
+        return self
+
+    def with_default_catalog_and_schema(
+        self, catalog: str, schema: str
+    ) -> SessionConfig:
+        """Select a name for the default catalog and schema.
+
+        Args:
+            catalog: Catalog name.
+            schema: Schema name.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_default_catalog_and_schema(
+            catalog, schema
+        )
+        return self
+
+    def with_information_schema(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the inclusion of ``information_schema`` virtual tables.
+
+        Args:
+            enabled: Whether to include ``information_schema`` virtual tables.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_information_schema(enabled)
+        return self
+
+    def with_batch_size(self, batch_size: int) -> SessionConfig:
+        """Customize batch size.
+
+        Args:
+            batch_size: Batch size.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_batch_size(batch_size)
+        return self
+
+    def with_target_partitions(self, target_partitions: int) -> SessionConfig:
+        """Customize the number of target partitions for query execution.
+
+        Increasing partitions can increase concurrency.
+
+        Args:
+            target_partitions: Number of target partitions.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_target_partitions(
+            target_partitions
+        )
+        return self
+
+    def with_repartition_aggregations(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the use of repartitioning for aggregations.
+
+        Enabling this improves parallelism.
+
+        Args:
+            enabled: Whether to use repartitioning for aggregations.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_repartition_aggregations(
+            enabled
+        )
+        return self
+
+    def with_repartition_joins(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the use of repartitioning for joins to improve parallelism.
+
+        Args:
+            enabled: Whether to use repartitioning for joins.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_repartition_joins(enabled)
+        return self
+
+    def with_repartition_windows(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the use of repartitioning for window functions.
+
+        This may improve parallelism.
+
+        Args:
+            enabled: Whether to use repartitioning for window functions.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_repartition_windows(enabled)
+        return self
+
+    def with_repartition_sorts(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the use of repartitioning for window functions.
+
+        This may improve parallelism.
+
+        Args:
+            enabled: Whether to use repartitioning for window functions.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_repartition_sorts(enabled)
+        return self
+
+    def with_repartition_file_scans(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the use of repartitioning for file scans.
+
+        Args:
+            enabled: Whether to use repartitioning for file scans.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_repartition_file_scans(enabled)
+        return self
+
+    def with_repartition_file_min_size(self, size: int) -> SessionConfig:
+        """Set minimum file range size for repartitioning scans.
+
+        Args:
+            size: Minimum file range size.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_repartition_file_min_size(size)
+        return self
+
+    def with_parquet_pruning(self, enabled: bool = True) -> SessionConfig:
+        """Enable or disable the use of pruning predicate for parquet readers.
+
+        Pruning predicates will enable the reader to skip row groups.
+
+        Args:
+            enabled: Whether to use pruning predicate for parquet readers.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_parquet_pruning(enabled)
+        return self
+
+    def set(self, key: str, value: str) -> SessionConfig:
+        """Set a configuration option.
+
+        Args:
+        key: Option key.
+        value: Option value.
+
+        Returns:
+            A new :py:class:`SessionConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.set(key, value)
+        return self
+
+
+class RuntimeConfig:
+    """Runtime configuration options."""
+
+    def __init__(self) -> None:
+        """Create a new :py:class:`RuntimeConfig` with default values."""
+        self.config_internal = RuntimeConfigInternal()
+
+    def with_disk_manager_disabled(self) -> RuntimeConfig:
+        """Disable the disk manager, attempts to create temporary files will error.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_disk_manager_disabled()
+        return self
+
+    def with_disk_manager_os(self) -> RuntimeConfig:
+        """Use the operating system's temporary directory for disk manager.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_disk_manager_os()
+        return self
+
+    def with_disk_manager_specified(self, *paths: str | pathlib.Path) -> RuntimeConfig:
+        """Use the specified paths for the disk manager's temporary files.
+
+        Args:
+            paths: Paths to use for the disk manager's temporary files.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+        """
+        paths = [str(p) for p in paths]
+        self.config_internal = self.config_internal.with_disk_manager_specified(paths)
+        return self
+
+    def with_unbounded_memory_pool(self) -> RuntimeConfig:
+        """Use an unbounded memory pool.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+        """
+        self.config_internal = self.config_internal.with_unbounded_memory_pool()
+        return self
+
+    def with_fair_spill_pool(self, size: int) -> RuntimeConfig:
+        """Use a fair spill pool with the specified size.
+
+        This pool works best when you know beforehand the query has multiple spillable
+        operators that will likely all need to spill. Sometimes it will cause spills
+        even when there was sufficient memory (reserved for other operators) to avoid
+        doing so::
+
+            ┌───────────────────────z──────────────────────z───────────────┐
+            │                       z                      z               │
+            │                       z                      z               │
+            │       Spillable       z       Unspillable    z     Free      │
+            │        Memory         z        Memory        z    Memory     │
+            │                       z                      z               │
+            │                       z                      z               │
+            └───────────────────────z──────────────────────z───────────────┘
+
+        Args:
+            size: Size of the memory pool in bytes.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+
+        Examples usage::
+
+            config = RuntimeConfig().with_fair_spill_pool(1024)
+        """
+        self.config_internal = self.config_internal.with_fair_spill_pool(size)
+        return self
+
+    def with_greedy_memory_pool(self, size: int) -> RuntimeConfig:
+        """Use a greedy memory pool with the specified size.
+
+        This pool works well for queries that do not need to spill or have a single
+        spillable operator. See :py:func:`with_fair_spill_pool` if there are
+        multiple spillable operators that all will spill.
+
+        Args:
+            size: Size of the memory pool in bytes.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+
+        Example usage::
+
+            config = RuntimeConfig().with_greedy_memory_pool(1024)
+        """
+        self.config_internal = self.config_internal.with_greedy_memory_pool(size)
+        return self
+
+    def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeConfig:
+        """Use the specified path to create any needed temporary files.
+
+        Args:
+            path: Path to use for temporary files.
+
+        Returns:
+            A new :py:class:`RuntimeConfig` object with the updated setting.
+
+        Example usage::
+
+            config = RuntimeConfig().with_temp_file_path("/tmp")
+        """
+        self.config_internal = self.config_internal.with_temp_file_path(str(path))
+        return self
+
+
+class SQLOptions:
+    """Options to be used when performing SQL queries."""
+
+    def __init__(self) -> None:
+        """Create a new :py:class:`SQLOptions` with default values.
+
+        The default values are:
+        - DDL commands are allowed
+        - DML commands are allowed
+        - Statements are allowed
+        """
+        self.options_internal = SQLOptionsInternal()
+
+    def with_allow_ddl(self, allow: bool = True) -> SQLOptions:
+        """Should DDL (Data Definition Language) commands be run?
+
+        Examples of DDL commands include ``CREATE TABLE`` and ``DROP TABLE``.
+
+        Args:
+            allow: Allow DDL commands to be run.
+
+        Returns:
+            A new :py:class:`SQLOptions` object with the updated setting.
+
+        Example usage::
+
+            options = SQLOptions().with_allow_ddl(True)
+        """
+        self.options_internal = self.options_internal.with_allow_ddl(allow)
+        return self
+
+    def with_allow_dml(self, allow: bool = True) -> SQLOptions:
+        """Should DML (Data Manipulation Language) commands be run?
+
+        Examples of DML commands include ``INSERT INTO`` and ``DELETE``.
+
+        Args:
+            allow: Allow DML commands to be run.
+
+        Returns:
+            A new :py:class:`SQLOptions` object with the updated setting.
+
+        Example usage::
+
+            options = SQLOptions().with_allow_dml(True)
+        """
+        self.options_internal = self.options_internal.with_allow_dml(allow)
+        return self
+
+    def with_allow_statements(self, allow: bool = True) -> SQLOptions:
+        """Should statements such as ``SET VARIABLE`` and ``BEGIN TRANSACTION`` be run?
+
+        Args:
+            allow: Allow statements to be run.
+
+        Returns:
+            A new :py:class:SQLOptions` object with the updated setting.
+
+        Example usage::
+
+            options = SQLOptions().with_allow_statements(True)
+        """
+        self.options_internal = self.options_internal.with_allow_statements(allow)
+        return self
+
+
+class SessionContext:
+    """This is the main interface for executing queries and creating DataFrames.
+
+    See :ref:`user_guide_concepts` in the online documentation for more information.
+    """
+
+    def __init__(
+        self, config: SessionConfig | None = None, runtime: RuntimeConfig | None = None
+    ) -> None:
+        """Main interface for executing queries with DataFusion.
+
+        Maintains the state of the connection between a user and an instance
+        of the connection between a user and an instance of the DataFusion
+        engine.
+
+        Args:
+            config: Session configuration options.
+            runtime: Runtime configuration options.
+
+        Example usage:
+
+        The following example demonstrates how to use the context to execute
+        a query against a CSV data source using the :py:class:`DataFrame` API::
+
+            from datafusion import SessionContext
+
+            ctx = SessionContext()
+            df = ctx.read_csv("data.csv")
+        """
+        config = config.config_internal if config is not None else None
+        runtime = runtime.config_internal if runtime is not None else None
+
+        self.ctx = SessionContextInternal(config, runtime)
+
+    def register_object_store(self, schema: str, store: Any, host: str | None) -> None:
+        """Add a new object store into the session.
+
+        Args:
+            schema: The data source schema.
+            store: The :py:class:`~datafusion.object_store.ObjectStore` to register.
+            host: URL for the host.
+        """
+        self.ctx.register_object_store(schema, store, host)
+
+    def register_listing_table(
+        self,
+        name: str,
+        path: str | pathlib.Path,
+        table_partition_cols: list[tuple[str, str]] | None = None,
+        file_extension: str = ".parquet",
+        schema: pyarrow.Schema | None = None,
+        file_sort_order: list[list[Expr | SortExpr]] | None = None,
+    ) -> None:
+        """Register multiple files as a single table.
+
+        Registers a :py:class:`~datafusion.catalog.Table` that can assemble multiple
+        files from locations in an :py:class:`~datafusion.object_store.ObjectStore`
+        instance.
+
+        Args:
+            name: Name of the resultant table.
+            path: Path to the file to register.
+            table_partition_cols: Partition columns.
+            file_extension: File extension of the provided table.
+            schema: The data source schema.
+            file_sort_order: Sort order for the file.
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+        file_sort_order_raw = (
+            [sort_list_to_raw_sort_list(f) for f in file_sort_order]
+            if file_sort_order is not None
+            else None
+        )
+        self.ctx.register_listing_table(
+            name,
+            str(path),
+            table_partition_cols,
+            file_extension,
+            schema,
+            file_sort_order_raw,
+        )
+
+    def sql(self, query: str, options: SQLOptions | None = None) -> DataFrame:
+        """Create a :py:class:`~datafusion.DataFrame` from SQL query text.
+
+        Note: This API implements DDL statements such as ``CREATE TABLE`` and
+        ``CREATE VIEW`` and DML statements such as ``INSERT INTO`` with in-memory
+        default implementation.See
+        :py:func:`~datafusion.context.SessionContext.sql_with_options`.
+
+        Args:
+            query: SQL query text.
+            options: If provided, the query will be validated against these options.
+
+        Returns:
+            DataFrame representation of the SQL query.
+        """
+        if options is None:
+            return DataFrame(self.ctx.sql(query))
+        return DataFrame(self.ctx.sql_with_options(query, options.options_internal))
+
+    def sql_with_options(self, query: str, options: SQLOptions) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from SQL query text.
+
+        This function will first validate that the query is allowed by the
+        provided options.
+
+        Args:
+            query: SQL query text.
+            options: SQL options.
+
+        Returns:
+            DataFrame representation of the SQL query.
+        """
+        return self.sql(query, options)
+
+    def create_dataframe(
+        self,
+        partitions: list[list[pyarrow.RecordBatch]],
+        name: str | None = None,
+        schema: pyarrow.Schema | None = None,
+    ) -> DataFrame:
+        """Create and return a dataframe using the provided partitions.
+
+        Args:
+            partitions: :py:class:`pyarrow.RecordBatch` partitions to register.
+            name: Resultant dataframe name.
+            schema: Schema for the partitions.
+
+        Returns:
+            DataFrame representation of the SQL query.
+        """
+        return DataFrame(self.ctx.create_dataframe(partitions, name, schema))
+
+    def create_dataframe_from_logical_plan(self, plan: LogicalPlan) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from an existing plan.
+
+        Args:
+            plan: Logical plan.
+
+        Returns:
+            DataFrame representation of the logical plan.
+        """
+        return DataFrame(self.ctx.create_dataframe_from_logical_plan(plan))
+
+    def from_pylist(
+        self, data: list[dict[str, Any]], name: str | None = None
+    ) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from a list.
+
+        Args:
+            data: List of dictionaries.
+            name: Name of the DataFrame.
+
+        Returns:
+            DataFrame representation of the list of dictionaries.
+        """
+        return DataFrame(self.ctx.from_pylist(data, name))
+
+    def from_pydict(
+        self, data: dict[str, list[Any]], name: str | None = None
+    ) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from a dictionary.
+
+        Args:
+            data: Dictionary of lists.
+            name: Name of the DataFrame.
+
+        Returns:
+            DataFrame representation of the dictionary of lists.
+        """
+        return DataFrame(self.ctx.from_pydict(data, name))
+
+    def from_arrow(self, data: Any, name: str | None = None) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source.
+
+        The Arrow data source can be any object that implements either
+        ``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return
+        a struct array. Common examples of sources from pyarrow include
+
+        Args:
+            data: Arrow data source.
+            name: Name of the DataFrame.
+
+        Returns:
+            DataFrame representation of the Arrow table.
+        """
+        return DataFrame(self.ctx.from_arrow(data, name))
+
+    @deprecated("Use ``from_arrow`` instead.")
+    def from_arrow_table(
+        self, data: pyarrow.Table, name: str | None = None
+    ) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow table.
+
+        This is an alias for :py:func:`from_arrow`.
+        """
+        return self.from_arrow(data, name)
+
+    def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Pandas DataFrame.
+
+        Args:
+            data: Pandas DataFrame.
+            name: Name of the DataFrame.
+
+        Returns:
+            DataFrame representation of the Pandas DataFrame.
+        """
+        return DataFrame(self.ctx.from_pandas(data, name))
+
+    def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFrame:
+        """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Polars DataFrame.
+
+        Args:
+            data: Polars DataFrame.
+            name: Name of the DataFrame.
+
+        Returns:
+            DataFrame representation of the Polars DataFrame.
+        """
+        return DataFrame(self.ctx.from_polars(data, name))
+
+    def register_table(self, name: str, table: Table) -> None:
+        """Register a :py:class: `~datafusion.catalog.Table` as a table.
+
+        The registered table can be referenced from SQL statement executed against.
+
+        Args:
+            name: Name of the resultant table.
+            table: DataFusion table to add to the session context.
+        """
+        self.ctx.register_table(name, table)
+
+    def deregister_table(self, name: str) -> None:
+        """Remove a table from the session."""
+        self.ctx.deregister_table(name)
+
+    def register_record_batches(
+        self, name: str, partitions: list[list[pyarrow.RecordBatch]]
+    ) -> None:
+        """Register record batches as a table.
+
+        This function will convert the provided partitions into a table and
+        register it into the session using the given name.
+
+        Args:
+            name: Name of the resultant table.
+            partitions: Record batches to register as a table.
+        """
+        self.ctx.register_record_batches(name, partitions)
+
+    def register_parquet(
+        self,
+        name: str,
+        path: str | pathlib.Path,
+        table_partition_cols: list[tuple[str, str]] | None = None,
+        parquet_pruning: bool = True,
+        file_extension: str = ".parquet",
+        skip_metadata: bool = True,
+        schema: pyarrow.Schema | None = None,
+        file_sort_order: list[list[Expr]] | None = None,
+    ) -> None:
+        """Register a Parquet file as a table.
+
+        The registered table can be referenced from SQL statement executed
+        against this context.
+
+        Args:
+            name: Name of the table to register.
+            path: Path to the Parquet file.
+            table_partition_cols: Partition columns.
+            parquet_pruning: Whether the parquet reader should use the
+                predicate to prune row groups.
+            file_extension: File extension; only files with this extension are
+                selected for data input.
+            skip_metadata: Whether the parquet reader should skip any metadata
+                that may be in the file schema. This can help avoid schema
+                conflicts due to metadata.
+            schema: The data source schema.
+            file_sort_order: Sort order for the file.
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+        self.ctx.register_parquet(
+            name,
+            str(path),
+            table_partition_cols,
+            parquet_pruning,
+            file_extension,
+            skip_metadata,
+            schema,
+            file_sort_order,
+        )
+
+    def register_csv(
+        self,
+        name: str,
+        path: str | pathlib.Path,
+        schema: pyarrow.Schema | None = None,
+        has_header: bool = True,
+        delimiter: str = ",",
+        schema_infer_max_records: int = 1000,
+        file_extension: str = ".csv",
+        file_compression_type: str | None = None,
+    ) -> None:
+        """Register a CSV file as a table.
+
+        The registered table can be referenced from SQL statement executed against.
+
+        Args:
+            name: Name of the table to register.
+            path: Path to the CSV file.
+            schema: An optional schema representing the CSV file. If None, the
+                CSV reader will try to infer it based on data in file.
+            has_header: Whether the CSV file have a header. If schema inference
+                is run on a file with no headers, default column names are
+                created.
+            delimiter: An optional column delimiter.
+            schema_infer_max_records: Maximum number of rows to read from CSV
+                files for schema inference if needed.
+            file_extension: File extension; only files with this extension are
+                selected for data input.
+            file_compression_type: File compression type.
+        """
+        self.ctx.register_csv(
+            name,
+            str(path),
+            schema,
+            has_header,
+            delimiter,
+            schema_infer_max_records,
+            file_extension,
+            file_compression_type,
+        )
+
+    def register_json(
+        self,
+        name: str,
+        path: str | pathlib.Path,
+        schema: pyarrow.Schema | None = None,
+        schema_infer_max_records: int = 1000,
+        file_extension: str = ".json",
+        table_partition_cols: list[tuple[str, str]] | None = None,
+        file_compression_type: str | None = None,
+    ) -> None:
+        """Register a JSON file as a table.
+
+        The registered table can be referenced from SQL statement executed
+        against this context.
+
+        Args:
+            name: Name of the table to register.
+            path: Path to the JSON file.
+            schema: The data source schema.
+            schema_infer_max_records: Maximum number of rows to read from JSON
+                files for schema inference if needed.
+            file_extension: File extension; only files with this extension are
+                selected for data input.
+            table_partition_cols: Partition columns.
+            file_compression_type: File compression type.
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+        self.ctx.register_json(
+            name,
+            str(path),
+            schema,
+            schema_infer_max_records,
+            file_extension,
+            table_partition_cols,
+            file_compression_type,
+        )
+
+    def register_avro(
+        self,
+        name: str,
+        path: str | pathlib.Path,
+        schema: pyarrow.Schema | None = None,
+        file_extension: str = ".avro",
+        table_partition_cols: list[tuple[str, str]] | None = None,
+    ) -> None:
+        """Register an Avro file as a table.
+
+        The registered table can be referenced from SQL statement executed against
+        this context.
+
+        Args:
+            name: Name of the table to register.
+            path: Path to the Avro file.
+            schema: The data source schema.
+            file_extension: File extension to select.
+            table_partition_cols:  Partition columns.
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+        self.ctx.register_avro(
+            name, str(path), schema, file_extension, table_partition_cols
+        )
+
+    def register_dataset(self, name: str, dataset: pyarrow.dataset.Dataset) -> None:
+        """Register a :py:class:`pyarrow.dataset.Dataset` as a table.
+
+        Args:
+            name: Name of the table to register.
+            dataset: PyArrow dataset.
+        """
+        self.ctx.register_dataset(name, dataset)
+
+    def register_udf(self, udf: ScalarUDF) -> None:
+        """Register a user-defined function (UDF) with the context."""
+        self.ctx.register_udf(udf._udf)
+
+    def register_udaf(self, udaf: AggregateUDF) -> None:
+        """Register a user-defined aggregation function (UDAF) with the context."""
+        self.ctx.register_udaf(udaf._udaf)
+
+    def catalog(self, name: str = "datafusion") -> Catalog:
+        """Retrieve a catalog by name."""
+        return self.ctx.catalog(name)
+
+    @deprecated(
+        "Use the catalog provider interface ``SessionContext.Catalog`` to "
+        "examine available catalogs, schemas and tables"
+    )
+    def tables(self) -> set[str]:
+        """Deprecated."""
+        return self.ctx.tables()
+
+    def table(self, name: str) -> DataFrame:
+        """Retrieve a previously registered table by name."""
+        return DataFrame(self.ctx.table(name))
+
+    def table_exist(self, name: str) -> bool:
+        """Return whether a table with the given name exists."""
+        return self.ctx.table_exist(name)
+
+    def empty_table(self) -> DataFrame:
+        """Create an empty :py:class:`~datafusion.dataframe.DataFrame`."""
+        return DataFrame(self.ctx.empty_table())
+
+    def session_id(self) -> str:
+        """Return an id that uniquely identifies this :py:class:`SessionContext`."""
+        return self.ctx.session_id()
+
+    def read_json(
+        self,
+        path: str | pathlib.Path,
+        schema: pyarrow.Schema | None = None,
+        schema_infer_max_records: int = 1000,
+        file_extension: str = ".json",
+        table_partition_cols: list[tuple[str, str]] | None = None,
+        file_compression_type: str | None = None,
+    ) -> DataFrame:
+        """Read a line-delimited JSON data source.
+
+        Args:
+            path: Path to the JSON file.
+            schema: The data source schema.
+            schema_infer_max_records: Maximum number of rows to read from JSON
+                files for schema inference if needed.
+            file_extension: File extension; only files with this extension are
+                selected for data input.
+            table_partition_cols: Partition columns.
+            file_compression_type: File compression type.
+
+        Returns:
+            DataFrame representation of the read JSON files.
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+        return DataFrame(
+            self.ctx.read_json(
+                str(path),
+                schema,
+                schema_infer_max_records,
+                file_extension,
+                table_partition_cols,
+                file_compression_type,
+            )
+        )
+
+    def read_csv(
+        self,
+        path: str | pathlib.Path | list[str] | list[pathlib.Path],
+        schema: pyarrow.Schema | None = None,
+        has_header: bool = True,
+        delimiter: str = ",",
+        schema_infer_max_records: int = 1000,
+        file_extension: str = ".csv",
+        table_partition_cols: list[tuple[str, str]] | None = None,
+        file_compression_type: str | None = None,
+    ) -> DataFrame:
+        """Read a CSV data source.
+
+        Args:
+            path: Path to the CSV file
+            schema: An optional schema representing the CSV files. If None, the
+                CSV reader will try to infer it based on data in file.
+            has_header: Whether the CSV file have a header. If schema inference
+                is run on a file with no headers, default column names are
+                created.
+            delimiter: An optional column delimiter.
+            schema_infer_max_records: Maximum number of rows to read from CSV
+                files for schema inference if needed.
+            file_extension:  File extension; only files with this extension are
+                selected for data input.
+            table_partition_cols:  Partition columns.
+            file_compression_type:  File compression type.
+
+        Returns:
+            DataFrame representation of the read CSV files
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+
+        path = [str(p) for p in path] if isinstance(path, list) else str(path)
+
+        return DataFrame(
+            self.ctx.read_csv(
+                path,
+                schema,
+                has_header,
+                delimiter,
+                schema_infer_max_records,
+                file_extension,
+                table_partition_cols,
+                file_compression_type,
+            )
+        )
+
+    def read_parquet(
+        self,
+        path: str | pathlib.Path,
+        table_partition_cols: list[tuple[str, str]] | None = None,
+        parquet_pruning: bool = True,
+        file_extension: str = ".parquet",
+        skip_metadata: bool = True,
+        schema: pyarrow.Schema | None = None,
+        file_sort_order: list[list[Expr]] | None = None,
+    ) -> DataFrame:
+        """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
+
+        Args:
+            path: Path to the Parquet file.
+            table_partition_cols: Partition columns.
+            parquet_pruning: Whether the parquet reader should use the predicate
+                to prune row groups.
+            file_extension: File extension; only files with this extension are
+                selected for data input.
+            skip_metadata: Whether the parquet reader should skip any metadata
+                that may be in the file schema. This can help avoid schema
+                conflicts due to metadata.
+            schema: An optional schema representing the parquet files. If None,
+                the parquet reader will try to infer it based on data in the
+                file.
+            file_sort_order: Sort order for the file.
+
+        Returns:
+            DataFrame representation of the read Parquet files
+        """
+        if table_partition_cols is None:
+            table_partition_cols = []
+        return DataFrame(
+            self.ctx.read_parquet(
+                str(path),
+                table_partition_cols,
+                parquet_pruning,
+                file_extension,
+                skip_metadata,
+                schema,
+                file_sort_order,
+            )
+        )
+
+    def read_avro(
+        self,
+        path: str | pathlib.Path,
+        schema: pyarrow.Schema | None = None,
+        file_partition_cols: list[tuple[str, str]] | None = None,
+        file_extension: str = ".avro",
+    ) -> DataFrame:
+        """Create a :py:class:`DataFrame` for reading Avro data source.
+
+        Args:
+            path: Path to the Avro file.
+            schema: The data source schema.
+            file_partition_cols: Partition columns.
+            file_extension: File extension to select.
+
+        Returns:
+            DataFrame representation of the read Avro file
+        """
+        if file_partition_cols is None:
+            file_partition_cols = []
+        return DataFrame(
+            self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension)
+        )
+
+    def read_table(self, table: Table) -> DataFrame:
+        """Creates a :py:class:`~datafusion.dataframe.DataFrame` from a table.
+
+        For a :py:class:`~datafusion.catalog.Table` such as a
+        :py:class:`~datafusion.catalog.ListingTable`, create a
+        :py:class:`~datafusion.dataframe.DataFrame`.
+        """
+        return DataFrame(self.ctx.read_table(table))
+
+    def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream:
+        """Execute the ``plan`` and return the results."""
+        return RecordBatchStream(self.ctx.execute(plan, partitions))
diff --git a/py-denormalized/python/denormalized/datafusion/dataframe.py b/py-denormalized/python/denormalized/datafusion/dataframe.py
new file mode 100644
index 0000000..4a50545
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/dataframe.py
@@ -0,0 +1,572 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+""":py:class:`DataFrame` is one of the core concepts in DataFusion.
+
+See :ref:`user_guide_concepts` in the online documentation for more information.
+"""
+
+from __future__ import annotations
+
+from typing import Any, List, TYPE_CHECKING
+from datafusion.record_batch import RecordBatchStream
+from typing_extensions import deprecated
+
+if TYPE_CHECKING:
+    import pyarrow as pa
+    import pandas as pd
+    import polars as pl
+    import pathlib
+    from typing import Callable
+
+from denormalized._internal import DataFrame as DataFrameInternal
+from denormalized.datafusion.expr import Expr, SortExpr, sort_or_default
+from denormalized._internal import (
+    LogicalPlan,
+    ExecutionPlan,
+)
+
+
+class DataFrame:
+    """Two dimensional table representation of data.
+
+    See :ref:`user_guide_concepts` in the online documentation for more information.
+    """
+
+    def __init__(self, df: DataFrameInternal) -> None:
+        """This constructor is not to be used by the end user.
+
+        See :py:class:`~datafusion.context.SessionContext` for methods to
+        create a :py:class:`DataFrame`.
+        """
+        self.df = df
+
+    def __getitem__(self, key: str | List[str]) -> DataFrame:
+        """Return a new :py:class`DataFrame` with the specified column or columns.
+
+        Args:
+            key: Column name or list of column names to select.
+
+        Returns:
+            DataFrame with the specified column or columns.
+        """
+        return DataFrame(self.df.__getitem__(key))
+
+    def __repr__(self) -> str:
+        """Return a string representation of the DataFrame.
+
+        Returns:
+            String representation of the DataFrame.
+        """
+        return self.df.__repr__()
+
+    def _repr_html_(self) -> str:
+        return self.df._repr_html_()
+
+    def describe(self) -> DataFrame:
+        """Return the statistics for this DataFrame.
+
+        Only summarized numeric datatypes at the moments and returns nulls
+        for non-numeric datatypes.
+
+        The output format is modeled after pandas.
+
+        Returns:
+            A summary DataFrame containing statistics.
+        """
+        return DataFrame(self.df.describe())
+
+    def schema(self) -> pa.Schema:
+        """Return the :py:class:`pyarrow.Schema` of this DataFrame.
+
+        The output schema contains information on the name, data type, and
+        nullability for each column.
+
+        Returns:
+            Describing schema of the DataFrame
+        """
+        return self.df.schema()
+
+    def select_columns(self, *args: str) -> DataFrame:
+        """Filter the DataFrame by columns.
+
+        Returns:
+            DataFrame only containing the specified columns.
+        """
+        return self.select(*args)
+
+    def select(self, *exprs: Expr | str) -> DataFrame:
+        """Project arbitrary expressions into a new :py:class:`DataFrame`.
+
+        Args:
+            exprs: Either column names or :py:class:`~datafusion.expr.Expr` to select.
+
+        Returns:
+            DataFrame after projection. It has one column for each expression.
+
+        Example usage:
+
+        The following example will return 3 columns from the original dataframe.
+        The first two columns will be the original column ``a`` and ``b`` since the
+        string "a" is assumed to refer to column selection. Also a duplicate of
+        column ``a`` will be returned with the column name ``alternate_a``::
+
+            df = df.select("a", col("b"), col("a").alias("alternate_a"))
+
+        """
+        exprs_internal = [
+            Expr.column(arg).expr if isinstance(arg, str) else arg.expr for arg in exprs
+        ]
+        return DataFrame(self.df.select(*exprs_internal))
+
+    def filter(self, *predicates: Expr) -> DataFrame:
+        """Return a DataFrame for which ``predicate`` evaluates to ``True``.
+
+        Rows for which ``predicate`` evaluates to ``False`` or ``None`` are filtered
+        out.  If more than one predicate is provided, these predicates will be
+        combined as a logical AND. If more complex logic is required, see the
+        logical operations in :py:mod:`~datafusion.functions`.
+
+        Args:
+            predicates: Predicate expression(s) to filter the DataFrame.
+
+        Returns:
+            DataFrame after filtering.
+        """
+        df = self.df
+        for p in predicates:
+            df = df.filter(p.expr)
+        return DataFrame(df)
+
+    def with_column(self, name: str, expr: Expr) -> DataFrame:
+        """Add an additional column to the DataFrame.
+
+        Args:
+            name: Name of the column to add.
+            expr: Expression to compute the column.
+
+        Returns:
+            DataFrame with the new column.
+        """
+        return DataFrame(self.df.with_column(name, expr.expr))
+
+    def with_column_renamed(self, old_name: str, new_name: str) -> DataFrame:
+        r"""Rename one column by applying a new projection.
+
+        This is a no-op if the column to be renamed does not exist.
+
+        The method supports case sensitive rename with wrapping column name
+        into one the following symbols (" or ' or \`).
+
+        Args:
+            old_name: Old column name.
+            new_name: New column name.
+
+        Returns:
+            DataFrame with the column renamed.
+        """
+        return DataFrame(self.df.with_column_renamed(old_name, new_name))
+
+    def aggregate(
+        self, group_by: list[Expr] | Expr, aggs: list[Expr] | Expr
+    ) -> DataFrame:
+        """Aggregates the rows of the current DataFrame.
+
+        Args:
+            group_by: List of expressions to group by.
+            aggs: List of expressions to aggregate.
+
+        Returns:
+            DataFrame after aggregation.
+        """
+        group_by = group_by if isinstance(group_by, list) else [group_by]
+        aggs = aggs if isinstance(aggs, list) else [aggs]
+
+        group_by = [e.expr for e in group_by]
+        aggs = [e.expr for e in aggs]
+        return DataFrame(self.df.aggregate(group_by, aggs))
+
+    def sort(self, *exprs: Expr | SortExpr) -> DataFrame:
+        """Sort the DataFrame by the specified sorting expressions.
+
+        Note that any expression can be turned into a sort expression by
+        calling its` ``sort`` method.
+
+        Args:
+            exprs: Sort expressions, applied in order.
+
+        Returns:
+            DataFrame after sorting.
+        """
+        exprs_raw = [sort_or_default(expr) for expr in exprs]
+        return DataFrame(self.df.sort(*exprs_raw))
+
+    def limit(self, count: int, offset: int = 0) -> DataFrame:
+        """Return a new :py:class:`DataFrame` with a limited number of rows.
+
+        Args:
+            count: Number of rows to limit the DataFrame to.
+            offset: Number of rows to skip.
+
+        Returns:
+            DataFrame after limiting.
+        """
+        return DataFrame(self.df.limit(count, offset))
+
+    def collect(self) -> list[pa.RecordBatch]:
+        """Execute this :py:class:`DataFrame` and collect results into memory.
+
+        Prior to calling ``collect``, modifying a DataFrme simply updates a plan
+        (no actual computation is performed). Calling ``collect`` triggers the
+        computation.
+
+        Returns:
+            List of :py:class:`pyarrow.RecordBatch` collected from the DataFrame.
+        """
+        return self.df.collect()
+
+    def cache(self) -> DataFrame:
+        """Cache the DataFrame as a memory table.
+
+        Returns:
+            Cached DataFrame.
+        """
+        return DataFrame(self.df.cache())
+
+    def collect_partitioned(self) -> list[list[pa.RecordBatch]]:
+        """Execute this DataFrame and collect all partitioned results.
+
+        This operation returns :py:class:`pyarrow.RecordBatch` maintaining the input
+        partitioning.
+
+        Returns:
+            List of list of :py:class:`RecordBatch` collected from the
+                DataFrame.
+        """
+        return self.df.collect_partitioned()
+
+    def show(self, num: int = 20) -> None:
+        """Execute the DataFrame and print the result to the console.
+
+        Args:
+            num: Number of lines to show.
+        """
+        self.df.show(num)
+
+    def distinct(self) -> DataFrame:
+        """Return a new :py:class:`DataFrame` with all duplicated rows removed.
+
+        Returns:
+            DataFrame after removing duplicates.
+        """
+        return DataFrame(self.df.distinct())
+
+    def join(
+        self,
+        right: DataFrame,
+        join_keys: tuple[list[str], list[str]],
+        how: str,
+    ) -> DataFrame:
+        """Join this :py:class:`DataFrame` with another :py:class:`DataFrame`.
+
+        Join keys are a pair of lists of column names in the left and right
+        dataframes, respectively. These lists must have the same length.
+
+        Args:
+            right: Other DataFrame to join with.
+            join_keys: Tuple of two lists of column names to join on.
+            how: Type of join to perform. Supported types are "inner", "left",
+                "right", "full", "semi", "anti".
+
+        Returns:
+            DataFrame after join.
+        """
+        return DataFrame(self.df.join(right.df, join_keys, how))
+
+    def explain(self, verbose: bool = False, analyze: bool = False) -> DataFrame:
+        """Return a DataFrame with the explanation of its plan so far.
+
+        If ``analyze`` is specified, runs the plan and reports metrics.
+
+        Args:
+            verbose: If ``True``, more details will be included.
+            analyze: If ``Tru`e``, the plan will run and metrics reported.
+
+        Returns:
+            DataFrame with the explanation of its plan.
+        """
+        return DataFrame(self.df.explain(verbose, analyze))
+
+    def logical_plan(self) -> LogicalPlan:
+        """Return the unoptimized ``LogicalPlan``.
+
+        Returns:
+            Unoptimized logical plan.
+        """
+        return self.df.logical_plan()
+
+    def optimized_logical_plan(self) -> LogicalPlan:
+        """Return the optimized ``LogicalPlan``.
+
+        Returns:
+            Optimized logical plan.
+        """
+        return self.df.optimized_logical_plan()
+
+    def execution_plan(self) -> ExecutionPlan:
+        """Return the execution/physical plan.
+
+        Returns:
+            Execution plan.
+        """
+        return self.df.execution_plan()
+
+    def repartition(self, num: int) -> DataFrame:
+        """Repartition a DataFrame into ``num`` partitions.
+
+        The batches allocation uses a round-robin algorithm.
+
+        Args:
+            num: Number of partitions to repartition the DataFrame into.
+
+        Returns:
+            Repartitioned DataFrame.
+        """
+        return DataFrame(self.df.repartition(num))
+
+    def repartition_by_hash(self, *exprs: Expr, num: int) -> DataFrame:
+        """Repartition a DataFrame using a hash partitioning scheme.
+
+        Args:
+            exprs: Expressions to evaluate and perform hashing on.
+            num: Number of partitions to repartition the DataFrame into.
+
+        Returns:
+            Repartitioned DataFrame.
+        """
+        exprs = [expr.expr for expr in exprs]
+        return DataFrame(self.df.repartition_by_hash(*exprs, num=num))
+
+    def union(self, other: DataFrame, distinct: bool = False) -> DataFrame:
+        """Calculate the union of two :py:class:`DataFrame`.
+
+        The two :py:class:`DataFrame` must have exactly the same schema.
+
+        Args:
+            other: DataFrame to union with.
+            distinct: If ``True``, duplicate rows will be removed.
+
+        Returns:
+            DataFrame after union.
+        """
+        return DataFrame(self.df.union(other.df, distinct))
+
+    def union_distinct(self, other: DataFrame) -> DataFrame:
+        """Calculate the distinct union of two :py:class:`DataFrame`.
+
+        The two :py:class:`DataFrame` must have exactly the same schema.
+        Any duplicate rows are discarded.
+
+        Args:
+            other: DataFrame to union with.
+
+        Returns:
+            DataFrame after union.
+        """
+        return DataFrame(self.df.union_distinct(other.df))
+
+    def intersect(self, other: DataFrame) -> DataFrame:
+        """Calculate the intersection of two :py:class:`DataFrame`.
+
+        The two :py:class:`DataFrame` must have exactly the same schema.
+
+        Args:
+            other:  DataFrame to intersect with.
+
+        Returns:
+            DataFrame after intersection.
+        """
+        return DataFrame(self.df.intersect(other.df))
+
+    def except_all(self, other: DataFrame) -> DataFrame:
+        """Calculate the exception of two :py:class:`DataFrame`.
+
+        The two :py:class:`DataFrame` must have exactly the same schema.
+
+        Args:
+            other: DataFrame to calculate exception with.
+
+        Returns:
+            DataFrame after exception.
+        """
+        return DataFrame(self.df.except_all(other.df))
+
+    def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None:
+        """Execute the :py:class:`DataFrame`  and write the results to a CSV file.
+
+        Args:
+            path: Path of the CSV file to write.
+            with_header: If true, output the CSV header row.
+        """
+        self.df.write_csv(str(path), with_header)
+
+    def write_parquet(
+        self,
+        path: str | pathlib.Path,
+        compression: str = "uncompressed",
+        compression_level: int | None = None,
+    ) -> None:
+        """Execute the :py:class:`DataFrame` and write the results to a Parquet file.
+
+        Args:
+            path: Path of the Parquet file to write.
+            compression: Compression type to use.
+            compression_level: Compression level to use.
+        """
+        self.df.write_parquet(str(path), compression, compression_level)
+
+    def write_json(self, path: str | pathlib.Path) -> None:
+        """Execute the :py:class:`DataFrame` and write the results to a JSON file.
+
+        Args:
+            path: Path of the JSON file to write.
+        """
+        self.df.write_json(str(path))
+
+    def to_arrow_table(self) -> pa.Table:
+        """Execute the :py:class:`DataFrame` and convert it into an Arrow Table.
+
+        Returns:
+            Arrow Table.
+        """
+        return self.df.to_arrow_table()
+
+    def execute_stream(self) -> RecordBatchStream:
+        """Executes this DataFrame and returns a stream over a single partition.
+
+        Returns:
+            Record Batch Stream over a single partition.
+        """
+        return RecordBatchStream(self.df.execute_stream())
+
+    def execute_stream_partitioned(self) -> list[RecordBatchStream]:
+        """Executes this DataFrame and returns a stream for each partition.
+
+        Returns:
+            One record batch stream per partition.
+        """
+        streams = self.df.execute_stream_partitioned()
+        return [RecordBatchStream(rbs) for rbs in streams]
+
+    def to_pandas(self) -> pd.DataFrame:
+        """Execute the :py:class:`DataFrame` and convert it into a Pandas DataFrame.
+
+        Returns:
+            Pandas DataFrame.
+        """
+        return self.df.to_pandas()
+
+    def to_pylist(self) -> list[dict[str, Any]]:
+        """Execute the :py:class:`DataFrame` and convert it into a list of dictionaries.
+
+        Returns:
+            List of dictionaries.
+        """
+        return self.df.to_pylist()
+
+    def to_pydict(self) -> dict[str, list[Any]]:
+        """Execute the :py:class:`DataFrame` and convert it into a dictionary of lists.
+
+        Returns:
+            Dictionary of lists.
+        """
+        return self.df.to_pydict()
+
+    def to_polars(self) -> pl.DataFrame:
+        """Execute the :py:class:`DataFrame` and convert it into a Polars DataFrame.
+
+        Returns:
+            Polars DataFrame.
+        """
+        return self.df.to_polars()
+
+    def count(self) -> int:
+        """Return the total number of rows in this :py:class:`DataFrame`.
+
+        Note that this method will actually run a plan to calculate the
+        count, which may be slow for large or complicated DataFrames.
+
+        Returns:
+            Number of rows in the DataFrame.
+        """
+        return self.df.count()
+
+    @deprecated("Use :py:func:`unnest_columns` instead.")
+    def unnest_column(self, column: str, preserve_nulls: bool = True) -> DataFrame:
+        """See :py:func:`unnest_columns`."""
+        return DataFrame(self.df.unnest_column(column, preserve_nulls=preserve_nulls))
+
+    def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFrame:
+        """Expand columns of arrays into a single row per array element.
+
+        Args:
+            columns: Column names to perform unnest operation on.
+            preserve_nulls: If False, rows with null entries will not be
+                returned.
+
+        Returns:
+            A DataFrame with the columns expanded.
+        """
+        columns = [c for c in columns]
+        return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls))
+
+    def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any:
+        """Export an Arrow PyCapsule Stream.
+
+        This will execute and collect the DataFrame. We will attempt to respect the
+        requested schema, but only trivial transformations will be applied such as only
+        returning the fields listed in the requested schema if their data types match
+        those in the DataFrame.
+
+        Args:
+            requested_schema: Attempt to provide the DataFrame using this schema.
+
+        Returns:
+            Arrow PyCapsule object.
+        """
+        return self.df.__arrow_c_stream__(requested_schema)
+
+    def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
+        """Apply a function to the current DataFrame which returns another DataFrame.
+
+        This is useful for chaining together multiple functions. For example::
+
+            def add_3(df: DataFrame) -> DataFrame:
+                return df.with_column("modified", lit(3))
+
+            def within_limit(df: DataFrame, limit: int) -> DataFrame:
+                return df.filter(col("a") < lit(limit)).distinct()
+
+            df = df.transform(modify_df).transform(within_limit, 4)
+
+        Args:
+            func: A callable function that takes a DataFrame as it's first argument
+            args: Zero or more arguments to pass to `func`
+
+        Returns:
+            DataFrame: After applying func to the original dataframe.
+        """
+        return func(self, *args)
diff --git a/py-denormalized/python/denormalized/datafusion/expr.py b/py-denormalized/python/denormalized/datafusion/expr.py
new file mode 100644
index 0000000..a858a66
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/expr.py
@@ -0,0 +1,718 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""This module supports expressions, one of the core concepts in DataFusion.
+
+See :ref:`Expressions` in the online documentation for more details.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional, Type
+
+import pyarrow as pa
+from denormalized.datafusion.common import DataTypeMap, NullTreatment, RexType
+from typing_extensions import deprecated
+
+from denormalized._internal import LogicalPlan
+from denormalized._internal import expr as expr_internal
+from denormalized._internal import functions as functions_internal
+
+# The following are imported from the internal representation. We may choose to
+# give these all proper wrappers, or to simply leave as is. These were added
+# in order to support passing the `test_imports` unit test.
+# Tim Saucer note: It is not clear to me what the use case is for exposing
+# these definitions to the end user.
+
+Alias = expr_internal.Alias
+Analyze = expr_internal.Analyze
+Aggregate = expr_internal.Aggregate
+AggregateFunction = expr_internal.AggregateFunction
+Between = expr_internal.Between
+BinaryExpr = expr_internal.BinaryExpr
+Case = expr_internal.Case
+Cast = expr_internal.Cast
+Column = expr_internal.Column
+CreateMemoryTable = expr_internal.CreateMemoryTable
+CreateView = expr_internal.CreateView
+CrossJoin = expr_internal.CrossJoin
+Distinct = expr_internal.Distinct
+DropTable = expr_internal.DropTable
+EmptyRelation = expr_internal.EmptyRelation
+Exists = expr_internal.Exists
+Explain = expr_internal.Explain
+Extension = expr_internal.Extension
+Filter = expr_internal.Filter
+GroupingSet = expr_internal.GroupingSet
+Join = expr_internal.Join
+ILike = expr_internal.ILike
+InList = expr_internal.InList
+InSubquery = expr_internal.InSubquery
+IsFalse = expr_internal.IsFalse
+IsNotTrue = expr_internal.IsNotTrue
+IsNull = expr_internal.IsNull
+IsTrue = expr_internal.IsTrue
+IsUnknown = expr_internal.IsUnknown
+IsNotFalse = expr_internal.IsNotFalse
+IsNotNull = expr_internal.IsNotNull
+IsNotUnknown = expr_internal.IsNotUnknown
+JoinConstraint = expr_internal.JoinConstraint
+JoinType = expr_internal.JoinType
+Like = expr_internal.Like
+Limit = expr_internal.Limit
+Literal = expr_internal.Literal
+Negative = expr_internal.Negative
+Not = expr_internal.Not
+Partitioning = expr_internal.Partitioning
+Placeholder = expr_internal.Placeholder
+Projection = expr_internal.Projection
+Repartition = expr_internal.Repartition
+ScalarSubquery = expr_internal.ScalarSubquery
+ScalarVariable = expr_internal.ScalarVariable
+SimilarTo = expr_internal.SimilarTo
+Sort = expr_internal.Sort
+Subquery = expr_internal.Subquery
+SubqueryAlias = expr_internal.SubqueryAlias
+TableScan = expr_internal.TableScan
+TryCast = expr_internal.TryCast
+Union = expr_internal.Union
+Unnest = expr_internal.Unnest
+UnnestExpr = expr_internal.UnnestExpr
+Window = expr_internal.Window
+
+__all__ = [
+    "Expr",
+    "Column",
+    "Literal",
+    "BinaryExpr",
+    "Literal",
+    "AggregateFunction",
+    "Not",
+    "IsNotNull",
+    "IsNull",
+    "IsTrue",
+    "IsFalse",
+    "IsUnknown",
+    "IsNotTrue",
+    "IsNotFalse",
+    "IsNotUnknown",
+    "Negative",
+    "Like",
+    "ILike",
+    "SimilarTo",
+    "ScalarVariable",
+    "Alias",
+    "InList",
+    "Exists",
+    "Subquery",
+    "InSubquery",
+    "ScalarSubquery",
+    "Placeholder",
+    "GroupingSet",
+    "Case",
+    "CaseBuilder",
+    "Cast",
+    "TryCast",
+    "Between",
+    "Explain",
+    "Limit",
+    "Aggregate",
+    "Sort",
+    "SortExpr",
+    "Analyze",
+    "EmptyRelation",
+    "Join",
+    "JoinType",
+    "JoinConstraint",
+    "CrossJoin",
+    "Union",
+    "Unnest",
+    "UnnestExpr",
+    "Extension",
+    "Filter",
+    "Projection",
+    "TableScan",
+    "CreateMemoryTable",
+    "CreateView",
+    "Distinct",
+    "SubqueryAlias",
+    "DropTable",
+    "Partitioning",
+    "Repartition",
+    "Window",
+    "WindowFrame",
+    "WindowFrameBound",
+]
+
+
+def expr_list_to_raw_expr_list(
+    expr_list: Optional[list[Expr]],
+) -> Optional[list[expr_internal.Expr]]:
+    """Helper function to convert an optional list to raw expressions."""
+    return [e.expr for e in expr_list] if expr_list is not None else None
+
+
+def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr:
+    """Helper function to return a default Sort if an Expr is provided."""
+    if isinstance(e, SortExpr):
+        return e.raw_sort
+    return SortExpr(e.expr, True, True).raw_sort
+
+
+def sort_list_to_raw_sort_list(
+    sort_list: Optional[list[Expr | SortExpr]],
+) -> Optional[list[expr_internal.SortExpr]]:
+    """Helper function to return an optional sort list to raw variant."""
+    return [sort_or_default(e) for e in sort_list] if sort_list is not None else None
+
+
+class Expr:
+    """Expression object.
+
+    Expressions are one of the core concepts in DataFusion. See
+    :ref:`Expressions` in the online documentation for more information.
+    """
+
+    def __init__(self, expr: expr_internal.Expr) -> None:
+        """This constructor should not be called by the end user."""
+        self.expr = expr
+
+    def to_variant(self) -> Any:
+        """Convert this expression into a python object if possible."""
+        return self.expr.to_variant()
+
+    @deprecated(
+        "display_name() is deprecated. Use :py:meth:`~Expr.schema_name` instead"
+    )
+    def display_name(self) -> str:
+        """Returns the name of this expression as it should appear in a schema.
+
+        This name will not include any CAST expressions.
+        """
+        return self.schema_name()
+
+    def schema_name(self) -> str:
+        """Returns the name of this expression as it should appear in a schema.
+
+        This name will not include any CAST expressions.
+        """
+        return self.expr.schema_name()
+
+    def canonical_name(self) -> str:
+        """Returns a complete string representation of this expression."""
+        return self.expr.canonical_name()
+
+    def variant_name(self) -> str:
+        """Returns the name of the Expr variant.
+
+        Ex: ``IsNotNull``, ``Literal``, ``BinaryExpr``, etc
+        """
+        return self.expr.variant_name()
+
+    def __richcmp__(self, other: Expr, op: int) -> Expr:
+        """Comparison operator."""
+        return Expr(self.expr.__richcmp__(other, op))
+
+    def __repr__(self) -> str:
+        """Generate a string representation of this expression."""
+        return self.expr.__repr__()
+
+    def __add__(self, rhs: Any) -> Expr:
+        """Addition operator.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__add__(rhs.expr))
+
+    def __sub__(self, rhs: Any) -> Expr:
+        """Subtraction operator.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__sub__(rhs.expr))
+
+    def __truediv__(self, rhs: Any) -> Expr:
+        """Division operator.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__truediv__(rhs.expr))
+
+    def __mul__(self, rhs: Any) -> Expr:
+        """Multiplication operator.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__mul__(rhs.expr))
+
+    def __mod__(self, rhs: Any) -> Expr:
+        """Modulo operator (%).
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__mod__(rhs.expr))
+
+    def __and__(self, rhs: Expr) -> Expr:
+        """Logical AND."""
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__and__(rhs.expr))
+
+    def __or__(self, rhs: Expr) -> Expr:
+        """Logical OR."""
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__or__(rhs.expr))
+
+    def __invert__(self) -> Expr:
+        """Binary not (~)."""
+        return Expr(self.expr.__invert__())
+
+    def __getitem__(self, key: str | int) -> Expr:
+        """Retrieve sub-object.
+
+        If ``key`` is a string, returns the subfield of the struct.
+        If ``key`` is an integer, retrieves the element in the array. Note that the
+        element index begins at ``0``, unlike `array_element` which begins at ``1``.
+        """
+        if isinstance(key, int):
+            return Expr(
+                functions_internal.array_element(self.expr, Expr.literal(key + 1).expr)
+            )
+        return Expr(self.expr.__getitem__(key))
+
+    def __eq__(self, rhs: Any) -> Expr:
+        """Equal to.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__eq__(rhs.expr))
+
+    def __ne__(self, rhs: Any) -> Expr:
+        """Not equal to.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__ne__(rhs.expr))
+
+    def __ge__(self, rhs: Any) -> Expr:
+        """Greater than or equal to.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__ge__(rhs.expr))
+
+    def __gt__(self, rhs: Any) -> Expr:
+        """Greater than.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__gt__(rhs.expr))
+
+    def __le__(self, rhs: Any) -> Expr:
+        """Less than or equal to.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__le__(rhs.expr))
+
+    def __lt__(self, rhs: Any) -> Expr:
+        """Less than.
+
+        Accepts either an expression or any valid PyArrow scalar literal value.
+        """
+        if not isinstance(rhs, Expr):
+            rhs = Expr.literal(rhs)
+        return Expr(self.expr.__lt__(rhs.expr))
+
+    __radd__ = __add__
+    __rand__ = __and__
+    __rmod__ = __mod__
+    __rmul__ = __mul__
+    __ror__ = __or__
+    __rsub__ = __sub__
+    __rtruediv__ = __truediv__
+
+    @staticmethod
+    def literal(value: Any) -> Expr:
+        """Creates a new expression representing a scalar value.
+
+        ``value`` must be a valid PyArrow scalar value or easily castable to one.
+        """
+        if not isinstance(value, pa.Scalar):
+            value = pa.scalar(value)
+        return Expr(expr_internal.Expr.literal(value))
+
+    @staticmethod
+    def column(value: str) -> Expr:
+        """Creates a new expression representing a column."""
+        return Expr(expr_internal.Expr.column(value))
+
+    def alias(self, name: str) -> Expr:
+        """Assign a name to the expression."""
+        return Expr(self.expr.alias(name))
+
+    def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
+        """Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`.
+
+        Args:
+            ascending: If true, sort in ascending order.
+            nulls_first: Return null values first.
+        """
+        return SortExpr(self.expr, ascending=ascending, nulls_first=nulls_first)
+
+    def is_null(self) -> Expr:
+        """Returns ``True`` if this expression is null."""
+        return Expr(self.expr.is_null())
+
+    def is_not_null(self) -> Expr:
+        """Returns ``True`` if this expression is not null."""
+        return Expr(self.expr.is_not_null())
+
+    _to_pyarrow_types = {
+        float: pa.float64(),
+        int: pa.int64(),
+        str: pa.string(),
+        bool: pa.bool_(),
+    }
+
+    def cast(
+        self, to: pa.DataType[Any] | Type[float] | Type[int] | Type[str] | Type[bool]
+    ) -> Expr:
+        """Cast to a new data type."""
+        if not isinstance(to, pa.DataType):
+            try:
+                to = self._to_pyarrow_types[to]
+            except KeyError:
+                raise TypeError(
+                    "Expected instance of pyarrow.DataType or builtins.type"
+                )
+
+        return Expr(self.expr.cast(to))
+
+    def between(self, low: Any, high: Any, negated: bool = False) -> Expr:
+        """Returns ``True`` if this expression is between a given range.
+
+        Args:
+            low: lower bound of the range (inclusive).
+            high: higher bound of the range (inclusive).
+            negated: negates whether the expression is between a given range
+        """
+        if not isinstance(low, Expr):
+            low = Expr.literal(low)
+
+        if not isinstance(high, Expr):
+            high = Expr.literal(high)
+
+        return Expr(self.expr.between(low.expr, high.expr, negated=negated))
+
+    def rex_type(self) -> RexType:
+        """Return the Rex Type of this expression.
+
+        A Rex (Row Expression) specifies a single row of data.That specification
+        could include user defined functions or types. RexType identifies the
+        row as one of the possible valid ``RexType``.
+        """
+        return self.expr.rex_type()
+
+    def types(self) -> DataTypeMap:
+        """Return the ``DataTypeMap``.
+
+        Returns:
+            DataTypeMap which represents the PythonType, Arrow DataType, and
+            SqlType Enum which this expression represents.
+        """
+        return self.expr.types()
+
+    def python_value(self) -> Any:
+        """Extracts the Expr value into a PyObject.
+
+        This is only valid for literal expressions.
+
+        Returns:
+            Python object representing literal value of the expression.
+        """
+        return self.expr.python_value()
+
+    def rex_call_operands(self) -> list[Expr]:
+        """Return the operands of the expression based on it's variant type.
+
+        Row expressions, Rex(s), operate on the concept of operands. Different
+        variants of Expressions, Expr(s), store those operands in different
+        datastructures. This function examines the Expr variant and returns
+        the operands to the calling logic.
+        """
+        return [Expr(e) for e in self.expr.rex_call_operands()]
+
+    def rex_call_operator(self) -> str:
+        """Extracts the operator associated with a row expression type call."""
+        return self.expr.rex_call_operator()
+
+    def column_name(self, plan: LogicalPlan) -> str:
+        """Compute the output column name based on the provided logical plan."""
+        return self.expr.column_name(plan)
+
+    def order_by(self, *exprs: Expr | SortExpr) -> ExprFuncBuilder:
+        """Set the ordering for a window or aggregate function.
+
+        This function will create an :py:class:`ExprFuncBuilder` that can be used to
+        set parameters for either window or aggregate functions. If used on any other
+        type of expression, an error will be generated when ``build()`` is called.
+        """
+        return ExprFuncBuilder(self.expr.order_by([sort_or_default(e) for e in exprs]))
+
+    def filter(self, filter: Expr) -> ExprFuncBuilder:
+        """Filter an aggregate function.
+
+        This function will create an :py:class:`ExprFuncBuilder` that can be used to
+        set parameters for either window or aggregate functions. If used on any other
+        type of expression, an error will be generated when ``build()`` is called.
+        """
+        return ExprFuncBuilder(self.expr.filter(filter.expr))
+
+    def distinct(self) -> ExprFuncBuilder:
+        """Only evaluate distinct values for an aggregate function.
+
+        This function will create an :py:class:`ExprFuncBuilder` that can be used to
+        set parameters for either window or aggregate functions. If used on any other
+        type of expression, an error will be generated when ``build()`` is called.
+        """
+        return ExprFuncBuilder(self.expr.distinct())
+
+    def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder:
+        """Set the treatment for ``null`` values for a window or aggregate function.
+
+        This function will create an :py:class:`ExprFuncBuilder` that can be used to
+        set parameters for either window or aggregate functions. If used on any other
+        type of expression, an error will be generated when ``build()`` is called.
+        """
+        return ExprFuncBuilder(self.expr.null_treatment(null_treatment.value))
+
+    def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder:
+        """Set the partitioning for a window function.
+
+        This function will create an :py:class:`ExprFuncBuilder` that can be used to
+        set parameters for either window or aggregate functions. If used on any other
+        type of expression, an error will be generated when ``build()`` is called.
+        """
+        return ExprFuncBuilder(
+            self.expr.partition_by(list(e.expr for e in partition_by))
+        )
+
+    def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder:
+        """Set the frame fora  window function.
+
+        This function will create an :py:class:`ExprFuncBuilder` that can be used to
+        set parameters for either window or aggregate functions. If used on any other
+        type of expression, an error will be generated when ``build()`` is called.
+        """
+        return ExprFuncBuilder(self.expr.window_frame(window_frame.window_frame))
+
+
+class ExprFuncBuilder:
+    def __init__(self, builder: expr_internal.ExprFuncBuilder):
+        self.builder = builder
+
+    def order_by(self, *exprs: Expr) -> ExprFuncBuilder:
+        """Set the ordering for a window or aggregate function.
+
+        Values given in ``exprs`` must be sort expressions. You can convert any other
+        expression to a sort expression using `.sort()`.
+        """
+        return ExprFuncBuilder(
+            self.builder.order_by([sort_or_default(e) for e in exprs])
+        )
+
+    def filter(self, filter: Expr) -> ExprFuncBuilder:
+        """Filter values during aggregation."""
+        return ExprFuncBuilder(self.builder.filter(filter.expr))
+
+    def distinct(self) -> ExprFuncBuilder:
+        """Only evaluate distinct values during aggregation."""
+        return ExprFuncBuilder(self.builder.distinct())
+
+    def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder:
+        """Set how nulls are treated for either window or aggregate functions."""
+        return ExprFuncBuilder(self.builder.null_treatment(null_treatment.value))
+
+    def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder:
+        """Set partitioning for window functions."""
+        return ExprFuncBuilder(
+            self.builder.partition_by(list(e.expr for e in partition_by))
+        )
+
+    def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder:
+        """Set window frame for window functions."""
+        return ExprFuncBuilder(self.builder.window_frame(window_frame.window_frame))
+
+    def build(self) -> Expr:
+        """Create an expression from a Function Builder."""
+        return Expr(self.builder.build())
+
+
+class WindowFrame:
+    """Defines a window frame for performing window operations."""
+
+    def __init__(
+        self, units: str, start_bound: Optional[Any], end_bound: Optional[Any]
+    ) -> None:
+        """Construct a window frame using the given parameters.
+
+        Args:
+            units: Should be one of ``rows``, ``range``, or ``groups``.
+            start_bound: Sets the preceding bound. Must be >= 0. If none, this
+                will be set to unbounded. If unit type is ``groups``, this
+                parameter must be set.
+            end_bound: Sets the following bound. Must be >= 0. If none, this
+                will be set to unbounded. If unit type is ``groups``, this
+                parameter must be set.
+        """
+        if not isinstance(start_bound, pa.Scalar) and start_bound is not None:
+            start_bound = pa.scalar(start_bound)
+            if units == "rows" or units == "groups":
+                start_bound = start_bound.cast(pa.uint64())
+        if not isinstance(end_bound, pa.Scalar) and end_bound is not None:
+            end_bound = pa.scalar(end_bound)
+            if units == "rows" or units == "groups":
+                end_bound = end_bound.cast(pa.uint64())
+        self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound)
+
+    def get_frame_units(self) -> str:
+        """Returns the window frame units for the bounds."""
+        return self.window_frame.get_frame_units()
+
+    def get_lower_bound(self) -> WindowFrameBound:
+        """Returns starting bound."""
+        return WindowFrameBound(self.window_frame.get_lower_bound())
+
+    def get_upper_bound(self):
+        """Returns end bound."""
+        return WindowFrameBound(self.window_frame.get_upper_bound())
+
+
+class WindowFrameBound:
+    """Defines a single window frame bound.
+
+    :py:class:`WindowFrame` typically requires a start and end bound.
+    """
+
+    def __init__(self, frame_bound: expr_internal.WindowFrameBound) -> None:
+        """Constructs a window frame bound."""
+        self.frame_bound = frame_bound
+
+    def get_offset(self) -> int | None:
+        """Returns the offset of the window frame."""
+        return self.frame_bound.get_offset()
+
+    def is_current_row(self) -> bool:
+        """Returns if the frame bound is current row."""
+        return self.frame_bound.is_current_row()
+
+    def is_following(self) -> bool:
+        """Returns if the frame bound is following."""
+        return self.frame_bound.is_following()
+
+    def is_preceding(self) -> bool:
+        """Returns if the frame bound is preceding."""
+        return self.frame_bound.is_preceding()
+
+    def is_unbounded(self) -> bool:
+        """Returns if the frame bound is unbounded."""
+        return self.frame_bound.is_unbounded()
+
+
+class CaseBuilder:
+    """Builder class for constructing case statements.
+
+    An example usage would be as follows::
+
+        import datafusion.functions as f
+        from datafusion import lit, col
+        df.select(
+            f.case(col("column_a")
+            .when(lit(1), lit("One"))
+            .when(lit(2), lit("Two"))
+            .otherwise(lit("Unknown"))
+        )
+    """
+
+    def __init__(self, case_builder: expr_internal.CaseBuilder) -> None:
+        """Constructs a case builder.
+
+        This is not typically called by the end user directly. See
+        :py:func:`datafusion.functions.case` instead.
+        """
+        self.case_builder = case_builder
+
+    def when(self, when_expr: Expr, then_expr: Expr) -> CaseBuilder:
+        """Add a case to match against."""
+        return CaseBuilder(self.case_builder.when(when_expr.expr, then_expr.expr))
+
+    def otherwise(self, else_expr: Expr) -> Expr:
+        """Set a default value for the case statement."""
+        return Expr(self.case_builder.otherwise(else_expr.expr))
+
+    def end(self) -> Expr:
+        """Finish building a case statement.
+
+        Any non-matching cases will end in a `null` value.
+        """
+        return Expr(self.case_builder.end())
+
+
+class SortExpr:
+    """Used to specify sorting on either a DataFrame or function."""
+
+    def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None:
+        """This constructor should not be called by the end user."""
+        self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first)
+
+    def expr(self) -> Expr:
+        """Return the raw expr backing the SortExpr."""
+        return Expr(self.raw_sort.expr())
+
+    def ascending(self) -> bool:
+        """Return ascending property."""
+        return self.raw_sort.ascending()
+
+    def nulls_first(self) -> bool:
+        """Return nulls_first property."""
+        return self.raw_sort.nulls_first()
+
+    def __repr__(self) -> str:
+        """Generate a string representation of this expression."""
+        return self.raw_sort.__repr__()
diff --git a/py-denormalized/python/denormalized/datafusion/functions.py b/py-denormalized/python/denormalized/datafusion/functions.py
new file mode 100644
index 0000000..291c578
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/functions.py
@@ -0,0 +1,2659 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""User functions for operating on :py:class:`~datafusion.expr.Expr`."""
+
+from __future__ import annotations
+
+from denormalized._internal import functions as f
+from denormalized.datafusion.expr import (
+    CaseBuilder,
+    Expr,
+    WindowFrame,
+    SortExpr,
+    sort_list_to_raw_sort_list,
+    expr_list_to_raw_expr_list,
+)
+from datafusion.context import SessionContext
+from datafusion.common import NullTreatment
+
+from typing import Any, Optional
+
+import pyarrow as pa
+
+__all__ = [
+    "abs",
+    "acos",
+    "acosh",
+    "alias",
+    "approx_distinct",
+    "approx_median",
+    "approx_percentile_cont",
+    "approx_percentile_cont_with_weight",
+    "array",
+    "array_agg",
+    "array_append",
+    "array_cat",
+    "array_concat",
+    "array_dims",
+    "array_distinct",
+    "array_element",
+    "array_except",
+    "array_extract",
+    "array_has",
+    "array_has_all",
+    "array_has_any",
+    "array_indexof",
+    "array_intersect",
+    "array_join",
+    "array_length",
+    "array_ndims",
+    "array_pop_back",
+    "array_pop_front",
+    "array_position",
+    "array_positions",
+    "array_prepend",
+    "array_push_back",
+    "array_push_front",
+    "array_remove",
+    "array_remove_all",
+    "array_remove_n",
+    "array_repeat",
+    "array_replace",
+    "array_replace_all",
+    "array_replace_n",
+    "array_resize",
+    "array_slice",
+    "array_sort",
+    "array_to_string",
+    "array_union",
+    "arrow_typeof",
+    "ascii",
+    "asin",
+    "asinh",
+    "atan",
+    "atan2",
+    "atanh",
+    "avg",
+    "bit_and",
+    "bit_length",
+    "bit_or",
+    "bit_xor",
+    "bool_and",
+    "bool_or",
+    "btrim",
+    "case",
+    "cbrt",
+    "ceil",
+    "char_length",
+    "character_length",
+    "chr",
+    "coalesce",
+    "col",
+    "concat",
+    "concat_ws",
+    "corr",
+    "cos",
+    "cosh",
+    "cot",
+    "count",
+    "count_star",
+    "covar",
+    "covar_pop",
+    "covar_samp",
+    "current_date",
+    "current_time",
+    "date_bin",
+    "date_part",
+    "date_trunc",
+    "datepart",
+    "datetrunc",
+    "decode",
+    "degrees",
+    "digest",
+    "encode",
+    "ends_with",
+    "exp",
+    "factorial",
+    "find_in_set",
+    "first_value",
+    "flatten",
+    "floor",
+    "from_unixtime",
+    "gcd",
+    "in_list",
+    "initcap",
+    "isnan",
+    "iszero",
+    "last_value",
+    "lcm",
+    "left",
+    "length",
+    "levenshtein",
+    "list_append",
+    "list_dims",
+    "list_distinct",
+    "list_element",
+    "list_except",
+    "list_extract",
+    "list_indexof",
+    "list_intersect",
+    "list_join",
+    "list_length",
+    "list_ndims",
+    "list_position",
+    "list_positions",
+    "list_prepend",
+    "list_push_back",
+    "list_push_front",
+    "list_remove",
+    "list_remove_all",
+    "list_remove_n",
+    "list_replace",
+    "list_replace_all",
+    "list_replace_n",
+    "list_resize",
+    "list_slice",
+    "list_sort",
+    "list_to_string",
+    "list_union",
+    "ln",
+    "log",
+    "log10",
+    "log2",
+    "lower",
+    "lpad",
+    "ltrim",
+    "make_array",
+    "make_date",
+    "max",
+    "md5",
+    "mean",
+    "median",
+    "min",
+    "named_struct",
+    "nanvl",
+    "now",
+    "nth_value",
+    "nullif",
+    "octet_length",
+    "order_by",
+    "overlay",
+    "pi",
+    "pow",
+    "power",
+    "radians",
+    "random",
+    "range",
+    "regexp_like",
+    "regexp_match",
+    "regexp_replace",
+    "regr_avgx",
+    "regr_avgy",
+    "regr_count",
+    "regr_intercept",
+    "regr_r2",
+    "regr_slope",
+    "regr_sxx",
+    "regr_sxy",
+    "regr_syy",
+    "repeat",
+    "replace",
+    "reverse",
+    "right",
+    "round",
+    "rpad",
+    "rtrim",
+    "sha224",
+    "sha256",
+    "sha384",
+    "sha512",
+    "signum",
+    "sin",
+    "sinh",
+    "split_part",
+    "sqrt",
+    "starts_with",
+    "stddev",
+    "stddev_pop",
+    "stddev_samp",
+    "string_agg",
+    "strpos",
+    "struct",
+    "substr",
+    "substr_index",
+    "substring",
+    "sum",
+    "tan",
+    "tanh",
+    "to_hex",
+    "to_timestamp",
+    "to_timestamp_micros",
+    "to_timestamp_millis",
+    "to_timestamp_seconds",
+    "to_unixtime",
+    "translate",
+    "trim",
+    "trunc",
+    "upper",
+    "uuid",
+    "var",
+    "var_pop",
+    "var_samp",
+    "var_sample",
+    "when",
+    # Window Functions
+    "window",
+    "lead",
+    "lag",
+    "row_number",
+    "rank",
+    "dense_rank",
+    "percent_rank",
+    "cume_dist",
+    "ntile",
+]
+
+
+def isnan(expr: Expr) -> Expr:
+    """Returns true if a given number is +NaN or -NaN otherwise returns false."""
+    return Expr(f.isnan(expr.expr))
+
+
+def nullif(expr1: Expr, expr2: Expr) -> Expr:
+    """Returns NULL if expr1 equals expr2; otherwise it returns expr1.
+
+    This can be used to perform the inverse operation of the COALESCE expression.
+    """
+    return Expr(f.nullif(expr1.expr, expr2.expr))
+
+
+def encode(input: Expr, encoding: Expr) -> Expr:
+    """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
+    return Expr(f.encode(input.expr, encoding.expr))
+
+
+def decode(input: Expr, encoding: Expr) -> Expr:
+    """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
+    return Expr(f.decode(input.expr, encoding.expr))
+
+
+def array_to_string(expr: Expr, delimiter: Expr) -> Expr:
+    """Converts each element to its text representation."""
+    return Expr(f.array_to_string(expr.expr, delimiter.expr))
+
+
+def array_join(expr: Expr, delimiter: Expr) -> Expr:
+    """Converts each element to its text representation.
+
+    This is an alias for :py:func:`array_to_string`.
+    """
+    return array_to_string(expr, delimiter)
+
+
+def list_to_string(expr: Expr, delimiter: Expr) -> Expr:
+    """Converts each element to its text representation.
+
+    This is an alias for :py:func:`array_to_string`.
+    """
+    return array_to_string(expr, delimiter)
+
+
+def list_join(expr: Expr, delimiter: Expr) -> Expr:
+    """Converts each element to its text representation.
+
+    This is an alias for :py:func:`array_to_string`.
+    """
+    return array_to_string(expr, delimiter)
+
+
+def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
+    """Returns whether the argument is contained within the list ``values``."""
+    values = [v.expr for v in values]
+    return Expr(f.in_list(arg.expr, values, negated))
+
+
+def digest(value: Expr, method: Expr) -> Expr:
+    """Computes the binary hash of an expression using the specified algorithm.
+
+    Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s,
+    blake2b, and blake3.
+    """
+    return Expr(f.digest(value.expr, method.expr))
+
+
+def concat(*args: Expr) -> Expr:
+    """Concatenates the text representations of all the arguments.
+
+    NULL arguments are ignored.
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.concat(args))
+
+
+def concat_ws(separator: str, *args: Expr) -> Expr:
+    """Concatenates the list ``args`` with the separator.
+
+    ``NULL`` arguments are ignored. ``separator`` should not be ``NULL``.
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.concat_ws(separator, args))
+
+
+def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
+    """Creates a new sort expression."""
+    return SortExpr(expr.expr, ascending=ascending, nulls_first=nulls_first)
+
+
+def alias(expr: Expr, name: str) -> Expr:
+    """Creates an alias expression."""
+    return Expr(f.alias(expr.expr, name))
+
+
+def col(name: str) -> Expr:
+    """Creates a column reference expression."""
+    return Expr(f.col(name))
+
+
+def count_star(filter: Optional[Expr] = None) -> Expr:
+    """Create a COUNT(1) aggregate expression.
+
+    This aggregate function will count all of the rows in the partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``distinct``, and ``null_treatment``.
+
+    Args:
+        filter: If provided, only count rows for which the filter is True
+    """
+    return count(Expr.literal(1), filter=filter)
+
+
+def case(expr: Expr) -> CaseBuilder:
+    """Create a case expression.
+
+    Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
+    expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
+    detailed usage.
+    """
+    return CaseBuilder(f.case(expr.expr))
+
+
+def when(when: Expr, then: Expr) -> CaseBuilder:
+    """Create a case expression that has no base expression.
+
+    Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
+    expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
+    detailed usage.
+    """
+    return CaseBuilder(f.when(when.expr, then.expr))
+
+
+def window(
+    name: str,
+    args: list[Expr],
+    partition_by: list[Expr] | None = None,
+    order_by: list[Expr | SortExpr] | None = None,
+    window_frame: WindowFrame | None = None,
+    ctx: SessionContext | None = None,
+) -> Expr:
+    """Creates a new Window function expression.
+
+    This interface will soon be deprecated. Instead of using this interface,
+    users should call the window functions directly. For example, to perform a
+    lag use::
+
+        df.select(functions.lag(col("a")).partition_by(col("b")).build())
+    """
+    args = [a.expr for a in args]
+    partition_by = expr_list_to_raw_expr_list(partition_by)
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+    window_frame = window_frame.window_frame if window_frame is not None else None
+    return Expr(f.window(name, args, partition_by, order_by_raw, window_frame, ctx))
+
+
+# scalar functions
+def abs(arg: Expr) -> Expr:
+    """Return the absolute value of a given number.
+
+    Returns:
+    --------
+    Expr
+        A new expression representing the absolute value of the input expression.
+    """
+    return Expr(f.abs(arg.expr))
+
+
+def acos(arg: Expr) -> Expr:
+    """Returns the arc cosine or inverse cosine of a number.
+
+    Returns:
+    --------
+    Expr
+        A new expression representing the arc cosine of the input expression.
+    """
+    return Expr(f.acos(arg.expr))
+
+
+def acosh(arg: Expr) -> Expr:
+    """Returns inverse hyperbolic cosine."""
+    return Expr(f.acosh(arg.expr))
+
+
+def ascii(arg: Expr) -> Expr:
+    """Returns the numeric code of the first character of the argument."""
+    return Expr(f.ascii(arg.expr))
+
+
+def asin(arg: Expr) -> Expr:
+    """Returns the arc sine or inverse sine of a number."""
+    return Expr(f.asin(arg.expr))
+
+
+def asinh(arg: Expr) -> Expr:
+    """Returns inverse hyperbolic sine."""
+    return Expr(f.asinh(arg.expr))
+
+
+def atan(arg: Expr) -> Expr:
+    """Returns inverse tangent of a number."""
+    return Expr(f.atan(arg.expr))
+
+
+def atanh(arg: Expr) -> Expr:
+    """Returns inverse hyperbolic tangent."""
+    return Expr(f.atanh(arg.expr))
+
+
+def atan2(y: Expr, x: Expr) -> Expr:
+    """Returns inverse tangent of a division given in the argument."""
+    return Expr(f.atan2(y.expr, x.expr))
+
+
+def bit_length(arg: Expr) -> Expr:
+    """Returns the number of bits in the string argument."""
+    return Expr(f.bit_length(arg.expr))
+
+
+def btrim(arg: Expr) -> Expr:
+    """Removes all characters, spaces by default, from both sides of a string."""
+    return Expr(f.btrim(arg.expr))
+
+
+def cbrt(arg: Expr) -> Expr:
+    """Returns the cube root of a number."""
+    return Expr(f.cbrt(arg.expr))
+
+
+def ceil(arg: Expr) -> Expr:
+    """Returns the nearest integer greater than or equal to argument."""
+    return Expr(f.ceil(arg.expr))
+
+
+def character_length(arg: Expr) -> Expr:
+    """Returns the number of characters in the argument."""
+    return Expr(f.character_length(arg.expr))
+
+
+def length(string: Expr) -> Expr:
+    """The number of characters in the ``string``."""
+    return Expr(f.length(string.expr))
+
+
+def char_length(string: Expr) -> Expr:
+    """The number of characters in the ``string``."""
+    return Expr(f.char_length(string.expr))
+
+
+def chr(arg: Expr) -> Expr:
+    """Converts the Unicode code point to a UTF8 character."""
+    return Expr(f.chr(arg.expr))
+
+
+def coalesce(*args: Expr) -> Expr:
+    """Returns the value of the first expr in ``args`` which is not NULL."""
+    args = [arg.expr for arg in args]
+    return Expr(f.coalesce(*args))
+
+
+def cos(arg: Expr) -> Expr:
+    """Returns the cosine of the argument."""
+    return Expr(f.cos(arg.expr))
+
+
+def cosh(arg: Expr) -> Expr:
+    """Returns the hyperbolic cosine of the argument."""
+    return Expr(f.cosh(arg.expr))
+
+
+def cot(arg: Expr) -> Expr:
+    """Returns the cotangent of the argument."""
+    return Expr(f.cot(arg.expr))
+
+
+def degrees(arg: Expr) -> Expr:
+    """Converts the argument from radians to degrees."""
+    return Expr(f.degrees(arg.expr))
+
+
+def ends_with(arg: Expr, suffix: Expr) -> Expr:
+    """Returns true if the ``string`` ends with the ``suffix``, false otherwise."""
+    return Expr(f.ends_with(arg.expr, suffix.expr))
+
+
+def exp(arg: Expr) -> Expr:
+    """Returns the exponential of the argument."""
+    return Expr(f.exp(arg.expr))
+
+
+def factorial(arg: Expr) -> Expr:
+    """Returns the factorial of the argument."""
+    return Expr(f.factorial(arg.expr))
+
+
+def find_in_set(string: Expr, string_list: Expr) -> Expr:
+    """Find a string in a list of strings.
+
+    Returns a value in the range of 1 to N if the string is in the string list
+    ``string_list`` consisting of N substrings.
+
+    The string list is a string composed of substrings separated by ``,`` characters.
+    """
+    return Expr(f.find_in_set(string.expr, string_list.expr))
+
+
+def floor(arg: Expr) -> Expr:
+    """Returns the nearest integer less than or equal to the argument."""
+    return Expr(f.floor(arg.expr))
+
+
+def gcd(x: Expr, y: Expr) -> Expr:
+    """Returns the greatest common divisor."""
+    return Expr(f.gcd(x.expr, y.expr))
+
+
+def initcap(string: Expr) -> Expr:
+    """Set the initial letter of each word to capital.
+
+    Converts the first letter of each word in ``string`` to uppercase and the remaining
+    characters to lowercase.
+    """
+    return Expr(f.initcap(string.expr))
+
+
+def instr(string: Expr, substring: Expr) -> Expr:
+    """Finds the position from where the ``substring`` matches the ``string``.
+
+    This is an alias for :py:func:`strpos`.
+    """
+    return strpos(string, substring)
+
+
+def iszero(arg: Expr) -> Expr:
+    """Returns true if a given number is +0.0 or -0.0 otherwise returns false."""
+    return Expr(f.iszero(arg.expr))
+
+
+def lcm(x: Expr, y: Expr) -> Expr:
+    """Returns the least common multiple."""
+    return Expr(f.lcm(x.expr, y.expr))
+
+
+def left(string: Expr, n: Expr) -> Expr:
+    """Returns the first ``n`` characters in the ``string``."""
+    return Expr(f.left(string.expr, n.expr))
+
+
+def levenshtein(string1: Expr, string2: Expr) -> Expr:
+    """Returns the Levenshtein distance between the two given strings."""
+    return Expr(f.levenshtein(string1.expr, string2.expr))
+
+
+def ln(arg: Expr) -> Expr:
+    """Returns the natural logarithm (base e) of the argument."""
+    return Expr(f.ln(arg.expr))
+
+
+def log(base: Expr, num: Expr) -> Expr:
+    """Returns the logarithm of a number for a particular ``base``."""
+    return Expr(f.log(base.expr, num.expr))
+
+
+def log10(arg: Expr) -> Expr:
+    """Base 10 logarithm of the argument."""
+    return Expr(f.log10(arg.expr))
+
+
+def log2(arg: Expr) -> Expr:
+    """Base 2 logarithm of the argument."""
+    return Expr(f.log2(arg.expr))
+
+
+def lower(arg: Expr) -> Expr:
+    """Converts a string to lowercase."""
+    return Expr(f.lower(arg.expr))
+
+
+def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
+    """Add left padding to a string.
+
+    Extends the string to length length by prepending the characters fill (a
+    space by default). If the string is already longer than length then it is
+    truncated (on the right).
+    """
+    characters = characters if characters is not None else Expr.literal(" ")
+    return Expr(f.lpad(string.expr, count.expr, characters.expr))
+
+
+def ltrim(arg: Expr) -> Expr:
+    """Removes all characters, spaces by default, from the beginning of a string."""
+    return Expr(f.ltrim(arg.expr))
+
+
+def md5(arg: Expr) -> Expr:
+    """Computes an MD5 128-bit checksum for a string expression."""
+    return Expr(f.md5(arg.expr))
+
+
+def nanvl(x: Expr, y: Expr) -> Expr:
+    """Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``."""
+    return Expr(f.nanvl(x.expr, y.expr))
+
+
+def octet_length(arg: Expr) -> Expr:
+    """Returns the number of bytes of a string."""
+    return Expr(f.octet_length(arg.expr))
+
+
+def overlay(
+    string: Expr, substring: Expr, start: Expr, length: Expr | None = None
+) -> Expr:
+    """Replace a substring with a new substring.
+
+    Replace the substring of string that starts at the ``start``'th character and
+    extends for ``length`` characters with new substring.
+    """
+    if length is None:
+        return Expr(f.overlay(string.expr, substring.expr, start.expr))
+    return Expr(f.overlay(string.expr, substring.expr, start.expr, length.expr))
+
+
+def pi() -> Expr:
+    """Returns an approximate value of π."""
+    return Expr(f.pi())
+
+
+def position(string: Expr, substring: Expr) -> Expr:
+    """Finds the position from where the ``substring`` matches the ``string``.
+
+    This is an alias for :py:func:`strpos`.
+    """
+    return strpos(string, substring)
+
+
+def power(base: Expr, exponent: Expr) -> Expr:
+    """Returns ``base`` raised to the power of ``exponent``."""
+    return Expr(f.power(base.expr, exponent.expr))
+
+
+def pow(base: Expr, exponent: Expr) -> Expr:
+    """Returns ``base`` raised to the power of ``exponent``.
+
+    This is an alias of :py:func:`power`.
+    """
+    return power(base, exponent)
+
+
+def radians(arg: Expr) -> Expr:
+    """Converts the argument from degrees to radians."""
+    return Expr(f.radians(arg.expr))
+
+
+def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
+    """Find if any regular expression (regex) matches exist.
+
+    Tests a string using a regular expression returning true if at least one match,
+    false otherwise.
+    """
+    if flags is not None:
+        flags = flags.expr
+    return Expr(f.regexp_like(string.expr, regex.expr, flags))
+
+
+def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
+    """Perform regular expression (regex) matching.
+
+    Returns an array with each element containing the leftmost-first match of the
+    corresponding index in ``regex`` to string in ``string``.
+    """
+    if flags is not None:
+        flags = flags.expr
+    return Expr(f.regexp_match(string.expr, regex.expr, flags))
+
+
+def regexp_replace(
+    string: Expr, pattern: Expr, replacement: Expr, flags: Expr | None = None
+) -> Expr:
+    """Replaces substring(s) matching a PCRE-like regular expression.
+
+    The full list of supported features and syntax can be found at
+    <https://docs.rs/regex/latest/regex/#syntax>
+
+    Supported flags with the addition of 'g' can be found at
+    <https://docs.rs/regex/latest/regex/#grouping-and-flags>
+    """
+    if flags is not None:
+        flags = flags.expr
+    return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags))
+
+
+def repeat(string: Expr, n: Expr) -> Expr:
+    """Repeats the ``string`` to ``n`` times."""
+    return Expr(f.repeat(string.expr, n.expr))
+
+
+def replace(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """Replaces all occurrences of ``from_val`` with ``to_val`` in the ``string``."""
+    return Expr(f.replace(string.expr, from_val.expr, to_val.expr))
+
+
+def reverse(arg: Expr) -> Expr:
+    """Reverse the string argument."""
+    return Expr(f.reverse(arg.expr))
+
+
+def right(string: Expr, n: Expr) -> Expr:
+    """Returns the last ``n`` characters in the ``string``."""
+    return Expr(f.right(string.expr, n.expr))
+
+
+def round(value: Expr, decimal_places: Expr = Expr.literal(0)) -> Expr:
+    """Round the argument to the nearest integer.
+
+    If the optional ``decimal_places`` is specified, round to the nearest number of
+    decimal places. You can specify a negative number of decimal places. For example
+    ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.
+    """
+    return Expr(f.round(value.expr, decimal_places.expr))
+
+
+def rpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
+    """Add right padding to a string.
+
+    Extends the string to length length by appending the characters fill (a space
+    by default). If the string is already longer than length then it is truncated.
+    """
+    characters = characters if characters is not None else Expr.literal(" ")
+    return Expr(f.rpad(string.expr, count.expr, characters.expr))
+
+
+def rtrim(arg: Expr) -> Expr:
+    """Removes all characters, spaces by default, from the end of a string."""
+    return Expr(f.rtrim(arg.expr))
+
+
+def sha224(arg: Expr) -> Expr:
+    """Computes the SHA-224 hash of a binary string."""
+    return Expr(f.sha224(arg.expr))
+
+
+def sha256(arg: Expr) -> Expr:
+    """Computes the SHA-256 hash of a binary string."""
+    return Expr(f.sha256(arg.expr))
+
+
+def sha384(arg: Expr) -> Expr:
+    """Computes the SHA-384 hash of a binary string."""
+    return Expr(f.sha384(arg.expr))
+
+
+def sha512(arg: Expr) -> Expr:
+    """Computes the SHA-512 hash of a binary string."""
+    return Expr(f.sha512(arg.expr))
+
+
+def signum(arg: Expr) -> Expr:
+    """Returns the sign of the argument (-1, 0, +1)."""
+    return Expr(f.signum(arg.expr))
+
+
+def sin(arg: Expr) -> Expr:
+    """Returns the sine of the argument."""
+    return Expr(f.sin(arg.expr))
+
+
+def sinh(arg: Expr) -> Expr:
+    """Returns the hyperbolic sine of the argument."""
+    return Expr(f.sinh(arg.expr))
+
+
+def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr:
+    """Split a string and return one part.
+
+    Splits a string based on a delimiter and picks out the desired field based
+    on the index.
+    """
+    return Expr(f.split_part(string.expr, delimiter.expr, index.expr))
+
+
+def sqrt(arg: Expr) -> Expr:
+    """Returns the square root of the argument."""
+    return Expr(f.sqrt(arg.expr))
+
+
+def starts_with(string: Expr, prefix: Expr) -> Expr:
+    """Returns true if string starts with prefix."""
+    return Expr(f.starts_with(string.expr, prefix.expr))
+
+
+def strpos(string: Expr, substring: Expr) -> Expr:
+    """Finds the position from where the ``substring`` matches the ``string``."""
+    return Expr(f.strpos(string.expr, substring.expr))
+
+
+def substr(string: Expr, position: Expr) -> Expr:
+    """Substring from the ``position`` to the end."""
+    return Expr(f.substr(string.expr, position.expr))
+
+
+def substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr:
+    """Returns an indexed substring.
+
+    The return will be the ``string`` from before ``count`` occurrences of
+    ``delimiter``.
+    """
+    return Expr(f.substr_index(string.expr, delimiter.expr, count.expr))
+
+
+def substring(string: Expr, position: Expr, length: Expr) -> Expr:
+    """Substring from the ``position`` with ``length`` characters."""
+    return Expr(f.substring(string.expr, position.expr, length.expr))
+
+
+def tan(arg: Expr) -> Expr:
+    """Returns the tangent of the argument."""
+    return Expr(f.tan(arg.expr))
+
+
+def tanh(arg: Expr) -> Expr:
+    """Returns the hyperbolic tangent of the argument."""
+    return Expr(f.tanh(arg.expr))
+
+
+def to_hex(arg: Expr) -> Expr:
+    """Converts an integer to a hexadecimal string."""
+    return Expr(f.to_hex(arg.expr))
+
+
+def now() -> Expr:
+    """Returns the current timestamp in nanoseconds.
+
+    This will use the same value for all instances of now() in same statement.
+    """
+    return Expr(f.now())
+
+
+def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
+    """Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
+
+    For usage of ``formatters`` see the rust chrono package ``strftime`` package.
+
+    [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
+    """
+    if formatters is None:
+        return f.to_timestamp(arg.expr)
+
+    formatters = [f.expr for f in formatters]
+    return Expr(f.to_timestamp(arg.expr, *formatters))
+
+
+def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
+    """Converts a string and optional formats to a ``Timestamp`` in milliseconds.
+
+    See :py:func:`to_timestamp` for a description on how to use formatters.
+    """
+    return Expr(f.to_timestamp_millis(arg.expr, *formatters))
+
+
+def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
+    """Converts a string and optional formats to a ``Timestamp`` in microseconds.
+
+    See :py:func:`to_timestamp` for a description on how to use formatters.
+    """
+    return Expr(f.to_timestamp_micros(arg.expr, *formatters))
+
+
+def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
+    """Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
+
+    See :py:func:`to_timestamp` for a description on how to use formatters.
+    """
+    return Expr(f.to_timestamp_nanos(arg.expr, *formatters))
+
+
+def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
+    """Converts a string and optional formats to a ``Timestamp`` in seconds.
+
+    See :py:func:`to_timestamp` for a description on how to use formatters.
+    """
+    return Expr(f.to_timestamp_seconds(arg.expr, *formatters))
+
+
+def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
+    """Converts a string and optional formats to a Unixtime."""
+    args = [f.expr for f in format_arguments]
+    return Expr(f.to_unixtime(string.expr, *args))
+
+
+def current_date() -> Expr:
+    """Returns current UTC date as a Date32 value."""
+    return Expr(f.current_date())
+
+
+def current_time() -> Expr:
+    """Returns current UTC time as a Time64 value."""
+    return Expr(f.current_time())
+
+
+def datepart(part: Expr, date: Expr) -> Expr:
+    """Return a specified part of a date.
+
+    This is an alias for :py:func:`date_part`.
+    """
+    return date_part(part, date)
+
+
+def date_part(part: Expr, date: Expr) -> Expr:
+    """Extracts a subfield from the date."""
+    return Expr(f.date_part(part.expr, date.expr))
+
+
+def date_trunc(part: Expr, date: Expr) -> Expr:
+    """Truncates the date to a specified level of precision."""
+    return Expr(f.date_trunc(part.expr, date.expr))
+
+
+def datetrunc(part: Expr, date: Expr) -> Expr:
+    """Truncates the date to a specified level of precision.
+
+    This is an alias for :py:func:`date_trunc`.
+    """
+    return date_trunc(part, date)
+
+
+def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr:
+    """Coerces an arbitrary timestamp to the start of the nearest specified interval."""
+    return Expr(f.date_bin(stride.expr, source.expr, origin.expr))
+
+
+def make_date(year: Expr, month: Expr, day: Expr) -> Expr:
+    """Make a date from year, month and day component parts."""
+    return Expr(f.make_date(year.expr, month.expr, day.expr))
+
+
+def translate(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """Replaces the characters in ``from_val`` with the counterpart in ``to_val``."""
+    return Expr(f.translate(string.expr, from_val.expr, to_val.expr))
+
+
+def trim(arg: Expr) -> Expr:
+    """Removes all characters, spaces by default, from both sides of a string."""
+    return Expr(f.trim(arg.expr))
+
+
+def trunc(num: Expr, precision: Expr | None = None) -> Expr:
+    """Truncate the number toward zero with optional precision."""
+    if precision is not None:
+        return Expr(f.trunc(num.expr, precision.expr))
+    return Expr(f.trunc(num.expr))
+
+
+def upper(arg: Expr) -> Expr:
+    """Converts a string to uppercase."""
+    return Expr(f.upper(arg.expr))
+
+
+def make_array(*args: Expr) -> Expr:
+    """Returns an array using the specified input expressions."""
+    args = [arg.expr for arg in args]
+    return Expr(f.make_array(args))
+
+
+def array(*args: Expr) -> Expr:
+    """Returns an array using the specified input expressions.
+
+    This is an alias for :py:func:`make_array`.
+    """
+    return make_array(*args)
+
+
+def range(start: Expr, stop: Expr, step: Expr) -> Expr:
+    """Create a list of values in the range between start and stop."""
+    return Expr(f.range(start.expr, stop.expr, step.expr))
+
+
+def uuid(arg: Expr) -> Expr:
+    """Returns uuid v4 as a string value."""
+    return Expr(f.uuid(arg.expr))
+
+
+def struct(*args: Expr) -> Expr:
+    """Returns a struct with the given arguments."""
+    args = [arg.expr for arg in args]
+    return Expr(f.struct(*args))
+
+
+def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr:
+    """Returns a struct with the given names and arguments pairs."""
+    name_pair_exprs = [[Expr.literal(pair[0]), pair[1]] for pair in name_pairs]
+
+    # flatten
+    name_pairs = [x.expr for xs in name_pair_exprs for x in xs]
+    return Expr(f.named_struct(*name_pairs))
+
+
+def from_unixtime(arg: Expr) -> Expr:
+    """Converts an integer to RFC3339 timestamp format string."""
+    return Expr(f.from_unixtime(arg.expr))
+
+
+def arrow_typeof(arg: Expr) -> Expr:
+    """Returns the Arrow type of the expression."""
+    return Expr(f.arrow_typeof(arg.expr))
+
+
+def random() -> Expr:
+    """Returns a random value in the range ``0.0 <= x < 1.0``."""
+    return Expr(f.random())
+
+
+def array_append(array: Expr, element: Expr) -> Expr:
+    """Appends an element to the end of an array."""
+    return Expr(f.array_append(array.expr, element.expr))
+
+
+def array_push_back(array: Expr, element: Expr) -> Expr:
+    """Appends an element to the end of an array.
+
+    This is an alias for :py:func:`array_append`.
+    """
+    return array_append(array, element)
+
+
+def list_append(array: Expr, element: Expr) -> Expr:
+    """Appends an element to the end of an array.
+
+    This is an alias for :py:func:`array_append`.
+    """
+    return array_append(array, element)
+
+
+def list_push_back(array: Expr, element: Expr) -> Expr:
+    """Appends an element to the end of an array.
+
+    This is an alias for :py:func:`array_append`.
+    """
+    return array_append(array, element)
+
+
+def array_concat(*args: Expr) -> Expr:
+    """Concatenates the input arrays."""
+    args = [arg.expr for arg in args]
+    return Expr(f.array_concat(args))
+
+
+def array_cat(*args: Expr) -> Expr:
+    """Concatenates the input arrays.
+
+    This is an alias for :py:func:`array_concat`.
+    """
+    return array_concat(*args)
+
+
+def array_dims(array: Expr) -> Expr:
+    """Returns an array of the array's dimensions."""
+    return Expr(f.array_dims(array.expr))
+
+
+def array_distinct(array: Expr) -> Expr:
+    """Returns distinct values from the array after removing duplicates."""
+    return Expr(f.array_distinct(array.expr))
+
+
+def list_distinct(array: Expr) -> Expr:
+    """Returns distinct values from the array after removing duplicates.
+
+    This is an alias for :py:func:`array_distinct`.
+    """
+    return array_distinct(array)
+
+
+def list_dims(array: Expr) -> Expr:
+    """Returns an array of the array's dimensions.
+
+    This is an alias for :py:func:`array_dims`.
+    """
+    return array_dims(array)
+
+
+def array_element(array: Expr, n: Expr) -> Expr:
+    """Extracts the element with the index n from the array."""
+    return Expr(f.array_element(array.expr, n.expr))
+
+
+def array_extract(array: Expr, n: Expr) -> Expr:
+    """Extracts the element with the index n from the array.
+
+    This is an alias for :py:func:`array_element`.
+    """
+    return array_element(array, n)
+
+
+def list_element(array: Expr, n: Expr) -> Expr:
+    """Extracts the element with the index n from the array.
+
+    This is an alias for :py:func:`array_element`.
+    """
+    return array_element(array, n)
+
+
+def list_extract(array: Expr, n: Expr) -> Expr:
+    """Extracts the element with the index n from the array.
+
+    This is an alias for :py:func:`array_element`.
+    """
+    return array_element(array, n)
+
+
+def array_length(array: Expr) -> Expr:
+    """Returns the length of the array."""
+    return Expr(f.array_length(array.expr))
+
+
+def list_length(array: Expr) -> Expr:
+    """Returns the length of the array.
+
+    This is an alias for :py:func:`array_length`.
+    """
+    return array_length(array)
+
+
+def array_has(first_array: Expr, second_array: Expr) -> Expr:
+    """Returns true if the element appears in the first array, otherwise false."""
+    return Expr(f.array_has(first_array.expr, second_array.expr))
+
+
+def array_has_all(first_array: Expr, second_array: Expr) -> Expr:
+    """Determines if there is complete overlap ``second_array`` in ``first_array``.
+
+    Returns true if each element of the second array appears in the first array.
+    Otherwise, it returns false.
+    """
+    return Expr(f.array_has_all(first_array.expr, second_array.expr))
+
+
+def array_has_any(first_array: Expr, second_array: Expr) -> Expr:
+    """Determine if there is an overlap between ``first_array`` and ``second_array``.
+
+    Returns true if at least one element of the second array appears in the first
+    array. Otherwise, it returns false.
+    """
+    return Expr(f.array_has_any(first_array.expr, second_array.expr))
+
+
+def array_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
+    """Return the position of the first occurrence of ``element`` in ``array``."""
+    return Expr(f.array_position(array.expr, element.expr, index))
+
+
+def array_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr:
+    """Return the position of the first occurrence of ``element`` in ``array``.
+
+    This is an alias for :py:func:`array_position`.
+    """
+    return array_position(array, element, index)
+
+
+def list_position(array: Expr, element: Expr, index: int | None = 1) -> Expr:
+    """Return the position of the first occurrence of ``element`` in ``array``.
+
+    This is an alias for :py:func:`array_position`.
+    """
+    return array_position(array, element, index)
+
+
+def list_indexof(array: Expr, element: Expr, index: int | None = 1) -> Expr:
+    """Return the position of the first occurrence of ``element`` in ``array``.
+
+    This is an alias for :py:func:`array_position`.
+    """
+    return array_position(array, element, index)
+
+
+def array_positions(array: Expr, element: Expr) -> Expr:
+    """Searches for an element in the array and returns all occurrences."""
+    return Expr(f.array_positions(array.expr, element.expr))
+
+
+def list_positions(array: Expr, element: Expr) -> Expr:
+    """Searches for an element in the array and returns all occurrences.
+
+    This is an alias for :py:func:`array_positions`.
+    """
+    return array_positions(array, element)
+
+
+def array_ndims(array: Expr) -> Expr:
+    """Returns the number of dimensions of the array."""
+    return Expr(f.array_ndims(array.expr))
+
+
+def list_ndims(array: Expr) -> Expr:
+    """Returns the number of dimensions of the array.
+
+    This is an alias for :py:func:`array_ndims`.
+    """
+    return array_ndims(array)
+
+
+def array_prepend(element: Expr, array: Expr) -> Expr:
+    """Prepends an element to the beginning of an array."""
+    return Expr(f.array_prepend(element.expr, array.expr))
+
+
+def array_push_front(element: Expr, array: Expr) -> Expr:
+    """Prepends an element to the beginning of an array.
+
+    This is an alias for :py:func:`array_prepend`.
+    """
+    return array_prepend(element, array)
+
+
+def list_prepend(element: Expr, array: Expr) -> Expr:
+    """Prepends an element to the beginning of an array.
+
+    This is an alias for :py:func:`array_prepend`.
+    """
+    return array_prepend(element, array)
+
+
+def list_push_front(element: Expr, array: Expr) -> Expr:
+    """Prepends an element to the beginning of an array.
+
+    This is an alias for :py:func:`array_prepend`.
+    """
+    return array_prepend(element, array)
+
+
+def array_pop_back(array: Expr) -> Expr:
+    """Returns the array without the last element."""
+    return Expr(f.array_pop_back(array.expr))
+
+
+def array_pop_front(array: Expr) -> Expr:
+    """Returns the array without the first element."""
+    return Expr(f.array_pop_front(array.expr))
+
+
+def array_remove(array: Expr, element: Expr) -> Expr:
+    """Removes the first element from the array equal to the given value."""
+    return Expr(f.array_remove(array.expr, element.expr))
+
+
+def list_remove(array: Expr, element: Expr) -> Expr:
+    """Removes the first element from the array equal to the given value.
+
+    This is an alias for :py:func:`array_remove`.
+    """
+    return array_remove(array, element)
+
+
+def array_remove_n(array: Expr, element: Expr, max: Expr) -> Expr:
+    """Removes the first ``max`` elements from the array equal to the given value."""
+    return Expr(f.array_remove_n(array.expr, element.expr, max.expr))
+
+
+def list_remove_n(array: Expr, element: Expr, max: Expr) -> Expr:
+    """Removes the first ``max`` elements from the array equal to the given value.
+
+    This is an alias for :py:func:`array_remove_n`.
+    """
+    return array_remove_n(array, element, max)
+
+
+def array_remove_all(array: Expr, element: Expr) -> Expr:
+    """Removes all elements from the array equal to the given value."""
+    return Expr(f.array_remove_all(array.expr, element.expr))
+
+
+def list_remove_all(array: Expr, element: Expr) -> Expr:
+    """Removes all elements from the array equal to the given value.
+
+    This is an alias for :py:func:`array_remove_all`.
+    """
+    return array_remove_all(array, element)
+
+
+def array_repeat(element: Expr, count: Expr) -> Expr:
+    """Returns an array containing ``element`` ``count`` times."""
+    return Expr(f.array_repeat(element.expr, count.expr))
+
+
+def array_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """Replaces the first occurrence of ``from_val`` with ``to_val``."""
+    return Expr(f.array_replace(array.expr, from_val.expr, to_val.expr))
+
+
+def list_replace(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """Replaces the first occurrence of ``from_val`` with ``to_val``.
+
+    This is an alias for :py:func:`array_replace`.
+    """
+    return array_replace(array, from_val, to_val)
+
+
+def array_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Expr:
+    """Replace ``n`` occurrences of ``from_val`` with ``to_val``.
+
+    Replaces the first ``max`` occurrences of the specified element with another
+    specified element.
+    """
+    return Expr(f.array_replace_n(array.expr, from_val.expr, to_val.expr, max.expr))
+
+
+def list_replace_n(array: Expr, from_val: Expr, to_val: Expr, max: Expr) -> Expr:
+    """Replace ``n`` occurrences of ``from_val`` with ``to_val``.
+
+    Replaces the first ``max`` occurrences of the specified element with another
+    specified element.
+
+    This is an alias for :py:func:`array_replace_n`.
+    """
+    return array_replace_n(array, from_val, to_val, max)
+
+
+def array_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """Replaces all occurrences of ``from_val`` with ``to_val``."""
+    return Expr(f.array_replace_all(array.expr, from_val.expr, to_val.expr))
+
+
+def list_replace_all(array: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """Replaces all occurrences of ``from_val`` with ``to_val``.
+
+    This is an alias for :py:func:`array_replace_all`.
+    """
+    return array_replace_all(array, from_val, to_val)
+
+
+def array_sort(array: Expr, descending: bool = False, null_first: bool = False) -> Expr:
+    """Sort an array.
+
+    Args:
+        array: The input array to sort.
+        descending: If True, sorts in descending order.
+        null_first: If True, nulls will be returned at the beginning of the array.
+    """
+    desc = "DESC" if descending else "ASC"
+    nulls_first = "NULLS FIRST" if null_first else "NULLS LAST"
+    return Expr(
+        f.array_sort(
+            array.expr, Expr.literal(desc).expr, Expr.literal(nulls_first).expr
+        )
+    )
+
+
+def list_sort(array: Expr, descending: bool = False, null_first: bool = False) -> Expr:
+    """This is an alias for :py:func:`array_sort`."""
+    return array_sort(array, descending=descending, null_first=null_first)
+
+
+def array_slice(
+    array: Expr, begin: Expr, end: Expr, stride: Expr | None = None
+) -> Expr:
+    """Returns a slice of the array."""
+    if stride is not None:
+        stride = stride.expr
+    return Expr(f.array_slice(array.expr, begin.expr, end.expr, stride))
+
+
+def list_slice(array: Expr, begin: Expr, end: Expr, stride: Expr | None = None) -> Expr:
+    """Returns a slice of the array.
+
+    This is an alias for :py:func:`array_slice`.
+    """
+    return array_slice(array, begin, end, stride)
+
+
+def array_intersect(array1: Expr, array2: Expr) -> Expr:
+    """Returns the intersection of ``array1`` and ``array2``."""
+    return Expr(f.array_intersect(array1.expr, array2.expr))
+
+
+def list_intersect(array1: Expr, array2: Expr) -> Expr:
+    """Returns an the intersection of ``array1`` and ``array2``.
+
+    This is an alias for :py:func:`array_intersect`.
+    """
+    return array_intersect(array1, array2)
+
+
+def array_union(array1: Expr, array2: Expr) -> Expr:
+    """Returns an array of the elements in the union of array1 and array2.
+
+    Duplicate rows will not be returned.
+    """
+    return Expr(f.array_union(array1.expr, array2.expr))
+
+
+def list_union(array1: Expr, array2: Expr) -> Expr:
+    """Returns an array of the elements in the union of array1 and array2.
+
+    Duplicate rows will not be returned.
+
+    This is an alias for :py:func:`array_union`.
+    """
+    return array_union(array1, array2)
+
+
+def array_except(array1: Expr, array2: Expr) -> Expr:
+    """Returns the elements that appear in ``array1`` but not in ``array2``."""
+    return Expr(f.array_except(array1.expr, array2.expr))
+
+
+def list_except(array1: Expr, array2: Expr) -> Expr:
+    """Returns the elements that appear in ``array1`` but not in the ``array2``.
+
+    This is an alias for :py:func:`array_except`.
+    """
+    return array_except(array1, array2)
+
+
+def array_resize(array: Expr, size: Expr, value: Expr) -> Expr:
+    """Returns an array with the specified size filled.
+
+    If ``size`` is greater than the ``array`` length, the additional entries will
+    be filled with the given ``value``.
+    """
+    return Expr(f.array_resize(array.expr, size.expr, value.expr))
+
+
+def list_resize(array: Expr, size: Expr, value: Expr) -> Expr:
+    """Returns an array with the specified size filled.
+
+    If ``size`` is greater than the ``array`` length, the additional entries will be
+    filled with the given ``value``. This is an alias for :py:func:`array_resize`.
+    """
+    return array_resize(array, size, value)
+
+
+def flatten(array: Expr) -> Expr:
+    """Flattens an array of arrays into a single array."""
+    return Expr(f.flatten(array.expr))
+
+
+# aggregate functions
+def approx_distinct(
+    expression: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Returns the approximate number of distinct values.
+
+    This aggregate function is similar to :py:func:`count` with distinct set, but it
+    will approximate the number of distinct entries. It may return significantly faster
+    than :py:func:`count` for some DataFrames.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Values to check for distinct entries
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.approx_distinct(expression.expr, filter=filter_raw))
+
+
+def approx_median(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Returns the approximate median value.
+
+    This aggregate function is similar to :py:func:`median`, but it will only
+    approximate the median. It may return significantly faster for some DataFrames.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by`` and ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Values to find the median for
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.approx_median(expression.expr, filter=filter_raw))
+
+
+def approx_percentile_cont(
+    expression: Expr,
+    percentile: float,
+    num_centroids: Optional[int] = None,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Returns the value that is approximately at a given percentile of ``expr``.
+
+    This aggregate function assumes the input values form a continuous distribution.
+    Suppose you have a DataFrame which consists of 100 different test scores. If you
+    called this function with a percentile of 0.9, it would return the value of the
+    test score that is above 90% of the other test scores. The returned value may be
+    between two of the values.
+
+    This function uses the [t-digest](https://arxiv.org/abs/1902.04023) algorithm to
+    compute the percentil. You can limit the number of bins used in this algorithm by
+    setting the ``num_centroids`` parameter.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Values for which to find the approximate percentile
+        percentile: This must be between 0.0 and 1.0, inclusive
+        num_centroids: Max bin size for the t-digest algorithm
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(
+        f.approx_percentile_cont(
+            expression.expr, percentile, num_centroids=num_centroids, filter=filter_raw
+        )
+    )
+
+
+def approx_percentile_cont_with_weight(
+    expression: Expr, weight: Expr, percentile: float, filter: Optional[Expr] = None
+) -> Expr:
+    """Returns the value of the weighted approximate percentile.
+
+    This aggregate function is similar to :py:func:`approx_percentile_cont` except that
+    it uses the associated associated weights.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Values for which to find the approximate percentile
+        weight: Relative weight for each of the values in ``expression``
+        percentile: This must be between 0.0 and 1.0, inclusive
+        filter: If provided, only compute against rows for which the filter is True
+
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(
+        f.approx_percentile_cont_with_weight(
+            expression.expr, weight.expr, percentile, filter=filter_raw
+        )
+    )
+
+
+def array_agg(
+    expression: Expr,
+    distinct: bool = False,
+    filter: Optional[Expr] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Aggregate values into an array.
+
+    Currently ``distinct`` and ``order_by`` cannot be used together. As a work around,
+    consider :py:func:`array_sort` after aggregation.
+    [Issue Tracker](https://github.com/apache/datafusion/issues/12371)
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the option ``null_treatment``.
+
+    Args:
+        expression: Values to combine into an array
+        distinct: If True, a single entry for each distinct value will be in the result
+        filter: If provided, only compute against rows for which the filter is True
+        order_by: Order the resultant array values
+    """
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(
+        f.array_agg(
+            expression.expr, distinct=distinct, filter=filter_raw, order_by=order_by_raw
+        )
+    )
+
+
+def avg(
+    expression: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Returns the average value.
+
+    This aggregate function expects a numeric expression and will return a float.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Values to combine into an array
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.avg(expression.expr, filter=filter_raw))
+
+
+def corr(value_y: Expr, value_x: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Returns the correlation coefficient between ``value1`` and ``value2``.
+
+    This aggregate function expects both values to be numeric and will return a float.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        value_y: The dependent variable for correlation
+        value_x: The independent variable for correlation
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.corr(value_y.expr, value_x.expr, filter=filter_raw))
+
+
+def count(
+    expressions: Expr | list[Expr] | None = None,
+    distinct: bool = False,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Returns the number of rows that match the given arguments.
+
+    This aggregate function will count the non-null rows provided in the expression.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by`` and ``null_treatment``.
+
+    Args:
+        expressions: Argument to perform bitwise calculation on
+        distinct: If True, a single entry for each distinct value will be in the result
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    if expressions is None:
+        args = [Expr.literal(1).expr]
+    elif isinstance(expressions, list):
+        args = [arg.expr for arg in expressions]
+    else:
+        args = [expressions.expr]
+
+    return Expr(f.count(*args, distinct=distinct, filter=filter_raw))
+
+
+def covar_pop(value_y: Expr, value_x: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the population covariance.
+
+    This aggregate function expects both values to be numeric and will return a float.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        value_y: The dependent variable for covariance
+        value_x: The independent variable for covariance
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.covar_pop(value_y.expr, value_x.expr, filter=filter_raw))
+
+
+def covar_samp(value_y: Expr, value_x: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the sample covariance.
+
+    This aggregate function expects both values to be numeric and will return a float.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        value_y: The dependent variable for covariance
+        value_x: The independent variable for covariance
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.covar_samp(value_y.expr, value_x.expr, filter=filter_raw))
+
+
+def covar(value_y: Expr, value_x: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the sample covariance.
+
+    This is an alias for :py:func:`covar_samp`.
+    """
+    return covar_samp(value_y, value_x, filter)
+
+
+def max(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Aggregate function that returns the maximum value of the argument.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: The value to find the maximum of
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.max(expression.expr, filter=filter_raw))
+
+
+def mean(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Returns the average (mean) value of the argument.
+
+    This is an alias for :py:func:`avg`.
+    """
+    return avg(expression, filter)
+
+
+def median(
+    expression: Expr, distinct: bool = False, filter: Optional[Expr] = None
+) -> Expr:
+    """Computes the median of a set of numbers.
+
+    This aggregate function returns the median value of the expression for the given
+    aggregate function.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by`` and ``null_treatment``.
+
+    Args:
+        expression: The value to compute the median of
+        distinct: If True, a single entry for each distinct value will be in the result
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.median(expression.expr, distinct=distinct, filter=filter_raw))
+
+
+def min(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Returns the minimum value of the argument.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: The value to find the minimum of
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.min(expression.expr, filter=filter_raw))
+
+
+def sum(
+    expression: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the sum of a set of numbers.
+
+    This aggregate function expects a numeric expression.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Values to combine into an array
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.sum(expression.expr, filter=filter_raw))
+
+
+def stddev(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the standard deviation of the argument.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: The value to find the minimum of
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.stddev(expression.expr, filter=filter_raw))
+
+
+def stddev_pop(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the population standard deviation of the argument.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: The value to find the minimum of
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.stddev_pop(expression.expr, filter=filter_raw))
+
+
+def stddev_samp(arg: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the sample standard deviation of the argument.
+
+    This is an alias for :py:func:`stddev`.
+    """
+    return stddev(arg, filter=filter)
+
+
+def var(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the sample variance of the argument.
+
+    This is an alias for :py:func:`var_samp`.
+    """
+    return var_samp(expression, filter)
+
+
+def var_pop(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the population variance of the argument.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: The variable to compute the variance for
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.var_pop(expression.expr, filter=filter_raw))
+
+
+def var_samp(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the sample variance of the argument.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: The variable to compute the variance for
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.var_sample(expression.expr, filter=filter_raw))
+
+
+def var_sample(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the sample variance of the argument.
+
+    This is an alias for :py:func:`var_samp`.
+    """
+    return var_samp(expression, filter)
+
+
+def regr_avgx(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the average of the independent variable ``x``.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_avgx(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_avgy(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the average of the dependent variable ``y``.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_avgy(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_count(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Counts the number of rows in which both expressions are not null.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_count(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_intercept(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the intercept from the linear regression.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_intercept(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_r2(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the R-squared value from linear regression.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_r2(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_slope(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the slope from linear regression.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_slope(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_sxx(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the sum of squares of the independent variable ``x``.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_sxx(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_sxy(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the sum of products of pairs of numbers.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_sxy(y.expr, x.expr, filter=filter_raw))
+
+
+def regr_syy(
+    y: Expr,
+    x: Expr,
+    filter: Optional[Expr] = None,
+) -> Expr:
+    """Computes the sum of squares of the dependent variable ``y``.
+
+    This is a linear regression aggregate function. Only non-null pairs of the inputs
+    are evaluated.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        y: The linear regression dependent variable
+        x: The linear regression independent variable
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(f.regr_syy(y.expr, x.expr, filter=filter_raw))
+
+
+def first_value(
+    expression: Expr,
+    filter: Optional[Expr] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+    null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS,
+) -> Expr:
+    """Returns the first value in a group of values.
+
+    This aggregate function will return the first value in the partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the option ``distinct``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        filter: If provided, only compute against rows for which the filter is True
+        order_by: Set the ordering of the expression to evaluate
+        null_treatment: Assign whether to respect or ignull null values.
+    """
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(
+        f.first_value(
+            expression.expr,
+            filter=filter_raw,
+            order_by=order_by_raw,
+            null_treatment=null_treatment.value,
+        )
+    )
+
+
+def last_value(
+    expression: Expr,
+    filter: Optional[Expr] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+    null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS,
+) -> Expr:
+    """Returns the last value in a group of values.
+
+    This aggregate function will return the last value in the partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the option ``distinct``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        filter: If provided, only compute against rows for which the filter is True
+        order_by: Set the ordering of the expression to evaluate
+        null_treatment: Assign whether to respect or ignull null values.
+    """
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(
+        f.last_value(
+            expression.expr,
+            filter=filter_raw,
+            order_by=order_by_raw,
+            null_treatment=null_treatment.value,
+        )
+    )
+
+
+def nth_value(
+    expression: Expr,
+    n: int,
+    filter: Optional[Expr] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+    null_treatment: NullTreatment = NullTreatment.RESPECT_NULLS,
+) -> Expr:
+    """Returns the n-th value in a group of values.
+
+    This aggregate function will return the n-th value in the partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the option ``distinct``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        n: Index of value to return. Starts at 1.
+        filter: If provided, only compute against rows for which the filter is True
+        order_by: Set the ordering of the expression to evaluate
+        null_treatment: Assign whether to respect or ignull null values.
+    """
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(
+        f.nth_value(
+            expression.expr,
+            n,
+            filter=filter_raw,
+            order_by=order_by_raw,
+            null_treatment=null_treatment.value,
+        )
+    )
+
+
+def bit_and(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the bitwise AND of the argument.
+
+    This aggregate function will bitwise compare every value in the input partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.bit_and(expression.expr, filter=filter_raw))
+
+
+def bit_or(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the bitwise OR of the argument.
+
+    This aggregate function will bitwise compare every value in the input partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.bit_or(expression.expr, filter=filter_raw))
+
+
+def bit_xor(
+    expression: Expr, distinct: bool = False, filter: Optional[Expr] = None
+) -> Expr:
+    """Computes the bitwise XOR of the argument.
+
+    This aggregate function will bitwise compare every value in the input partition.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by`` and ``null_treatment``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        distinct: If True, evaluate each unique value of expression only once
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.bit_xor(expression.expr, distinct=distinct, filter=filter_raw))
+
+
+def bool_and(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the boolean AND of the argument.
+
+    This aggregate function will compare every value in the input partition. These are
+    expected to be boolean values.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Argument to perform calculation on
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.bool_and(expression.expr, filter=filter_raw))
+
+
+def bool_or(expression: Expr, filter: Optional[Expr] = None) -> Expr:
+    """Computes the boolean OR of the argument.
+
+    This aggregate function will compare every value in the input partition. These are
+    expected to be boolean values.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``order_by``, ``null_treatment``, and ``distinct``.
+
+    Args:
+        expression: Argument to perform calculation on
+        filter: If provided, only compute against rows for which the filter is True
+    """
+    filter_raw = filter.expr if filter is not None else None
+    return Expr(f.bool_or(expression.expr, filter=filter_raw))
+
+
+def lead(
+    arg: Expr,
+    shift_offset: int = 1,
+    default_value: Optional[Any] = None,
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a lead window function.
+
+    Lead operation will return the argument that is in the next shift_offset-th row in
+    the partition. For example ``lead(col("b"), shift_offset=3, default_value=5)`` will
+    return the 3rd following value in column ``b``. At the end of the partition, where
+    no futher values can be returned it will return the default value of 5.
+
+    Here is an example of both the ``lead`` and :py:func:`datafusion.functions.lag`
+    functions on a simple DataFrame::
+
+        +--------+------+-----+
+        | points | lead | lag |
+        +--------+------+-----+
+        | 100    | 100  |     |
+        | 100    | 50   | 100 |
+        | 50     | 25   | 100 |
+        | 25     |      | 50  |
+        +--------+------+-----+
+
+    To set window function parameters use the window builder approach described in the
+    ref:`_window_functions` online documentation.
+
+    Args:
+        arg: Value to return
+        shift_offset: Number of rows following the current row.
+        default_value: Value to return if shift_offet row does not exist.
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    if not isinstance(default_value, pa.Scalar) and default_value is not None:
+        default_value = pa.scalar(default_value)
+
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.lead(
+            arg.expr,
+            shift_offset,
+            default_value,
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def lag(
+    arg: Expr,
+    shift_offset: int = 1,
+    default_value: Optional[Any] = None,
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a lag window function.
+
+    Lag operation will return the argument that is in the previous shift_offset-th row
+    in the partition. For example ``lag(col("b"), shift_offset=3, default_value=5)``
+    will return the 3rd previous value in column ``b``. At the beginnig of the
+    partition, where no values can be returned it will return the default value of 5.
+
+    Here is an example of both the ``lag`` and :py:func:`datafusion.functions.lead`
+    functions on a simple DataFrame::
+
+        +--------+------+-----+
+        | points | lead | lag |
+        +--------+------+-----+
+        | 100    | 100  |     |
+        | 100    | 50   | 100 |
+        | 50     | 25   | 100 |
+        | 25     |      | 50  |
+        +--------+------+-----+
+
+    Args:
+        arg: Value to return
+        shift_offset: Number of rows before the current row.
+        default_value: Value to return if shift_offet row does not exist.
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    if not isinstance(default_value, pa.Scalar):
+        default_value = pa.scalar(default_value)
+
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.lag(
+            arg.expr,
+            shift_offset,
+            default_value,
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def row_number(
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a row number window function.
+
+    Returns the row number of the window function.
+
+    Here is an example of the ``row_number`` on a simple DataFrame::
+
+        +--------+------------+
+        | points | row number |
+        +--------+------------+
+        | 100    | 1          |
+        | 100    | 2          |
+        | 50     | 3          |
+        | 25     | 4          |
+        +--------+------------+
+
+    Args:
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.row_number(
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def rank(
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a rank window function.
+
+    Returns the rank based upon the window order. Consecutive equal values will receive
+    the same rank, but the next different value will not be consecutive but rather the
+    number of rows that preceed it plus one. This is similar to Olympic medals. If two
+    people tie for gold, the next place is bronze. There would be no silver medal. Here
+    is an example of a dataframe with a window ordered by descending ``points`` and the
+    associated rank.
+
+    You should set ``order_by`` to produce meaningful results::
+
+        +--------+------+
+        | points | rank |
+        +--------+------+
+        | 100    | 1    |
+        | 100    | 1    |
+        | 50     | 3    |
+        | 25     | 4    |
+        +--------+------+
+
+    Args:
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.rank(
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def dense_rank(
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a dense_rank window function.
+
+    This window function is similar to :py:func:`rank` except that the returned values
+    will be consecutive. Here is an example of a dataframe with a window ordered by
+    descending ``points`` and the associated dense rank::
+
+        +--------+------------+
+        | points | dense_rank |
+        +--------+------------+
+        | 100    | 1          |
+        | 100    | 1          |
+        | 50     | 2          |
+        | 25     | 3          |
+        +--------+------------+
+
+    Args:
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.dense_rank(
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def percent_rank(
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a percent_rank window function.
+
+    This window function is similar to :py:func:`rank` except that the returned values
+    are the percentage from 0.0 to 1.0 from first to last. Here is an example of a
+    dataframe with a window ordered by descending ``points`` and the associated percent
+    rank::
+
+        +--------+--------------+
+        | points | percent_rank |
+        +--------+--------------+
+        | 100    | 0.0          |
+        | 100    | 0.0          |
+        | 50     | 0.666667     |
+        | 25     | 1.0          |
+        +--------+--------------+
+
+    Args:
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.percent_rank(
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def cume_dist(
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a cumulative distribution window function.
+
+    This window function is similar to :py:func:`rank` except that the returned values
+    are the ratio of the row number to the total numebr of rows. Here is an example of a
+    dataframe with a window ordered by descending ``points`` and the associated
+    cumulative distribution::
+
+        +--------+-----------+
+        | points | cume_dist |
+        +--------+-----------+
+        | 100    | 0.5       |
+        | 100    | 0.5       |
+        | 50     | 0.75      |
+        | 25     | 1.0       |
+        +--------+-----------+
+
+    Args:
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.cume_dist(
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def ntile(
+    groups: int,
+    partition_by: Optional[list[Expr]] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Create a n-tile window function.
+
+    This window function orders the window frame into a give number of groups based on
+    the ordering criteria. It then returns which group the current row is assigned to.
+    Here is an example of a dataframe with a window ordered by descending ``points``
+    and the associated n-tile function::
+
+        +--------+-------+
+        | points | ntile |
+        +--------+-------+
+        | 120    | 1     |
+        | 100    | 1     |
+        | 80     | 2     |
+        | 60     | 2     |
+        | 40     | 3     |
+        | 20     | 3     |
+        +--------+-------+
+
+    Args:
+        groups: Number of groups for the n-tile to be divided into.
+        partition_by: Expressions to partition the window frame on.
+        order_by: Set ordering within the window frame.
+    """
+    partition_cols = (
+        [col.expr for col in partition_by] if partition_by is not None else None
+    )
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+
+    return Expr(
+        f.ntile(
+            Expr.literal(groups).expr,
+            partition_by=partition_cols,
+            order_by=order_by_raw,
+        )
+    )
+
+
+def string_agg(
+    expression: Expr,
+    delimiter: str,
+    filter: Optional[Expr] = None,
+    order_by: Optional[list[Expr | SortExpr]] = None,
+) -> Expr:
+    """Concatenates the input strings.
+
+    This aggregate function will concatenate input strings, ignoring null values, and
+    seperating them with the specified delimiter. Non-string values will be converted to
+    their string equivalents.
+
+    If using the builder functions described in ref:`_aggregation` this function ignores
+    the options ``distinct`` and ``null_treatment``.
+
+    Args:
+        expression: Argument to perform bitwise calculation on
+        delimiter: Text to place between each value of expression
+        filter: If provided, only compute against rows for which the filter is True
+        order_by: Set the ordering of the expression to evaluate
+    """
+    order_by_raw = sort_list_to_raw_sort_list(order_by)
+    filter_raw = filter.expr if filter is not None else None
+
+    return Expr(
+        f.string_agg(
+            expression.expr,
+            delimiter,
+            filter=filter_raw,
+            order_by=order_by_raw,
+        )
+    )
diff --git a/py-denormalized/python/denormalized/datafusion/input/__init__.py b/py-denormalized/python/denormalized/datafusion/input/__init__.py
new file mode 100644
index 0000000..f85ce21
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/input/__init__.py
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""This package provides for input sources.
+
+The primary class used within DataFusion is ``LocationInputPlugin``.
+"""
+
+from .location import LocationInputPlugin
+
+__all__ = [
+    LocationInputPlugin,
+]
diff --git a/py-denormalized/python/denormalized/datafusion/input/base.py b/py-denormalized/python/denormalized/datafusion/input/base.py
new file mode 100644
index 0000000..4eba197
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/input/base.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""This module provides ``BaseInputSource``.
+
+A user can extend this to provide a custom input source.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from datafusion.common import SqlTable
+
+
+class BaseInputSource(ABC):
+    """Base Input Source class.
+
+    If a consuming library would like to provider their own InputSource this is
+    the class they should extend to write their own.
+
+    Once completed the Plugin InputSource can be registered with the
+    SessionContext to ensure that it will be used in order
+    to obtain the SqlTable information from the custom datasource.
+    """
+
+    @abstractmethod
+    def is_correct_input(self, input_item: Any, table_name: str, **kwargs) -> bool:
+        """Returns `True` if the input is valid."""
+        pass
+
+    @abstractmethod
+    def build_table(self, input_item: Any, table_name: str, **kwarg) -> SqlTable:
+        """Create a table from the input source."""
+        pass
diff --git a/py-denormalized/python/denormalized/datafusion/input/location.py b/py-denormalized/python/denormalized/datafusion/input/location.py
new file mode 100644
index 0000000..b274539
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/input/location.py
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""The default input source for DataFusion."""
+
+import os
+import glob
+from typing import Any
+
+from datafusion.common import DataTypeMap, SqlTable
+from datafusion.input.base import BaseInputSource
+
+
+class LocationInputPlugin(BaseInputSource):
+    """Input Plugin for everything.
+
+    This can be read in from a file (on disk, remote etc.).
+    """
+
+    def is_correct_input(self, input_item: Any, table_name: str, **kwargs):
+        """Returns `True` if the input is valid."""
+        return isinstance(input_item, str)
+
+    def build_table(
+        self,
+        input_file: str,
+        table_name: str,
+        **kwargs,
+    ) -> SqlTable:
+        """Create a table from the input source."""
+        _, extension = os.path.splitext(input_file)
+        format = extension.lstrip(".").lower()
+        num_rows = 0  # Total number of rows in the file. Used for statistics
+        columns = []
+        if format == "parquet":
+            import pyarrow.parquet as pq
+
+            # Read the Parquet metadata
+            metadata = pq.read_metadata(input_file)
+            num_rows = metadata.num_rows
+            # Iterate through the schema and build the SqlTable
+            for col in metadata.schema:
+                columns.append(
+                    (
+                        col.name,
+                        DataTypeMap.from_parquet_type_str(col.physical_type),
+                    )
+                )
+        elif format == "csv":
+            import csv
+
+            # Consume header row and count number of rows for statistics.
+            # TODO: Possibly makes sense to have the eager number of rows
+            # calculated as a configuration since you must read the entire file
+            # to get that information. However, this should only be occurring
+            # at table creation time and therefore shouldn't
+            # slow down query performance.
+            with open(input_file, "r") as file:
+                reader = csv.reader(file)
+                header_row = next(reader)
+                print(header_row)
+                for _ in reader:
+                    num_rows += 1
+            # TODO: Need to actually consume this row into reasonable columns
+            raise RuntimeError("TODO: Currently unable to support CSV input files.")
+        else:
+            raise RuntimeError(
+                f"Input of format: `{format}` is currently not supported.\
+                Only Parquet and CSV."
+            )
+
+        # Input could possibly be multiple files. Create a list if so
+        input_files = glob.glob(input_file)
+
+        return SqlTable(table_name, columns, num_rows, input_files)
diff --git a/py-denormalized/python/denormalized/datafusion/object_store.py b/py-denormalized/python/denormalized/datafusion/object_store.py
new file mode 100644
index 0000000..3a3371e
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/object_store.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Object store functionality."""
+
+from denormalized._internal import object_store
+
+AmazonS3 = object_store.AmazonS3
+GoogleCloud = object_store.GoogleCloud
+LocalFileSystem = object_store.LocalFileSystem
+MicrosoftAzure = object_store.MicrosoftAzure
+
+__all__ = [
+    "AmazonS3",
+    "GoogleCloud",
+    "LocalFileSystem",
+    "MicrosoftAzure",
+]
+
+
+def __getattr__(name):
+    return getattr(object_store, name)
diff --git a/py-denormalized/python/denormalized/datafusion/py.typed b/py-denormalized/python/denormalized/datafusion/py.typed
new file mode 100644
index 0000000..d216be4
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
\ No newline at end of file
diff --git a/py-denormalized/python/denormalized/datafusion/record_batch.py b/py-denormalized/python/denormalized/datafusion/record_batch.py
new file mode 100644
index 0000000..e0e436e
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/record_batch.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""This module provides the classes for handling record batches.
+
+These are typically the result of dataframe
+:py:func:`datafusion.dataframe.execute_stream` operations.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import pyarrow
+    import denormalized._internal as df_internal
+    import typing_extensions
+
+
+class RecordBatch:
+    """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`."""
+
+    def __init__(self, record_batch: df_internal.RecordBatch) -> None:
+        """This constructor is generally not called by the end user.
+
+        See the :py:class:`RecordBatchStream` iterator for generating this class.
+        """
+        self.record_batch = record_batch
+
+    def to_pyarrow(self) -> pyarrow.RecordBatch:
+        """Convert to :py:class:`pyarrow.RecordBatch`."""
+        return self.record_batch.to_pyarrow()
+
+
+class RecordBatchStream:
+    """This class represents a stream of record batches.
+
+    These are typically the result of a
+    :py:func:`~datafusion.dataframe.DataFrame.execute_stream` operation.
+    """
+
+    def __init__(self, record_batch_stream: df_internal.RecordBatchStream) -> None:
+        """This constructor is typically not called by the end user."""
+        self.rbs = record_batch_stream
+
+    def next(self) -> RecordBatch | None:
+        """See :py:func:`__next__` for the iterator function."""
+        try:
+            next_batch = next(self)
+        except StopIteration:
+            return None
+
+        return next_batch
+
+    def __next__(self) -> RecordBatch:
+        """Iterator function."""
+        next_batch = next(self.rbs)
+        return RecordBatch(next_batch)
+
+    def __iter__(self) -> typing_extensions.Self:
+        """Iterator function."""
+        return self
diff --git a/py-denormalized/python/denormalized/datafusion/udf.py b/py-denormalized/python/denormalized/datafusion/udf.py
new file mode 100644
index 0000000..c1d45f9
--- /dev/null
+++ b/py-denormalized/python/denormalized/datafusion/udf.py
@@ -0,0 +1,248 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Provides the user defined functions for evaluation of dataframes."""
+
+from __future__ import annotations
+
+import denormalized._internal as df_internal
+from datafusion.expr import Expr
+from typing import Callable, TYPE_CHECKING, TypeVar
+from abc import ABCMeta, abstractmethod
+from typing import List
+from enum import Enum
+import pyarrow
+
+if TYPE_CHECKING:
+    _R = TypeVar("_R", bound=pyarrow.DataType)
+
+
+class Volatility(Enum):
+    """Defines how stable or volatile a function is.
+
+    When setting the volatility of a function, you can either pass this
+    enumeration or a ``str``. The ``str`` equivalent is the lower case value of the
+    name (`"immutable"`, `"stable"`, or `"volatile"`).
+    """
+
+    Immutable = 1
+    """An immutable function will always return the same output when given the
+    same input.
+
+    DataFusion will attempt to inline immutable functions during planning.
+    """
+
+    Stable = 2
+    """
+    Returns the same value for a given input within a single queries.
+
+    A stable function may return different values given the same input across
+    different queries but must return the same value for a given input within a
+    query. An example of this is the ``Now`` function. DataFusion will attempt to
+    inline ``Stable`` functions during planning, when possible. For query
+    ``select col1, now() from t1``, it might take a while to execute but ``now()``
+    column will be the same for each output row, which is evaluated during
+    planning.
+    """
+
+    Volatile = 3
+    """A volatile function may change the return value from evaluation to
+    evaluation.
+
+    Multiple invocations of a volatile function may return different results
+    when used in the same query. An example of this is the random() function.
+    DataFusion can not evaluate such functions during planning. In the query
+    ``select col1, random() from t1``, ``random()`` function will be evaluated
+    for each output row, resulting in a unique random value for each row.
+    """
+
+    def __str__(self):
+        """Returns the string equivalent."""
+        return self.name.lower()
+
+
+class ScalarUDF:
+    """Class for performing scalar user defined functions (UDF).
+
+    Scalar UDFs operate on a row by row basis. See also :py:class:`AggregateUDF` for
+    operating on a group of rows.
+    """
+
+    def __init__(
+        self,
+        name: str | None,
+        func: Callable[..., _R],
+        input_types: list[pyarrow.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+    ) -> None:
+        """Instantiate a scalar user defined function (UDF).
+
+        See helper method :py:func:`udf` for argument details.
+        """
+        self._udf = df_internal.ScalarUDF(
+            name, func, input_types, return_type, str(volatility)
+        )
+
+    def __call__(self, *args: Expr) -> Expr:
+        """Execute the UDF.
+
+        This function is not typically called by an end user. These calls will
+        occur during the evaluation of the dataframe.
+        """
+        args = [arg.expr for arg in args]
+        return Expr(self._udf.__call__(*args))
+
+    @staticmethod
+    def udf(
+        func: Callable[..., _R],
+        input_types: list[pyarrow.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+        name: str | None = None,
+    ) -> ScalarUDF:
+        """Create a new User Defined Function.
+
+        Args:
+            func: A callable python function.
+            input_types: The data types of the arguments to ``func``. This list
+                must be of the same length as the number of arguments.
+            return_type: The data type of the return value from the python
+                function.
+            volatility: See ``Volatility`` for allowed values.
+            name: A descriptive name for the function.
+
+        Returns:
+            A user defined aggregate function, which can be used in either data
+                aggregation or window function calls.
+        """
+        if not callable(func):
+            raise TypeError("`func` argument must be callable")
+        if name is None:
+            name = func.__qualname__.lower()
+        return ScalarUDF(
+            name=name,
+            func=func,
+            input_types=input_types,
+            return_type=return_type,
+            volatility=volatility,
+        )
+
+
+class Accumulator(metaclass=ABCMeta):
+    """Defines how an :py:class:`AggregateUDF` accumulates values."""
+
+    @abstractmethod
+    def state(self) -> List[pyarrow.Scalar]:
+        """Return the current state."""
+        pass
+
+    @abstractmethod
+    def update(self, values: pyarrow.Array) -> None:
+        """Evaluate an array of values and update state."""
+        pass
+
+    @abstractmethod
+    def merge(self, states: List[pyarrow.Array]) -> None:
+        """Merge a set of states."""
+        pass
+
+    @abstractmethod
+    def evaluate(self) -> pyarrow.Scalar:
+        """Return the resultant value."""
+        pass
+
+
+if TYPE_CHECKING:
+    _A = TypeVar("_A", bound=(Callable[..., _R], Accumulator))
+
+
+class AggregateUDF:
+    """Class for performing scalar user defined functions (UDF).
+
+    Aggregate UDFs operate on a group of rows and return a single value. See
+    also :py:class:`ScalarUDF` for operating on a row by row basis.
+    """
+
+    def __init__(
+        self,
+        name: str | None,
+        accumulator: _A,
+        input_types: list[pyarrow.DataType],
+        return_type: _R,
+        state_type: list[pyarrow.DataType],
+        volatility: Volatility | str,
+    ) -> None:
+        """Instantiate a user defined aggregate function (UDAF).
+
+        See :py:func:`udaf` for a convenience function and argument
+        descriptions.
+        """
+        self._udf = df_internal.AggregateUDF(
+            name, accumulator, input_types, return_type, state_type, str(volatility)
+        )
+
+    def __call__(self, *args: Expr) -> Expr:
+        """Execute the UDAF.
+
+        This function is not typically called by an end user. These calls will
+        occur during the evaluation of the dataframe.
+        """
+        args = [arg.expr for arg in args]
+        return Expr(self._udf.__call__(*args))
+
+    @staticmethod
+    def udaf(
+        accum: _A,
+        input_types: list[pyarrow.DataType],
+        return_type: _R,
+        state_type: list[pyarrow.DataType],
+        volatility: Volatility | str,
+        name: str | None = None,
+    ) -> AggregateUDF:
+        """Create a new User Defined Aggregate Function.
+
+        The accumulator function must be callable and implement :py:class:`Accumulator`.
+
+        Args:
+            accum: The accumulator python function.
+            input_types: The data types of the arguments to ``accum``.
+            return_type: The data type of the return value.
+            state_type: The data types of the intermediate accumulation.
+            volatility: See :py:class:`Volatility` for allowed values.
+            name: A descriptive name for the function.
+
+        Returns:
+            A user defined aggregate function, which can be used in either data
+            aggregation or window function calls.
+        """
+        if not issubclass(accum, Accumulator):
+            raise TypeError(
+                "`accum` must implement the abstract base class Accumulator"
+            )
+        if name is None:
+            name = accum.__qualname__.lower()
+        if isinstance(input_types, pyarrow.lib.DataType):
+            input_types = [input_types]
+        return AggregateUDF(
+            name=name,
+            accumulator=accum,
+            input_types=input_types,
+            return_type=return_type,
+            state_type=state_type,
+            volatility=volatility,
+        )
diff --git a/py-denormalized/python/denormalized/datastream.py b/py-denormalized/python/denormalized/datastream.py
index 0919902..d961e75 100644
--- a/py-denormalized/python/denormalized/datastream.py
+++ b/py-denormalized/python/denormalized/datastream.py
@@ -1,7 +1,7 @@
 import pyarrow as pa
-from datafusion import Expr
 from denormalized._internal import PyDataStream
-from denormalized._internal import expr as internal_expr
+from denormalized.datafusion import Expr
+from denormalized.utils import to_internal_expr, to_internal_exprs
 
 
 class DataStream:
@@ -48,7 +48,7 @@ def select(self, expr_list: list[Expr]) -> "DataStream":
         Returns:
             DataStream: A new DataStream with the selected columns/expressions.
         """
-        return DataStream(self.ds.select(expr_list))
+        return DataStream(self.ds.select(to_internal_exprs(expr_list)))
 
     def filter(self, predicate: Expr) -> "DataStream":
         """Filter the DataStream based on a predicate.
@@ -59,7 +59,19 @@ def filter(self, predicate: Expr) -> "DataStream":
         Returns:
             DataStream: A new DataStream with the filter applied.
         """
-        return DataStream(self.ds.filter(predicate))
+        return DataStream(self.ds.filter(to_internal_expr(predicate)))
+
+    def with_column(self, name: str, predicate: Expr) -> "DataStream":
+        """Add a new column to the DataStream.
+
+        Args:
+            name (str): The name of the new column.
+            predicate (Expr): The expression that defines the column's values.
+
+        Returns:
+            DataStream: A new DataStream with the additional column.
+        """
+        return DataStream(self.ds.with_column(name, to_internal_expr(predicate)))
 
     def join_on(
         self, right: "DataStream", join_type: str, on_exprs: list[Expr]
@@ -82,7 +94,7 @@ def join(
         join_type: str,
         left_cols: list[str],
         right_cols: list[str],
-        filter: Expr = None,
+        filter: Expr | None = None,
     ) -> "DataStream":
         """Join this DataStream with another one based on column names.
 
@@ -102,16 +114,16 @@ def join(
 
     def window(
         self,
-        group_expr: list[Expr],
-        aggr_expr: list[Expr],
+        group_exprs: list[Expr],
+        aggr_exprs: list[Expr],
         window_length_millis: int,
         slide_millis: int | None = None,
     ) -> "DataStream":
         """Apply a windowing operation to the DataStream.
 
         Args:
-            group_expr (list[Expr]): The expressions to group by.
-            aggr_expr (list[Expr]): The aggregation expressions to apply.
+            group_exprs (list[Expr]): The expressions to group by.
+            aggr_exprs (list[Expr]): The aggregation expressions to apply.
             window_length_millis (int): The length of the window in milliseconds.
             slide_millis (int, optional): The slide interval of the window in
                 milliseconds.
@@ -120,7 +132,12 @@ def window(
             DataStream: A new DataStream with the windowing operation applied.
         """
         return DataStream(
-            self.ds.window(group_expr, aggr_expr, window_length_millis, slide_millis)
+            self.ds.window(
+                to_internal_exprs(group_exprs),
+                to_internal_exprs(aggr_exprs),
+                window_length_millis,
+                slide_millis,
+            )
         )
 
     def print_stream(self) -> None:
diff --git a/py-denormalized/python/denormalized/utils.py b/py-denormalized/python/denormalized/utils.py
new file mode 100644
index 0000000..13a5dbf
--- /dev/null
+++ b/py-denormalized/python/denormalized/utils.py
@@ -0,0 +1,13 @@
+from denormalized._internal import expr as internal_exprs
+from denormalized.datafusion import Expr
+
+
+def to_internal_expr(expr: Expr | str) -> internal_exprs:
+    """Convert a single Expr or string to internal exprs."""
+    return Expr.column(expr).expr if isinstance(expr, str) else expr.expr
+
+def to_internal_exprs(exprs: list[Expr] | list[str]) -> list[internal_exprs]:
+    """Convert a list of Expr or string to a list of internal exprs."""
+    return [
+         to_internal_expr(arg) for arg in exprs
+    ]
diff --git a/py-denormalized/python/examples/stream_aggregate.py b/py-denormalized/python/examples/stream_aggregate.py
index ccd8026..dbf96b5 100644
--- a/py-denormalized/python/examples/stream_aggregate.py
+++ b/py-denormalized/python/examples/stream_aggregate.py
@@ -3,8 +3,8 @@
 
 import pyarrow as pa
 from denormalized import Context
-from denormalized._internal import expr
-from denormalized._internal import functions as f
+from denormalized.datafusion import lit, col
+from denormalized.datafusion import functions as f
 
 import signal
 import sys
@@ -23,26 +23,25 @@ def signal_handler(sig, frame):
     "reading": 0.0,
 }
 
-def sample_func(rb):
-    print("hello world2!")
-    print(len(rb))
+def sample_sink_func(rb):
+    print(rb)
 
 ctx = Context()
 ds = ctx.from_topic("temperature", json.dumps(sample_event), bootstrap_server)
 
 
 ds.window(
-    [expr.Expr.column("sensor_name")],
+    [col("sensor_name")],
     [
-        f.count(expr.Expr.column("reading"), distinct=False, filter=None).alias(
+        f.count(col("reading"), distinct=False, filter=None).alias(
             "count"
         ),
-        f.min(expr.Expr.column("reading")).alias("min"),
-        f.max(expr.Expr.column("reading")).alias("max"),
-        f.avg(expr.Expr.column("reading")).alias("average"),
+        f.min(col("reading")).alias("min"),
+        f.max(col("reading")).alias("max"),
+        f.avg(col("reading")).alias("average"),
     ],
     1000,
     None,
 ).filter(
-    expr.Expr.column("max") > (expr.Expr.literal(pa.scalar(113)))
-).sink_python(sample_func)
+    col("max") > (lit(113))
+).sink_python(sample_sink_func)
diff --git a/py-denormalized/python/examples/udf_example.py b/py-denormalized/python/examples/udf_example.py
new file mode 100644
index 0000000..563bd99
--- /dev/null
+++ b/py-denormalized/python/examples/udf_example.py
@@ -0,0 +1,60 @@
+"""stream_aggregate example."""
+
+import json
+import signal
+import sys
+
+import pyarrow as pa
+import pyarrow.compute as pc
+from denormalized import Context
+from denormalized.datafusion import col
+from denormalized.datafusion import functions as f
+from denormalized.datafusion import lit, udf
+
+
+def signal_handler(sig, frame):
+    sys.exit(0)
+
+signal.signal(signal.SIGINT, signal_handler)
+
+bootstrap_server = "localhost:9092"
+
+sample_event = {
+    "occurred_at_ms": 100,
+    "sensor_name": "foo",
+    "reading": 0.0,
+}
+
+def gt(lhs: pa.Array, rhs: pa.Scalar) -> pa.Array:
+    return pc.greater(lhs, rhs)
+
+greater_than_udf = udf(gt, [pa.float64(), pa.float64()], pa.bool_(), "stable")
+
+def sample_sink_func(rb: pa.RecordBatch):
+    if not len(rb):
+        return
+    print(rb)
+
+
+ctx = Context()
+ds = ctx.from_topic("temperature", json.dumps(sample_event), bootstrap_server)
+
+ds.window(
+    [col("sensor_name")],
+    [
+        f.count(col("reading"), distinct=False, filter=None).alias("count"),
+        f.min(col("reading")).alias("min"),
+        f.max(col("reading")).alias("max"),
+        f.avg(col("reading")).alias("average"),
+    ],
+    1000,
+    None,
+).with_column(
+    "greater_than",
+    greater_than_udf(
+        col("count"),
+        lit(1400.0),
+    ),
+).sink_python(
+    sample_sink_func
+)
diff --git a/py-denormalized/src/datastream.rs b/py-denormalized/src/datastream.rs
index fa5f604..da9de9d 100644
--- a/py-denormalized/src/datastream.rs
+++ b/py-denormalized/src/datastream.rs
@@ -81,6 +81,11 @@ impl PyDataStream {
         Ok(Self::new(ds))
     }
 
+    pub fn with_column(&self, name: &str, expr: PyExpr) -> Result<Self> {
+        let ds = self.ds.as_ref().clone().with_column(name, expr.into())?;
+        Ok(Self::new(ds))
+    }
+
     pub fn join_on(
         &self,
         _right: PyDataStream,
diff --git a/py-denormalized/src/lib.rs b/py-denormalized/src/lib.rs
index 4e1c285..fd92024 100644
--- a/py-denormalized/src/lib.rs
+++ b/py-denormalized/src/lib.rs
@@ -1,7 +1,5 @@
 use pyo3::prelude::*;
 
-use datafusion_python::{expr, functions};
-
 pub mod context;
 pub mod datastream;
 
@@ -15,24 +13,14 @@ pub(crate) struct TokioRuntime(tokio::runtime::Runtime);
 /// A Python module implemented in Rust.
 #[pymodule]
 fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
-    // Register the Tokio Runtime as a module attribute so we can reuse it
-    m.add(
-        "runtime",
-        TokioRuntime(tokio::runtime::Runtime::new().unwrap()),
-    )?;
-
     m.add_class::<datastream::PyDataStream>()?;
     m.add_class::<context::PyContext>()?;
 
-    // Register `expr` as a submodule. Matching `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/
-    let expr = PyModule::new_bound(py, "expr")?;
-    expr::init_module(&expr)?;
-    m.add_submodule(&expr)?;
-
+    datafusion_python::_internal(py, &m)?;
     // Register the functions as a submodule
-    let funcs = PyModule::new_bound(py, "functions")?;
-    functions::init_module(&funcs)?;
-    m.add_submodule(&funcs)?;
+    // let datafusion = &PyModule::new_bound(py, "datafusion")?;
+    // datafusion_python::_internal(py, datafusion)?;
+    // m.add_submodule(datafusion)?;
 
     Ok(())
 }