From 076ad58dba87be07c45a563ec105f28dea647620 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 4 Nov 2024 18:04:22 +0000
Subject: [PATCH 01/12] Expose mixed and conditional joins in pylibcudf
 (#17235)

Expose these join types to pylibcudf, they will be useful for implement inequality joins in cudf polars.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/17235
---
 cpp/include/cudf/join.hpp                     |  32 +-
 cpp/src/join/conditional_join.cu              |   7 +-
 cpp/src/join/mixed_join.cu                    |   7 +-
 python/pylibcudf/pylibcudf/join.pxd           |  76 ++++
 python/pylibcudf/pylibcudf/join.pyx           | 405 ++++++++++++++++++
 python/pylibcudf/pylibcudf/libcudf/join.pxd   | 114 +++++
 python/pylibcudf/pylibcudf/tests/test_join.py | 154 ++++++-
 7 files changed, 771 insertions(+), 24 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index a590eb27511..afefd04d4fa 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -573,7 +573,7 @@ class distinct_hash_join {
  * Result: {{1}, {0}}
  * @endcode
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -620,7 +620,7 @@ conditional_inner_join(table_view const& left,
  * Result: {{0, 1, 2}, {None, 0, None}}
  * @endcode
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -666,7 +666,7 @@ conditional_left_join(table_view const& left,
  * Result: {{0, 1, 2, None, None}, {None, 0, None, 1, 2}}
  * @endcode
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -705,7 +705,7 @@ conditional_full_join(table_view const& left,
  * Result: {1}
  * @endcode
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -746,7 +746,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_semi_join(
  * Result: {0, 2}
  * @endcode
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -793,7 +793,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_anti_join(
  * Result: {{1}, {0}}
  * @endcode
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -855,7 +855,7 @@ mixed_inner_join(
  * Result: {{0, 1, 2}, {None, 0, None}}
  * @endcode
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -917,7 +917,7 @@ mixed_left_join(
  * Result: {{0, 1, 2, None, None}, {None, 0, None, 1, 2}}
  * @endcode
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -972,7 +972,7 @@ mixed_full_join(
  * Result: {1}
  * @endcode
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -1022,7 +1022,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
  * Result: {0, 2}
  * @endcode
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -1061,7 +1061,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
  * choose a suitable compare_nulls value AND use appropriate null-safe
  * operators in the expression.
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -1103,7 +1103,7 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_in
  * choose a suitable compare_nulls value AND use appropriate null-safe
  * operators in the expression.
  *
- * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error If the binary predicate outputs a non-boolean result.
  * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
  * match.
  * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
@@ -1142,7 +1142,7 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_le
  * If the provided predicate returns NULL for a pair of rows
  * (left, right), that pair is not included in the output.
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -1167,7 +1167,7 @@ std::size_t conditional_inner_join_size(
  * If the provided predicate returns NULL for a pair of rows
  * (left, right), that pair is not included in the output.
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -1192,7 +1192,7 @@ std::size_t conditional_left_join_size(
  * If the provided predicate returns NULL for a pair of rows
  * (left, right), that pair is not included in the output.
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
@@ -1217,7 +1217,7 @@ std::size_t conditional_left_semi_join_size(
  * If the provided predicate returns NULL for a pair of rows
  * (left, right), that pair is not included in the output.
  *
- * @throw cudf::logic_error if the binary predicate outputs a non-boolean result.
+ * @throw cudf::data_type_error if the binary predicate outputs a non-boolean result.
  *
  * @param left The left table
  * @param right The right table
diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu
index 40d1c925889..781fda215fd 100644
--- a/cpp/src/join/conditional_join.cu
+++ b/cpp/src/join/conditional_join.cu
@@ -28,6 +28,7 @@
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -178,7 +179,8 @@ conditional_join(table_view const& left,
   auto const parser =
     ast::detail::expression_parser{binary_predicate, left, right, has_nulls, stream, mr};
   CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
-               "The expression must produce a boolean output.");
+               "The expression must produce a boolean output.",
+               cudf::data_type_error);
 
   auto left_table  = table_device_view::create(left, stream);
   auto right_table = table_device_view::create(right, stream);
@@ -330,7 +332,8 @@ std::size_t compute_conditional_join_output_size(table_view const& left,
   auto const parser =
     ast::detail::expression_parser{binary_predicate, left, right, has_nulls, stream, mr};
   CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
-               "The expression must produce a boolean output.");
+               "The expression must produce a boolean output.",
+               cudf::data_type_error);
 
   auto left_table  = table_device_view::create(left, stream);
   auto right_table = table_device_view::create(right, stream);
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index 820b81ee309..90b0d0a45ad 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -28,6 +28,7 @@
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
 
@@ -115,7 +116,8 @@ mixed_join(
   auto const parser = ast::detail::expression_parser{
     binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
   CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
-               "The expression must produce a boolean output.");
+               "The expression must produce a boolean output.",
+               cudf::data_type_error);
 
   // TODO: The non-conditional join impls start with a dictionary matching,
   // figure out what that is and what it's needed for (and if conditional joins
@@ -381,7 +383,8 @@ compute_mixed_join_output_size(table_view const& left_equality,
   auto const parser = ast::detail::expression_parser{
     binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
   CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
-               "The expression must produce a boolean output.");
+               "The expression must produce a boolean output.",
+               cudf::data_type_error);
 
   // TODO: The non-conditional join impls start with a dictionary matching,
   // figure out what that is and what it's needed for (and if conditional joins
diff --git a/python/pylibcudf/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd
index 06969b4a2db..bb9162b466a 100644
--- a/python/pylibcudf/pylibcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/join.pxd
@@ -3,6 +3,7 @@
 from pylibcudf.libcudf.types cimport null_equality
 
 from .column cimport Column
+from .expressions cimport Expression
 from .table cimport Table
 
 
@@ -37,3 +38,78 @@ cpdef Column left_anti_join(
 )
 
 cpdef Table cross_join(Table left, Table right)
+
+cpdef tuple conditional_inner_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+)
+
+cpdef tuple conditional_left_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+)
+
+cpdef tuple conditional_full_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+)
+
+cpdef Column conditional_left_semi_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+)
+
+cpdef Column conditional_left_anti_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+)
+
+cpdef tuple mixed_inner_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+)
+
+cpdef tuple mixed_left_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+)
+
+cpdef tuple mixed_full_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+)
+
+cpdef Column mixed_left_semi_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+)
+
+cpdef Column mixed_left_anti_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+)
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index bc72647ea8e..0d841eee194 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.types cimport null_equality
 from rmm.librmm.device_buffer cimport device_buffer
 
 from .column cimport Column
+from .expressions cimport Expression
 from .table cimport Table
 
 
@@ -214,3 +215,407 @@ cpdef Table cross_join(Table left, Table right):
     with nogil:
         result = cpp_join.cross_join(left.view(), right.view())
     return Table.from_libcudf(move(result))
+
+
+cpdef tuple conditional_inner_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+):
+    """Perform a conditional inner join between two tables.
+
+    For details, see :cpp:func:`conditional_inner_join`.
+
+    Parameters
+    ----------
+    left : Table
+        The left table to join.
+    right : Table
+        The right table to join.
+    binary_predicate : Expression
+        Condition to join on.
+
+    Returns
+    -------
+    Tuple[Column, Column]
+        A tuple containing the row indices from the left and right tables after the
+        join.
+    """
+    cdef cpp_join.gather_map_pair_type c_result
+    with nogil:
+        c_result = cpp_join.conditional_inner_join(
+            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+        )
+    return (
+        _column_from_gather_map(move(c_result.first)),
+        _column_from_gather_map(move(c_result.second)),
+    )
+
+
+cpdef tuple conditional_left_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+):
+    """Perform a conditional left join between two tables.
+
+    For details, see :cpp:func:`conditional_left_join`.
+
+    Parameters
+    ----------
+    left : Table
+        The left table to join.
+    right : Table
+        The right table to join.
+    binary_predicate : Expression
+        Condition to join on.
+
+    Returns
+    -------
+    Tuple[Column, Column]
+        A tuple containing the row indices from the left and right tables after the
+        join.
+    """
+    cdef cpp_join.gather_map_pair_type c_result
+    with nogil:
+        c_result = cpp_join.conditional_left_join(
+            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+        )
+    return (
+        _column_from_gather_map(move(c_result.first)),
+        _column_from_gather_map(move(c_result.second)),
+    )
+
+
+cpdef tuple conditional_full_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+):
+    """Perform a conditional full join between two tables.
+
+    For details, see :cpp:func:`conditional_full_join`.
+
+    Parameters
+    ----------
+    left : Table
+        The left table to join.
+    right : Table
+        The right table to join.
+    binary_predicate : Expression
+        Condition to join on.
+
+    Returns
+    -------
+    Tuple[Column, Column]
+        A tuple containing the row indices from the left and right tables after the
+        join.
+    """
+    cdef cpp_join.gather_map_pair_type c_result
+    with nogil:
+        c_result = cpp_join.conditional_full_join(
+            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+        )
+    return (
+        _column_from_gather_map(move(c_result.first)),
+        _column_from_gather_map(move(c_result.second)),
+    )
+
+
+cpdef Column conditional_left_semi_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+):
+    """Perform a conditional left semi join between two tables.
+
+    For details, see :cpp:func:`conditional_left_semi_join`.
+
+    Parameters
+    ----------
+    left : Table
+        The left table to join.
+    right : Table
+        The right table to join.
+    binary_predicate : Expression
+        Condition to join on.
+
+    Returns
+    -------
+    Column
+        A column containing the row indices from the left table after the join.
+    """
+    cdef cpp_join.gather_map_type c_result
+    with nogil:
+        c_result = cpp_join.conditional_left_semi_join(
+            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+        )
+    return _column_from_gather_map(move(c_result))
+
+
+cpdef Column conditional_left_anti_join(
+    Table left,
+    Table right,
+    Expression binary_predicate,
+):
+    """Perform a conditional left anti join between two tables.
+
+    For details, see :cpp:func:`conditional_left_anti_join`.
+
+    Parameters
+    ----------
+    left : Table
+        The left table to join.
+    right : Table
+        The right table to join.
+    binary_predicate : Expression
+        Condition to join on.
+
+    Returns
+    -------
+    Column
+        A column containing the row indices from the left table after the join.
+    """
+    cdef cpp_join.gather_map_type c_result
+    with nogil:
+        c_result = cpp_join.conditional_left_anti_join(
+            left.view(), right.view(), dereference(binary_predicate.c_obj.get())
+        )
+    return _column_from_gather_map(move(c_result))
+
+
+cpdef tuple mixed_inner_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+):
+    """Perform a mixed inner join between two tables.
+
+    For details, see :cpp:func:`mixed_inner_join`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    Tuple[Column, Column]
+        A tuple containing the row indices from the left and right tables after the
+        join.
+    """
+    cdef cpp_join.gather_map_pair_type c_result
+    with nogil:
+        c_result = cpp_join.mixed_inner_join(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+        )
+    return (
+        _column_from_gather_map(move(c_result.first)),
+        _column_from_gather_map(move(c_result.second)),
+    )
+
+
+cpdef tuple mixed_left_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+):
+    """Perform a mixed left join between two tables.
+
+    For details, see :cpp:func:`mixed_left_join`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    Tuple[Column, Column]
+        A tuple containing the row indices from the left and right tables after the
+        join.
+    """
+    cdef cpp_join.gather_map_pair_type c_result
+    with nogil:
+        c_result = cpp_join.mixed_left_join(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+        )
+    return (
+        _column_from_gather_map(move(c_result.first)),
+        _column_from_gather_map(move(c_result.second)),
+    )
+
+
+cpdef tuple mixed_full_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+):
+    """Perform a mixed full join between two tables.
+
+    For details, see :cpp:func:`mixed_full_join`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    Tuple[Column, Column]
+        A tuple containing the row indices from the left and right tables after the
+        join.
+    """
+    cdef cpp_join.gather_map_pair_type c_result
+    with nogil:
+        c_result = cpp_join.mixed_full_join(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+        )
+    return (
+        _column_from_gather_map(move(c_result.first)),
+        _column_from_gather_map(move(c_result.second)),
+    )
+
+
+cpdef Column mixed_left_semi_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+):
+    """Perform a mixed left semi join between two tables.
+
+    For details, see :cpp:func:`mixed_left_semi_join`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    Column
+        A column containing the row indices from the left table after the join.
+    """
+    cdef cpp_join.gather_map_type c_result
+    with nogil:
+        c_result = cpp_join.mixed_left_semi_join(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+        )
+    return _column_from_gather_map(move(c_result))
+
+
+cpdef Column mixed_left_anti_join(
+    Table left_keys,
+    Table right_keys,
+    Table left_conditional,
+    Table right_conditional,
+    Expression binary_predicate,
+    null_equality nulls_equal
+):
+    """Perform a mixed left anti join between two tables.
+
+    For details, see :cpp:func:`mixed_left_anti_join`.
+
+    Parameters
+    ----------
+    left_keys : Table
+        The left table to use for the equality join.
+    right_keys : Table
+        The right table to use for the equality join.
+    left_conditional : Table
+        The left table to use for the conditional join.
+    right_conditional : Table
+        The right table to use for the conditional join.
+    binary_predicate : Expression
+        Condition to join on.
+    nulls_equal : NullEquality
+        Should nulls compare equal in the equality join?
+
+    Returns
+    -------
+    Column
+        A column containing the row indices from the left table after the join.
+    """
+    cdef cpp_join.gather_map_type c_result
+    with nogil:
+        c_result = cpp_join.mixed_left_anti_join(
+            left_keys.view(),
+            right_keys.view(),
+            left_conditional.view(),
+            right_conditional.view(),
+            dereference(binary_predicate.c_obj.get()),
+            nulls_equal,
+        )
+    return _column_from_gather_map(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 21033a0284e..f8e592c2104 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -1,10 +1,14 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+from libc.stddef cimport size_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
+from libcpp.optional cimport optional
 from libcpp.pair cimport pair
 from libcpp.vector cimport vector
+from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.expressions cimport expression
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_equality, size_type
@@ -74,3 +78,113 @@ cdef extern from "cudf/join.hpp" namespace "cudf" nogil:
         const table_view left,
         const table_view right,
     ) except +
+
+    cdef gather_map_pair_type conditional_inner_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type conditional_inner_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+        optional[size_t] output_size
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type conditional_left_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type conditional_left_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+        optional[size_t] output_size
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type conditional_full_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type conditional_full_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+        optional[size_t] output_size
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_type conditional_left_semi_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_type conditional_left_semi_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+        optional[size_t] output_size
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_type conditional_left_anti_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_type conditional_left_anti_join(
+        const table_view left,
+        const table_view right,
+        const expression binary_predicate,
+        optional[size_t] output_size
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type mixed_inner_join(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type mixed_left_join(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_pair_type mixed_full_join(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_type mixed_left_semi_join(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls
+    ) except +libcudf_exception_handler
+
+    cdef gather_map_type mixed_left_anti_join(
+        const table_view left_equality,
+        const table_view right_equality,
+        const table_view left_conditional,
+        const table_view right_conditional,
+        const expression binary_predicate,
+        null_equality compare_nulls
+    ) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/tests/test_join.py b/python/pylibcudf/pylibcudf/tests/test_join.py
index f43a56046a4..56cf421780b 100644
--- a/python/pylibcudf/pylibcudf/tests/test_join.py
+++ b/python/pylibcudf/pylibcudf/tests/test_join.py
@@ -2,17 +2,45 @@
 
 import numpy as np
 import pyarrow as pa
+import pytest
 from utils import assert_table_eq
 
 import pylibcudf as plc
 
 
-def test_cross_join():
-    left = pa.Table.from_arrays([[0, 1, 2], [3, 4, 5]], names=["a", "b"])
-    right = pa.Table.from_arrays(
-        [[6, 7, 8, 9], [10, 11, 12, 13]], names=["c", "d"]
+@pytest.fixture
+def left():
+    return pa.Table.from_arrays(
+        [[0, 1, 2, 100], [3, 4, 5, None]],
+        schema=pa.schema({"a": pa.int32(), "b": pa.int32()}),
     )
 
+
+@pytest.fixture
+def right():
+    return pa.Table.from_arrays(
+        [[-1, -2, 0, 1, -3], [10, 3, 4, 5, None]],
+        schema=pa.schema({"c": pa.int32(), "d": pa.int32()}),
+    )
+
+
+@pytest.fixture
+def expr():
+    return plc.expressions.Operation(
+        plc.expressions.ASTOperator.LESS,
+        plc.expressions.ColumnReference(
+            0, plc.expressions.TableReference.LEFT
+        ),
+        plc.expressions.ColumnReference(
+            0, plc.expressions.TableReference.RIGHT
+        ),
+    )
+
+
+def test_cross_join(left, right):
+    # Remove the nulls so the calculation of the expected result works
+    left = left[:-1]
+    right = right[:-1]
     pleft = plc.interop.from_arrow(left)
     pright = plc.interop.from_arrow(right)
 
@@ -27,3 +55,121 @@ def test_cross_join():
     got = plc.join.cross_join(pleft, pright)
 
     assert_table_eq(expect, got)
+
+
+sentinel = np.iinfo(np.int32).min
+
+
+@pytest.mark.parametrize(
+    "join_type,expect_left,expect_right",
+    [
+        (plc.join.conditional_inner_join, {0}, {3}),
+        (plc.join.conditional_left_join, {0, 1, 2, 3}, {3, sentinel}),
+        (
+            plc.join.conditional_full_join,
+            {0, 1, 2, 3, sentinel},
+            {0, 1, 2, 3, 4, sentinel},
+        ),
+    ],
+    ids=["inner", "left", "full"],
+)
+def test_conditional_join(
+    left, right, expr, join_type, expect_left, expect_right
+):
+    pleft = plc.interop.from_arrow(left)
+    pright = plc.interop.from_arrow(right)
+
+    g_left, g_right = map(plc.interop.to_arrow, join_type(pleft, pright, expr))
+
+    assert set(g_left.to_pylist()) == expect_left
+    assert set(g_right.to_pylist()) == expect_right
+
+
+@pytest.mark.parametrize(
+    "join_type,expect",
+    [
+        (plc.join.conditional_left_semi_join, {0}),
+        (plc.join.conditional_left_anti_join, {1, 2, 3}),
+    ],
+    ids=["semi", "anti"],
+)
+def test_conditional_semianti_join(left, right, expr, join_type, expect):
+    pleft = plc.interop.from_arrow(left)
+    pright = plc.interop.from_arrow(right)
+
+    g_left = plc.interop.to_arrow(join_type(pleft, pright, expr))
+
+    assert set(g_left.to_pylist()) == expect
+
+
+@pytest.mark.parametrize(
+    "join_type,expect_left,expect_right",
+    [
+        (plc.join.mixed_inner_join, set(), set()),
+        (plc.join.mixed_left_join, {0, 1, 2, 3}, {sentinel}),
+        (
+            plc.join.mixed_full_join,
+            {0, 1, 2, 3, sentinel},
+            {0, 1, 2, 3, 4, sentinel},
+        ),
+    ],
+    ids=["inner", "left", "full"],
+)
+@pytest.mark.parametrize(
+    "null_equality",
+    [plc.types.NullEquality.EQUAL, plc.types.NullEquality.UNEQUAL],
+    ids=["nulls_equal", "nulls_not_equal"],
+)
+def test_mixed_join(
+    left, right, expr, join_type, expect_left, expect_right, null_equality
+):
+    pleft = plc.interop.from_arrow(left)
+    pright = plc.interop.from_arrow(right)
+
+    g_left, g_right = map(
+        plc.interop.to_arrow,
+        join_type(
+            plc.Table(pleft.columns()[1:]),
+            plc.Table(pright.columns()[1:]),
+            pleft,
+            pright,
+            expr,
+            null_equality,
+        ),
+    )
+
+    assert set(g_left.to_pylist()) == expect_left
+    assert set(g_right.to_pylist()) == expect_right
+
+
+@pytest.mark.parametrize(
+    "join_type,expect",
+    [
+        (plc.join.mixed_left_semi_join, set()),
+        (plc.join.mixed_left_anti_join, {0, 1, 2, 3}),
+    ],
+    ids=["semi", "anti"],
+)
+@pytest.mark.parametrize(
+    "null_equality",
+    [plc.types.NullEquality.EQUAL, plc.types.NullEquality.UNEQUAL],
+    ids=["nulls_equal", "nulls_not_equal"],
+)
+def test_mixed_semianti_join(
+    left, right, expr, join_type, expect, null_equality
+):
+    pleft = plc.interop.from_arrow(left)
+    pright = plc.interop.from_arrow(right)
+
+    g_left = plc.interop.to_arrow(
+        join_type(
+            plc.Table(pleft.columns()[1:]),
+            plc.Table(pright.columns()[1:]),
+            pleft,
+            pright,
+            expr,
+            null_equality,
+        )
+    )
+
+    assert set(g_left.to_pylist()) == expect

From a2001dd5c93177fbebd37e85de5d83f152869eb9 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 4 Nov 2024 10:06:15 -0800
Subject: [PATCH 02/12] Use more pylibcudf.io.types enums in cudf._libs
 (#17237)

If we consider the `pylibcudf.libcudf` namespace to eventually be more "private", this PR replaces that usage, specifically when accessing enums, with their public counterparts

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/17237
---
 python/cudf/cudf/_lib/csv.pyx           |  12 +--
 python/cudf/cudf/_lib/json.pyx          |  38 +++----
 python/cudf/cudf/_lib/orc.pyx           |  43 ++++----
 python/cudf/cudf/_lib/parquet.pyx       | 138 +++++++++++-------------
 python/pylibcudf/pylibcudf/io/types.pyx |   8 +-
 5 files changed, 111 insertions(+), 128 deletions(-)

diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 9ad96f610b3..c09e06bfc59 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -28,7 +28,7 @@ from pylibcudf.libcudf.io.csv cimport (
     write_csv as cpp_write_csv,
 )
 from pylibcudf.libcudf.io.data_sink cimport data_sink
-from pylibcudf.libcudf.io.types cimport compression_type, sink_info
+from pylibcudf.libcudf.io.types cimport sink_info
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from cudf._lib.io.utils cimport make_sink_info
@@ -148,13 +148,13 @@ def read_csv(
         byte_range = (0, 0)
 
     if compression is None:
-        c_compression = compression_type.NONE
+        c_compression = plc.io.types.CompressionType.NONE
     else:
         compression_map = {
-            "infer": compression_type.AUTO,
-            "gzip": compression_type.GZIP,
-            "bz2": compression_type.BZIP2,
-            "zip": compression_type.ZIP,
+            "infer": plc.io.types.CompressionType.AUTO,
+            "gzip": plc.io.types.CompressionType.GZIP,
+            "bz2": plc.io.types.CompressionType.BZIP2,
+            "zip": plc.io.types.CompressionType.ZIP,
         }
         c_compression = compression_map[compression]
 
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 9bbbcf60dcf..fb149603960 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -9,10 +9,6 @@ from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 
-cimport pylibcudf.libcudf.io.types as cudf_io_types
-from pylibcudf.io.types cimport compression_type
-from pylibcudf.libcudf.io.json cimport json_recovery_mode_t
-from pylibcudf.libcudf.io.types cimport compression_type
 from pylibcudf.libcudf.types cimport data_type, type_id
 from pylibcudf.types cimport DataType
 
@@ -24,15 +20,6 @@ from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io
 import pylibcudf as plc
 
 
-cdef json_recovery_mode_t _get_json_recovery_mode(object on_bad_lines):
-    if on_bad_lines.lower() == "error":
-        return json_recovery_mode_t.FAIL
-    elif on_bad_lines.lower() == "recover":
-        return json_recovery_mode_t.RECOVER_WITH_NULL
-    else:
-        raise TypeError(f"Invalid parameter for {on_bad_lines=}")
-
-
 cpdef read_json(object filepaths_or_buffers,
                 object dtype,
                 bool lines,
@@ -41,7 +28,7 @@ cpdef read_json(object filepaths_or_buffers,
                 bool keep_quotes,
                 bool mixed_types_as_string,
                 bool prune_columns,
-                object on_bad_lines):
+                str on_bad_lines):
     """
     Cython function to call into libcudf API, see `read_json`.
 
@@ -64,19 +51,24 @@ cpdef read_json(object filepaths_or_buffers,
             filepaths_or_buffers[idx] = filepaths_or_buffers[idx].encode()
 
     # Setup arguments
-    cdef cudf_io_types.compression_type c_compression
-
     if compression is not None:
         if compression == 'gzip':
-            c_compression = cudf_io_types.compression_type.GZIP
+            c_compression = plc.io.types.CompressionType.GZIP
         elif compression == 'bz2':
-            c_compression = cudf_io_types.compression_type.BZIP2
+            c_compression = plc.io.types.CompressionType.BZIP2
         elif compression == 'zip':
-            c_compression = cudf_io_types.compression_type.ZIP
+            c_compression = plc.io.types.CompressionType.ZIP
         else:
-            c_compression = cudf_io_types.compression_type.AUTO
+            c_compression = plc.io.types.CompressionType.AUTO
+    else:
+        c_compression = plc.io.types.CompressionType.NONE
+
+    if on_bad_lines.lower() == "error":
+        c_on_bad_lines = plc.io.types.JSONRecoveryMode.FAIL
+    elif on_bad_lines.lower() == "recover":
+        c_on_bad_lines = plc.io.types.JSONRecoveryMode.RECOVER_WITH_NULL
     else:
-        c_compression = cudf_io_types.compression_type.NONE
+        raise TypeError(f"Invalid parameter for {on_bad_lines=}")
 
     processed_dtypes = None
 
@@ -108,7 +100,7 @@ cpdef read_json(object filepaths_or_buffers,
             keep_quotes = keep_quotes,
             mixed_types_as_string = mixed_types_as_string,
             prune_columns = prune_columns,
-            recovery_mode = _get_json_recovery_mode(on_bad_lines)
+            recovery_mode = c_on_bad_lines
         )
         df = cudf.DataFrame._from_data(
             *_data_from_columns(
@@ -130,7 +122,7 @@ cpdef read_json(object filepaths_or_buffers,
             keep_quotes = keep_quotes,
             mixed_types_as_string = mixed_types_as_string,
             prune_columns = prune_columns,
-            recovery_mode = _get_json_recovery_mode(on_bad_lines)
+            recovery_mode = c_on_bad_lines
         )
 
         df = cudf.DataFrame._from_data(
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index f88c48ce989..32a5e463916 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -15,7 +15,6 @@ try:
 except ImportError:
     import json
 
-cimport pylibcudf.libcudf.io.types as cudf_io_types
 cimport pylibcudf.libcudf.lists.lists_column_view as cpp_lists_column_view
 from pylibcudf.libcudf.io.data_sink cimport data_sink
 from pylibcudf.libcudf.io.orc cimport (
@@ -26,7 +25,6 @@ from pylibcudf.libcudf.io.orc cimport (
 )
 from pylibcudf.libcudf.io.types cimport (
     column_in_metadata,
-    compression_type,
     sink_info,
     table_input_metadata,
 )
@@ -137,22 +135,23 @@ cpdef read_orc(object filepaths_or_buffers,
     return data, index
 
 
-cdef compression_type _get_comp_type(object compression):
+def _get_comp_type(object compression):
     if compression is None or compression is False:
-        return compression_type.NONE
+        return plc.io.types.CompressionType.NONE
 
     compression = str(compression).upper()
     if compression == "SNAPPY":
-        return compression_type.SNAPPY
+        return plc.io.types.CompressionType.SNAPPY
     elif compression == "ZLIB":
-        return compression_type.ZLIB
+        return plc.io.types.CompressionType.ZLIB
     elif compression == "ZSTD":
-        return compression_type.ZSTD
+        return plc.io.types.CompressionType.ZSTD
     elif compression == "LZ4":
-        return compression_type.LZ4
+        return plc.io.types.CompressionType.LZ4
     else:
         raise ValueError(f"Unsupported `compression` type {compression}")
 
+
 cdef tuple _get_index_from_metadata(
         vector[map[string, string]] user_data,
         object names,
@@ -210,7 +209,8 @@ cdef tuple _get_index_from_metadata(
         range_idx
     )
 
-cdef cudf_io_types.statistics_freq _get_orc_stat_freq(object statistics):
+
+def _get_orc_stat_freq(str statistics):
     """
     Convert ORC statistics terms to CUDF convention:
       - ORC "STRIPE"   == CUDF "ROWGROUP"
@@ -218,11 +218,11 @@ cdef cudf_io_types.statistics_freq _get_orc_stat_freq(object statistics):
     """
     statistics = str(statistics).upper()
     if statistics == "NONE":
-        return cudf_io_types.statistics_freq.STATISTICS_NONE
+        return plc.io.types.StatisticsFreq.STATISTICS_NONE
     elif statistics == "STRIPE":
-        return cudf_io_types.statistics_freq.STATISTICS_ROWGROUP
+        return plc.io.types.StatisticsFreq.STATISTICS_ROWGROUP
     elif statistics == "ROWGROUP":
-        return cudf_io_types.statistics_freq.STATISTICS_PAGE
+        return plc.io.types.StatisticsFreq.STATISTICS_PAGE
     else:
         raise ValueError(f"Unsupported `statistics_freq` type {statistics}")
 
@@ -232,7 +232,7 @@ def write_orc(
     table,
     object path_or_buf,
     object compression="snappy",
-    object statistics="ROWGROUP",
+    str statistics="ROWGROUP",
     object stripe_size_bytes=None,
     object stripe_size_rows=None,
     object row_index_stride=None,
@@ -246,7 +246,6 @@ def write_orc(
     --------
     cudf.read_orc
     """
-    cdef compression_type compression_ = _get_comp_type(compression)
     cdef unique_ptr[data_sink] data_sink_c
     cdef sink_info sink_info_c = make_sink_info(path_or_buf, data_sink_c)
     cdef table_input_metadata tbl_meta
@@ -289,7 +288,7 @@ def write_orc(
             sink_info_c, tv
         ).metadata(tbl_meta)
         .key_value_metadata(move(user_data))
-        .compression(compression_)
+        .compression(_get_comp_type(compression))
         .enable_statistics(_get_orc_stat_freq(statistics))
         .build()
     )
@@ -330,8 +329,8 @@ cdef class ORCWriter:
     cdef unique_ptr[orc_chunked_writer] writer
     cdef sink_info sink
     cdef unique_ptr[data_sink] _data_sink
-    cdef cudf_io_types.statistics_freq stat_freq
-    cdef compression_type comp_type
+    cdef str statistics
+    cdef object compression
     cdef object index
     cdef table_input_metadata tbl_meta
     cdef object cols_as_map_type
@@ -343,15 +342,15 @@ cdef class ORCWriter:
                   object path,
                   object index=None,
                   object compression="snappy",
-                  object statistics="ROWGROUP",
+                  str statistics="ROWGROUP",
                   object cols_as_map_type=None,
                   object stripe_size_bytes=None,
                   object stripe_size_rows=None,
                   object row_index_stride=None):
 
         self.sink = make_sink_info(path, self._data_sink)
-        self.stat_freq = _get_orc_stat_freq(statistics)
-        self.comp_type = _get_comp_type(compression)
+        self.statistics = statistics
+        self.compression = compression
         self.index = index
         self.cols_as_map_type = cols_as_map_type \
             if cols_as_map_type is None else set(cols_as_map_type)
@@ -429,8 +428,8 @@ cdef class ORCWriter:
                 chunked_orc_writer_options.builder(self.sink)
                 .metadata(self.tbl_meta)
                 .key_value_metadata(move(user_data))
-                .compression(self.comp_type)
-                .enable_statistics(self.stat_freq)
+                .compression(_get_comp_type(self.compression))
+                .enable_statistics(_get_orc_stat_freq(self.statistics))
                 .build()
             )
         if self.stripe_size_bytes is not None:
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index fa2690c7f21..1212637d330 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -31,10 +31,9 @@ from libcpp.unordered_map cimport unordered_map
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-cimport pylibcudf.libcudf.io.data_sink as cudf_io_data_sink
-cimport pylibcudf.libcudf.io.types as cudf_io_types
 from pylibcudf.expressions cimport Expression
 from pylibcudf.io.parquet cimport ChunkedParquetReader
+from pylibcudf.libcudf.io.data_sink cimport data_sink
 from pylibcudf.libcudf.io.parquet cimport (
     chunked_parquet_writer_options,
     merge_row_group_metadata as parquet_merge_metadata,
@@ -47,8 +46,14 @@ from pylibcudf.libcudf.io.parquet_metadata cimport (
     read_parquet_metadata as parquet_metadata_reader,
 )
 from pylibcudf.libcudf.io.types cimport (
+    source_info,
+    sink_info,
     column_in_metadata,
     table_input_metadata,
+    partition_info,
+    statistics_freq,
+    compression_type,
+    dictionary_policy,
 )
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
@@ -377,7 +382,7 @@ cpdef read_parquet_metadata(filepaths_or_buffers):
     cudf.io.parquet.read_parquet
     cudf.io.parquet.to_parquet
     """
-    cdef cudf_io_types.source_info source = make_source_info(filepaths_or_buffers)
+    cdef source_info source = make_source_info(filepaths_or_buffers)
 
     args = move(source)
 
@@ -466,8 +471,8 @@ def write_parquet(
 
     cdef vector[map[string, string]] user_data
     cdef table_view tv
-    cdef vector[unique_ptr[cudf_io_data_sink.data_sink]] _data_sinks
-    cdef cudf_io_types.sink_info sink = make_sinks_info(
+    cdef vector[unique_ptr[data_sink]] _data_sinks
+    cdef sink_info sink = make_sinks_info(
         filepaths_or_buffers, _data_sinks
     )
 
@@ -531,19 +536,19 @@ def write_parquet(
             "Valid values are '1.0' and '2.0'"
         )
 
-    cdef cudf_io_types.dictionary_policy dict_policy = (
-        cudf_io_types.dictionary_policy.ADAPTIVE
+    dict_policy = (
+        plc.io.types.DictionaryPolicy.ADAPTIVE
         if use_dictionary
-        else cudf_io_types.dictionary_policy.NEVER
+        else plc.io.types.DictionaryPolicy.NEVER
     )
 
-    cdef cudf_io_types.compression_type comp_type = _get_comp_type(compression)
-    cdef cudf_io_types.statistics_freq stat_freq = _get_stat_freq(statistics)
+    comp_type = _get_comp_type(compression)
+    stat_freq = _get_stat_freq(statistics)
 
     cdef unique_ptr[vector[uint8_t]] out_metadata_c
     cdef vector[string] c_column_chunks_file_paths
     cdef bool _int96_timestamps = int96_timestamps
-    cdef vector[cudf_io_types.partition_info] partitions
+    cdef vector[partition_info] partitions
 
     # Perform write
     cdef parquet_writer_options args = move(
@@ -563,7 +568,7 @@ def write_parquet(
         partitions.reserve(len(partitions_info))
         for part in partitions_info:
             partitions.push_back(
-                cudf_io_types.partition_info(part[0], part[1])
+                partition_info(part[0], part[1])
             )
         args.set_partitions(move(partitions))
     if metadata_file_path is not None:
@@ -646,17 +651,17 @@ cdef class ParquetWriter:
     cdef bool initialized
     cdef unique_ptr[cpp_parquet_chunked_writer] writer
     cdef table_input_metadata tbl_meta
-    cdef cudf_io_types.sink_info sink
-    cdef vector[unique_ptr[cudf_io_data_sink.data_sink]] _data_sink
-    cdef cudf_io_types.statistics_freq stat_freq
-    cdef cudf_io_types.compression_type comp_type
+    cdef sink_info sink
+    cdef vector[unique_ptr[data_sink]] _data_sink
+    cdef str statistics
+    cdef object compression
     cdef object index
     cdef size_t row_group_size_bytes
     cdef size_type row_group_size_rows
     cdef size_t max_page_size_bytes
     cdef size_type max_page_size_rows
     cdef size_t max_dictionary_size
-    cdef cudf_io_types.dictionary_policy dict_policy
+    cdef bool use_dictionary
     cdef bool write_arrow_schema
 
     def __cinit__(self, object filepath_or_buffer, object index=None,
@@ -674,8 +679,8 @@ cdef class ParquetWriter:
             else [filepath_or_buffer]
         )
         self.sink = make_sinks_info(filepaths_or_buffers, self._data_sink)
-        self.stat_freq = _get_stat_freq(statistics)
-        self.comp_type = _get_comp_type(compression)
+        self.statistics = statistics
+        self.compression = compression
         self.index = index
         self.initialized = False
         self.row_group_size_bytes = row_group_size_bytes
@@ -683,11 +688,7 @@ cdef class ParquetWriter:
         self.max_page_size_bytes = max_page_size_bytes
         self.max_page_size_rows = max_page_size_rows
         self.max_dictionary_size = max_dictionary_size
-        self.dict_policy = (
-            cudf_io_types.dictionary_policy.ADAPTIVE
-            if use_dictionary
-            else cudf_io_types.dictionary_policy.NEVER
-        )
+        self.use_dictionary = use_dictionary
         self.write_arrow_schema = store_schema
 
     def write_table(self, table, object partitions_info=None):
@@ -706,11 +707,11 @@ cdef class ParquetWriter:
         else:
             tv = table_view_from_table(table, ignore_index=True)
 
-        cdef vector[cudf_io_types.partition_info] partitions
+        cdef vector[partition_info] partitions
         if partitions_info is not None:
             for part in partitions_info:
                 partitions.push_back(
-                    cudf_io_types.partition_info(part[0], part[1])
+                    partition_info(part[0], part[1])
                 )
 
         with nogil:
@@ -795,13 +796,20 @@ cdef class ParquetWriter:
         user_data = vector[map[string, string]](num_partitions, tmp_user_data)
 
         cdef chunked_parquet_writer_options args
+        cdef compression_type comp_type = _get_comp_type(self.compression)
+        cdef statistics_freq stat_freq = _get_stat_freq(self.statistics)
+        cdef dictionary_policy dict_policy = (
+            plc.io.types.DictionaryPolicy.ADAPTIVE
+            if self.use_dictionary
+            else plc.io.types.DictionaryPolicy.NEVER
+        )
         with nogil:
             args = move(
                 chunked_parquet_writer_options.builder(self.sink)
                 .metadata(self.tbl_meta)
                 .key_value_metadata(move(user_data))
-                .compression(self.comp_type)
-                .stats_level(self.stat_freq)
+                .compression(comp_type)
+                .stats_level(stat_freq)
                 .row_group_size_bytes(self.row_group_size_bytes)
                 .row_group_size_rows(self.row_group_size_rows)
                 .max_page_size_bytes(self.max_page_size_bytes)
@@ -810,7 +818,7 @@ cdef class ParquetWriter:
                 .write_arrow_schema(self.write_arrow_schema)
                 .build()
             )
-            args.set_dictionary_policy(self.dict_policy)
+            args.set_dictionary_policy(dict_policy)
             self.writer.reset(new cpp_parquet_chunked_writer(args))
         self.initialized = True
 
@@ -838,56 +846,28 @@ cpdef merge_filemetadata(object filemetadata_list):
     return np.asarray(out_metadata_py)
 
 
-cdef cudf_io_types.statistics_freq _get_stat_freq(object statistics):
-    statistics = str(statistics).upper()
-    if statistics == "NONE":
-        return cudf_io_types.statistics_freq.STATISTICS_NONE
-    elif statistics == "ROWGROUP":
-        return cudf_io_types.statistics_freq.STATISTICS_ROWGROUP
-    elif statistics == "PAGE":
-        return cudf_io_types.statistics_freq.STATISTICS_PAGE
-    elif statistics == "COLUMN":
-        return cudf_io_types.statistics_freq.STATISTICS_COLUMN
-    else:
+cdef statistics_freq _get_stat_freq(str statistics):
+    result = getattr(
+        plc.io.types.StatisticsFreq,
+        f"STATISTICS_{statistics.upper()}",
+        None
+    )
+    if result is None:
         raise ValueError("Unsupported `statistics_freq` type")
+    return result
 
 
-cdef cudf_io_types.compression_type _get_comp_type(object compression):
+cdef compression_type _get_comp_type(object compression):
     if compression is None:
-        return cudf_io_types.compression_type.NONE
-
-    compression = str(compression).upper()
-    if compression == "SNAPPY":
-        return cudf_io_types.compression_type.SNAPPY
-    elif compression == "ZSTD":
-        return cudf_io_types.compression_type.ZSTD
-    elif compression == "LZ4":
-        return cudf_io_types.compression_type.LZ4
-    else:
+        return plc.io.types.CompressionType.NONE
+    result = getattr(
+        plc.io.types.CompressionType,
+        str(compression).upper(),
+        None
+    )
+    if result is None:
         raise ValueError("Unsupported `compression` type")
-
-
-cdef cudf_io_types.column_encoding _get_encoding_type(object encoding):
-    if encoding is None:
-        return cudf_io_types.column_encoding.USE_DEFAULT
-
-    enc = str(encoding).upper()
-    if enc == "PLAIN":
-        return cudf_io_types.column_encoding.PLAIN
-    elif enc == "DICTIONARY":
-        return cudf_io_types.column_encoding.DICTIONARY
-    elif enc == "DELTA_BINARY_PACKED":
-        return cudf_io_types.column_encoding.DELTA_BINARY_PACKED
-    elif enc == "DELTA_LENGTH_BYTE_ARRAY":
-        return cudf_io_types.column_encoding.DELTA_LENGTH_BYTE_ARRAY
-    elif enc == "DELTA_BYTE_ARRAY":
-        return cudf_io_types.column_encoding.DELTA_BYTE_ARRAY
-    elif enc == "BYTE_STREAM_SPLIT":
-        return cudf_io_types.column_encoding.BYTE_STREAM_SPLIT
-    elif enc == "USE_DEFAULT":
-        return cudf_io_types.column_encoding.USE_DEFAULT
-    else:
-        raise ValueError("Unsupported `column_encoding` type")
+    return result
 
 
 cdef _set_col_metadata(
@@ -914,7 +894,15 @@ cdef _set_col_metadata(
         col_meta.set_skip_compression(True)
 
     if column_encoding is not None and full_path in column_encoding:
-        col_meta.set_encoding(_get_encoding_type(column_encoding[full_path]))
+        encoding = column_encoding[full_path]
+        if encoding is None:
+            c_encoding = plc.io.types.ColumnEncoding.USE_DEFAULT
+        else:
+            enc = str(encoding).upper()
+            c_encoding = getattr(plc.io.types.ColumnEncoding, enc, None)
+            if c_encoding is None:
+                raise ValueError("Unsupported `column_encoding` type")
+        col_meta.set_encoding(c_encoding)
 
     if column_type_length is not None and full_path in column_type_length:
         col_meta.set_output_as_binary(True)
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 563a02761da..967d05e7057 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -23,8 +23,12 @@ import os
 
 from pylibcudf.libcudf.io.json import \
     json_recovery_mode_t as JSONRecoveryMode  # no-cython-lint
-from pylibcudf.libcudf.io.types import \
-    compression_type as CompressionType  # no-cython-lint
+from pylibcudf.libcudf.io.types import (
+    compression_type as CompressionType,  # no-cython-lint
+    column_encoding as ColumnEncoding,  # no-cython-lint
+    dictionary_policy as DictionaryPolicy,  # no-cython-lint
+    statistics_freq as StatisticsFreq, # no-cython-lint
+)
 
 
 cdef class TableWithMetadata:

From 1d25d14b718541145b45cf25c80b55321a9e2c32 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 4 Nov 2024 14:16:05 -0600
Subject: [PATCH 03/12] Fix discoverability of submodules inside `pd.util`
 (#17215)

Fixes: #17166

This PR fixes the discoverability of the submodules of attributes and modules inside `pd.util`. Somehow `importlib.import_module("pandas.util").__dict__` doesn't display submodules and only root level attributes.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/17215
---
 python/cudf/cudf/pandas/_wrappers/pandas.py   | 28 ++++++++++++++-----
 .../cudf_pandas_tests/test_cudf_pandas.py     | 18 ++++++++++++
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 6d03063fa27..05e7d159c63 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -75,13 +75,27 @@ def _pandas_util_dir():
     # In pandas 2.0, pandas.util contains public APIs under
     # __getattr__ but no __dir__ to find them
     # https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/util/__init__.py
-    return list(importlib.import_module("pandas.util").__dict__.keys()) + [
-        "hash_array",
-        "hash_pandas_object",
-        "Appender",
-        "Substitution",
-        "cache_readonly",
-    ]
+    res = list(
+        set(
+            list(importlib.import_module("pandas.util").__dict__.keys())
+            + [
+                "Appender",
+                "Substitution",
+                "_exceptions",
+                "_print_versions",
+                "cache_readonly",
+                "hash_array",
+                "hash_pandas_object",
+                "version",
+                "_tester",
+                "_validators",
+                "_decorators",
+            ]
+        )
+    )
+    if cudf.core._compat.PANDAS_GE_220:
+        res.append("capitalize_first_letter")
+    return res
 
 
 pd.util.__dir__ = _pandas_util_dir
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 7aefdc386bb..3e7d1cf3c4c 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -1759,3 +1759,21 @@ def test_fallback_raises_error(monkeypatch):
         monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
         with pytest.raises(ProxyFallbackError):
             pd.Series(range(2)).astype(object)
+
+
+@pytest.mark.parametrize(
+    "attrs",
+    [
+        "_exceptions",
+        "version",
+        "_print_versions",
+        "capitalize_first_letter",
+        "_validators",
+        "_decorators",
+    ],
+)
+def test_cudf_pandas_util_version(attrs):
+    if not PANDAS_GE_220 and attrs == "capitalize_first_letter":
+        assert not hasattr(pd.util, attrs)
+    else:
+        assert hasattr(pd.util, attrs)

From 45563b363d62b0f27f3d371e880142748a62eec5 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Mon, 4 Nov 2024 15:06:35 -0600
Subject: [PATCH 04/12] Refactor Dask cuDF legacy code (#17205)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "legacy" DataFrame API is now deprecated (https://github.com/dask/dask/pull/11437). The main purpose of this PR is to start isolating legacy code in Dask cuDF.

**Old layout**:

```
dask_cudf/
├── expr/
│   ├── _collection.py
│   ├── _expr.py
│   ├── _groupby.py
├── io/
│   ├── tests/
│   ├── ...
│   ├── parquet.py
│   ├── ...
├── tests/
├── accessors.py
├── backends.py
├── core.py
├── groupby.py
├── sorting.py
```

**New layout**:

```
dask_cudf/
├── _expr/
│   ├── accessors.py
│   ├── collection.py
│   ├── expr.py
│   ├── groupby.py
├── _legacy/
│   ├── io/
│   ├── core.py
│   ├── groupby.py
│   ├── sorting.py
├── io/
│   ├── tests/
│   ├── ...
│   ├── parquet.py
│   ├── ...
├── tests/
├── backends.py
├── core.py
```

**Notes**

- This PR adds some backward compatibility to the expr-based API that was previously missing: The user can now import collection classes from `dask_cudf.core` (previously led to a "silent" bug when query-planning was enabled).
- The user can also import various IO functions from `dask_cudf.io` (and sub-modules like `dask_cudf.io.parquet`), but they will typically get a deprecation warning.
- This PR is still technically "breaking" in the sense that the user can no longer import *some* functions/classes from `dask_cudf.io.*`. Also, the `groupby`, `sorting`, and `accessors` modules have simply moved. It *should* be uncommon for down-stream code to import from these modules. It's also worth noting that query-planning was already causing problems for these users if they *were* doing this.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Mads R. B. Kristensen (https://github.com/madsbk)

URL: https://github.com/rapidsai/cudf/pull/17205
---
 python/dask_cudf/dask_cudf/__init__.py        |  51 +-
 python/dask_cudf/dask_cudf/_expr/__init__.py  |   1 +
 .../dask_cudf/{ => _expr}/accessors.py        |   0
 .../_collection.py => _expr/collection.py}    |  33 +-
 python/dask_cudf/dask_cudf/_expr/expr.py      | 210 +++++
 python/dask_cudf/dask_cudf/_expr/groupby.py   | 335 ++++++++
 .../dask_cudf/dask_cudf/_legacy/__init__.py   |   1 +
 python/dask_cudf/dask_cudf/_legacy/core.py    | 711 ++++++++++++++++
 .../dask_cudf/{ => _legacy}/groupby.py        |   2 +-
 .../dask_cudf/_legacy/io/__init__.py          |  11 +
 python/dask_cudf/dask_cudf/_legacy/io/csv.py  | 222 +++++
 python/dask_cudf/dask_cudf/_legacy/io/json.py | 209 +++++
 python/dask_cudf/dask_cudf/_legacy/io/orc.py  | 199 +++++
 .../dask_cudf/dask_cudf/_legacy/io/parquet.py | 513 ++++++++++++
 python/dask_cudf/dask_cudf/_legacy/io/text.py |  54 ++
 .../dask_cudf/{ => _legacy}/sorting.py        |   0
 python/dask_cudf/dask_cudf/backends.py        |  29 +-
 python/dask_cudf/dask_cudf/core.py            | 760 +-----------------
 python/dask_cudf/dask_cudf/expr/__init__.py   |  25 -
 python/dask_cudf/dask_cudf/expr/_expr.py      | 511 ------------
 python/dask_cudf/dask_cudf/expr/_groupby.py   | 123 ---
 python/dask_cudf/dask_cudf/io/__init__.py     |  39 +-
 python/dask_cudf/dask_cudf/io/csv.py          | 226 +-----
 python/dask_cudf/dask_cudf/io/json.py         | 213 +----
 python/dask_cudf/dask_cudf/io/orc.py          | 212 +----
 python/dask_cudf/dask_cudf/io/parquet.py      | 594 +++-----------
 .../dask_cudf/dask_cudf/io/tests/test_csv.py  |  15 +
 .../dask_cudf/dask_cudf/io/tests/test_json.py |  15 +
 .../dask_cudf/dask_cudf/io/tests/test_orc.py  |  18 +
 .../dask_cudf/io/tests/test_parquet.py        |  39 +-
 .../dask_cudf/dask_cudf/io/tests/test_text.py |  12 +
 python/dask_cudf/dask_cudf/io/text.py         |  58 +-
 python/dask_cudf/dask_cudf/tests/test_core.py |  24 -
 .../dask_cudf/dask_cudf/tests/test_groupby.py |   2 +-
 python/dask_cudf/dask_cudf/tests/utils.py     |   2 +-
 35 files changed, 2795 insertions(+), 2674 deletions(-)
 create mode 100644 python/dask_cudf/dask_cudf/_expr/__init__.py
 rename python/dask_cudf/dask_cudf/{ => _expr}/accessors.py (100%)
 rename python/dask_cudf/dask_cudf/{expr/_collection.py => _expr/collection.py} (88%)
 create mode 100644 python/dask_cudf/dask_cudf/_expr/expr.py
 create mode 100644 python/dask_cudf/dask_cudf/_expr/groupby.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/__init__.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/core.py
 rename python/dask_cudf/dask_cudf/{ => _legacy}/groupby.py (99%)
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/io/__init__.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/io/csv.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/io/json.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/io/orc.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/io/parquet.py
 create mode 100644 python/dask_cudf/dask_cudf/_legacy/io/text.py
 rename python/dask_cudf/dask_cudf/{ => _legacy}/sorting.py (100%)
 delete mode 100644 python/dask_cudf/dask_cudf/expr/__init__.py
 delete mode 100644 python/dask_cudf/dask_cudf/expr/_expr.py
 delete mode 100644 python/dask_cudf/dask_cudf/expr/_groupby.py

diff --git a/python/dask_cudf/dask_cudf/__init__.py b/python/dask_cudf/dask_cudf/__init__.py
index f9df22cc436..cc17e71039a 100644
--- a/python/dask_cudf/dask_cudf/__init__.py
+++ b/python/dask_cudf/dask_cudf/__init__.py
@@ -1,21 +1,19 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
-from dask import config
-
-# For dask>2024.2.0, we can silence the loud deprecation
-# warning before importing `dask.dataframe` (this won't
-# do anything for dask==2024.2.0)
-config.set({"dataframe.query-planning-warning": False})
+import warnings
+from importlib import import_module
 
-import dask.dataframe as dd  # noqa: E402
+from dask import config
+import dask.dataframe as dd
 from dask.dataframe import from_delayed  # noqa: E402
 
 import cudf  # noqa: E402
 
 from . import backends  # noqa: E402, F401
 from ._version import __git_commit__, __version__  # noqa: E402, F401
-from .core import concat, from_cudf, from_dask_dataframe  # noqa: E402
-from .expr import QUERY_PLANNING_ON  # noqa: E402
+from .core import concat, from_cudf, DataFrame, Index, Series  # noqa: F401
+
+QUERY_PLANNING_ON = dd.DASK_EXPR_ENABLED
 
 
 def read_csv(*args, **kwargs):
@@ -38,26 +36,44 @@ def read_parquet(*args, **kwargs):
         return dd.read_parquet(*args, **kwargs)
 
 
-def raise_not_implemented_error(attr_name):
+def _deprecated_api(old_api, new_api=None, rec=None):
     def inner_func(*args, **kwargs):
+        if new_api:
+            # Use alternative
+            msg = f"{old_api} is now deprecated. "
+            msg += rec or f"Please use {new_api} instead."
+            warnings.warn(msg, FutureWarning)
+            new_attr = new_api.split(".")
+            module = import_module(".".join(new_attr[:-1]))
+            return getattr(module, new_attr[-1])(*args, **kwargs)
+
+        # No alternative - raise an error
         raise NotImplementedError(
-            f"Top-level {attr_name} API is not available for dask-expr."
+            f"{old_api} is no longer supported. " + (rec or "")
         )
 
     return inner_func
 
 
 if QUERY_PLANNING_ON:
-    from .expr._collection import DataFrame, Index, Series
+    from ._expr.expr import _patch_dask_expr
+    from . import io  # noqa: F401
 
-    groupby_agg = raise_not_implemented_error("groupby_agg")
+    groupby_agg = _deprecated_api("dask_cudf.groupby_agg")
     read_text = DataFrame.read_text
-    to_orc = raise_not_implemented_error("to_orc")
+    _patch_dask_expr()
 
 else:
-    from .core import DataFrame, Index, Series  # noqa: F401
-    from .groupby import groupby_agg  # noqa: F401
-    from .io import read_text, to_orc  # noqa: F401
+    from ._legacy.groupby import groupby_agg  # noqa: F401
+    from ._legacy.io import read_text  # noqa: F401
+    from . import io  # noqa: F401
+
+
+to_orc = _deprecated_api(
+    "dask_cudf.to_orc",
+    new_api="dask_cudf._legacy.io.to_orc",
+    rec="Please use DataFrame.to_orc instead.",
+)
 
 
 __all__ = [
@@ -65,7 +81,6 @@ def inner_func(*args, **kwargs):
     "Series",
     "Index",
     "from_cudf",
-    "from_dask_dataframe",
     "concat",
     "from_delayed",
 ]
diff --git a/python/dask_cudf/dask_cudf/_expr/__init__.py b/python/dask_cudf/dask_cudf/_expr/__init__.py
new file mode 100644
index 00000000000..3c827d4ff59
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_expr/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
diff --git a/python/dask_cudf/dask_cudf/accessors.py b/python/dask_cudf/dask_cudf/_expr/accessors.py
similarity index 100%
rename from python/dask_cudf/dask_cudf/accessors.py
rename to python/dask_cudf/dask_cudf/_expr/accessors.py
diff --git a/python/dask_cudf/dask_cudf/expr/_collection.py b/python/dask_cudf/dask_cudf/_expr/collection.py
similarity index 88%
rename from python/dask_cudf/dask_cudf/expr/_collection.py
rename to python/dask_cudf/dask_cudf/_expr/collection.py
index 907abaa2bfc..fdf7d8630e9 100644
--- a/python/dask_cudf/dask_cudf/expr/_collection.py
+++ b/python/dask_cudf/dask_cudf/_expr/collection.py
@@ -34,22 +34,6 @@
 
 
 class CudfFrameBase(FrameBase):
-    def to_dask_dataframe(self, **kwargs):
-        """Create a dask.dataframe object from a dask_cudf object
-
-        WARNING: This API is deprecated, and may not work properly.
-        Please use `*.to_backend("pandas")` to convert the
-        underlying data to pandas.
-        """
-
-        warnings.warn(
-            "The `to_dask_dataframe` API is now deprecated. "
-            "Please use `*.to_backend('pandas')` instead.",
-            FutureWarning,
-        )
-
-        return self.to_backend("pandas", **kwargs)
-
     def _prepare_cov_corr(self, min_periods, numeric_only):
         # Upstream version of this method sets min_periods
         # to 2 by default (which is not supported by cudf)
@@ -94,7 +78,7 @@ def var(
     def rename_axis(
         self, mapper=no_default, index=no_default, columns=no_default, axis=0
     ):
-        from dask_cudf.expr._expr import RenameAxisCudf
+        from dask_cudf._expr.expr import RenameAxisCudf
 
         return new_collection(
             RenameAxisCudf(
@@ -136,7 +120,7 @@ def groupby(
         dropna=None,
         **kwargs,
     ):
-        from dask_cudf.expr._groupby import GroupBy
+        from dask_cudf._expr.groupby import GroupBy
 
         if isinstance(by, FrameBase) and not isinstance(by, DXSeries):
             raise ValueError(
@@ -169,13 +153,16 @@ def groupby(
         )
 
     def to_orc(self, *args, **kwargs):
-        return self.to_legacy_dataframe().to_orc(*args, **kwargs)
+        from dask_cudf._legacy.io import to_orc
+
+        return to_orc(self, *args, **kwargs)
+        # return self.to_legacy_dataframe().to_orc(*args, **kwargs)
 
     @staticmethod
     def read_text(*args, **kwargs):
         from dask_expr import from_legacy_dataframe
 
-        from dask_cudf.io.text import read_text as legacy_read_text
+        from dask_cudf._legacy.io.text import read_text as legacy_read_text
 
         ddf = legacy_read_text(*args, **kwargs)
         return from_legacy_dataframe(ddf)
@@ -183,19 +170,19 @@ def read_text(*args, **kwargs):
 
 class Series(DXSeries, CudfFrameBase):
     def groupby(self, by, **kwargs):
-        from dask_cudf.expr._groupby import SeriesGroupBy
+        from dask_cudf._expr.groupby import SeriesGroupBy
 
         return SeriesGroupBy(self, by, **kwargs)
 
     @cached_property
     def list(self):
-        from dask_cudf.accessors import ListMethods
+        from dask_cudf._expr.accessors import ListMethods
 
         return ListMethods(self)
 
     @cached_property
     def struct(self):
-        from dask_cudf.accessors import StructMethods
+        from dask_cudf._expr.accessors import StructMethods
 
         return StructMethods(self)
 
diff --git a/python/dask_cudf/dask_cudf/_expr/expr.py b/python/dask_cudf/dask_cudf/_expr/expr.py
new file mode 100644
index 00000000000..8b91e53604c
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_expr/expr.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import functools
+
+import dask_expr._shuffle as _shuffle_module
+from dask_expr import new_collection
+from dask_expr._cumulative import CumulativeBlockwise
+from dask_expr._expr import Elemwise, Expr, RenameAxis, VarColumns
+from dask_expr._reductions import Reduction, Var
+
+from dask.dataframe.core import (
+    is_dataframe_like,
+    make_meta,
+    meta_nonempty,
+)
+from dask.dataframe.dispatch import is_categorical_dtype
+from dask.typing import no_default
+
+import cudf
+
+##
+## Custom expressions
+##
+
+
+class RenameAxisCudf(RenameAxis):
+    # TODO: Remove this after rename_axis is supported in cudf
+    # (See: https://github.com/rapidsai/cudf/issues/16895)
+    @staticmethod
+    def operation(df, index=no_default, **kwargs):
+        if index != no_default:
+            df.index.name = index
+            return df
+        raise NotImplementedError(
+            "Only `index` is supported for the cudf backend"
+        )
+
+
+class ToCudfBackend(Elemwise):
+    # TODO: Inherit from ToBackend when rapids-dask-dependency
+    # is pinned to dask>=2024.8.1
+    _parameters = ["frame", "options"]
+    _projection_passthrough = True
+    _filter_passthrough = True
+    _preserves_partitioning_information = True
+
+    @staticmethod
+    def operation(df, options):
+        from dask_cudf.backends import to_cudf_dispatch
+
+        return to_cudf_dispatch(df, **options)
+
+    def _simplify_down(self):
+        if isinstance(
+            self.frame._meta, (cudf.DataFrame, cudf.Series, cudf.Index)
+        ):
+            # We already have cudf data
+            return self.frame
+
+
+##
+## Custom expression patching
+##
+
+
+# This can be removed after cudf#15176 is addressed.
+# See: https://github.com/rapidsai/cudf/issues/15176
+class PatchCumulativeBlockwise(CumulativeBlockwise):
+    @property
+    def _args(self) -> list:
+        return self.operands[:1]
+
+    @property
+    def _kwargs(self) -> dict:
+        # Must pass axis and skipna as kwargs in cudf
+        return {"axis": self.axis, "skipna": self.skipna}
+
+
+# The upstream Var code uses `Series.values`, and relies on numpy
+# for most of the logic. Unfortunately, cudf -> cupy conversion
+# is not supported for data containing null values. Therefore,
+# we must implement our own version of Var for now. This logic
+# is mostly copied from dask-cudf.
+
+
+class VarCudf(Reduction):
+    # Uses the parallel version of Welford's online algorithm (Chan '79)
+    # (http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf)
+    _parameters = [
+        "frame",
+        "skipna",
+        "ddof",
+        "numeric_only",
+        "split_every",
+    ]
+    _defaults = {
+        "skipna": True,
+        "ddof": 1,
+        "numeric_only": False,
+        "split_every": False,
+    }
+
+    @functools.cached_property
+    def _meta(self):
+        return make_meta(
+            meta_nonempty(self.frame._meta).var(
+                skipna=self.skipna, numeric_only=self.numeric_only
+            )
+        )
+
+    @property
+    def chunk_kwargs(self):
+        return dict(skipna=self.skipna, numeric_only=self.numeric_only)
+
+    @property
+    def combine_kwargs(self):
+        return {}
+
+    @property
+    def aggregate_kwargs(self):
+        return dict(ddof=self.ddof)
+
+    @classmethod
+    def reduction_chunk(cls, x, skipna=True, numeric_only=False):
+        kwargs = {"numeric_only": numeric_only} if is_dataframe_like(x) else {}
+        if skipna or numeric_only:
+            n = x.count(**kwargs)
+            kwargs["skipna"] = skipna
+            avg = x.mean(**kwargs)
+        else:
+            # Not skipping nulls, so might as well
+            # avoid the full `count` operation
+            n = len(x)
+            kwargs["skipna"] = skipna
+            avg = x.sum(**kwargs) / n
+        if numeric_only:
+            # Workaround for cudf bug
+            # (see: https://github.com/rapidsai/cudf/issues/13731)
+            x = x[n.index]
+        m2 = ((x - avg) ** 2).sum(**kwargs)
+        return n, avg, m2
+
+    @classmethod
+    def reduction_combine(cls, parts):
+        n, avg, m2 = parts[0]
+        for i in range(1, len(parts)):
+            n_a, avg_a, m2_a = n, avg, m2
+            n_b, avg_b, m2_b = parts[i]
+            n = n_a + n_b
+            avg = (n_a * avg_a + n_b * avg_b) / n
+            delta = avg_b - avg_a
+            m2 = m2_a + m2_b + delta**2 * n_a * n_b / n
+        return n, avg, m2
+
+    @classmethod
+    def reduction_aggregate(cls, vals, ddof=1):
+        vals = cls.reduction_combine(vals)
+        n, _, m2 = vals
+        return m2 / (n - ddof)
+
+
+def _patched_var(
+    self,
+    axis=0,
+    skipna=True,
+    ddof=1,
+    numeric_only=False,
+    split_every=False,
+):
+    if axis == 0:
+        if hasattr(self._meta, "to_pandas"):
+            return VarCudf(self, skipna, ddof, numeric_only, split_every)
+        else:
+            return Var(self, skipna, ddof, numeric_only, split_every)
+    elif axis == 1:
+        return VarColumns(self, skipna, ddof, numeric_only)
+    else:
+        raise ValueError(f"axis={axis} not supported. Please specify 0 or 1")
+
+
+# Temporary work-around for missing cudf + categorical support
+# See: https://github.com/rapidsai/cudf/issues/11795
+# TODO: Fix RepartitionQuantiles and remove this in cudf>24.06
+
+_original_get_divisions = _shuffle_module._get_divisions
+
+
+def _patched_get_divisions(frame, other, *args, **kwargs):
+    # NOTE: The following two lines contains the "patch"
+    # (we simply convert the partitioning column to pandas)
+    if is_categorical_dtype(other._meta.dtype) and hasattr(
+        other.frame._meta, "to_pandas"
+    ):
+        other = new_collection(other).to_backend("pandas")._expr
+
+    # Call "original" function
+    return _original_get_divisions(frame, other, *args, **kwargs)
+
+
+_PATCHED = False
+
+
+def _patch_dask_expr():
+    global _PATCHED
+
+    if not _PATCHED:
+        CumulativeBlockwise._args = PatchCumulativeBlockwise._args
+        CumulativeBlockwise._kwargs = PatchCumulativeBlockwise._kwargs
+        Expr.var = _patched_var
+        _shuffle_module._get_divisions = _patched_get_divisions
+        _PATCHED = True
diff --git a/python/dask_cudf/dask_cudf/_expr/groupby.py b/python/dask_cudf/dask_cudf/_expr/groupby.py
new file mode 100644
index 00000000000..0242fac6e72
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_expr/groupby.py
@@ -0,0 +1,335 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import functools
+
+import pandas as pd
+from dask_expr._collection import new_collection
+from dask_expr._groupby import (
+    DecomposableGroupbyAggregation,
+    GroupBy as DXGroupBy,
+    GroupbyAggregation,
+    SeriesGroupBy as DXSeriesGroupBy,
+    SingleAggregation,
+)
+from dask_expr._util import is_scalar
+
+from dask.dataframe.core import _concat
+from dask.dataframe.groupby import Aggregation
+
+from cudf.core.groupby.groupby import _deprecate_collect
+
+##
+## Fused groupby aggregations
+##
+
+
+def _get_spec_info(gb):
+    if isinstance(gb.arg, (dict, list)):
+        aggs = gb.arg.copy()
+    else:
+        aggs = gb.arg
+
+    if gb._slice and not isinstance(aggs, dict):
+        aggs = {gb._slice: aggs}
+
+    gb_cols = gb._by_columns
+    if isinstance(gb_cols, str):
+        gb_cols = [gb_cols]
+    columns = [c for c in gb.frame.columns if c not in gb_cols]
+    if not isinstance(aggs, dict):
+        aggs = {col: aggs for col in columns}
+
+    # Assert if our output will have a MultiIndex; this will be the case if
+    # any value in the `aggs` dict is not a string (i.e. multiple/named
+    # aggregations per column)
+    str_cols_out = True
+    aggs_renames = {}
+    for col in aggs:
+        if isinstance(aggs[col], str) or callable(aggs[col]):
+            aggs[col] = [aggs[col]]
+        elif isinstance(aggs[col], dict):
+            str_cols_out = False
+            col_aggs = []
+            for k, v in aggs[col].items():
+                aggs_renames[col, v] = k
+                col_aggs.append(v)
+            aggs[col] = col_aggs
+        else:
+            str_cols_out = False
+        if col in gb_cols:
+            columns.append(col)
+
+    return {
+        "aggs": aggs,
+        "columns": columns,
+        "str_cols_out": str_cols_out,
+        "aggs_renames": aggs_renames,
+    }
+
+
+def _get_meta(gb):
+    spec_info = gb.spec_info
+    gb_cols = gb._by_columns
+    aggs = spec_info["aggs"].copy()
+    aggs_renames = spec_info["aggs_renames"]
+    if spec_info["str_cols_out"]:
+        # Metadata should use `str` for dict values if that is
+        # what the user originally specified (column names will
+        # be str, rather than tuples).
+        for col in aggs:
+            aggs[col] = aggs[col][0]
+    _meta = gb.frame._meta.groupby(gb_cols).agg(aggs)
+    if aggs_renames:
+        col_array = []
+        agg_array = []
+        for col, agg in _meta.columns:
+            col_array.append(col)
+            agg_array.append(aggs_renames.get((col, agg), agg))
+        _meta.columns = pd.MultiIndex.from_arrays([col_array, agg_array])
+    return _meta
+
+
+class DecomposableCudfGroupbyAgg(DecomposableGroupbyAggregation):
+    sep = "___"
+
+    @functools.cached_property
+    def spec_info(self):
+        return _get_spec_info(self)
+
+    @functools.cached_property
+    def _meta(self):
+        return _get_meta(self)
+
+    @property
+    def shuffle_by_index(self):
+        return False  # We always group by column(s)
+
+    @classmethod
+    def chunk(cls, df, *by, **kwargs):
+        from dask_cudf._legacy.groupby import _groupby_partition_agg
+
+        return _groupby_partition_agg(df, **kwargs)
+
+    @classmethod
+    def combine(cls, inputs, **kwargs):
+        from dask_cudf._legacy.groupby import _tree_node_agg
+
+        return _tree_node_agg(_concat(inputs), **kwargs)
+
+    @classmethod
+    def aggregate(cls, inputs, **kwargs):
+        from dask_cudf._legacy.groupby import _finalize_gb_agg
+
+        return _finalize_gb_agg(_concat(inputs), **kwargs)
+
+    @property
+    def chunk_kwargs(self) -> dict:
+        dropna = True if self.dropna is None else self.dropna
+        return {
+            "gb_cols": self._by_columns,
+            "aggs": self.spec_info["aggs"],
+            "columns": self.spec_info["columns"],
+            "dropna": dropna,
+            "sort": self.sort,
+            "sep": self.sep,
+        }
+
+    @property
+    def combine_kwargs(self) -> dict:
+        dropna = True if self.dropna is None else self.dropna
+        return {
+            "gb_cols": self._by_columns,
+            "dropna": dropna,
+            "sort": self.sort,
+            "sep": self.sep,
+        }
+
+    @property
+    def aggregate_kwargs(self) -> dict:
+        dropna = True if self.dropna is None else self.dropna
+        final_columns = self._slice or self._meta.columns
+        return {
+            "gb_cols": self._by_columns,
+            "aggs": self.spec_info["aggs"],
+            "columns": self.spec_info["columns"],
+            "final_columns": final_columns,
+            "as_index": True,
+            "dropna": dropna,
+            "sort": self.sort,
+            "sep": self.sep,
+            "str_cols_out": self.spec_info["str_cols_out"],
+            "aggs_renames": self.spec_info["aggs_renames"],
+        }
+
+
+class CudfGroupbyAgg(GroupbyAggregation):
+    @functools.cached_property
+    def spec_info(self):
+        return _get_spec_info(self)
+
+    @functools.cached_property
+    def _meta(self):
+        return _get_meta(self)
+
+    def _lower(self):
+        return DecomposableCudfGroupbyAgg(
+            self.frame,
+            self.arg,
+            self.observed,
+            self.dropna,
+            self.split_every,
+            self.split_out,
+            self.sort,
+            self.shuffle_method,
+            self._slice,
+            *self.by,
+        )
+
+
+def _maybe_get_custom_expr(
+    gb,
+    aggs,
+    split_every=None,
+    split_out=None,
+    shuffle_method=None,
+    **kwargs,
+):
+    from dask_cudf._legacy.groupby import (
+        OPTIMIZED_AGGS,
+        _aggs_optimized,
+        _redirect_aggs,
+    )
+
+    if kwargs:
+        # Unsupported key-word arguments
+        return None
+
+    if not hasattr(gb.obj._meta, "to_pandas"):
+        # Not cuDF-backed data
+        return None
+
+    _aggs = _redirect_aggs(aggs)
+    if not _aggs_optimized(_aggs, OPTIMIZED_AGGS):
+        # One or more aggregations are unsupported
+        return None
+
+    return CudfGroupbyAgg(
+        gb.obj.expr,
+        _aggs,
+        gb.observed,
+        gb.dropna,
+        split_every,
+        split_out,
+        gb.sort,
+        shuffle_method,
+        gb._slice,
+        *gb.by,
+    )
+
+
+##
+## Custom groupby classes
+##
+
+
+class ListAgg(SingleAggregation):
+    @staticmethod
+    def groupby_chunk(arg):
+        return arg.agg(list)
+
+    @staticmethod
+    def groupby_aggregate(arg):
+        gb = arg.agg(list)
+        if gb.ndim > 1:
+            for col in gb.columns:
+                gb[col] = gb[col].list.concat()
+            return gb
+        else:
+            return gb.list.concat()
+
+
+list_aggregation = Aggregation(
+    name="list",
+    chunk=ListAgg.groupby_chunk,
+    agg=ListAgg.groupby_aggregate,
+)
+
+
+def _translate_arg(arg):
+    # Helper function to translate args so that
+    # they can be processed correctly by upstream
+    # dask & dask-expr. Right now, the only necessary
+    # translation is list aggregations.
+    if isinstance(arg, dict):
+        return {k: _translate_arg(v) for k, v in arg.items()}
+    elif isinstance(arg, list):
+        return [_translate_arg(x) for x in arg]
+    elif arg in ("collect", "list", list):
+        return list_aggregation
+    else:
+        return arg
+
+
+# We define our own GroupBy classes in Dask cuDF for
+# the following reasons:
+#  (1) We want to use a custom `aggregate` algorithm
+#      that performs multiple aggregations on the
+#      same dataframe partition at once. The upstream
+#      algorithm breaks distinct aggregations into
+#      separate tasks.
+#  (2) We need to work around missing `observed=False`
+#      support:
+#      https://github.com/rapidsai/cudf/issues/15173
+
+
+class GroupBy(DXGroupBy):
+    def __init__(self, *args, observed=None, **kwargs):
+        observed = observed if observed is not None else True
+        super().__init__(*args, observed=observed, **kwargs)
+
+    def __getitem__(self, key):
+        if is_scalar(key):
+            return SeriesGroupBy(
+                self.obj,
+                by=self.by,
+                slice=key,
+                sort=self.sort,
+                dropna=self.dropna,
+                observed=self.observed,
+            )
+        g = GroupBy(
+            self.obj,
+            by=self.by,
+            slice=key,
+            sort=self.sort,
+            dropna=self.dropna,
+            observed=self.observed,
+            group_keys=self.group_keys,
+        )
+        return g
+
+    def collect(self, **kwargs):
+        _deprecate_collect()
+        return self._single_agg(ListAgg, **kwargs)
+
+    def aggregate(self, arg, fused=True, **kwargs):
+        if (
+            fused
+            and (expr := _maybe_get_custom_expr(self, arg, **kwargs))
+            is not None
+        ):
+            return new_collection(expr)
+        else:
+            return super().aggregate(_translate_arg(arg), **kwargs)
+
+
+class SeriesGroupBy(DXSeriesGroupBy):
+    def __init__(self, *args, observed=None, **kwargs):
+        observed = observed if observed is not None else True
+        super().__init__(*args, observed=observed, **kwargs)
+
+    def collect(self, **kwargs):
+        _deprecate_collect()
+        return self._single_agg(ListAgg, **kwargs)
+
+    def aggregate(self, arg, **kwargs):
+        return super().aggregate(_translate_arg(arg), **kwargs)
diff --git a/python/dask_cudf/dask_cudf/_legacy/__init__.py b/python/dask_cudf/dask_cudf/_legacy/__init__.py
new file mode 100644
index 00000000000..3c827d4ff59
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
diff --git a/python/dask_cudf/dask_cudf/_legacy/core.py b/python/dask_cudf/dask_cudf/_legacy/core.py
new file mode 100644
index 00000000000..d6beb775a5e
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/core.py
@@ -0,0 +1,711 @@
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+
+import math
+import warnings
+
+import numpy as np
+import pandas as pd
+from tlz import partition_all
+
+from dask import dataframe as dd
+from dask.base import normalize_token, tokenize
+from dask.dataframe.core import (
+    Scalar,
+    handle_out,
+    make_meta as dask_make_meta,
+    map_partitions,
+)
+from dask.dataframe.utils import raise_on_meta_error
+from dask.highlevelgraph import HighLevelGraph
+from dask.utils import M, OperatorMethodMixin, apply, derived_from, funcname
+
+import cudf
+from cudf import _lib as libcudf
+from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
+
+from dask_cudf._expr.accessors import ListMethods, StructMethods
+from dask_cudf._legacy import sorting
+from dask_cudf._legacy.sorting import (
+    _deprecate_shuffle_kwarg,
+    _get_shuffle_method,
+)
+
+
+class _Frame(dd.core._Frame, OperatorMethodMixin):
+    """Superclass for DataFrame and Series
+
+    Parameters
+    ----------
+    dsk : dict
+        The dask graph to compute this DataFrame
+    name : str
+        The key prefix that specifies which keys in the dask comprise this
+        particular DataFrame / Series
+    meta : cudf.DataFrame, cudf.Series, or cudf.Index
+        An empty cudf object with names, dtypes, and indices matching the
+        expected output.
+    divisions : tuple of index values
+        Values along which we partition our blocks on the index
+    """
+
+    def _is_partition_type(self, meta):
+        return isinstance(meta, self._partition_type)
+
+    def __repr__(self):
+        s = "<dask_cudf.%s | %d tasks | %d npartitions>"
+        return s % (type(self).__name__, len(self.dask), self.npartitions)
+
+
+normalize_token.register(_Frame, lambda a: a._name)
+
+
+class DataFrame(_Frame, dd.core.DataFrame):
+    """
+    A distributed Dask DataFrame where the backing dataframe is a
+    :class:`cuDF DataFrame <cudf:cudf.DataFrame>`.
+
+    Typically you would not construct this object directly, but rather
+    use one of Dask-cuDF's IO routines.
+
+    Most operations on :doc:`Dask DataFrames <dask:dataframe>` are
+    supported, with many of the same caveats.
+
+    """
+
+    _partition_type = cudf.DataFrame
+
+    @_dask_cudf_performance_tracking
+    def _assign_column(self, k, v):
+        def assigner(df, k, v):
+            out = df.copy()
+            out[k] = v
+            return out
+
+        meta = assigner(self._meta, k, dask_make_meta(v))
+        return self.map_partitions(assigner, k, v, meta=meta)
+
+    @_dask_cudf_performance_tracking
+    def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None):
+        import uuid
+
+        if kwargs is None:
+            kwargs = {}
+
+        if cache_key is None:
+            cache_key = uuid.uuid4()
+
+        def do_apply_rows(df, func, incols, outcols, kwargs):
+            return df.apply_rows(
+                func, incols, outcols, kwargs, cache_key=cache_key
+            )
+
+        meta = do_apply_rows(self._meta, func, incols, outcols, kwargs)
+        return self.map_partitions(
+            do_apply_rows, func, incols, outcols, kwargs, meta=meta
+        )
+
+    @_deprecate_shuffle_kwarg
+    @_dask_cudf_performance_tracking
+    def merge(self, other, shuffle_method=None, **kwargs):
+        on = kwargs.pop("on", None)
+        if isinstance(on, tuple):
+            on = list(on)
+        return super().merge(
+            other,
+            on=on,
+            shuffle_method=_get_shuffle_method(shuffle_method),
+            **kwargs,
+        )
+
+    @_deprecate_shuffle_kwarg
+    @_dask_cudf_performance_tracking
+    def join(self, other, shuffle_method=None, **kwargs):
+        # CuDF doesn't support "right" join yet
+        how = kwargs.pop("how", "left")
+        if how == "right":
+            return other.join(other=self, how="left", **kwargs)
+
+        on = kwargs.pop("on", None)
+        if isinstance(on, tuple):
+            on = list(on)
+        return super().join(
+            other,
+            how=how,
+            on=on,
+            shuffle_method=_get_shuffle_method(shuffle_method),
+            **kwargs,
+        )
+
+    @_deprecate_shuffle_kwarg
+    @_dask_cudf_performance_tracking
+    def set_index(
+        self,
+        other,
+        sorted=False,
+        divisions=None,
+        shuffle_method=None,
+        **kwargs,
+    ):
+        pre_sorted = sorted
+        del sorted
+
+        if divisions == "quantile":
+            warnings.warn(
+                "Using divisions='quantile' is now deprecated. "
+                "Please raise an issue on github if you believe "
+                "this feature is necessary.",
+                FutureWarning,
+            )
+
+        if (
+            divisions == "quantile"
+            or isinstance(divisions, (cudf.DataFrame, cudf.Series))
+            or (
+                isinstance(other, str)
+                and cudf.api.types.is_string_dtype(self[other].dtype)
+            )
+        ):
+            # Let upstream-dask handle "pre-sorted" case
+            if pre_sorted:
+                return dd.shuffle.set_sorted_index(
+                    self, other, divisions=divisions, **kwargs
+                )
+
+            by = other
+            if not isinstance(other, list):
+                by = [by]
+            if len(by) > 1:
+                raise ValueError("Dask does not support MultiIndex (yet).")
+            if divisions == "quantile":
+                divisions = None
+
+            # Use dask_cudf's sort_values
+            df = self.sort_values(
+                by,
+                max_branch=kwargs.get("max_branch", None),
+                divisions=divisions,
+                set_divisions=True,
+                ignore_index=True,
+                shuffle_method=shuffle_method,
+            )
+
+            # Ignore divisions if its a dataframe
+            if isinstance(divisions, cudf.DataFrame):
+                divisions = None
+
+            # Set index and repartition
+            df2 = df.map_partitions(
+                sorting.set_index_post,
+                index_name=other,
+                drop=kwargs.get("drop", True),
+                column_dtype=df.columns.dtype,
+            )
+            npartitions = kwargs.get("npartitions", self.npartitions)
+            partition_size = kwargs.get("partition_size", None)
+            if partition_size:
+                return df2.repartition(partition_size=partition_size)
+            if not divisions and df2.npartitions != npartitions:
+                return df2.repartition(npartitions=npartitions)
+            if divisions and df2.npartitions != len(divisions) - 1:
+                return df2.repartition(divisions=divisions)
+            return df2
+
+        return super().set_index(
+            other,
+            sorted=pre_sorted,
+            shuffle_method=_get_shuffle_method(shuffle_method),
+            divisions=divisions,
+            **kwargs,
+        )
+
+    @_deprecate_shuffle_kwarg
+    @_dask_cudf_performance_tracking
+    def sort_values(
+        self,
+        by,
+        ignore_index=False,
+        max_branch=None,
+        divisions=None,
+        set_divisions=False,
+        ascending=True,
+        na_position="last",
+        sort_function=None,
+        sort_function_kwargs=None,
+        shuffle_method=None,
+        **kwargs,
+    ):
+        if kwargs:
+            raise ValueError(
+                f"Unsupported input arguments passed : {list(kwargs.keys())}"
+            )
+
+        df = sorting.sort_values(
+            self,
+            by,
+            max_branch=max_branch,
+            divisions=divisions,
+            set_divisions=set_divisions,
+            ignore_index=ignore_index,
+            ascending=ascending,
+            na_position=na_position,
+            shuffle_method=shuffle_method,
+            sort_function=sort_function,
+            sort_function_kwargs=sort_function_kwargs,
+        )
+
+        if ignore_index:
+            return df.reset_index(drop=True)
+        return df
+
+    @_dask_cudf_performance_tracking
+    def to_parquet(self, path, *args, **kwargs):
+        """Calls dask.dataframe.io.to_parquet with CudfEngine backend"""
+        from dask_cudf._legacy.io import to_parquet
+
+        return to_parquet(self, path, *args, **kwargs)
+
+    @_dask_cudf_performance_tracking
+    def to_orc(self, path, **kwargs):
+        """Calls dask_cudf._legacy.io.to_orc"""
+        from dask_cudf._legacy.io import to_orc
+
+        return to_orc(self, path, **kwargs)
+
+    @derived_from(pd.DataFrame)
+    @_dask_cudf_performance_tracking
+    def var(
+        self,
+        axis=None,
+        skipna=True,
+        ddof=1,
+        split_every=False,
+        dtype=None,
+        out=None,
+        naive=False,
+        numeric_only=False,
+    ):
+        axis = self._validate_axis(axis)
+        meta = self._meta_nonempty.var(
+            axis=axis, skipna=skipna, numeric_only=numeric_only
+        )
+        if axis == 1:
+            result = map_partitions(
+                M.var,
+                self,
+                meta=meta,
+                token=self._token_prefix + "var",
+                axis=axis,
+                skipna=skipna,
+                ddof=ddof,
+                numeric_only=numeric_only,
+            )
+            return handle_out(out, result)
+        elif naive:
+            return _naive_var(self, meta, skipna, ddof, split_every, out)
+        else:
+            return _parallel_var(self, meta, skipna, split_every, out)
+
+    @_deprecate_shuffle_kwarg
+    @_dask_cudf_performance_tracking
+    def shuffle(self, *args, shuffle_method=None, **kwargs):
+        """Wraps dask.dataframe DataFrame.shuffle method"""
+        return super().shuffle(
+            *args, shuffle_method=_get_shuffle_method(shuffle_method), **kwargs
+        )
+
+    @_dask_cudf_performance_tracking
+    def groupby(self, by=None, **kwargs):
+        from .groupby import CudfDataFrameGroupBy
+
+        return CudfDataFrameGroupBy(self, by=by, **kwargs)
+
+
+@_dask_cudf_performance_tracking
+def sum_of_squares(x):
+    x = x.astype("f8")._column
+    outcol = libcudf.reduce.reduce("sum_of_squares", x)
+    return cudf.Series._from_column(outcol)
+
+
+@_dask_cudf_performance_tracking
+def var_aggregate(x2, x, n, ddof):
+    try:
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            result = (x2 / n) - (x / n) ** 2
+        if ddof != 0:
+            result = result * n / (n - ddof)
+        return result
+    except ZeroDivisionError:
+        return np.float64(np.nan)
+
+
+@_dask_cudf_performance_tracking
+def nlargest_agg(x, **kwargs):
+    return cudf.concat(x).nlargest(**kwargs)
+
+
+@_dask_cudf_performance_tracking
+def nsmallest_agg(x, **kwargs):
+    return cudf.concat(x).nsmallest(**kwargs)
+
+
+class Series(_Frame, dd.core.Series):
+    _partition_type = cudf.Series
+
+    @_dask_cudf_performance_tracking
+    def count(self, split_every=False):
+        return reduction(
+            [self],
+            chunk=M.count,
+            aggregate=np.sum,
+            split_every=split_every,
+            meta="i8",
+        )
+
+    @_dask_cudf_performance_tracking
+    def mean(self, split_every=False):
+        sum = self.sum(split_every=split_every)
+        n = self.count(split_every=split_every)
+        return sum / n
+
+    @derived_from(pd.DataFrame)
+    @_dask_cudf_performance_tracking
+    def var(
+        self,
+        axis=None,
+        skipna=True,
+        ddof=1,
+        split_every=False,
+        dtype=None,
+        out=None,
+        naive=False,
+    ):
+        axis = self._validate_axis(axis)
+        meta = self._meta_nonempty.var(axis=axis, skipna=skipna)
+        if axis == 1:
+            result = map_partitions(
+                M.var,
+                self,
+                meta=meta,
+                token=self._token_prefix + "var",
+                axis=axis,
+                skipna=skipna,
+                ddof=ddof,
+            )
+            return handle_out(out, result)
+        elif naive:
+            return _naive_var(self, meta, skipna, ddof, split_every, out)
+        else:
+            return _parallel_var(self, meta, skipna, split_every, out)
+
+    @_dask_cudf_performance_tracking
+    def groupby(self, *args, **kwargs):
+        from .groupby import CudfSeriesGroupBy
+
+        return CudfSeriesGroupBy(self, *args, **kwargs)
+
+    @property  # type: ignore
+    @_dask_cudf_performance_tracking
+    def list(self):
+        return ListMethods(self)
+
+    @property  # type: ignore
+    @_dask_cudf_performance_tracking
+    def struct(self):
+        return StructMethods(self)
+
+
+class Index(Series, dd.core.Index):
+    _partition_type = cudf.Index  # type: ignore
+
+
+@_dask_cudf_performance_tracking
+def _naive_var(ddf, meta, skipna, ddof, split_every, out):
+    num = ddf._get_numeric_data()
+    x = 1.0 * num.sum(skipna=skipna, split_every=split_every)
+    x2 = 1.0 * (num**2).sum(skipna=skipna, split_every=split_every)
+    n = num.count(split_every=split_every)
+    name = ddf._token_prefix + "var"
+    result = map_partitions(
+        var_aggregate, x2, x, n, token=name, meta=meta, ddof=ddof
+    )
+    if isinstance(ddf, DataFrame):
+        result.divisions = (min(ddf.columns), max(ddf.columns))
+    return handle_out(out, result)
+
+
+@_dask_cudf_performance_tracking
+def _parallel_var(ddf, meta, skipna, split_every, out):
+    def _local_var(x, skipna):
+        if skipna:
+            n = x.count()
+            avg = x.mean(skipna=skipna)
+        else:
+            # Not skipping nulls, so might as well
+            # avoid the full `count` operation
+            n = len(x)
+            avg = x.sum(skipna=skipna) / n
+        m2 = ((x - avg) ** 2).sum(skipna=skipna)
+        return n, avg, m2
+
+    def _aggregate_var(parts):
+        n, avg, m2 = parts[0]
+        for i in range(1, len(parts)):
+            n_a, avg_a, m2_a = n, avg, m2
+            n_b, avg_b, m2_b = parts[i]
+            n = n_a + n_b
+            avg = (n_a * avg_a + n_b * avg_b) / n
+            delta = avg_b - avg_a
+            m2 = m2_a + m2_b + delta**2 * n_a * n_b / n
+        return n, avg, m2
+
+    def _finalize_var(vals):
+        n, _, m2 = vals
+        return m2 / (n - 1)
+
+    # Build graph
+    nparts = ddf.npartitions
+    if not split_every:
+        split_every = nparts
+    name = "var-" + tokenize(skipna, split_every, out)
+    local_name = "local-" + name
+    num = ddf._get_numeric_data()
+    dsk = {
+        (local_name, n, 0): (_local_var, (num._name, n), skipna)
+        for n in range(nparts)
+    }
+
+    # Use reduction tree
+    widths = [nparts]
+    while nparts > 1:
+        nparts = math.ceil(nparts / split_every)
+        widths.append(nparts)
+    height = len(widths)
+    for depth in range(1, height):
+        for group in range(widths[depth]):
+            p_max = widths[depth - 1]
+            lstart = split_every * group
+            lstop = min(lstart + split_every, p_max)
+            node_list = [
+                (local_name, p, depth - 1) for p in range(lstart, lstop)
+            ]
+            dsk[(local_name, group, depth)] = (_aggregate_var, node_list)
+    if height == 1:
+        group = depth = 0
+    dsk[(name, 0)] = (_finalize_var, (local_name, group, depth))
+
+    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[num, ddf])
+    result = dd.core.new_dd_object(graph, name, meta, (None, None))
+    if isinstance(ddf, DataFrame):
+        result.divisions = (min(ddf.columns), max(ddf.columns))
+    return handle_out(out, result)
+
+
+@_dask_cudf_performance_tracking
+def _extract_meta(x):
+    """
+    Extract internal cache data (``_meta``) from dask_cudf objects
+    """
+    if isinstance(x, (Scalar, _Frame)):
+        return x._meta
+    elif isinstance(x, list):
+        return [_extract_meta(_x) for _x in x]
+    elif isinstance(x, tuple):
+        return tuple(_extract_meta(_x) for _x in x)
+    elif isinstance(x, dict):
+        return {k: _extract_meta(v) for k, v in x.items()}
+    return x
+
+
+@_dask_cudf_performance_tracking
+def _emulate(func, *args, **kwargs):
+    """
+    Apply a function using args / kwargs. If arguments contain dd.DataFrame /
+    dd.Series, using internal cache (``_meta``) for calculation
+    """
+    with raise_on_meta_error(funcname(func)):
+        return func(*_extract_meta(args), **_extract_meta(kwargs))
+
+
+@_dask_cudf_performance_tracking
+def align_partitions(args):
+    """Align partitions between dask_cudf objects.
+
+    Note that if all divisions are unknown, but have equal npartitions, then
+    they will be passed through unchanged.
+    """
+    dfs = [df for df in args if isinstance(df, _Frame)]
+    if not dfs:
+        return args
+
+    divisions = dfs[0].divisions
+    if not all(df.divisions == divisions for df in dfs):
+        raise NotImplementedError("Aligning mismatched partitions")
+    return args
+
+
+@_dask_cudf_performance_tracking
+def reduction(
+    args,
+    chunk=None,
+    aggregate=None,
+    combine=None,
+    meta=None,
+    token=None,
+    chunk_kwargs=None,
+    aggregate_kwargs=None,
+    combine_kwargs=None,
+    split_every=None,
+    **kwargs,
+):
+    """Generic tree reduction operation.
+
+    Parameters
+    ----------
+    args :
+        Positional arguments for the `chunk` function. All `dask.dataframe`
+        objects should be partitioned and indexed equivalently.
+    chunk : function [block-per-arg] -> block
+        Function to operate on each block of data
+    aggregate : function list-of-blocks -> block
+        Function to operate on the list of results of chunk
+    combine : function list-of-blocks -> block, optional
+        Function to operate on intermediate lists of results of chunk
+        in a tree-reduction. If not provided, defaults to aggregate.
+    $META
+    token : str, optional
+        The name to use for the output keys.
+    chunk_kwargs : dict, optional
+        Keywords for the chunk function only.
+    aggregate_kwargs : dict, optional
+        Keywords for the aggregate function only.
+    combine_kwargs : dict, optional
+        Keywords for the combine function only.
+    split_every : int, optional
+        Group partitions into groups of this size while performing a
+        tree-reduction. If set to False, no tree-reduction will be used,
+        and all intermediates will be concatenated and passed to ``aggregate``.
+        Default is 8.
+    kwargs :
+        All remaining keywords will be passed to ``chunk``, ``aggregate``, and
+        ``combine``.
+    """
+    if chunk_kwargs is None:
+        chunk_kwargs = dict()
+    if aggregate_kwargs is None:
+        aggregate_kwargs = dict()
+    chunk_kwargs.update(kwargs)
+    aggregate_kwargs.update(kwargs)
+
+    if combine is None:
+        if combine_kwargs:
+            raise ValueError("`combine_kwargs` provided with no `combine`")
+        combine = aggregate
+        combine_kwargs = aggregate_kwargs
+    else:
+        if combine_kwargs is None:
+            combine_kwargs = dict()
+        combine_kwargs.update(kwargs)
+
+    if not isinstance(args, (tuple, list)):
+        args = [args]
+
+    npartitions = {arg.npartitions for arg in args if isinstance(arg, _Frame)}
+    if len(npartitions) > 1:
+        raise ValueError("All arguments must have same number of partitions")
+    npartitions = npartitions.pop()
+
+    if split_every is None:
+        split_every = 8
+    elif split_every is False:
+        split_every = npartitions
+    elif split_every < 2 or not isinstance(split_every, int):
+        raise ValueError("split_every must be an integer >= 2")
+
+    token_key = tokenize(
+        token or (chunk, aggregate),
+        meta,
+        args,
+        chunk_kwargs,
+        aggregate_kwargs,
+        combine_kwargs,
+        split_every,
+    )
+
+    # Chunk
+    a = f"{token or funcname(chunk)}-chunk-{token_key}"
+    if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs:
+        dsk = {
+            (a, 0, i): (chunk, key)
+            for i, key in enumerate(args[0].__dask_keys__())
+        }
+    else:
+        dsk = {
+            (a, 0, i): (
+                apply,
+                chunk,
+                [(x._name, i) if isinstance(x, _Frame) else x for x in args],
+                chunk_kwargs,
+            )
+            for i in range(args[0].npartitions)
+        }
+
+    # Combine
+    b = f"{token or funcname(combine)}-combine-{token_key}"
+    k = npartitions
+    depth = 0
+    while k > split_every:
+        for part_i, inds in enumerate(partition_all(split_every, range(k))):
+            conc = (list, [(a, depth, i) for i in inds])
+            dsk[(b, depth + 1, part_i)] = (
+                (apply, combine, [conc], combine_kwargs)
+                if combine_kwargs
+                else (combine, conc)
+            )
+        k = part_i + 1
+        a = b
+        depth += 1
+
+    # Aggregate
+    b = f"{token or funcname(aggregate)}-agg-{token_key}"
+    conc = (list, [(a, depth, i) for i in range(k)])
+    if aggregate_kwargs:
+        dsk[(b, 0)] = (apply, aggregate, [conc], aggregate_kwargs)
+    else:
+        dsk[(b, 0)] = (aggregate, conc)
+
+    if meta is None:
+        meta_chunk = _emulate(apply, chunk, args, chunk_kwargs)
+        meta = _emulate(apply, aggregate, [[meta_chunk]], aggregate_kwargs)
+    meta = dask_make_meta(meta)
+
+    graph = HighLevelGraph.from_collections(b, dsk, dependencies=args)
+    return dd.core.new_dd_object(graph, b, meta, (None, None))
+
+
+for name in (
+    "add",
+    "sub",
+    "mul",
+    "truediv",
+    "floordiv",
+    "mod",
+    "pow",
+    "radd",
+    "rsub",
+    "rmul",
+    "rtruediv",
+    "rfloordiv",
+    "rmod",
+    "rpow",
+):
+    meth = getattr(cudf.DataFrame, name)
+    DataFrame._bind_operator_method(name, meth, original=cudf.Series)
+
+    meth = getattr(cudf.Series, name)
+    Series._bind_operator_method(name, meth, original=cudf.Series)
+
+for name in ("lt", "gt", "le", "ge", "ne", "eq"):
+    meth = getattr(cudf.Series, name)
+    Series._bind_comparison_method(name, meth, original=cudf.Series)
diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/_legacy/groupby.py
similarity index 99%
rename from python/dask_cudf/dask_cudf/groupby.py
rename to python/dask_cudf/dask_cudf/_legacy/groupby.py
index bbbcde17b51..7e01e91476d 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/_legacy/groupby.py
@@ -18,7 +18,7 @@
 from cudf.core.groupby.groupby import _deprecate_collect
 from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
-from dask_cudf.sorting import _deprecate_shuffle_kwarg
+from dask_cudf._legacy.sorting import _deprecate_shuffle_kwarg
 
 # aggregations that are dask-cudf optimized
 OPTIMIZED_AGGS = (
diff --git a/python/dask_cudf/dask_cudf/_legacy/io/__init__.py b/python/dask_cudf/dask_cudf/_legacy/io/__init__.py
new file mode 100644
index 00000000000..0421bd755f4
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/io/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+
+from .csv import read_csv  # noqa: F401
+from .json import read_json  # noqa: F401
+from .orc import read_orc, to_orc  # noqa: F401
+from .text import read_text  # noqa: F401
+
+try:
+    from .parquet import read_parquet, to_parquet  # noqa: F401
+except ImportError:
+    pass
diff --git a/python/dask_cudf/dask_cudf/_legacy/io/csv.py b/python/dask_cudf/dask_cudf/_legacy/io/csv.py
new file mode 100644
index 00000000000..fa5400344f9
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/io/csv.py
@@ -0,0 +1,222 @@
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+
+import os
+from glob import glob
+from warnings import warn
+
+from fsspec.utils import infer_compression
+
+from dask import dataframe as dd
+from dask.base import tokenize
+from dask.dataframe.io.csv import make_reader
+from dask.utils import apply, parse_bytes
+
+import cudf
+
+
+def read_csv(path, blocksize="default", **kwargs):
+    """
+    Read CSV files into a :class:`.DataFrame`.
+
+    This API parallelizes the :func:`cudf:cudf.read_csv` function in
+    the following ways:
+
+    It supports loading many files at once using globstrings:
+
+    >>> import dask_cudf
+    >>> df = dask_cudf.read_csv("myfiles.*.csv")
+
+    In some cases it can break up large files:
+
+    >>> df = dask_cudf.read_csv("largefile.csv", blocksize="256 MiB")
+
+    It can read CSV files from external resources (e.g. S3, HTTP, FTP)
+
+    >>> df = dask_cudf.read_csv("s3://bucket/myfiles.*.csv")
+    >>> df = dask_cudf.read_csv("https://www.mycloud.com/sample.csv")
+
+    Internally ``read_csv`` uses :func:`cudf:cudf.read_csv` and
+    supports many of the same keyword arguments with the same
+    performance guarantees. See the docstring for
+    :func:`cudf:cudf.read_csv` for more information on available
+    keyword arguments.
+
+    Parameters
+    ----------
+    path : str, path object, or file-like object
+        Either a path to a file (a str, :py:class:`pathlib.Path`, or
+        py._path.local.LocalPath), URL (including http, ftp, and S3
+        locations), or any object with a read() method (such as
+        builtin :py:func:`open` file handler function or
+        :py:class:`~io.StringIO`).
+    blocksize : int or str, default "256 MiB"
+        The target task partition size. If ``None``, a single block
+        is used for each file.
+    **kwargs : dict
+        Passthrough key-word arguments that are sent to
+        :func:`cudf:cudf.read_csv`.
+
+    Notes
+    -----
+    If any of `skipfooter`/`skiprows`/`nrows` are passed,
+    `blocksize` will default to None.
+
+    Examples
+    --------
+    >>> import dask_cudf
+    >>> ddf = dask_cudf.read_csv("sample.csv", usecols=["a", "b"])
+    >>> ddf.compute()
+       a      b
+    0  1     hi
+    1  2  hello
+    2  3     ai
+
+    """
+
+    # Handle `chunksize` deprecation
+    if "chunksize" in kwargs:
+        chunksize = kwargs.pop("chunksize", "default")
+        warn(
+            "`chunksize` is deprecated and will be removed in the future. "
+            "Please use `blocksize` instead.",
+            FutureWarning,
+        )
+        if blocksize == "default":
+            blocksize = chunksize
+
+    # Set default `blocksize`
+    if blocksize == "default":
+        if (
+            kwargs.get("skipfooter", 0) != 0
+            or kwargs.get("skiprows", 0) != 0
+            or kwargs.get("nrows", None) is not None
+        ):
+            # Cannot read in blocks if skipfooter,
+            # skiprows or nrows is passed.
+            blocksize = None
+        else:
+            blocksize = "256 MiB"
+
+    if "://" in str(path):
+        func = make_reader(cudf.read_csv, "read_csv", "CSV")
+        return func(path, blocksize=blocksize, **kwargs)
+    else:
+        return _internal_read_csv(path=path, blocksize=blocksize, **kwargs)
+
+
+def _internal_read_csv(path, blocksize="256 MiB", **kwargs):
+    if isinstance(blocksize, str):
+        blocksize = parse_bytes(blocksize)
+
+    if isinstance(path, list):
+        filenames = path
+    elif isinstance(path, str):
+        filenames = sorted(glob(path))
+    elif hasattr(path, "__fspath__"):
+        filenames = sorted(glob(path.__fspath__()))
+    else:
+        raise TypeError(f"Path type not understood:{type(path)}")
+
+    if not filenames:
+        msg = f"A file in: {filenames} does not exist."
+        raise FileNotFoundError(msg)
+
+    name = "read-csv-" + tokenize(
+        path, tokenize, **kwargs
+    )  # TODO: get last modified time
+
+    compression = kwargs.get("compression", "infer")
+
+    if compression == "infer":
+        # Infer compression from first path by default
+        compression = infer_compression(filenames[0])
+
+    if compression and blocksize:
+        # compressed CSVs reading must read the entire file
+        kwargs.pop("byte_range", None)
+        warn(
+            "Warning %s compression does not support breaking apart files\n"
+            "Please ensure that each individual file can fit in memory and\n"
+            "use the keyword ``blocksize=None to remove this message``\n"
+            "Setting ``blocksize=(size of file)``" % compression
+        )
+        blocksize = None
+
+    if blocksize is None:
+        return read_csv_without_blocksize(path, **kwargs)
+
+    # Let dask.dataframe generate meta
+    dask_reader = make_reader(cudf.read_csv, "read_csv", "CSV")
+    kwargs1 = kwargs.copy()
+    usecols = kwargs1.pop("usecols", None)
+    dtype = kwargs1.pop("dtype", None)
+    meta = dask_reader(filenames[0], **kwargs1)._meta
+    names = meta.columns
+    if usecols or dtype:
+        # Regenerate meta with original kwargs if
+        # `usecols` or `dtype` was specified
+        meta = dask_reader(filenames[0], **kwargs)._meta
+
+    dsk = {}
+    i = 0
+    dtypes = meta.dtypes.values
+
+    for fn in filenames:
+        size = os.path.getsize(fn)
+        for start in range(0, size, blocksize):
+            kwargs2 = kwargs.copy()
+            kwargs2["byte_range"] = (
+                start,
+                blocksize,
+            )  # specify which chunk of the file we care about
+            if start != 0:
+                kwargs2["names"] = names  # no header in the middle of the file
+                kwargs2["header"] = None
+            dsk[(name, i)] = (apply, _read_csv, [fn, dtypes], kwargs2)
+
+            i += 1
+
+    divisions = [None] * (len(dsk) + 1)
+    return dd.core.new_dd_object(dsk, name, meta, divisions)
+
+
+def _read_csv(fn, dtypes=None, **kwargs):
+    return cudf.read_csv(fn, **kwargs)
+
+
+def read_csv_without_blocksize(path, **kwargs):
+    """Read entire CSV with optional compression (gzip/zip)
+
+    Parameters
+    ----------
+    path : str
+        path to files (support for glob)
+    """
+    if isinstance(path, list):
+        filenames = path
+    elif isinstance(path, str):
+        filenames = sorted(glob(path))
+    elif hasattr(path, "__fspath__"):
+        filenames = sorted(glob(path.__fspath__()))
+    else:
+        raise TypeError(f"Path type not understood:{type(path)}")
+
+    name = "read-csv-" + tokenize(path, **kwargs)
+
+    meta_kwargs = kwargs.copy()
+    if "skipfooter" in meta_kwargs:
+        meta_kwargs.pop("skipfooter")
+    if "nrows" in meta_kwargs:
+        meta_kwargs.pop("nrows")
+    # Read "head" of first file (first 5 rows).
+    # Convert to empty df for metadata.
+    meta = cudf.read_csv(filenames[0], nrows=5, **meta_kwargs).iloc[:0]
+
+    graph = {
+        (name, i): (apply, cudf.read_csv, [fn], kwargs)
+        for i, fn in enumerate(filenames)
+    }
+
+    divisions = [None] * (len(filenames) + 1)
+
+    return dd.core.new_dd_object(graph, name, meta, divisions)
diff --git a/python/dask_cudf/dask_cudf/_legacy/io/json.py b/python/dask_cudf/dask_cudf/_legacy/io/json.py
new file mode 100644
index 00000000000..98c5ceedb76
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/io/json.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+
+from functools import partial
+
+import numpy as np
+from fsspec.core import get_compression, get_fs_token_paths
+
+import dask
+from dask.utils import parse_bytes
+
+import cudf
+from cudf.core.column import as_column
+from cudf.utils.ioutils import _is_local_filesystem
+
+from dask_cudf.backends import _default_backend
+
+
+def _read_json_partition(
+    paths,
+    fs=None,
+    include_path_column=False,
+    path_converter=None,
+    **kwargs,
+):
+    # Transfer all data up front for remote storage
+    sources = (
+        paths
+        if fs is None
+        else fs.cat_ranges(
+            paths,
+            [0] * len(paths),
+            fs.sizes(paths),
+        )
+    )
+
+    if include_path_column:
+        # Add "path" column.
+        # Must iterate over sources sequentially
+        if not isinstance(include_path_column, str):
+            include_path_column = "path"
+        converted_paths = (
+            paths
+            if path_converter is None
+            else [path_converter(path) for path in paths]
+        )
+        dfs = []
+        for i, source in enumerate(sources):
+            df = cudf.read_json(source, **kwargs)
+            df[include_path_column] = as_column(
+                converted_paths[i], length=len(df)
+            )
+            dfs.append(df)
+        return cudf.concat(dfs)
+    else:
+        # Pass sources directly to cudf
+        return cudf.read_json(sources, **kwargs)
+
+
+def read_json(
+    url_path,
+    engine="auto",
+    blocksize=None,
+    orient="records",
+    lines=None,
+    compression="infer",
+    aggregate_files=True,
+    **kwargs,
+):
+    """Read JSON data into a :class:`.DataFrame`.
+
+    This function wraps :func:`dask.dataframe.read_json`, and passes
+    ``engine=partial(cudf.read_json, engine="auto")`` by default.
+
+    Parameters
+    ----------
+    url_path : str, list of str
+        Location to read from. If a string, can include a glob character to
+        find a set of file names.
+        Supports protocol specifications such as ``"s3://"``.
+    engine : str or Callable, default "auto"
+
+        If str, this value will be used as the ``engine`` argument
+        when :func:`cudf.read_json` is used to create each partition.
+        If a :obj:`~collections.abc.Callable`, this value will be used as the
+        underlying function used to create each partition from JSON
+        data. The default value is "auto", so that
+        ``engine=partial(cudf.read_json, engine="auto")`` will be
+        passed to :func:`dask.dataframe.read_json` by default.
+    aggregate_files : bool or int
+        Whether to map multiple files to each output partition. If True,
+        the `blocksize` argument will be used to determine the number of
+        files in each partition. If any one file is larger than `blocksize`,
+        the `aggregate_files` argument will be ignored. If an integer value
+        is specified, the `blocksize` argument will be ignored, and that
+        number of files will be mapped to each partition. Default is True.
+    **kwargs :
+        Key-word arguments to pass through to :func:`dask.dataframe.read_json`.
+
+    Returns
+    -------
+    :class:`.DataFrame`
+
+    Examples
+    --------
+    Load single file
+
+    >>> from dask_cudf import read_json
+    >>> read_json('myfile.json')  # doctest: +SKIP
+
+    Load large line-delimited JSON files using partitions of approx
+    256MB size
+
+    >>> read_json('data/file*.csv', blocksize=2**28)  # doctest: +SKIP
+
+    Load nested JSON data
+
+    >>> read_json('myfile.json')  # doctest: +SKIP
+
+    See Also
+    --------
+    dask.dataframe.read_json
+
+    """
+
+    if lines is None:
+        lines = orient == "records"
+    if orient != "records" and lines:
+        raise ValueError(
+            'Line-delimited JSON is only available with orient="records".'
+        )
+    if blocksize and (orient != "records" or not lines):
+        raise ValueError(
+            "JSON file chunking only allowed for JSON-lines"
+            "input (orient='records', lines=True)."
+        )
+
+    inputs = []
+    if aggregate_files and blocksize or int(aggregate_files) > 1:
+        # Attempt custom read if we are mapping multiple files
+        # to each output partition. Otherwise, upstream logic
+        # is sufficient.
+
+        storage_options = kwargs.get("storage_options", {})
+        fs, _, paths = get_fs_token_paths(
+            url_path, mode="rb", storage_options=storage_options
+        )
+        if isinstance(aggregate_files, int) and aggregate_files > 1:
+            # Map a static file count to each partition
+            inputs = [
+                paths[offset : offset + aggregate_files]
+                for offset in range(0, len(paths), aggregate_files)
+            ]
+        elif aggregate_files is True and blocksize:
+            # Map files dynamically (using blocksize)
+            file_sizes = fs.sizes(paths)  # NOTE: This can be slow
+            blocksize = parse_bytes(blocksize)
+            if all([file_size <= blocksize for file_size in file_sizes]):
+                counts = np.unique(
+                    np.floor(np.cumsum(file_sizes) / blocksize),
+                    return_counts=True,
+                )[1]
+                offsets = np.concatenate([[0], counts.cumsum()])
+                inputs = [
+                    paths[offsets[i] : offsets[i + 1]]
+                    for i in range(len(offsets) - 1)
+                ]
+
+    if inputs:
+        # Inputs were successfully populated.
+        # Use custom _read_json_partition function
+        # to generate each partition.
+
+        compression = get_compression(
+            url_path[0] if isinstance(url_path, list) else url_path,
+            compression,
+        )
+        _kwargs = dict(
+            orient=orient,
+            lines=lines,
+            compression=compression,
+            include_path_column=kwargs.get("include_path_column", False),
+            path_converter=kwargs.get("path_converter"),
+        )
+        if not _is_local_filesystem(fs):
+            _kwargs["fs"] = fs
+        # TODO: Generate meta more efficiently
+        meta = _read_json_partition(inputs[0][:1], **_kwargs)
+        return dask.dataframe.from_map(
+            _read_json_partition,
+            inputs,
+            meta=meta,
+            **_kwargs,
+        )
+
+    # Fall back to dask.dataframe.read_json
+    return _default_backend(
+        dask.dataframe.read_json,
+        url_path,
+        engine=(
+            partial(cudf.read_json, engine=engine)
+            if isinstance(engine, str)
+            else engine
+        ),
+        blocksize=blocksize,
+        orient=orient,
+        lines=lines,
+        compression=compression,
+        **kwargs,
+    )
diff --git a/python/dask_cudf/dask_cudf/_legacy/io/orc.py b/python/dask_cudf/dask_cudf/_legacy/io/orc.py
new file mode 100644
index 00000000000..bed69f038b0
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/io/orc.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+from io import BufferedWriter, IOBase
+
+from fsspec.core import get_fs_token_paths
+from fsspec.utils import stringify_path
+from pyarrow import orc as orc
+
+from dask import dataframe as dd
+from dask.base import tokenize
+from dask.dataframe.io.utils import _get_pyarrow_dtypes
+
+import cudf
+
+
+def _read_orc_stripe(fs, path, stripe, columns, kwargs=None):
+    """Pull out specific columns from specific stripe"""
+    if kwargs is None:
+        kwargs = {}
+    with fs.open(path, "rb") as f:
+        df_stripe = cudf.read_orc(
+            f, stripes=[stripe], columns=columns, **kwargs
+        )
+    return df_stripe
+
+
+def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs):
+    """Read ORC files into a :class:`.DataFrame`.
+
+    Note that this function is mostly borrowed from upstream Dask.
+
+    Parameters
+    ----------
+    path : str or list[str]
+        Location of file(s), which can be a full URL with protocol specifier,
+        and may include glob character if a single string.
+    columns : None or list[str]
+        Columns to load. If None, loads all.
+    filters : None or list of tuple or list of lists of tuples
+        If not None, specifies a filter predicate used to filter out
+        row groups using statistics stored for each row group as
+        Parquet metadata. Row groups that do not match the given
+        filter predicate are not read. The predicate is expressed in
+        `disjunctive normal form (DNF)
+        <https://en.wikipedia.org/wiki/Disjunctive_normal_form>`__
+        like ``[[('x', '=', 0), ...], ...]``. DNF allows arbitrary
+        boolean logical combinations of single column predicates. The
+        innermost tuples each describe a single column predicate. The
+        list of inner predicates is interpreted as a conjunction
+        (AND), forming a more selective and multiple column predicate.
+        Finally, the outermost list combines these filters as a
+        disjunction (OR). Predicates may also be passed as a list of
+        tuples. This form is interpreted as a single conjunction. To
+        express OR in predicates, one must use the (preferred)
+        notation of list of lists of tuples.
+    storage_options : None or dict
+        Further parameters to pass to the bytes backend.
+
+    See Also
+    --------
+    dask.dataframe.read_orc
+
+    Returns
+    -------
+    dask_cudf.DataFrame
+
+    """
+
+    storage_options = storage_options or {}
+    fs, fs_token, paths = get_fs_token_paths(
+        path, mode="rb", storage_options=storage_options
+    )
+    schema = None
+    nstripes_per_file = []
+    for path in paths:
+        with fs.open(path, "rb") as f:
+            o = orc.ORCFile(f)
+            if schema is None:
+                schema = o.schema
+            elif schema != o.schema:
+                raise ValueError(
+                    "Incompatible schemas while parsing ORC files"
+                )
+            nstripes_per_file.append(o.nstripes)
+    schema = _get_pyarrow_dtypes(schema, categories=None)
+    if columns is not None:
+        ex = set(columns) - set(schema)
+        if ex:
+            raise ValueError(
+                f"Requested columns ({ex}) not in schema ({set(schema)})"
+            )
+    else:
+        columns = list(schema)
+
+    with fs.open(paths[0], "rb") as f:
+        meta = cudf.read_orc(
+            f,
+            stripes=[0] if nstripes_per_file[0] else None,
+            columns=columns,
+            **kwargs,
+        )
+
+    name = "read-orc-" + tokenize(fs_token, path, columns, filters, **kwargs)
+    dsk = {}
+    N = 0
+    for path, n in zip(paths, nstripes_per_file):
+        for stripe in (
+            range(n)
+            if filters is None
+            else cudf.io.orc._filter_stripes(filters, path)
+        ):
+            dsk[(name, N)] = (
+                _read_orc_stripe,
+                fs,
+                path,
+                stripe,
+                columns,
+                kwargs,
+            )
+            N += 1
+
+    divisions = [None] * (len(dsk) + 1)
+    return dd.core.new_dd_object(dsk, name, meta, divisions)
+
+
+def write_orc_partition(df, path, fs, filename, compression="snappy"):
+    full_path = fs.sep.join([path, filename])
+    with fs.open(full_path, mode="wb") as out_file:
+        if not isinstance(out_file, IOBase):
+            out_file = BufferedWriter(out_file)
+        cudf.io.to_orc(df, out_file, compression=compression)
+    return full_path
+
+
+def to_orc(
+    df,
+    path,
+    write_index=True,
+    storage_options=None,
+    compression="snappy",
+    compute=True,
+    **kwargs,
+):
+    """
+    Write a :class:`.DataFrame` to ORC file(s) (one file per partition).
+
+    Parameters
+    ----------
+    df : DataFrame
+    path : str or pathlib.Path
+        Destination directory for data.  Prepend with protocol like ``s3://``
+        or ``hdfs://`` for remote data.
+    write_index : boolean, optional
+        Whether or not to write the index. Defaults to True.
+    storage_options : None or dict
+        Further parameters to pass to the bytes backend.
+    compression : string or dict, optional
+    compute : bool, optional
+        If True (default) then the result is computed immediately. If
+        False then a :class:`~dask.delayed.Delayed` object is returned
+        for future computation.
+
+    """
+
+    from dask import compute as dask_compute, delayed
+
+    # TODO: Use upstream dask implementation once available
+    #       (see: Dask Issue#5596)
+
+    if hasattr(path, "name"):
+        path = stringify_path(path)
+    fs, _, _ = get_fs_token_paths(
+        path, mode="wb", storage_options=storage_options
+    )
+    # Trim any protocol information from the path before forwarding
+    path = fs._strip_protocol(path)
+
+    if write_index:
+        df = df.reset_index()
+    else:
+        # Not writing index - might as well drop it
+        df = df.reset_index(drop=True)
+
+    fs.mkdirs(path, exist_ok=True)
+
+    # Use i_offset and df.npartitions to define file-name list
+    filenames = ["part.%i.orc" % i for i in range(df.npartitions)]
+
+    # write parts
+    dwrite = delayed(write_orc_partition)
+    parts = [
+        dwrite(d, path, fs, filename, compression=compression)
+        for d, filename in zip(df.to_delayed(), filenames)
+    ]
+
+    if compute:
+        return dask_compute(*parts)
+
+    return delayed(list)(parts)
diff --git a/python/dask_cudf/dask_cudf/_legacy/io/parquet.py b/python/dask_cudf/dask_cudf/_legacy/io/parquet.py
new file mode 100644
index 00000000000..39ac6474958
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/io/parquet.py
@@ -0,0 +1,513 @@
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+import itertools
+import warnings
+from functools import partial
+from io import BufferedWriter, BytesIO, IOBase
+
+import numpy as np
+import pandas as pd
+from pyarrow import dataset as pa_ds, parquet as pq
+
+from dask import dataframe as dd
+from dask.dataframe.io.parquet.arrow import ArrowDatasetEngine
+
+try:
+    from dask.dataframe.io.parquet import (
+        create_metadata_file as create_metadata_file_dd,
+    )
+except ImportError:
+    create_metadata_file_dd = None
+
+import cudf
+from cudf.core.column import CategoricalColumn, as_column
+from cudf.io import write_to_dataset
+from cudf.io.parquet import _apply_post_filters, _normalize_filters
+from cudf.utils.dtypes import cudf_dtype_from_pa_type
+
+
+class CudfEngine(ArrowDatasetEngine):
+    @classmethod
+    def _create_dd_meta(cls, dataset_info, **kwargs):
+        # Start with pandas-version of meta
+        meta_pd = super()._create_dd_meta(dataset_info, **kwargs)
+
+        # Convert to cudf
+        # (drop unsupported timezone information)
+        for k, v in meta_pd.dtypes.items():
+            if isinstance(v, pd.DatetimeTZDtype) and v.tz is not None:
+                meta_pd[k] = meta_pd[k].dt.tz_localize(None)
+        meta_cudf = cudf.from_pandas(meta_pd)
+
+        # Re-set "object" dtypes to align with pa schema
+        kwargs = dataset_info.get("kwargs", {})
+        set_object_dtypes_from_pa_schema(
+            meta_cudf,
+            kwargs.get("schema", None),
+        )
+
+        return meta_cudf
+
+    @classmethod
+    def multi_support(cls):
+        # Assert that this class is CudfEngine
+        # and that multi-part reading is supported
+        return cls == CudfEngine
+
+    @classmethod
+    def _read_paths(
+        cls,
+        paths,
+        fs,
+        columns=None,
+        row_groups=None,
+        filters=None,
+        partitions=None,
+        partitioning=None,
+        partition_keys=None,
+        open_file_options=None,
+        dataset_kwargs=None,
+        **kwargs,
+    ):
+        # Simplify row_groups if all None
+        if row_groups == [None for path in paths]:
+            row_groups = None
+
+        # Make sure we read in the columns needed for row-wise
+        # filtering after IO. This means that one or more columns
+        # will be dropped almost immediately after IO. However,
+        # we do NEED these columns for accurate filtering.
+        filters = _normalize_filters(filters)
+        projected_columns = None
+        if columns and filters:
+            projected_columns = [c for c in columns if c is not None]
+            columns = sorted(
+                set(v[0] for v in itertools.chain.from_iterable(filters))
+                | set(projected_columns)
+            )
+
+        dataset_kwargs = dataset_kwargs or {}
+        dataset_kwargs["partitioning"] = partitioning or "hive"
+
+        # Use cudf to read in data
+        try:
+            df = cudf.read_parquet(
+                paths,
+                engine="cudf",
+                columns=columns,
+                row_groups=row_groups if row_groups else None,
+                dataset_kwargs=dataset_kwargs,
+                categorical_partitions=False,
+                filesystem=fs,
+                **kwargs,
+            )
+        except RuntimeError as err:
+            # TODO: Remove try/except after null-schema issue is resolved
+            # (See: https://github.com/rapidsai/cudf/issues/12702)
+            if len(paths) > 1:
+                df = cudf.concat(
+                    [
+                        cudf.read_parquet(
+                            path,
+                            engine="cudf",
+                            columns=columns,
+                            row_groups=row_groups[i] if row_groups else None,
+                            dataset_kwargs=dataset_kwargs,
+                            categorical_partitions=False,
+                            filesystem=fs,
+                            **kwargs,
+                        )
+                        for i, path in enumerate(paths)
+                    ]
+                )
+            else:
+                raise err
+
+        # Apply filters (if any are defined)
+        df = _apply_post_filters(df, filters)
+
+        if projected_columns:
+            # Elements of `projected_columns` may now be in the index.
+            # We must filter these names from our projection
+            projected_columns = [
+                col for col in projected_columns if col in df._column_names
+            ]
+            df = df[projected_columns]
+
+        if partitions and partition_keys is None:
+            # Use `HivePartitioning` by default
+            ds = pa_ds.dataset(
+                paths,
+                filesystem=fs,
+                **dataset_kwargs,
+            )
+            frag = next(ds.get_fragments())
+            if frag:
+                # Extract hive-partition keys, and make sure they
+                # are ordered the same as they are in `partitions`
+                raw_keys = pa_ds._get_partition_keys(frag.partition_expression)
+                partition_keys = [
+                    (hive_part.name, raw_keys[hive_part.name])
+                    for hive_part in partitions
+                ]
+
+        if partition_keys:
+            if partitions is None:
+                raise ValueError("Must pass partition sets")
+
+            for i, (name, index2) in enumerate(partition_keys):
+                if len(partitions[i].keys):
+                    # Build a categorical column from `codes` directly
+                    # (since the category is often a larger dtype)
+                    codes = as_column(
+                        partitions[i].keys.get_loc(index2),
+                        length=len(df),
+                    )
+                    df[name] = CategoricalColumn(
+                        data=None,
+                        size=codes.size,
+                        dtype=cudf.CategoricalDtype(
+                            categories=partitions[i].keys, ordered=False
+                        ),
+                        offset=codes.offset,
+                        children=(codes,),
+                    )
+                elif name not in df.columns:
+                    # Add non-categorical partition column
+                    df[name] = as_column(index2, length=len(df))
+
+        return df
+
+    @classmethod
+    def read_partition(
+        cls,
+        fs,
+        pieces,
+        columns,
+        index,
+        categories=(),
+        partitions=(),
+        filters=None,
+        partitioning=None,
+        schema=None,
+        open_file_options=None,
+        **kwargs,
+    ):
+        if columns is not None:
+            columns = [c for c in columns]
+        if isinstance(index, list):
+            columns += index
+
+        dataset_kwargs = kwargs.get("dataset", {})
+        partitioning = partitioning or dataset_kwargs.get("partitioning", None)
+        if isinstance(partitioning, dict):
+            partitioning = pa_ds.partitioning(**partitioning)
+
+        # Check if we are actually selecting any columns
+        read_columns = columns
+        if schema and columns:
+            ignored = set(schema.names) - set(columns)
+            if not ignored:
+                read_columns = None
+
+        if not isinstance(pieces, list):
+            pieces = [pieces]
+
+        # Extract supported kwargs from `kwargs`
+        read_kwargs = kwargs.get("read", {})
+        read_kwargs.update(open_file_options or {})
+        check_file_size = read_kwargs.pop("check_file_size", None)
+
+        # Wrap reading logic in a `try` block so that we can
+        # inform the user that the `read_parquet` partition
+        # size is too large for the available memory
+        try:
+            # Assume multi-piece read
+            paths = []
+            rgs = []
+            last_partition_keys = None
+            dfs = []
+
+            for i, piece in enumerate(pieces):
+                (path, row_group, partition_keys) = piece
+                row_group = None if row_group == [None] else row_group
+
+                # File-size check to help "protect" users from change
+                # to up-stream `split_row_groups` default. We only
+                # check the file size if this partition corresponds
+                # to a full file, and `check_file_size` is defined
+                if check_file_size and len(pieces) == 1 and row_group is None:
+                    file_size = fs.size(path)
+                    if file_size > check_file_size:
+                        warnings.warn(
+                            f"A large parquet file ({file_size}B) is being "
+                            f"used to create a DataFrame partition in "
+                            f"read_parquet. This may cause out of memory "
+                            f"exceptions in operations downstream. See the "
+                            f"notes on split_row_groups in the read_parquet "
+                            f"documentation. Setting split_row_groups "
+                            f"explicitly will silence this warning."
+                        )
+
+                if i > 0 and partition_keys != last_partition_keys:
+                    dfs.append(
+                        cls._read_paths(
+                            paths,
+                            fs,
+                            columns=read_columns,
+                            row_groups=rgs if rgs else None,
+                            filters=filters,
+                            partitions=partitions,
+                            partitioning=partitioning,
+                            partition_keys=last_partition_keys,
+                            dataset_kwargs=dataset_kwargs,
+                            **read_kwargs,
+                        )
+                    )
+                    paths = []
+                    rgs = []
+                    last_partition_keys = None
+                paths.append(path)
+                rgs.append(
+                    [row_group]
+                    if not isinstance(row_group, list)
+                    and row_group is not None
+                    else row_group
+                )
+                last_partition_keys = partition_keys
+
+            dfs.append(
+                cls._read_paths(
+                    paths,
+                    fs,
+                    columns=read_columns,
+                    row_groups=rgs if rgs else None,
+                    filters=filters,
+                    partitions=partitions,
+                    partitioning=partitioning,
+                    partition_keys=last_partition_keys,
+                    dataset_kwargs=dataset_kwargs,
+                    **read_kwargs,
+                )
+            )
+            df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0]
+
+            # Re-set "object" dtypes align with pa schema
+            set_object_dtypes_from_pa_schema(df, schema)
+
+            if index and (index[0] in df.columns):
+                df = df.set_index(index[0])
+            elif index is False and df.index.names != [None]:
+                # If index=False, we shouldn't have a named index
+                df.reset_index(inplace=True)
+
+        except MemoryError as err:
+            raise MemoryError(
+                "Parquet data was larger than the available GPU memory!\n\n"
+                "See the notes on split_row_groups in the read_parquet "
+                "documentation.\n\n"
+                "Original Error: " + str(err)
+            )
+            raise err
+
+        return df
+
+    @staticmethod
+    def write_partition(
+        df,
+        path,
+        fs,
+        filename,
+        partition_on,
+        return_metadata,
+        fmd=None,
+        compression="snappy",
+        index_cols=None,
+        **kwargs,
+    ):
+        preserve_index = False
+        if len(index_cols) and set(index_cols).issubset(set(df.columns)):
+            df.set_index(index_cols, drop=True, inplace=True)
+            preserve_index = True
+        if partition_on:
+            md = write_to_dataset(
+                df=df,
+                root_path=path,
+                compression=compression,
+                filename=filename,
+                partition_cols=partition_on,
+                fs=fs,
+                preserve_index=preserve_index,
+                return_metadata=return_metadata,
+                statistics=kwargs.get("statistics", "ROWGROUP"),
+                int96_timestamps=kwargs.get("int96_timestamps", False),
+                row_group_size_bytes=kwargs.get("row_group_size_bytes", None),
+                row_group_size_rows=kwargs.get("row_group_size_rows", None),
+                max_page_size_bytes=kwargs.get("max_page_size_bytes", None),
+                max_page_size_rows=kwargs.get("max_page_size_rows", None),
+                storage_options=kwargs.get("storage_options", None),
+            )
+        else:
+            with fs.open(fs.sep.join([path, filename]), mode="wb") as out_file:
+                if not isinstance(out_file, IOBase):
+                    out_file = BufferedWriter(out_file)
+                md = df.to_parquet(
+                    path=out_file,
+                    engine=kwargs.get("engine", "cudf"),
+                    index=kwargs.get("index", None),
+                    partition_cols=kwargs.get("partition_cols", None),
+                    partition_file_name=kwargs.get(
+                        "partition_file_name", None
+                    ),
+                    partition_offsets=kwargs.get("partition_offsets", None),
+                    statistics=kwargs.get("statistics", "ROWGROUP"),
+                    int96_timestamps=kwargs.get("int96_timestamps", False),
+                    row_group_size_bytes=kwargs.get(
+                        "row_group_size_bytes", None
+                    ),
+                    row_group_size_rows=kwargs.get(
+                        "row_group_size_rows", None
+                    ),
+                    storage_options=kwargs.get("storage_options", None),
+                    metadata_file_path=filename if return_metadata else None,
+                )
+        # Return the schema needed to write the metadata
+        if return_metadata:
+            return [{"meta": md}]
+        else:
+            return []
+
+    @staticmethod
+    def write_metadata(parts, fmd, fs, path, append=False, **kwargs):
+        if parts:
+            # Aggregate metadata and write to _metadata file
+            metadata_path = fs.sep.join([path, "_metadata"])
+            _meta = []
+            if append and fmd is not None:
+                # Convert to bytes: <https://github.com/rapidsai/cudf/issues/17177>
+                if isinstance(fmd, pq.FileMetaData):
+                    with BytesIO() as myio:
+                        fmd.write_metadata_file(myio)
+                        myio.seek(0)
+                        fmd = np.frombuffer(myio.read(), dtype="uint8")
+                _meta = [fmd]
+            _meta.extend([parts[i][0]["meta"] for i in range(len(parts))])
+            _meta = (
+                cudf.io.merge_parquet_filemetadata(_meta)
+                if len(_meta) > 1
+                else _meta[0]
+            )
+            with fs.open(metadata_path, "wb") as fil:
+                fil.write(memoryview(_meta))
+
+    @classmethod
+    def collect_file_metadata(cls, path, fs, file_path):
+        with fs.open(path, "rb") as f:
+            meta = pq.ParquetFile(f).metadata
+        if file_path:
+            meta.set_file_path(file_path)
+        with BytesIO() as myio:
+            meta.write_metadata_file(myio)
+            myio.seek(0)
+            meta = np.frombuffer(myio.read(), dtype="uint8")
+        return meta
+
+    @classmethod
+    def aggregate_metadata(cls, meta_list, fs, out_path):
+        meta = (
+            cudf.io.merge_parquet_filemetadata(meta_list)
+            if len(meta_list) > 1
+            else meta_list[0]
+        )
+        if out_path:
+            metadata_path = fs.sep.join([out_path, "_metadata"])
+            with fs.open(metadata_path, "wb") as fil:
+                fil.write(memoryview(meta))
+            return None
+        else:
+            return meta
+
+
+def set_object_dtypes_from_pa_schema(df, schema):
+    # Simple utility to modify cudf DataFrame
+    # "object" dtypes to agree with a specific
+    # pyarrow schema.
+    if schema:
+        for col_name, col in df._data.items():
+            if col_name is None:
+                # Pyarrow cannot handle `None` as a field name.
+                # However, this should be a simple range index that
+                # we can ignore anyway
+                continue
+            typ = cudf_dtype_from_pa_type(schema.field(col_name).type)
+            if (
+                col_name in schema.names
+                and not isinstance(typ, (cudf.ListDtype, cudf.StructDtype))
+                and isinstance(col, cudf.core.column.StringColumn)
+            ):
+                df._data[col_name] = col.astype(typ)
+
+
+def read_parquet(path, columns=None, **kwargs):
+    """
+    Read parquet files into a :class:`.DataFrame`.
+
+    Calls :func:`dask.dataframe.read_parquet` with ``engine=CudfEngine``
+    to coordinate the execution of :func:`cudf.read_parquet`, and to
+    ultimately create a :class:`.DataFrame` collection.
+
+    See the :func:`dask.dataframe.read_parquet` documentation for
+    all available options.
+
+    Examples
+    --------
+    >>> from dask_cudf import read_parquet
+    >>> df = read_parquet("/path/to/dataset/")  # doctest: +SKIP
+
+    When dealing with one or more large parquet files having an
+    in-memory footprint >15% device memory, the ``split_row_groups``
+    argument should be used to map Parquet **row-groups** to DataFrame
+    partitions (instead of **files** to partitions). For example, the
+    following code will map each row-group to a distinct partition:
+
+    >>> df = read_parquet(..., split_row_groups=True)  # doctest: +SKIP
+
+    To map **multiple** row-groups to each partition, an integer can be
+    passed to ``split_row_groups`` to specify the **maximum** number of
+    row-groups allowed in each output partition:
+
+    >>> df = read_parquet(..., split_row_groups=10)  # doctest: +SKIP
+
+    See Also
+    --------
+    cudf.read_parquet
+    dask.dataframe.read_parquet
+    """
+    if isinstance(columns, str):
+        columns = [columns]
+
+    # Set "check_file_size" option to determine whether we
+    # should check the parquet-file size. This check is meant
+    # to "protect" users from `split_row_groups` default changes
+    check_file_size = kwargs.pop("check_file_size", 500_000_000)
+    if (
+        check_file_size
+        and ("split_row_groups" not in kwargs)
+        and ("chunksize" not in kwargs)
+    ):
+        # User is not specifying `split_row_groups` or `chunksize`,
+        # so we should warn them if/when a file is ~>0.5GB on disk.
+        # They can set `split_row_groups` explicitly to silence/skip
+        # this check
+        if "read" not in kwargs:
+            kwargs["read"] = {}
+        kwargs["read"]["check_file_size"] = check_file_size
+
+    return dd.read_parquet(path, columns=columns, engine=CudfEngine, **kwargs)
+
+
+to_parquet = partial(dd.to_parquet, engine=CudfEngine)
+
+if create_metadata_file_dd is None:
+    create_metadata_file = create_metadata_file_dd
+else:
+    create_metadata_file = partial(create_metadata_file_dd, engine=CudfEngine)
diff --git a/python/dask_cudf/dask_cudf/_legacy/io/text.py b/python/dask_cudf/dask_cudf/_legacy/io/text.py
new file mode 100644
index 00000000000..9cdb7c5220b
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/_legacy/io/text.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+
+import os
+from glob import glob
+
+import dask.dataframe as dd
+from dask.base import tokenize
+from dask.utils import apply, parse_bytes
+
+import cudf
+
+
+def read_text(path, chunksize="256 MiB", **kwargs):
+    if isinstance(chunksize, str):
+        chunksize = parse_bytes(chunksize)
+
+    if isinstance(path, list):
+        filenames = path
+    elif isinstance(path, str):
+        filenames = sorted(glob(path))
+    elif hasattr(path, "__fspath__"):
+        filenames = sorted(glob(path.__fspath__()))
+    else:
+        raise TypeError(f"Path type not understood:{type(path)}")
+
+    if not filenames:
+        msg = f"A file in: {filenames} does not exist."
+        raise FileNotFoundError(msg)
+
+    name = "read-text-" + tokenize(path, tokenize, **kwargs)
+
+    if chunksize:
+        dsk = {}
+        i = 0
+        for fn in filenames:
+            size = os.path.getsize(fn)
+            for start in range(0, size, chunksize):
+                kwargs1 = kwargs.copy()
+                kwargs1["byte_range"] = (
+                    start,
+                    chunksize,
+                )  # specify which chunk of the file we care about
+
+                dsk[(name, i)] = (apply, cudf.read_text, [fn], kwargs1)
+                i += 1
+    else:
+        dsk = {
+            (name, i): (apply, cudf.read_text, [fn], kwargs)
+            for i, fn in enumerate(filenames)
+        }
+
+    meta = cudf.Series([], dtype="O")
+    divisions = [None] * (len(dsk) + 1)
+    return dd.core.new_dd_object(dsk, name, meta, divisions)
diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/_legacy/sorting.py
similarity index 100%
rename from python/dask_cudf/dask_cudf/sorting.py
rename to python/dask_cudf/dask_cudf/_legacy/sorting.py
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index bead964a0ef..fb02e0ac772 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -46,7 +46,7 @@
 from cudf.api.types import is_string_dtype
 from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
-from .core import DataFrame, Index, Series
+from ._legacy.core import DataFrame, Index, Series
 
 get_parallel_type.register(cudf.DataFrame, lambda _: DataFrame)
 get_parallel_type.register(cudf.Series, lambda _: Series)
@@ -574,7 +574,7 @@ class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
     >>> with dask.config.set({"dataframe.backend": "cudf"}):
     ...     ddf = dd.from_dict({"a": range(10)})
     >>> type(ddf)
-    <class 'dask_cudf.core.DataFrame'>
+    <class 'dask_cudf._legacy.core.DataFrame'>
     """
 
     @classmethod
@@ -610,7 +610,7 @@ def from_dict(
 
     @staticmethod
     def read_parquet(*args, engine=None, **kwargs):
-        from dask_cudf.io.parquet import CudfEngine
+        from dask_cudf._legacy.io.parquet import CudfEngine
 
         _raise_unsupported_parquet_kwargs(**kwargs)
         return _default_backend(
@@ -622,19 +622,19 @@ def read_parquet(*args, engine=None, **kwargs):
 
     @staticmethod
     def read_json(*args, **kwargs):
-        from dask_cudf.io.json import read_json
+        from dask_cudf._legacy.io.json import read_json
 
         return read_json(*args, **kwargs)
 
     @staticmethod
     def read_orc(*args, **kwargs):
-        from dask_cudf.io import read_orc
+        from dask_cudf._legacy.io import read_orc
 
         return read_orc(*args, **kwargs)
 
     @staticmethod
     def read_csv(*args, **kwargs):
-        from dask_cudf.io import read_csv
+        from dask_cudf._legacy.io import read_csv
 
         return read_csv(*args, **kwargs)
 
@@ -674,7 +674,7 @@ class CudfDXBackendEntrypoint(DataFrameBackendEntrypoint):
     def to_backend(data, **kwargs):
         import dask_expr as dx
 
-        from dask_cudf.expr._expr import ToCudfBackend
+        from dask_cudf._expr.expr import ToCudfBackend
 
         return dx.new_collection(ToCudfBackend(data, kwargs))
 
@@ -710,7 +710,7 @@ def read_parquet(path, *args, filesystem="fsspec", engine=None, **kwargs):
             and filesystem.lower() == "fsspec"
         ):
             # Default "fsspec" filesystem
-            from dask_cudf.io.parquet import CudfEngine
+            from dask_cudf._legacy.io.parquet import CudfEngine
 
             _raise_unsupported_parquet_kwargs(**kwargs)
             return _default_backend(
@@ -736,7 +736,7 @@ def read_parquet(path, *args, filesystem="fsspec", engine=None, **kwargs):
             from dask.core import flatten
             from dask.dataframe.utils import pyarrow_strings_enabled
 
-            from dask_cudf.expr._expr import CudfReadParquetPyarrowFS
+            from dask_cudf.io.parquet import CudfReadParquetPyarrowFS
 
             if args:
                 raise ValueError(f"Unexpected positional arguments: {args}")
@@ -862,7 +862,7 @@ def read_csv(
 
     @staticmethod
     def read_json(*args, **kwargs):
-        from dask_cudf.io.json import read_json as read_json_impl
+        from dask_cudf._legacy.io.json import read_json as read_json_impl
 
         return read_json_impl(*args, **kwargs)
 
@@ -870,14 +870,7 @@ def read_json(*args, **kwargs):
     def read_orc(*args, **kwargs):
         from dask_expr import from_legacy_dataframe
 
-        from dask_cudf.io.orc import read_orc as legacy_read_orc
+        from dask_cudf._legacy.io.orc import read_orc as legacy_read_orc
 
         ddf = legacy_read_orc(*args, **kwargs)
         return from_legacy_dataframe(ddf)
-
-
-# Import/register cudf-specific classes for dask-expr
-try:
-    import dask_cudf.expr  # noqa: F401
-except ImportError:
-    pass
diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index 3181c8d69ec..7d6d5c05cbe 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -1,705 +1,25 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-import math
 import textwrap
-import warnings
 
-import numpy as np
-import pandas as pd
-from tlz import partition_all
-
-from dask import dataframe as dd
-from dask.base import normalize_token, tokenize
-from dask.dataframe.core import (
-    Scalar,
-    handle_out,
-    make_meta as dask_make_meta,
-    map_partitions,
-)
-from dask.dataframe.utils import raise_on_meta_error
-from dask.highlevelgraph import HighLevelGraph
-from dask.utils import M, OperatorMethodMixin, apply, derived_from, funcname
+import dask.dataframe as dd
+from dask.tokenize import tokenize
 
 import cudf
-from cudf import _lib as libcudf
 from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
-from dask_cudf import sorting
-from dask_cudf.accessors import ListMethods, StructMethods
-from dask_cudf.sorting import _deprecate_shuffle_kwarg, _get_shuffle_method
-
-
-class _Frame(dd.core._Frame, OperatorMethodMixin):
-    """Superclass for DataFrame and Series
-
-    Parameters
-    ----------
-    dsk : dict
-        The dask graph to compute this DataFrame
-    name : str
-        The key prefix that specifies which keys in the dask comprise this
-        particular DataFrame / Series
-    meta : cudf.DataFrame, cudf.Series, or cudf.Index
-        An empty cudf object with names, dtypes, and indices matching the
-        expected output.
-    divisions : tuple of index values
-        Values along which we partition our blocks on the index
-    """
-
-    def _is_partition_type(self, meta):
-        return isinstance(meta, self._partition_type)
-
-    def __repr__(self):
-        s = "<dask_cudf.%s | %d tasks | %d npartitions>"
-        return s % (type(self).__name__, len(self.dask), self.npartitions)
-
-    @_dask_cudf_performance_tracking
-    def to_dask_dataframe(self, **kwargs):
-        """Create a dask.dataframe object from a dask_cudf object
-
-        WARNING: This API is deprecated, and may not work properly
-        when query-planning is active. Please use `*.to_backend("pandas")`
-        to convert the underlying data to pandas.
-        """
-
-        warnings.warn(
-            "The `to_dask_dataframe` API is now deprecated. "
-            "Please use `*.to_backend('pandas')` instead.",
-            FutureWarning,
-        )
-
-        return self.to_backend("pandas", **kwargs)
-
-
-concat = dd.concat
-
-
-normalize_token.register(_Frame, lambda a: a._name)
-
-
-class DataFrame(_Frame, dd.core.DataFrame):
-    """
-    A distributed Dask DataFrame where the backing dataframe is a
-    :class:`cuDF DataFrame <cudf:cudf.DataFrame>`.
-
-    Typically you would not construct this object directly, but rather
-    use one of Dask-cuDF's IO routines.
-
-    Most operations on :doc:`Dask DataFrames <dask:dataframe>` are
-    supported, with many of the same caveats.
-
-    """
-
-    _partition_type = cudf.DataFrame
-
-    @_dask_cudf_performance_tracking
-    def _assign_column(self, k, v):
-        def assigner(df, k, v):
-            out = df.copy()
-            out[k] = v
-            return out
-
-        meta = assigner(self._meta, k, dask_make_meta(v))
-        return self.map_partitions(assigner, k, v, meta=meta)
-
-    @_dask_cudf_performance_tracking
-    def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None):
-        import uuid
-
-        if kwargs is None:
-            kwargs = {}
-
-        if cache_key is None:
-            cache_key = uuid.uuid4()
-
-        def do_apply_rows(df, func, incols, outcols, kwargs):
-            return df.apply_rows(
-                func, incols, outcols, kwargs, cache_key=cache_key
-            )
-
-        meta = do_apply_rows(self._meta, func, incols, outcols, kwargs)
-        return self.map_partitions(
-            do_apply_rows, func, incols, outcols, kwargs, meta=meta
-        )
-
-    @_deprecate_shuffle_kwarg
-    @_dask_cudf_performance_tracking
-    def merge(self, other, shuffle_method=None, **kwargs):
-        on = kwargs.pop("on", None)
-        if isinstance(on, tuple):
-            on = list(on)
-        return super().merge(
-            other,
-            on=on,
-            shuffle_method=_get_shuffle_method(shuffle_method),
-            **kwargs,
-        )
-
-    @_deprecate_shuffle_kwarg
-    @_dask_cudf_performance_tracking
-    def join(self, other, shuffle_method=None, **kwargs):
-        # CuDF doesn't support "right" join yet
-        how = kwargs.pop("how", "left")
-        if how == "right":
-            return other.join(other=self, how="left", **kwargs)
-
-        on = kwargs.pop("on", None)
-        if isinstance(on, tuple):
-            on = list(on)
-        return super().join(
-            other,
-            how=how,
-            on=on,
-            shuffle_method=_get_shuffle_method(shuffle_method),
-            **kwargs,
-        )
-
-    @_deprecate_shuffle_kwarg
-    @_dask_cudf_performance_tracking
-    def set_index(
-        self,
-        other,
-        sorted=False,
-        divisions=None,
-        shuffle_method=None,
-        **kwargs,
-    ):
-        pre_sorted = sorted
-        del sorted
-
-        if divisions == "quantile":
-            warnings.warn(
-                "Using divisions='quantile' is now deprecated. "
-                "Please raise an issue on github if you believe "
-                "this feature is necessary.",
-                FutureWarning,
-            )
-
-        if (
-            divisions == "quantile"
-            or isinstance(divisions, (cudf.DataFrame, cudf.Series))
-            or (
-                isinstance(other, str)
-                and cudf.api.types.is_string_dtype(self[other].dtype)
-            )
-        ):
-            # Let upstream-dask handle "pre-sorted" case
-            if pre_sorted:
-                return dd.shuffle.set_sorted_index(
-                    self, other, divisions=divisions, **kwargs
-                )
-
-            by = other
-            if not isinstance(other, list):
-                by = [by]
-            if len(by) > 1:
-                raise ValueError("Dask does not support MultiIndex (yet).")
-            if divisions == "quantile":
-                divisions = None
-
-            # Use dask_cudf's sort_values
-            df = self.sort_values(
-                by,
-                max_branch=kwargs.get("max_branch", None),
-                divisions=divisions,
-                set_divisions=True,
-                ignore_index=True,
-                shuffle_method=shuffle_method,
-            )
-
-            # Ignore divisions if its a dataframe
-            if isinstance(divisions, cudf.DataFrame):
-                divisions = None
-
-            # Set index and repartition
-            df2 = df.map_partitions(
-                sorting.set_index_post,
-                index_name=other,
-                drop=kwargs.get("drop", True),
-                column_dtype=df.columns.dtype,
-            )
-            npartitions = kwargs.get("npartitions", self.npartitions)
-            partition_size = kwargs.get("partition_size", None)
-            if partition_size:
-                return df2.repartition(partition_size=partition_size)
-            if not divisions and df2.npartitions != npartitions:
-                return df2.repartition(npartitions=npartitions)
-            if divisions and df2.npartitions != len(divisions) - 1:
-                return df2.repartition(divisions=divisions)
-            return df2
-
-        return super().set_index(
-            other,
-            sorted=pre_sorted,
-            shuffle_method=_get_shuffle_method(shuffle_method),
-            divisions=divisions,
-            **kwargs,
-        )
-
-    @_deprecate_shuffle_kwarg
-    @_dask_cudf_performance_tracking
-    def sort_values(
-        self,
-        by,
-        ignore_index=False,
-        max_branch=None,
-        divisions=None,
-        set_divisions=False,
-        ascending=True,
-        na_position="last",
-        sort_function=None,
-        sort_function_kwargs=None,
-        shuffle_method=None,
-        **kwargs,
-    ):
-        if kwargs:
-            raise ValueError(
-                f"Unsupported input arguments passed : {list(kwargs.keys())}"
-            )
-
-        df = sorting.sort_values(
-            self,
-            by,
-            max_branch=max_branch,
-            divisions=divisions,
-            set_divisions=set_divisions,
-            ignore_index=ignore_index,
-            ascending=ascending,
-            na_position=na_position,
-            shuffle_method=shuffle_method,
-            sort_function=sort_function,
-            sort_function_kwargs=sort_function_kwargs,
-        )
-
-        if ignore_index:
-            return df.reset_index(drop=True)
-        return df
-
-    @_dask_cudf_performance_tracking
-    def to_parquet(self, path, *args, **kwargs):
-        """Calls dask.dataframe.io.to_parquet with CudfEngine backend"""
-        from dask_cudf.io import to_parquet
-
-        return to_parquet(self, path, *args, **kwargs)
-
-    @_dask_cudf_performance_tracking
-    def to_orc(self, path, **kwargs):
-        """Calls dask_cudf.io.to_orc"""
-        from dask_cudf.io import to_orc
-
-        return to_orc(self, path, **kwargs)
-
-    @derived_from(pd.DataFrame)
-    @_dask_cudf_performance_tracking
-    def var(
-        self,
-        axis=None,
-        skipna=True,
-        ddof=1,
-        split_every=False,
-        dtype=None,
-        out=None,
-        naive=False,
-        numeric_only=False,
-    ):
-        axis = self._validate_axis(axis)
-        meta = self._meta_nonempty.var(
-            axis=axis, skipna=skipna, numeric_only=numeric_only
-        )
-        if axis == 1:
-            result = map_partitions(
-                M.var,
-                self,
-                meta=meta,
-                token=self._token_prefix + "var",
-                axis=axis,
-                skipna=skipna,
-                ddof=ddof,
-                numeric_only=numeric_only,
-            )
-            return handle_out(out, result)
-        elif naive:
-            return _naive_var(self, meta, skipna, ddof, split_every, out)
-        else:
-            return _parallel_var(self, meta, skipna, split_every, out)
-
-    @_deprecate_shuffle_kwarg
-    @_dask_cudf_performance_tracking
-    def shuffle(self, *args, shuffle_method=None, **kwargs):
-        """Wraps dask.dataframe DataFrame.shuffle method"""
-        return super().shuffle(
-            *args, shuffle_method=_get_shuffle_method(shuffle_method), **kwargs
-        )
-
-    @_dask_cudf_performance_tracking
-    def groupby(self, by=None, **kwargs):
-        from .groupby import CudfDataFrameGroupBy
-
-        return CudfDataFrameGroupBy(self, by=by, **kwargs)
-
-
-@_dask_cudf_performance_tracking
-def sum_of_squares(x):
-    x = x.astype("f8")._column
-    outcol = libcudf.reduce.reduce("sum_of_squares", x)
-    return cudf.Series._from_column(outcol)
-
-
-@_dask_cudf_performance_tracking
-def var_aggregate(x2, x, n, ddof):
-    try:
-        with warnings.catch_warnings(record=True):
-            warnings.simplefilter("always")
-            result = (x2 / n) - (x / n) ** 2
-        if ddof != 0:
-            result = result * n / (n - ddof)
-        return result
-    except ZeroDivisionError:
-        return np.float64(np.nan)
-
-
-@_dask_cudf_performance_tracking
-def nlargest_agg(x, **kwargs):
-    return cudf.concat(x).nlargest(**kwargs)
-
-
-@_dask_cudf_performance_tracking
-def nsmallest_agg(x, **kwargs):
-    return cudf.concat(x).nsmallest(**kwargs)
-
-
-class Series(_Frame, dd.core.Series):
-    _partition_type = cudf.Series
-
-    @_dask_cudf_performance_tracking
-    def count(self, split_every=False):
-        return reduction(
-            [self],
-            chunk=M.count,
-            aggregate=np.sum,
-            split_every=split_every,
-            meta="i8",
-        )
-
-    @_dask_cudf_performance_tracking
-    def mean(self, split_every=False):
-        sum = self.sum(split_every=split_every)
-        n = self.count(split_every=split_every)
-        return sum / n
-
-    @derived_from(pd.DataFrame)
-    @_dask_cudf_performance_tracking
-    def var(
-        self,
-        axis=None,
-        skipna=True,
-        ddof=1,
-        split_every=False,
-        dtype=None,
-        out=None,
-        naive=False,
-    ):
-        axis = self._validate_axis(axis)
-        meta = self._meta_nonempty.var(axis=axis, skipna=skipna)
-        if axis == 1:
-            result = map_partitions(
-                M.var,
-                self,
-                meta=meta,
-                token=self._token_prefix + "var",
-                axis=axis,
-                skipna=skipna,
-                ddof=ddof,
-            )
-            return handle_out(out, result)
-        elif naive:
-            return _naive_var(self, meta, skipna, ddof, split_every, out)
-        else:
-            return _parallel_var(self, meta, skipna, split_every, out)
-
-    @_dask_cudf_performance_tracking
-    def groupby(self, *args, **kwargs):
-        from .groupby import CudfSeriesGroupBy
-
-        return CudfSeriesGroupBy(self, *args, **kwargs)
-
-    @property  # type: ignore
-    @_dask_cudf_performance_tracking
-    def list(self):
-        return ListMethods(self)
-
-    @property  # type: ignore
-    @_dask_cudf_performance_tracking
-    def struct(self):
-        return StructMethods(self)
-
-
-class Index(Series, dd.core.Index):
-    _partition_type = cudf.Index  # type: ignore
-
-
-@_dask_cudf_performance_tracking
-def _naive_var(ddf, meta, skipna, ddof, split_every, out):
-    num = ddf._get_numeric_data()
-    x = 1.0 * num.sum(skipna=skipna, split_every=split_every)
-    x2 = 1.0 * (num**2).sum(skipna=skipna, split_every=split_every)
-    n = num.count(split_every=split_every)
-    name = ddf._token_prefix + "var"
-    result = map_partitions(
-        var_aggregate, x2, x, n, token=name, meta=meta, ddof=ddof
-    )
-    if isinstance(ddf, DataFrame):
-        result.divisions = (min(ddf.columns), max(ddf.columns))
-    return handle_out(out, result)
-
-
-@_dask_cudf_performance_tracking
-def _parallel_var(ddf, meta, skipna, split_every, out):
-    def _local_var(x, skipna):
-        if skipna:
-            n = x.count()
-            avg = x.mean(skipna=skipna)
-        else:
-            # Not skipping nulls, so might as well
-            # avoid the full `count` operation
-            n = len(x)
-            avg = x.sum(skipna=skipna) / n
-        m2 = ((x - avg) ** 2).sum(skipna=skipna)
-        return n, avg, m2
-
-    def _aggregate_var(parts):
-        n, avg, m2 = parts[0]
-        for i in range(1, len(parts)):
-            n_a, avg_a, m2_a = n, avg, m2
-            n_b, avg_b, m2_b = parts[i]
-            n = n_a + n_b
-            avg = (n_a * avg_a + n_b * avg_b) / n
-            delta = avg_b - avg_a
-            m2 = m2_a + m2_b + delta**2 * n_a * n_b / n
-        return n, avg, m2
-
-    def _finalize_var(vals):
-        n, _, m2 = vals
-        return m2 / (n - 1)
-
-    # Build graph
-    nparts = ddf.npartitions
-    if not split_every:
-        split_every = nparts
-    name = "var-" + tokenize(skipna, split_every, out)
-    local_name = "local-" + name
-    num = ddf._get_numeric_data()
-    dsk = {
-        (local_name, n, 0): (_local_var, (num._name, n), skipna)
-        for n in range(nparts)
-    }
-
-    # Use reduction tree
-    widths = [nparts]
-    while nparts > 1:
-        nparts = math.ceil(nparts / split_every)
-        widths.append(nparts)
-    height = len(widths)
-    for depth in range(1, height):
-        for group in range(widths[depth]):
-            p_max = widths[depth - 1]
-            lstart = split_every * group
-            lstop = min(lstart + split_every, p_max)
-            node_list = [
-                (local_name, p, depth - 1) for p in range(lstart, lstop)
-            ]
-            dsk[(local_name, group, depth)] = (_aggregate_var, node_list)
-    if height == 1:
-        group = depth = 0
-    dsk[(name, 0)] = (_finalize_var, (local_name, group, depth))
-
-    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[num, ddf])
-    result = dd.core.new_dd_object(graph, name, meta, (None, None))
-    if isinstance(ddf, DataFrame):
-        result.divisions = (min(ddf.columns), max(ddf.columns))
-    return handle_out(out, result)
-
-
-@_dask_cudf_performance_tracking
-def _extract_meta(x):
-    """
-    Extract internal cache data (``_meta``) from dask_cudf objects
-    """
-    if isinstance(x, (Scalar, _Frame)):
-        return x._meta
-    elif isinstance(x, list):
-        return [_extract_meta(_x) for _x in x]
-    elif isinstance(x, tuple):
-        return tuple(_extract_meta(_x) for _x in x)
-    elif isinstance(x, dict):
-        return {k: _extract_meta(v) for k, v in x.items()}
-    return x
-
-
-@_dask_cudf_performance_tracking
-def _emulate(func, *args, **kwargs):
-    """
-    Apply a function using args / kwargs. If arguments contain dd.DataFrame /
-    dd.Series, using internal cache (``_meta``) for calculation
-    """
-    with raise_on_meta_error(funcname(func)):
-        return func(*_extract_meta(args), **_extract_meta(kwargs))
-
-
-@_dask_cudf_performance_tracking
-def align_partitions(args):
-    """Align partitions between dask_cudf objects.
-
-    Note that if all divisions are unknown, but have equal npartitions, then
-    they will be passed through unchanged.
-    """
-    dfs = [df for df in args if isinstance(df, _Frame)]
-    if not dfs:
-        return args
-
-    divisions = dfs[0].divisions
-    if not all(df.divisions == divisions for df in dfs):
-        raise NotImplementedError("Aligning mismatched partitions")
-    return args
-
-
-@_dask_cudf_performance_tracking
-def reduction(
-    args,
-    chunk=None,
-    aggregate=None,
-    combine=None,
-    meta=None,
-    token=None,
-    chunk_kwargs=None,
-    aggregate_kwargs=None,
-    combine_kwargs=None,
-    split_every=None,
-    **kwargs,
-):
-    """Generic tree reduction operation.
-
-    Parameters
-    ----------
-    args :
-        Positional arguments for the `chunk` function. All `dask.dataframe`
-        objects should be partitioned and indexed equivalently.
-    chunk : function [block-per-arg] -> block
-        Function to operate on each block of data
-    aggregate : function list-of-blocks -> block
-        Function to operate on the list of results of chunk
-    combine : function list-of-blocks -> block, optional
-        Function to operate on intermediate lists of results of chunk
-        in a tree-reduction. If not provided, defaults to aggregate.
-    $META
-    token : str, optional
-        The name to use for the output keys.
-    chunk_kwargs : dict, optional
-        Keywords for the chunk function only.
-    aggregate_kwargs : dict, optional
-        Keywords for the aggregate function only.
-    combine_kwargs : dict, optional
-        Keywords for the combine function only.
-    split_every : int, optional
-        Group partitions into groups of this size while performing a
-        tree-reduction. If set to False, no tree-reduction will be used,
-        and all intermediates will be concatenated and passed to ``aggregate``.
-        Default is 8.
-    kwargs :
-        All remaining keywords will be passed to ``chunk``, ``aggregate``, and
-        ``combine``.
-    """
-    if chunk_kwargs is None:
-        chunk_kwargs = dict()
-    if aggregate_kwargs is None:
-        aggregate_kwargs = dict()
-    chunk_kwargs.update(kwargs)
-    aggregate_kwargs.update(kwargs)
-
-    if combine is None:
-        if combine_kwargs:
-            raise ValueError("`combine_kwargs` provided with no `combine`")
-        combine = aggregate
-        combine_kwargs = aggregate_kwargs
-    else:
-        if combine_kwargs is None:
-            combine_kwargs = dict()
-        combine_kwargs.update(kwargs)
-
-    if not isinstance(args, (tuple, list)):
-        args = [args]
-
-    npartitions = {arg.npartitions for arg in args if isinstance(arg, _Frame)}
-    if len(npartitions) > 1:
-        raise ValueError("All arguments must have same number of partitions")
-    npartitions = npartitions.pop()
-
-    if split_every is None:
-        split_every = 8
-    elif split_every is False:
-        split_every = npartitions
-    elif split_every < 2 or not isinstance(split_every, int):
-        raise ValueError("split_every must be an integer >= 2")
-
-    token_key = tokenize(
-        token or (chunk, aggregate),
-        meta,
-        args,
-        chunk_kwargs,
-        aggregate_kwargs,
-        combine_kwargs,
-        split_every,
+# This module provides backward compatibility for legacy import patterns.
+if dd.DASK_EXPR_ENABLED:
+    from dask_cudf._expr.collection import (  # noqa: E402
+        DataFrame,
+        Index,
+        Series,
     )
+else:
+    from dask_cudf._legacy.core import DataFrame, Index, Series  # noqa: F401
 
-    # Chunk
-    a = f"{token or funcname(chunk)}-chunk-{token_key}"
-    if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs:
-        dsk = {
-            (a, 0, i): (chunk, key)
-            for i, key in enumerate(args[0].__dask_keys__())
-        }
-    else:
-        dsk = {
-            (a, 0, i): (
-                apply,
-                chunk,
-                [(x._name, i) if isinstance(x, _Frame) else x for x in args],
-                chunk_kwargs,
-            )
-            for i in range(args[0].npartitions)
-        }
 
-    # Combine
-    b = f"{token or funcname(combine)}-combine-{token_key}"
-    k = npartitions
-    depth = 0
-    while k > split_every:
-        for part_i, inds in enumerate(partition_all(split_every, range(k))):
-            conc = (list, [(a, depth, i) for i in inds])
-            dsk[(b, depth + 1, part_i)] = (
-                (apply, combine, [conc], combine_kwargs)
-                if combine_kwargs
-                else (combine, conc)
-            )
-        k = part_i + 1
-        a = b
-        depth += 1
-
-    # Aggregate
-    b = f"{token or funcname(aggregate)}-agg-{token_key}"
-    conc = (list, [(a, depth, i) for i in range(k)])
-    if aggregate_kwargs:
-        dsk[(b, 0)] = (apply, aggregate, [conc], aggregate_kwargs)
-    else:
-        dsk[(b, 0)] = (aggregate, conc)
-
-    if meta is None:
-        meta_chunk = _emulate(apply, chunk, args, chunk_kwargs)
-        meta = _emulate(apply, aggregate, [[meta_chunk]], aggregate_kwargs)
-    meta = dask_make_meta(meta)
-
-    graph = HighLevelGraph.from_collections(b, dsk, dependencies=args)
-    return dd.core.new_dd_object(graph, b, meta, (None, None))
+concat = dd.concat  # noqa: F401
 
 
 @_dask_cudf_performance_tracking
@@ -744,59 +64,3 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
     # since dask-expr does not provide a docstring for from_pandas.
     + textwrap.dedent(dd.from_pandas.__doc__ or "")
 )
-
-
-@_dask_cudf_performance_tracking
-def from_dask_dataframe(df):
-    """
-    Convert a Dask :class:`dask.dataframe.DataFrame` to a Dask-cuDF
-    one.
-
-    WARNING: This API is deprecated, and may not work properly
-    when query-planning is active. Please use `*.to_backend("cudf")`
-    to convert the underlying data to cudf.
-
-    Parameters
-    ----------
-    df : dask.dataframe.DataFrame
-        The Dask dataframe to convert
-
-    Returns
-    -------
-    dask_cudf.DataFrame : A new Dask collection backed by cuDF objects
-    """
-
-    warnings.warn(
-        "The `from_dask_dataframe` API is now deprecated. "
-        "Please use `*.to_backend('cudf')` instead.",
-        FutureWarning,
-    )
-
-    return df.to_backend("cudf")
-
-
-for name in (
-    "add",
-    "sub",
-    "mul",
-    "truediv",
-    "floordiv",
-    "mod",
-    "pow",
-    "radd",
-    "rsub",
-    "rmul",
-    "rtruediv",
-    "rfloordiv",
-    "rmod",
-    "rpow",
-):
-    meth = getattr(cudf.DataFrame, name)
-    DataFrame._bind_operator_method(name, meth, original=cudf.Series)
-
-    meth = getattr(cudf.Series, name)
-    Series._bind_operator_method(name, meth, original=cudf.Series)
-
-for name in ("lt", "gt", "le", "ge", "ne", "eq"):
-    meth = getattr(cudf.Series, name)
-    Series._bind_comparison_method(name, meth, original=cudf.Series)
diff --git a/python/dask_cudf/dask_cudf/expr/__init__.py b/python/dask_cudf/dask_cudf/expr/__init__.py
deleted file mode 100644
index 6dadadd5263..00000000000
--- a/python/dask_cudf/dask_cudf/expr/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from dask import config
-
-# Check if dask-dataframe is using dask-expr.
-# For dask>=2024.3.0, a null value will default to True
-QUERY_PLANNING_ON = config.get("dataframe.query-planning", None) is not False
-
-# Register custom expressions and collections
-if QUERY_PLANNING_ON:
-    # Broadly avoid "p2p" and "disk" defaults for now
-    config.set({"dataframe.shuffle.method": "tasks"})
-
-    try:
-        import dask_cudf.expr._collection  # noqa: F401
-        import dask_cudf.expr._expr  # noqa: F401
-
-    except ImportError as err:
-        # Dask *should* raise an error before this.
-        # However, we can still raise here to be certain.
-        raise RuntimeError(
-            "Failed to register the 'cudf' backend for dask-expr."
-            " Please make sure you have dask-expr installed.\n"
-            f"Error Message: {err}"
-        )
diff --git a/python/dask_cudf/dask_cudf/expr/_expr.py b/python/dask_cudf/dask_cudf/expr/_expr.py
deleted file mode 100644
index c7cf66fbffd..00000000000
--- a/python/dask_cudf/dask_cudf/expr/_expr.py
+++ /dev/null
@@ -1,511 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-import functools
-
-import dask_expr._shuffle as _shuffle_module
-import pandas as pd
-from dask_expr import new_collection
-from dask_expr._cumulative import CumulativeBlockwise
-from dask_expr._expr import Elemwise, Expr, RenameAxis, VarColumns
-from dask_expr._groupby import (
-    DecomposableGroupbyAggregation,
-    GroupbyAggregation,
-)
-from dask_expr._reductions import Reduction, Var
-from dask_expr.io.io import FusedParquetIO
-from dask_expr.io.parquet import FragmentWrapper, ReadParquetPyarrowFS
-
-from dask.dataframe.core import (
-    _concat,
-    is_dataframe_like,
-    make_meta,
-    meta_nonempty,
-)
-from dask.dataframe.dispatch import is_categorical_dtype
-from dask.typing import no_default
-
-import cudf
-
-##
-## Custom expressions
-##
-
-
-def _get_spec_info(gb):
-    if isinstance(gb.arg, (dict, list)):
-        aggs = gb.arg.copy()
-    else:
-        aggs = gb.arg
-
-    if gb._slice and not isinstance(aggs, dict):
-        aggs = {gb._slice: aggs}
-
-    gb_cols = gb._by_columns
-    if isinstance(gb_cols, str):
-        gb_cols = [gb_cols]
-    columns = [c for c in gb.frame.columns if c not in gb_cols]
-    if not isinstance(aggs, dict):
-        aggs = {col: aggs for col in columns}
-
-    # Assert if our output will have a MultiIndex; this will be the case if
-    # any value in the `aggs` dict is not a string (i.e. multiple/named
-    # aggregations per column)
-    str_cols_out = True
-    aggs_renames = {}
-    for col in aggs:
-        if isinstance(aggs[col], str) or callable(aggs[col]):
-            aggs[col] = [aggs[col]]
-        elif isinstance(aggs[col], dict):
-            str_cols_out = False
-            col_aggs = []
-            for k, v in aggs[col].items():
-                aggs_renames[col, v] = k
-                col_aggs.append(v)
-            aggs[col] = col_aggs
-        else:
-            str_cols_out = False
-        if col in gb_cols:
-            columns.append(col)
-
-    return {
-        "aggs": aggs,
-        "columns": columns,
-        "str_cols_out": str_cols_out,
-        "aggs_renames": aggs_renames,
-    }
-
-
-def _get_meta(gb):
-    spec_info = gb.spec_info
-    gb_cols = gb._by_columns
-    aggs = spec_info["aggs"].copy()
-    aggs_renames = spec_info["aggs_renames"]
-    if spec_info["str_cols_out"]:
-        # Metadata should use `str` for dict values if that is
-        # what the user originally specified (column names will
-        # be str, rather than tuples).
-        for col in aggs:
-            aggs[col] = aggs[col][0]
-    _meta = gb.frame._meta.groupby(gb_cols).agg(aggs)
-    if aggs_renames:
-        col_array = []
-        agg_array = []
-        for col, agg in _meta.columns:
-            col_array.append(col)
-            agg_array.append(aggs_renames.get((col, agg), agg))
-        _meta.columns = pd.MultiIndex.from_arrays([col_array, agg_array])
-    return _meta
-
-
-class DecomposableCudfGroupbyAgg(DecomposableGroupbyAggregation):
-    sep = "___"
-
-    @functools.cached_property
-    def spec_info(self):
-        return _get_spec_info(self)
-
-    @functools.cached_property
-    def _meta(self):
-        return _get_meta(self)
-
-    @property
-    def shuffle_by_index(self):
-        return False  # We always group by column(s)
-
-    @classmethod
-    def chunk(cls, df, *by, **kwargs):
-        from dask_cudf.groupby import _groupby_partition_agg
-
-        return _groupby_partition_agg(df, **kwargs)
-
-    @classmethod
-    def combine(cls, inputs, **kwargs):
-        from dask_cudf.groupby import _tree_node_agg
-
-        return _tree_node_agg(_concat(inputs), **kwargs)
-
-    @classmethod
-    def aggregate(cls, inputs, **kwargs):
-        from dask_cudf.groupby import _finalize_gb_agg
-
-        return _finalize_gb_agg(_concat(inputs), **kwargs)
-
-    @property
-    def chunk_kwargs(self) -> dict:
-        dropna = True if self.dropna is None else self.dropna
-        return {
-            "gb_cols": self._by_columns,
-            "aggs": self.spec_info["aggs"],
-            "columns": self.spec_info["columns"],
-            "dropna": dropna,
-            "sort": self.sort,
-            "sep": self.sep,
-        }
-
-    @property
-    def combine_kwargs(self) -> dict:
-        dropna = True if self.dropna is None else self.dropna
-        return {
-            "gb_cols": self._by_columns,
-            "dropna": dropna,
-            "sort": self.sort,
-            "sep": self.sep,
-        }
-
-    @property
-    def aggregate_kwargs(self) -> dict:
-        dropna = True if self.dropna is None else self.dropna
-        final_columns = self._slice or self._meta.columns
-        return {
-            "gb_cols": self._by_columns,
-            "aggs": self.spec_info["aggs"],
-            "columns": self.spec_info["columns"],
-            "final_columns": final_columns,
-            "as_index": True,
-            "dropna": dropna,
-            "sort": self.sort,
-            "sep": self.sep,
-            "str_cols_out": self.spec_info["str_cols_out"],
-            "aggs_renames": self.spec_info["aggs_renames"],
-        }
-
-
-class CudfGroupbyAgg(GroupbyAggregation):
-    @functools.cached_property
-    def spec_info(self):
-        return _get_spec_info(self)
-
-    @functools.cached_property
-    def _meta(self):
-        return _get_meta(self)
-
-    def _lower(self):
-        return DecomposableCudfGroupbyAgg(
-            self.frame,
-            self.arg,
-            self.observed,
-            self.dropna,
-            self.split_every,
-            self.split_out,
-            self.sort,
-            self.shuffle_method,
-            self._slice,
-            *self.by,
-        )
-
-
-def _maybe_get_custom_expr(
-    gb,
-    aggs,
-    split_every=None,
-    split_out=None,
-    shuffle_method=None,
-    **kwargs,
-):
-    from dask_cudf.groupby import (
-        OPTIMIZED_AGGS,
-        _aggs_optimized,
-        _redirect_aggs,
-    )
-
-    if kwargs:
-        # Unsupported key-word arguments
-        return None
-
-    if not hasattr(gb.obj._meta, "to_pandas"):
-        # Not cuDF-backed data
-        return None
-
-    _aggs = _redirect_aggs(aggs)
-    if not _aggs_optimized(_aggs, OPTIMIZED_AGGS):
-        # One or more aggregations are unsupported
-        return None
-
-    return CudfGroupbyAgg(
-        gb.obj.expr,
-        _aggs,
-        gb.observed,
-        gb.dropna,
-        split_every,
-        split_out,
-        gb.sort,
-        shuffle_method,
-        gb._slice,
-        *gb.by,
-    )
-
-
-class CudfFusedParquetIO(FusedParquetIO):
-    @staticmethod
-    def _load_multiple_files(
-        frag_filters,
-        columns,
-        schema,
-        *to_pandas_args,
-    ):
-        import pyarrow as pa
-
-        from dask.base import apply, tokenize
-        from dask.threaded import get
-
-        token = tokenize(frag_filters, columns, schema)
-        name = f"pq-file-{token}"
-        dsk = {
-            (name, i): (
-                CudfReadParquetPyarrowFS._fragment_to_table,
-                frag,
-                filter,
-                columns,
-                schema,
-            )
-            for i, (frag, filter) in enumerate(frag_filters)
-        }
-        dsk[name] = (
-            apply,
-            pa.concat_tables,
-            [list(dsk.keys())],
-            {"promote_options": "permissive"},
-        )
-        return CudfReadParquetPyarrowFS._table_to_pandas(
-            get(dsk, name),
-            *to_pandas_args,
-        )
-
-
-class CudfReadParquetPyarrowFS(ReadParquetPyarrowFS):
-    @functools.cached_property
-    def _dataset_info(self):
-        from dask_cudf.io.parquet import set_object_dtypes_from_pa_schema
-
-        dataset_info = super()._dataset_info
-        meta_pd = dataset_info["base_meta"]
-        if isinstance(meta_pd, cudf.DataFrame):
-            return dataset_info
-
-        # Convert to cudf
-        # (drop unsupported timezone information)
-        for k, v in meta_pd.dtypes.items():
-            if isinstance(v, pd.DatetimeTZDtype) and v.tz is not None:
-                meta_pd[k] = meta_pd[k].dt.tz_localize(None)
-        meta_cudf = cudf.from_pandas(meta_pd)
-
-        # Re-set "object" dtypes to align with pa schema
-        kwargs = dataset_info.get("kwargs", {})
-        set_object_dtypes_from_pa_schema(
-            meta_cudf,
-            kwargs.get("schema", None),
-        )
-
-        dataset_info["base_meta"] = meta_cudf
-        self.operands[type(self)._parameters.index("_dataset_info_cache")] = (
-            dataset_info
-        )
-        return dataset_info
-
-    @staticmethod
-    def _table_to_pandas(table, index_name):
-        df = cudf.DataFrame.from_arrow(table)
-        if index_name is not None:
-            df = df.set_index(index_name)
-        return df
-
-    def _filtered_task(self, index: int):
-        columns = self.columns.copy()
-        index_name = self.index.name
-        if self.index is not None:
-            index_name = self.index.name
-        schema = self._dataset_info["schema"].remove_metadata()
-        if index_name:
-            if columns is None:
-                columns = list(schema.names)
-            columns.append(index_name)
-        return (
-            self._table_to_pandas,
-            (
-                self._fragment_to_table,
-                FragmentWrapper(self.fragments[index], filesystem=self.fs),
-                self.filters,
-                columns,
-                schema,
-            ),
-            index_name,
-        )
-
-    def _tune_up(self, parent):
-        if self._fusion_compression_factor >= 1:
-            return
-        if isinstance(parent, CudfFusedParquetIO):
-            return
-        return parent.substitute(self, CudfFusedParquetIO(self))
-
-
-class RenameAxisCudf(RenameAxis):
-    # TODO: Remove this after rename_axis is supported in cudf
-    # (See: https://github.com/rapidsai/cudf/issues/16895)
-    @staticmethod
-    def operation(df, index=no_default, **kwargs):
-        if index != no_default:
-            df.index.name = index
-            return df
-        raise NotImplementedError(
-            "Only `index` is supported for the cudf backend"
-        )
-
-
-class ToCudfBackend(Elemwise):
-    # TODO: Inherit from ToBackend when rapids-dask-dependency
-    # is pinned to dask>=2024.8.1
-    _parameters = ["frame", "options"]
-    _projection_passthrough = True
-    _filter_passthrough = True
-    _preserves_partitioning_information = True
-
-    @staticmethod
-    def operation(df, options):
-        from dask_cudf.backends import to_cudf_dispatch
-
-        return to_cudf_dispatch(df, **options)
-
-    def _simplify_down(self):
-        if isinstance(
-            self.frame._meta, (cudf.DataFrame, cudf.Series, cudf.Index)
-        ):
-            # We already have cudf data
-            return self.frame
-
-
-##
-## Custom expression patching
-##
-
-
-# This can be removed after cudf#15176 is addressed.
-# See: https://github.com/rapidsai/cudf/issues/15176
-class PatchCumulativeBlockwise(CumulativeBlockwise):
-    @property
-    def _args(self) -> list:
-        return self.operands[:1]
-
-    @property
-    def _kwargs(self) -> dict:
-        # Must pass axis and skipna as kwargs in cudf
-        return {"axis": self.axis, "skipna": self.skipna}
-
-
-CumulativeBlockwise._args = PatchCumulativeBlockwise._args
-CumulativeBlockwise._kwargs = PatchCumulativeBlockwise._kwargs
-
-
-# The upstream Var code uses `Series.values`, and relies on numpy
-# for most of the logic. Unfortunately, cudf -> cupy conversion
-# is not supported for data containing null values. Therefore,
-# we must implement our own version of Var for now. This logic
-# is mostly copied from dask-cudf.
-
-
-class VarCudf(Reduction):
-    # Uses the parallel version of Welford's online algorithm (Chan '79)
-    # (http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf)
-    _parameters = ["frame", "skipna", "ddof", "numeric_only", "split_every"]
-    _defaults = {
-        "skipna": True,
-        "ddof": 1,
-        "numeric_only": False,
-        "split_every": False,
-    }
-
-    @functools.cached_property
-    def _meta(self):
-        return make_meta(
-            meta_nonempty(self.frame._meta).var(
-                skipna=self.skipna, numeric_only=self.numeric_only
-            )
-        )
-
-    @property
-    def chunk_kwargs(self):
-        return dict(skipna=self.skipna, numeric_only=self.numeric_only)
-
-    @property
-    def combine_kwargs(self):
-        return {}
-
-    @property
-    def aggregate_kwargs(self):
-        return dict(ddof=self.ddof)
-
-    @classmethod
-    def reduction_chunk(cls, x, skipna=True, numeric_only=False):
-        kwargs = {"numeric_only": numeric_only} if is_dataframe_like(x) else {}
-        if skipna or numeric_only:
-            n = x.count(**kwargs)
-            kwargs["skipna"] = skipna
-            avg = x.mean(**kwargs)
-        else:
-            # Not skipping nulls, so might as well
-            # avoid the full `count` operation
-            n = len(x)
-            kwargs["skipna"] = skipna
-            avg = x.sum(**kwargs) / n
-        if numeric_only:
-            # Workaround for cudf bug
-            # (see: https://github.com/rapidsai/cudf/issues/13731)
-            x = x[n.index]
-        m2 = ((x - avg) ** 2).sum(**kwargs)
-        return n, avg, m2
-
-    @classmethod
-    def reduction_combine(cls, parts):
-        n, avg, m2 = parts[0]
-        for i in range(1, len(parts)):
-            n_a, avg_a, m2_a = n, avg, m2
-            n_b, avg_b, m2_b = parts[i]
-            n = n_a + n_b
-            avg = (n_a * avg_a + n_b * avg_b) / n
-            delta = avg_b - avg_a
-            m2 = m2_a + m2_b + delta**2 * n_a * n_b / n
-        return n, avg, m2
-
-    @classmethod
-    def reduction_aggregate(cls, vals, ddof=1):
-        vals = cls.reduction_combine(vals)
-        n, _, m2 = vals
-        return m2 / (n - ddof)
-
-
-def _patched_var(
-    self, axis=0, skipna=True, ddof=1, numeric_only=False, split_every=False
-):
-    if axis == 0:
-        if hasattr(self._meta, "to_pandas"):
-            return VarCudf(self, skipna, ddof, numeric_only, split_every)
-        else:
-            return Var(self, skipna, ddof, numeric_only, split_every)
-    elif axis == 1:
-        return VarColumns(self, skipna, ddof, numeric_only)
-    else:
-        raise ValueError(f"axis={axis} not supported. Please specify 0 or 1")
-
-
-Expr.var = _patched_var
-
-
-# Temporary work-around for missing cudf + categorical support
-# See: https://github.com/rapidsai/cudf/issues/11795
-# TODO: Fix RepartitionQuantiles and remove this in cudf>24.06
-
-_original_get_divisions = _shuffle_module._get_divisions
-
-
-def _patched_get_divisions(frame, other, *args, **kwargs):
-    # NOTE: The following two lines contains the "patch"
-    # (we simply convert the partitioning column to pandas)
-    if is_categorical_dtype(other._meta.dtype) and hasattr(
-        other.frame._meta, "to_pandas"
-    ):
-        other = new_collection(other).to_backend("pandas")._expr
-
-    # Call "original" function
-    return _original_get_divisions(frame, other, *args, **kwargs)
-
-
-_shuffle_module._get_divisions = _patched_get_divisions
diff --git a/python/dask_cudf/dask_cudf/expr/_groupby.py b/python/dask_cudf/dask_cudf/expr/_groupby.py
deleted file mode 100644
index 8a16fe7615d..00000000000
--- a/python/dask_cudf/dask_cudf/expr/_groupby.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from dask_expr._collection import new_collection
-from dask_expr._groupby import (
-    GroupBy as DXGroupBy,
-    SeriesGroupBy as DXSeriesGroupBy,
-    SingleAggregation,
-)
-from dask_expr._util import is_scalar
-
-from dask.dataframe.groupby import Aggregation
-
-from cudf.core.groupby.groupby import _deprecate_collect
-
-from dask_cudf.expr._expr import _maybe_get_custom_expr
-
-##
-## Custom groupby classes
-##
-
-
-class ListAgg(SingleAggregation):
-    @staticmethod
-    def groupby_chunk(arg):
-        return arg.agg(list)
-
-    @staticmethod
-    def groupby_aggregate(arg):
-        gb = arg.agg(list)
-        if gb.ndim > 1:
-            for col in gb.columns:
-                gb[col] = gb[col].list.concat()
-            return gb
-        else:
-            return gb.list.concat()
-
-
-list_aggregation = Aggregation(
-    name="list",
-    chunk=ListAgg.groupby_chunk,
-    agg=ListAgg.groupby_aggregate,
-)
-
-
-def _translate_arg(arg):
-    # Helper function to translate args so that
-    # they can be processed correctly by upstream
-    # dask & dask-expr. Right now, the only necessary
-    # translation is list aggregations.
-    if isinstance(arg, dict):
-        return {k: _translate_arg(v) for k, v in arg.items()}
-    elif isinstance(arg, list):
-        return [_translate_arg(x) for x in arg]
-    elif arg in ("collect", "list", list):
-        return list_aggregation
-    else:
-        return arg
-
-
-# We define our own GroupBy classes in Dask cuDF for
-# the following reasons:
-#  (1) We want to use a custom `aggregate` algorithm
-#      that performs multiple aggregations on the
-#      same dataframe partition at once. The upstream
-#      algorithm breaks distinct aggregations into
-#      separate tasks.
-#  (2) We need to work around missing `observed=False`
-#      support:
-#      https://github.com/rapidsai/cudf/issues/15173
-
-
-class GroupBy(DXGroupBy):
-    def __init__(self, *args, observed=None, **kwargs):
-        observed = observed if observed is not None else True
-        super().__init__(*args, observed=observed, **kwargs)
-
-    def __getitem__(self, key):
-        if is_scalar(key):
-            return SeriesGroupBy(
-                self.obj,
-                by=self.by,
-                slice=key,
-                sort=self.sort,
-                dropna=self.dropna,
-                observed=self.observed,
-            )
-        g = GroupBy(
-            self.obj,
-            by=self.by,
-            slice=key,
-            sort=self.sort,
-            dropna=self.dropna,
-            observed=self.observed,
-            group_keys=self.group_keys,
-        )
-        return g
-
-    def collect(self, **kwargs):
-        _deprecate_collect()
-        return self._single_agg(ListAgg, **kwargs)
-
-    def aggregate(self, arg, fused=True, **kwargs):
-        if (
-            fused
-            and (expr := _maybe_get_custom_expr(self, arg, **kwargs))
-            is not None
-        ):
-            return new_collection(expr)
-        else:
-            return super().aggregate(_translate_arg(arg), **kwargs)
-
-
-class SeriesGroupBy(DXSeriesGroupBy):
-    def __init__(self, *args, observed=None, **kwargs):
-        observed = observed if observed is not None else True
-        super().__init__(*args, observed=observed, **kwargs)
-
-    def collect(self, **kwargs):
-        _deprecate_collect()
-        return self._single_agg(ListAgg, **kwargs)
-
-    def aggregate(self, arg, **kwargs):
-        return super().aggregate(_translate_arg(arg), **kwargs)
diff --git a/python/dask_cudf/dask_cudf/io/__init__.py b/python/dask_cudf/dask_cudf/io/__init__.py
index 0421bd755f4..1e0f24d78ce 100644
--- a/python/dask_cudf/dask_cudf/io/__init__.py
+++ b/python/dask_cudf/dask_cudf/io/__init__.py
@@ -1,11 +1,32 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
-from .csv import read_csv  # noqa: F401
-from .json import read_json  # noqa: F401
-from .orc import read_orc, to_orc  # noqa: F401
-from .text import read_text  # noqa: F401
+from dask_cudf import _deprecated_api
 
-try:
-    from .parquet import read_parquet, to_parquet  # noqa: F401
-except ImportError:
-    pass
+from . import csv, orc, json, parquet, text  # noqa: F401
+
+
+read_csv = _deprecated_api(
+    "dask_cudf.io.read_csv", new_api="dask_cudf.read_csv"
+)
+read_json = _deprecated_api(
+    "dask_cudf.io.read_json", new_api="dask_cudf.read_json"
+)
+read_orc = _deprecated_api(
+    "dask_cudf.io.read_orc", new_api="dask_cudf.read_orc"
+)
+to_orc = _deprecated_api(
+    "dask_cudf.io.to_orc",
+    new_api="dask_cudf._legacy.io.to_orc",
+    rec="Please use the DataFrame.to_orc method instead.",
+)
+read_text = _deprecated_api(
+    "dask_cudf.io.read_text", new_api="dask_cudf.read_text"
+)
+read_parquet = _deprecated_api(
+    "dask_cudf.io.read_parquet", new_api="dask_cudf.read_parquet"
+)
+to_parquet = _deprecated_api(
+    "dask_cudf.io.to_parquet",
+    new_api="dask_cudf._legacy.io.parquet.to_parquet",
+    rec="Please use the DataFrame.to_parquet method instead.",
+)
diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py
index fa5400344f9..b22b31a591f 100644
--- a/python/dask_cudf/dask_cudf/io/csv.py
+++ b/python/dask_cudf/dask_cudf/io/csv.py
@@ -1,222 +1,8 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
-import os
-from glob import glob
-from warnings import warn
+from dask_cudf import _deprecated_api
 
-from fsspec.utils import infer_compression
-
-from dask import dataframe as dd
-from dask.base import tokenize
-from dask.dataframe.io.csv import make_reader
-from dask.utils import apply, parse_bytes
-
-import cudf
-
-
-def read_csv(path, blocksize="default", **kwargs):
-    """
-    Read CSV files into a :class:`.DataFrame`.
-
-    This API parallelizes the :func:`cudf:cudf.read_csv` function in
-    the following ways:
-
-    It supports loading many files at once using globstrings:
-
-    >>> import dask_cudf
-    >>> df = dask_cudf.read_csv("myfiles.*.csv")
-
-    In some cases it can break up large files:
-
-    >>> df = dask_cudf.read_csv("largefile.csv", blocksize="256 MiB")
-
-    It can read CSV files from external resources (e.g. S3, HTTP, FTP)
-
-    >>> df = dask_cudf.read_csv("s3://bucket/myfiles.*.csv")
-    >>> df = dask_cudf.read_csv("https://www.mycloud.com/sample.csv")
-
-    Internally ``read_csv`` uses :func:`cudf:cudf.read_csv` and
-    supports many of the same keyword arguments with the same
-    performance guarantees. See the docstring for
-    :func:`cudf:cudf.read_csv` for more information on available
-    keyword arguments.
-
-    Parameters
-    ----------
-    path : str, path object, or file-like object
-        Either a path to a file (a str, :py:class:`pathlib.Path`, or
-        py._path.local.LocalPath), URL (including http, ftp, and S3
-        locations), or any object with a read() method (such as
-        builtin :py:func:`open` file handler function or
-        :py:class:`~io.StringIO`).
-    blocksize : int or str, default "256 MiB"
-        The target task partition size. If ``None``, a single block
-        is used for each file.
-    **kwargs : dict
-        Passthrough key-word arguments that are sent to
-        :func:`cudf:cudf.read_csv`.
-
-    Notes
-    -----
-    If any of `skipfooter`/`skiprows`/`nrows` are passed,
-    `blocksize` will default to None.
-
-    Examples
-    --------
-    >>> import dask_cudf
-    >>> ddf = dask_cudf.read_csv("sample.csv", usecols=["a", "b"])
-    >>> ddf.compute()
-       a      b
-    0  1     hi
-    1  2  hello
-    2  3     ai
-
-    """
-
-    # Handle `chunksize` deprecation
-    if "chunksize" in kwargs:
-        chunksize = kwargs.pop("chunksize", "default")
-        warn(
-            "`chunksize` is deprecated and will be removed in the future. "
-            "Please use `blocksize` instead.",
-            FutureWarning,
-        )
-        if blocksize == "default":
-            blocksize = chunksize
-
-    # Set default `blocksize`
-    if blocksize == "default":
-        if (
-            kwargs.get("skipfooter", 0) != 0
-            or kwargs.get("skiprows", 0) != 0
-            or kwargs.get("nrows", None) is not None
-        ):
-            # Cannot read in blocks if skipfooter,
-            # skiprows or nrows is passed.
-            blocksize = None
-        else:
-            blocksize = "256 MiB"
-
-    if "://" in str(path):
-        func = make_reader(cudf.read_csv, "read_csv", "CSV")
-        return func(path, blocksize=blocksize, **kwargs)
-    else:
-        return _internal_read_csv(path=path, blocksize=blocksize, **kwargs)
-
-
-def _internal_read_csv(path, blocksize="256 MiB", **kwargs):
-    if isinstance(blocksize, str):
-        blocksize = parse_bytes(blocksize)
-
-    if isinstance(path, list):
-        filenames = path
-    elif isinstance(path, str):
-        filenames = sorted(glob(path))
-    elif hasattr(path, "__fspath__"):
-        filenames = sorted(glob(path.__fspath__()))
-    else:
-        raise TypeError(f"Path type not understood:{type(path)}")
-
-    if not filenames:
-        msg = f"A file in: {filenames} does not exist."
-        raise FileNotFoundError(msg)
-
-    name = "read-csv-" + tokenize(
-        path, tokenize, **kwargs
-    )  # TODO: get last modified time
-
-    compression = kwargs.get("compression", "infer")
-
-    if compression == "infer":
-        # Infer compression from first path by default
-        compression = infer_compression(filenames[0])
-
-    if compression and blocksize:
-        # compressed CSVs reading must read the entire file
-        kwargs.pop("byte_range", None)
-        warn(
-            "Warning %s compression does not support breaking apart files\n"
-            "Please ensure that each individual file can fit in memory and\n"
-            "use the keyword ``blocksize=None to remove this message``\n"
-            "Setting ``blocksize=(size of file)``" % compression
-        )
-        blocksize = None
-
-    if blocksize is None:
-        return read_csv_without_blocksize(path, **kwargs)
-
-    # Let dask.dataframe generate meta
-    dask_reader = make_reader(cudf.read_csv, "read_csv", "CSV")
-    kwargs1 = kwargs.copy()
-    usecols = kwargs1.pop("usecols", None)
-    dtype = kwargs1.pop("dtype", None)
-    meta = dask_reader(filenames[0], **kwargs1)._meta
-    names = meta.columns
-    if usecols or dtype:
-        # Regenerate meta with original kwargs if
-        # `usecols` or `dtype` was specified
-        meta = dask_reader(filenames[0], **kwargs)._meta
-
-    dsk = {}
-    i = 0
-    dtypes = meta.dtypes.values
-
-    for fn in filenames:
-        size = os.path.getsize(fn)
-        for start in range(0, size, blocksize):
-            kwargs2 = kwargs.copy()
-            kwargs2["byte_range"] = (
-                start,
-                blocksize,
-            )  # specify which chunk of the file we care about
-            if start != 0:
-                kwargs2["names"] = names  # no header in the middle of the file
-                kwargs2["header"] = None
-            dsk[(name, i)] = (apply, _read_csv, [fn, dtypes], kwargs2)
-
-            i += 1
-
-    divisions = [None] * (len(dsk) + 1)
-    return dd.core.new_dd_object(dsk, name, meta, divisions)
-
-
-def _read_csv(fn, dtypes=None, **kwargs):
-    return cudf.read_csv(fn, **kwargs)
-
-
-def read_csv_without_blocksize(path, **kwargs):
-    """Read entire CSV with optional compression (gzip/zip)
-
-    Parameters
-    ----------
-    path : str
-        path to files (support for glob)
-    """
-    if isinstance(path, list):
-        filenames = path
-    elif isinstance(path, str):
-        filenames = sorted(glob(path))
-    elif hasattr(path, "__fspath__"):
-        filenames = sorted(glob(path.__fspath__()))
-    else:
-        raise TypeError(f"Path type not understood:{type(path)}")
-
-    name = "read-csv-" + tokenize(path, **kwargs)
-
-    meta_kwargs = kwargs.copy()
-    if "skipfooter" in meta_kwargs:
-        meta_kwargs.pop("skipfooter")
-    if "nrows" in meta_kwargs:
-        meta_kwargs.pop("nrows")
-    # Read "head" of first file (first 5 rows).
-    # Convert to empty df for metadata.
-    meta = cudf.read_csv(filenames[0], nrows=5, **meta_kwargs).iloc[:0]
-
-    graph = {
-        (name, i): (apply, cudf.read_csv, [fn], kwargs)
-        for i, fn in enumerate(filenames)
-    }
-
-    divisions = [None] * (len(filenames) + 1)
-
-    return dd.core.new_dd_object(graph, name, meta, divisions)
+read_csv = _deprecated_api(
+    "dask_cudf.io.csv.read_csv",
+    new_api="dask_cudf.read_csv",
+)
diff --git a/python/dask_cudf/dask_cudf/io/json.py b/python/dask_cudf/dask_cudf/io/json.py
index 98c5ceedb76..8f85ea54c0a 100644
--- a/python/dask_cudf/dask_cudf/io/json.py
+++ b/python/dask_cudf/dask_cudf/io/json.py
@@ -1,209 +1,8 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
-from functools import partial
+from dask_cudf import _deprecated_api
 
-import numpy as np
-from fsspec.core import get_compression, get_fs_token_paths
-
-import dask
-from dask.utils import parse_bytes
-
-import cudf
-from cudf.core.column import as_column
-from cudf.utils.ioutils import _is_local_filesystem
-
-from dask_cudf.backends import _default_backend
-
-
-def _read_json_partition(
-    paths,
-    fs=None,
-    include_path_column=False,
-    path_converter=None,
-    **kwargs,
-):
-    # Transfer all data up front for remote storage
-    sources = (
-        paths
-        if fs is None
-        else fs.cat_ranges(
-            paths,
-            [0] * len(paths),
-            fs.sizes(paths),
-        )
-    )
-
-    if include_path_column:
-        # Add "path" column.
-        # Must iterate over sources sequentially
-        if not isinstance(include_path_column, str):
-            include_path_column = "path"
-        converted_paths = (
-            paths
-            if path_converter is None
-            else [path_converter(path) for path in paths]
-        )
-        dfs = []
-        for i, source in enumerate(sources):
-            df = cudf.read_json(source, **kwargs)
-            df[include_path_column] = as_column(
-                converted_paths[i], length=len(df)
-            )
-            dfs.append(df)
-        return cudf.concat(dfs)
-    else:
-        # Pass sources directly to cudf
-        return cudf.read_json(sources, **kwargs)
-
-
-def read_json(
-    url_path,
-    engine="auto",
-    blocksize=None,
-    orient="records",
-    lines=None,
-    compression="infer",
-    aggregate_files=True,
-    **kwargs,
-):
-    """Read JSON data into a :class:`.DataFrame`.
-
-    This function wraps :func:`dask.dataframe.read_json`, and passes
-    ``engine=partial(cudf.read_json, engine="auto")`` by default.
-
-    Parameters
-    ----------
-    url_path : str, list of str
-        Location to read from. If a string, can include a glob character to
-        find a set of file names.
-        Supports protocol specifications such as ``"s3://"``.
-    engine : str or Callable, default "auto"
-
-        If str, this value will be used as the ``engine`` argument
-        when :func:`cudf.read_json` is used to create each partition.
-        If a :obj:`~collections.abc.Callable`, this value will be used as the
-        underlying function used to create each partition from JSON
-        data. The default value is "auto", so that
-        ``engine=partial(cudf.read_json, engine="auto")`` will be
-        passed to :func:`dask.dataframe.read_json` by default.
-    aggregate_files : bool or int
-        Whether to map multiple files to each output partition. If True,
-        the `blocksize` argument will be used to determine the number of
-        files in each partition. If any one file is larger than `blocksize`,
-        the `aggregate_files` argument will be ignored. If an integer value
-        is specified, the `blocksize` argument will be ignored, and that
-        number of files will be mapped to each partition. Default is True.
-    **kwargs :
-        Key-word arguments to pass through to :func:`dask.dataframe.read_json`.
-
-    Returns
-    -------
-    :class:`.DataFrame`
-
-    Examples
-    --------
-    Load single file
-
-    >>> from dask_cudf import read_json
-    >>> read_json('myfile.json')  # doctest: +SKIP
-
-    Load large line-delimited JSON files using partitions of approx
-    256MB size
-
-    >>> read_json('data/file*.csv', blocksize=2**28)  # doctest: +SKIP
-
-    Load nested JSON data
-
-    >>> read_json('myfile.json')  # doctest: +SKIP
-
-    See Also
-    --------
-    dask.dataframe.read_json
-
-    """
-
-    if lines is None:
-        lines = orient == "records"
-    if orient != "records" and lines:
-        raise ValueError(
-            'Line-delimited JSON is only available with orient="records".'
-        )
-    if blocksize and (orient != "records" or not lines):
-        raise ValueError(
-            "JSON file chunking only allowed for JSON-lines"
-            "input (orient='records', lines=True)."
-        )
-
-    inputs = []
-    if aggregate_files and blocksize or int(aggregate_files) > 1:
-        # Attempt custom read if we are mapping multiple files
-        # to each output partition. Otherwise, upstream logic
-        # is sufficient.
-
-        storage_options = kwargs.get("storage_options", {})
-        fs, _, paths = get_fs_token_paths(
-            url_path, mode="rb", storage_options=storage_options
-        )
-        if isinstance(aggregate_files, int) and aggregate_files > 1:
-            # Map a static file count to each partition
-            inputs = [
-                paths[offset : offset + aggregate_files]
-                for offset in range(0, len(paths), aggregate_files)
-            ]
-        elif aggregate_files is True and blocksize:
-            # Map files dynamically (using blocksize)
-            file_sizes = fs.sizes(paths)  # NOTE: This can be slow
-            blocksize = parse_bytes(blocksize)
-            if all([file_size <= blocksize for file_size in file_sizes]):
-                counts = np.unique(
-                    np.floor(np.cumsum(file_sizes) / blocksize),
-                    return_counts=True,
-                )[1]
-                offsets = np.concatenate([[0], counts.cumsum()])
-                inputs = [
-                    paths[offsets[i] : offsets[i + 1]]
-                    for i in range(len(offsets) - 1)
-                ]
-
-    if inputs:
-        # Inputs were successfully populated.
-        # Use custom _read_json_partition function
-        # to generate each partition.
-
-        compression = get_compression(
-            url_path[0] if isinstance(url_path, list) else url_path,
-            compression,
-        )
-        _kwargs = dict(
-            orient=orient,
-            lines=lines,
-            compression=compression,
-            include_path_column=kwargs.get("include_path_column", False),
-            path_converter=kwargs.get("path_converter"),
-        )
-        if not _is_local_filesystem(fs):
-            _kwargs["fs"] = fs
-        # TODO: Generate meta more efficiently
-        meta = _read_json_partition(inputs[0][:1], **_kwargs)
-        return dask.dataframe.from_map(
-            _read_json_partition,
-            inputs,
-            meta=meta,
-            **_kwargs,
-        )
-
-    # Fall back to dask.dataframe.read_json
-    return _default_backend(
-        dask.dataframe.read_json,
-        url_path,
-        engine=(
-            partial(cudf.read_json, engine=engine)
-            if isinstance(engine, str)
-            else engine
-        ),
-        blocksize=blocksize,
-        orient=orient,
-        lines=lines,
-        compression=compression,
-        **kwargs,
-    )
+read_json = _deprecated_api(
+    "dask_cudf.io.json.read_json",
+    new_api="dask_cudf.read_json",
+)
diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py
index bed69f038b0..5219cdacc31 100644
--- a/python/dask_cudf/dask_cudf/io/orc.py
+++ b/python/dask_cudf/dask_cudf/io/orc.py
@@ -1,199 +1,13 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from io import BufferedWriter, IOBase
-
-from fsspec.core import get_fs_token_paths
-from fsspec.utils import stringify_path
-from pyarrow import orc as orc
-
-from dask import dataframe as dd
-from dask.base import tokenize
-from dask.dataframe.io.utils import _get_pyarrow_dtypes
-
-import cudf
-
-
-def _read_orc_stripe(fs, path, stripe, columns, kwargs=None):
-    """Pull out specific columns from specific stripe"""
-    if kwargs is None:
-        kwargs = {}
-    with fs.open(path, "rb") as f:
-        df_stripe = cudf.read_orc(
-            f, stripes=[stripe], columns=columns, **kwargs
-        )
-    return df_stripe
-
-
-def read_orc(path, columns=None, filters=None, storage_options=None, **kwargs):
-    """Read ORC files into a :class:`.DataFrame`.
-
-    Note that this function is mostly borrowed from upstream Dask.
-
-    Parameters
-    ----------
-    path : str or list[str]
-        Location of file(s), which can be a full URL with protocol specifier,
-        and may include glob character if a single string.
-    columns : None or list[str]
-        Columns to load. If None, loads all.
-    filters : None or list of tuple or list of lists of tuples
-        If not None, specifies a filter predicate used to filter out
-        row groups using statistics stored for each row group as
-        Parquet metadata. Row groups that do not match the given
-        filter predicate are not read. The predicate is expressed in
-        `disjunctive normal form (DNF)
-        <https://en.wikipedia.org/wiki/Disjunctive_normal_form>`__
-        like ``[[('x', '=', 0), ...], ...]``. DNF allows arbitrary
-        boolean logical combinations of single column predicates. The
-        innermost tuples each describe a single column predicate. The
-        list of inner predicates is interpreted as a conjunction
-        (AND), forming a more selective and multiple column predicate.
-        Finally, the outermost list combines these filters as a
-        disjunction (OR). Predicates may also be passed as a list of
-        tuples. This form is interpreted as a single conjunction. To
-        express OR in predicates, one must use the (preferred)
-        notation of list of lists of tuples.
-    storage_options : None or dict
-        Further parameters to pass to the bytes backend.
-
-    See Also
-    --------
-    dask.dataframe.read_orc
-
-    Returns
-    -------
-    dask_cudf.DataFrame
-
-    """
-
-    storage_options = storage_options or {}
-    fs, fs_token, paths = get_fs_token_paths(
-        path, mode="rb", storage_options=storage_options
-    )
-    schema = None
-    nstripes_per_file = []
-    for path in paths:
-        with fs.open(path, "rb") as f:
-            o = orc.ORCFile(f)
-            if schema is None:
-                schema = o.schema
-            elif schema != o.schema:
-                raise ValueError(
-                    "Incompatible schemas while parsing ORC files"
-                )
-            nstripes_per_file.append(o.nstripes)
-    schema = _get_pyarrow_dtypes(schema, categories=None)
-    if columns is not None:
-        ex = set(columns) - set(schema)
-        if ex:
-            raise ValueError(
-                f"Requested columns ({ex}) not in schema ({set(schema)})"
-            )
-    else:
-        columns = list(schema)
-
-    with fs.open(paths[0], "rb") as f:
-        meta = cudf.read_orc(
-            f,
-            stripes=[0] if nstripes_per_file[0] else None,
-            columns=columns,
-            **kwargs,
-        )
-
-    name = "read-orc-" + tokenize(fs_token, path, columns, filters, **kwargs)
-    dsk = {}
-    N = 0
-    for path, n in zip(paths, nstripes_per_file):
-        for stripe in (
-            range(n)
-            if filters is None
-            else cudf.io.orc._filter_stripes(filters, path)
-        ):
-            dsk[(name, N)] = (
-                _read_orc_stripe,
-                fs,
-                path,
-                stripe,
-                columns,
-                kwargs,
-            )
-            N += 1
-
-    divisions = [None] * (len(dsk) + 1)
-    return dd.core.new_dd_object(dsk, name, meta, divisions)
-
-
-def write_orc_partition(df, path, fs, filename, compression="snappy"):
-    full_path = fs.sep.join([path, filename])
-    with fs.open(full_path, mode="wb") as out_file:
-        if not isinstance(out_file, IOBase):
-            out_file = BufferedWriter(out_file)
-        cudf.io.to_orc(df, out_file, compression=compression)
-    return full_path
-
-
-def to_orc(
-    df,
-    path,
-    write_index=True,
-    storage_options=None,
-    compression="snappy",
-    compute=True,
-    **kwargs,
-):
-    """
-    Write a :class:`.DataFrame` to ORC file(s) (one file per partition).
-
-    Parameters
-    ----------
-    df : DataFrame
-    path : str or pathlib.Path
-        Destination directory for data.  Prepend with protocol like ``s3://``
-        or ``hdfs://`` for remote data.
-    write_index : boolean, optional
-        Whether or not to write the index. Defaults to True.
-    storage_options : None or dict
-        Further parameters to pass to the bytes backend.
-    compression : string or dict, optional
-    compute : bool, optional
-        If True (default) then the result is computed immediately. If
-        False then a :class:`~dask.delayed.Delayed` object is returned
-        for future computation.
-
-    """
-
-    from dask import compute as dask_compute, delayed
-
-    # TODO: Use upstream dask implementation once available
-    #       (see: Dask Issue#5596)
-
-    if hasattr(path, "name"):
-        path = stringify_path(path)
-    fs, _, _ = get_fs_token_paths(
-        path, mode="wb", storage_options=storage_options
-    )
-    # Trim any protocol information from the path before forwarding
-    path = fs._strip_protocol(path)
-
-    if write_index:
-        df = df.reset_index()
-    else:
-        # Not writing index - might as well drop it
-        df = df.reset_index(drop=True)
-
-    fs.mkdirs(path, exist_ok=True)
-
-    # Use i_offset and df.npartitions to define file-name list
-    filenames = ["part.%i.orc" % i for i in range(df.npartitions)]
-
-    # write parts
-    dwrite = delayed(write_orc_partition)
-    parts = [
-        dwrite(d, path, fs, filename, compression=compression)
-        for d, filename in zip(df.to_delayed(), filenames)
-    ]
-
-    if compute:
-        return dask_compute(*parts)
-
-    return delayed(list)(parts)
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from dask_cudf import _deprecated_api
+
+read_orc = _deprecated_api(
+    "dask_cudf.io.orc.read_orc",
+    new_api="dask_cudf.read_orc",
+)
+to_orc = _deprecated_api(
+    "dask_cudf.io.orc.to_orc",
+    new_api="dask_cudf._legacy.io.orc.to_orc",
+    rec="Please use the DataFrame.to_orc method instead.",
+)
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index 39ac6474958..48cea7266af 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -1,35 +1,66 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-import itertools
-import warnings
-from functools import partial
-from io import BufferedWriter, BytesIO, IOBase
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import functools
 
-import numpy as np
 import pandas as pd
-from pyarrow import dataset as pa_ds, parquet as pq
+from dask_expr.io.io import FusedParquetIO
+from dask_expr.io.parquet import FragmentWrapper, ReadParquetPyarrowFS
 
-from dask import dataframe as dd
-from dask.dataframe.io.parquet.arrow import ArrowDatasetEngine
+import cudf
 
-try:
-    from dask.dataframe.io.parquet import (
-        create_metadata_file as create_metadata_file_dd,
-    )
-except ImportError:
-    create_metadata_file_dd = None
+from dask_cudf import _deprecated_api
+
+# Dask-expr imports CudfEngine from this module
+from dask_cudf._legacy.io.parquet import CudfEngine  # noqa: F401
+
+
+class CudfFusedParquetIO(FusedParquetIO):
+    @staticmethod
+    def _load_multiple_files(
+        frag_filters,
+        columns,
+        schema,
+        *to_pandas_args,
+    ):
+        import pyarrow as pa
+
+        from dask.base import apply, tokenize
+        from dask.threaded import get
+
+        token = tokenize(frag_filters, columns, schema)
+        name = f"pq-file-{token}"
+        dsk = {
+            (name, i): (
+                CudfReadParquetPyarrowFS._fragment_to_table,
+                frag,
+                filter,
+                columns,
+                schema,
+            )
+            for i, (frag, filter) in enumerate(frag_filters)
+        }
+        dsk[name] = (
+            apply,
+            pa.concat_tables,
+            [list(dsk.keys())],
+            {"promote_options": "permissive"},
+        )
+        return CudfReadParquetPyarrowFS._table_to_pandas(
+            get(dsk, name),
+            *to_pandas_args,
+        )
 
-import cudf
-from cudf.core.column import CategoricalColumn, as_column
-from cudf.io import write_to_dataset
-from cudf.io.parquet import _apply_post_filters, _normalize_filters
-from cudf.utils.dtypes import cudf_dtype_from_pa_type
 
+class CudfReadParquetPyarrowFS(ReadParquetPyarrowFS):
+    @functools.cached_property
+    def _dataset_info(self):
+        from dask_cudf._legacy.io.parquet import (
+            set_object_dtypes_from_pa_schema,
+        )
 
-class CudfEngine(ArrowDatasetEngine):
-    @classmethod
-    def _create_dd_meta(cls, dataset_info, **kwargs):
-        # Start with pandas-version of meta
-        meta_pd = super()._create_dd_meta(dataset_info, **kwargs)
+        dataset_info = super()._dataset_info
+        meta_pd = dataset_info["base_meta"]
+        if isinstance(meta_pd, cudf.DataFrame):
+            return dataset_info
 
         # Convert to cudf
         # (drop unsupported timezone information)
@@ -45,469 +76,60 @@ def _create_dd_meta(cls, dataset_info, **kwargs):
             kwargs.get("schema", None),
         )
 
-        return meta_cudf
-
-    @classmethod
-    def multi_support(cls):
-        # Assert that this class is CudfEngine
-        # and that multi-part reading is supported
-        return cls == CudfEngine
-
-    @classmethod
-    def _read_paths(
-        cls,
-        paths,
-        fs,
-        columns=None,
-        row_groups=None,
-        filters=None,
-        partitions=None,
-        partitioning=None,
-        partition_keys=None,
-        open_file_options=None,
-        dataset_kwargs=None,
-        **kwargs,
-    ):
-        # Simplify row_groups if all None
-        if row_groups == [None for path in paths]:
-            row_groups = None
-
-        # Make sure we read in the columns needed for row-wise
-        # filtering after IO. This means that one or more columns
-        # will be dropped almost immediately after IO. However,
-        # we do NEED these columns for accurate filtering.
-        filters = _normalize_filters(filters)
-        projected_columns = None
-        if columns and filters:
-            projected_columns = [c for c in columns if c is not None]
-            columns = sorted(
-                set(v[0] for v in itertools.chain.from_iterable(filters))
-                | set(projected_columns)
-            )
-
-        dataset_kwargs = dataset_kwargs or {}
-        dataset_kwargs["partitioning"] = partitioning or "hive"
-
-        # Use cudf to read in data
-        try:
-            df = cudf.read_parquet(
-                paths,
-                engine="cudf",
-                columns=columns,
-                row_groups=row_groups if row_groups else None,
-                dataset_kwargs=dataset_kwargs,
-                categorical_partitions=False,
-                filesystem=fs,
-                **kwargs,
-            )
-        except RuntimeError as err:
-            # TODO: Remove try/except after null-schema issue is resolved
-            # (See: https://github.com/rapidsai/cudf/issues/12702)
-            if len(paths) > 1:
-                df = cudf.concat(
-                    [
-                        cudf.read_parquet(
-                            path,
-                            engine="cudf",
-                            columns=columns,
-                            row_groups=row_groups[i] if row_groups else None,
-                            dataset_kwargs=dataset_kwargs,
-                            categorical_partitions=False,
-                            filesystem=fs,
-                            **kwargs,
-                        )
-                        for i, path in enumerate(paths)
-                    ]
-                )
-            else:
-                raise err
-
-        # Apply filters (if any are defined)
-        df = _apply_post_filters(df, filters)
-
-        if projected_columns:
-            # Elements of `projected_columns` may now be in the index.
-            # We must filter these names from our projection
-            projected_columns = [
-                col for col in projected_columns if col in df._column_names
-            ]
-            df = df[projected_columns]
-
-        if partitions and partition_keys is None:
-            # Use `HivePartitioning` by default
-            ds = pa_ds.dataset(
-                paths,
-                filesystem=fs,
-                **dataset_kwargs,
-            )
-            frag = next(ds.get_fragments())
-            if frag:
-                # Extract hive-partition keys, and make sure they
-                # are ordered the same as they are in `partitions`
-                raw_keys = pa_ds._get_partition_keys(frag.partition_expression)
-                partition_keys = [
-                    (hive_part.name, raw_keys[hive_part.name])
-                    for hive_part in partitions
-                ]
-
-        if partition_keys:
-            if partitions is None:
-                raise ValueError("Must pass partition sets")
-
-            for i, (name, index2) in enumerate(partition_keys):
-                if len(partitions[i].keys):
-                    # Build a categorical column from `codes` directly
-                    # (since the category is often a larger dtype)
-                    codes = as_column(
-                        partitions[i].keys.get_loc(index2),
-                        length=len(df),
-                    )
-                    df[name] = CategoricalColumn(
-                        data=None,
-                        size=codes.size,
-                        dtype=cudf.CategoricalDtype(
-                            categories=partitions[i].keys, ordered=False
-                        ),
-                        offset=codes.offset,
-                        children=(codes,),
-                    )
-                elif name not in df.columns:
-                    # Add non-categorical partition column
-                    df[name] = as_column(index2, length=len(df))
-
-        return df
-
-    @classmethod
-    def read_partition(
-        cls,
-        fs,
-        pieces,
-        columns,
-        index,
-        categories=(),
-        partitions=(),
-        filters=None,
-        partitioning=None,
-        schema=None,
-        open_file_options=None,
-        **kwargs,
-    ):
-        if columns is not None:
-            columns = [c for c in columns]
-        if isinstance(index, list):
-            columns += index
-
-        dataset_kwargs = kwargs.get("dataset", {})
-        partitioning = partitioning or dataset_kwargs.get("partitioning", None)
-        if isinstance(partitioning, dict):
-            partitioning = pa_ds.partitioning(**partitioning)
-
-        # Check if we are actually selecting any columns
-        read_columns = columns
-        if schema and columns:
-            ignored = set(schema.names) - set(columns)
-            if not ignored:
-                read_columns = None
-
-        if not isinstance(pieces, list):
-            pieces = [pieces]
-
-        # Extract supported kwargs from `kwargs`
-        read_kwargs = kwargs.get("read", {})
-        read_kwargs.update(open_file_options or {})
-        check_file_size = read_kwargs.pop("check_file_size", None)
-
-        # Wrap reading logic in a `try` block so that we can
-        # inform the user that the `read_parquet` partition
-        # size is too large for the available memory
-        try:
-            # Assume multi-piece read
-            paths = []
-            rgs = []
-            last_partition_keys = None
-            dfs = []
-
-            for i, piece in enumerate(pieces):
-                (path, row_group, partition_keys) = piece
-                row_group = None if row_group == [None] else row_group
-
-                # File-size check to help "protect" users from change
-                # to up-stream `split_row_groups` default. We only
-                # check the file size if this partition corresponds
-                # to a full file, and `check_file_size` is defined
-                if check_file_size and len(pieces) == 1 and row_group is None:
-                    file_size = fs.size(path)
-                    if file_size > check_file_size:
-                        warnings.warn(
-                            f"A large parquet file ({file_size}B) is being "
-                            f"used to create a DataFrame partition in "
-                            f"read_parquet. This may cause out of memory "
-                            f"exceptions in operations downstream. See the "
-                            f"notes on split_row_groups in the read_parquet "
-                            f"documentation. Setting split_row_groups "
-                            f"explicitly will silence this warning."
-                        )
-
-                if i > 0 and partition_keys != last_partition_keys:
-                    dfs.append(
-                        cls._read_paths(
-                            paths,
-                            fs,
-                            columns=read_columns,
-                            row_groups=rgs if rgs else None,
-                            filters=filters,
-                            partitions=partitions,
-                            partitioning=partitioning,
-                            partition_keys=last_partition_keys,
-                            dataset_kwargs=dataset_kwargs,
-                            **read_kwargs,
-                        )
-                    )
-                    paths = []
-                    rgs = []
-                    last_partition_keys = None
-                paths.append(path)
-                rgs.append(
-                    [row_group]
-                    if not isinstance(row_group, list)
-                    and row_group is not None
-                    else row_group
-                )
-                last_partition_keys = partition_keys
-
-            dfs.append(
-                cls._read_paths(
-                    paths,
-                    fs,
-                    columns=read_columns,
-                    row_groups=rgs if rgs else None,
-                    filters=filters,
-                    partitions=partitions,
-                    partitioning=partitioning,
-                    partition_keys=last_partition_keys,
-                    dataset_kwargs=dataset_kwargs,
-                    **read_kwargs,
-                )
-            )
-            df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0]
-
-            # Re-set "object" dtypes align with pa schema
-            set_object_dtypes_from_pa_schema(df, schema)
-
-            if index and (index[0] in df.columns):
-                df = df.set_index(index[0])
-            elif index is False and df.index.names != [None]:
-                # If index=False, we shouldn't have a named index
-                df.reset_index(inplace=True)
-
-        except MemoryError as err:
-            raise MemoryError(
-                "Parquet data was larger than the available GPU memory!\n\n"
-                "See the notes on split_row_groups in the read_parquet "
-                "documentation.\n\n"
-                "Original Error: " + str(err)
-            )
-            raise err
-
-        return df
-
-    @staticmethod
-    def write_partition(
-        df,
-        path,
-        fs,
-        filename,
-        partition_on,
-        return_metadata,
-        fmd=None,
-        compression="snappy",
-        index_cols=None,
-        **kwargs,
-    ):
-        preserve_index = False
-        if len(index_cols) and set(index_cols).issubset(set(df.columns)):
-            df.set_index(index_cols, drop=True, inplace=True)
-            preserve_index = True
-        if partition_on:
-            md = write_to_dataset(
-                df=df,
-                root_path=path,
-                compression=compression,
-                filename=filename,
-                partition_cols=partition_on,
-                fs=fs,
-                preserve_index=preserve_index,
-                return_metadata=return_metadata,
-                statistics=kwargs.get("statistics", "ROWGROUP"),
-                int96_timestamps=kwargs.get("int96_timestamps", False),
-                row_group_size_bytes=kwargs.get("row_group_size_bytes", None),
-                row_group_size_rows=kwargs.get("row_group_size_rows", None),
-                max_page_size_bytes=kwargs.get("max_page_size_bytes", None),
-                max_page_size_rows=kwargs.get("max_page_size_rows", None),
-                storage_options=kwargs.get("storage_options", None),
-            )
-        else:
-            with fs.open(fs.sep.join([path, filename]), mode="wb") as out_file:
-                if not isinstance(out_file, IOBase):
-                    out_file = BufferedWriter(out_file)
-                md = df.to_parquet(
-                    path=out_file,
-                    engine=kwargs.get("engine", "cudf"),
-                    index=kwargs.get("index", None),
-                    partition_cols=kwargs.get("partition_cols", None),
-                    partition_file_name=kwargs.get(
-                        "partition_file_name", None
-                    ),
-                    partition_offsets=kwargs.get("partition_offsets", None),
-                    statistics=kwargs.get("statistics", "ROWGROUP"),
-                    int96_timestamps=kwargs.get("int96_timestamps", False),
-                    row_group_size_bytes=kwargs.get(
-                        "row_group_size_bytes", None
-                    ),
-                    row_group_size_rows=kwargs.get(
-                        "row_group_size_rows", None
-                    ),
-                    storage_options=kwargs.get("storage_options", None),
-                    metadata_file_path=filename if return_metadata else None,
-                )
-        # Return the schema needed to write the metadata
-        if return_metadata:
-            return [{"meta": md}]
-        else:
-            return []
+        dataset_info["base_meta"] = meta_cudf
+        self.operands[type(self)._parameters.index("_dataset_info_cache")] = (
+            dataset_info
+        )
+        return dataset_info
 
     @staticmethod
-    def write_metadata(parts, fmd, fs, path, append=False, **kwargs):
-        if parts:
-            # Aggregate metadata and write to _metadata file
-            metadata_path = fs.sep.join([path, "_metadata"])
-            _meta = []
-            if append and fmd is not None:
-                # Convert to bytes: <https://github.com/rapidsai/cudf/issues/17177>
-                if isinstance(fmd, pq.FileMetaData):
-                    with BytesIO() as myio:
-                        fmd.write_metadata_file(myio)
-                        myio.seek(0)
-                        fmd = np.frombuffer(myio.read(), dtype="uint8")
-                _meta = [fmd]
-            _meta.extend([parts[i][0]["meta"] for i in range(len(parts))])
-            _meta = (
-                cudf.io.merge_parquet_filemetadata(_meta)
-                if len(_meta) > 1
-                else _meta[0]
-            )
-            with fs.open(metadata_path, "wb") as fil:
-                fil.write(memoryview(_meta))
-
-    @classmethod
-    def collect_file_metadata(cls, path, fs, file_path):
-        with fs.open(path, "rb") as f:
-            meta = pq.ParquetFile(f).metadata
-        if file_path:
-            meta.set_file_path(file_path)
-        with BytesIO() as myio:
-            meta.write_metadata_file(myio)
-            myio.seek(0)
-            meta = np.frombuffer(myio.read(), dtype="uint8")
-        return meta
+    def _table_to_pandas(table, index_name):
+        df = cudf.DataFrame.from_arrow(table)
+        if index_name is not None:
+            df = df.set_index(index_name)
+        return df
 
-    @classmethod
-    def aggregate_metadata(cls, meta_list, fs, out_path):
-        meta = (
-            cudf.io.merge_parquet_filemetadata(meta_list)
-            if len(meta_list) > 1
-            else meta_list[0]
+    def _filtered_task(self, index: int):
+        columns = self.columns.copy()
+        index_name = self.index.name
+        if self.index is not None:
+            index_name = self.index.name
+        schema = self._dataset_info["schema"].remove_metadata()
+        if index_name:
+            if columns is None:
+                columns = list(schema.names)
+            columns.append(index_name)
+        return (
+            self._table_to_pandas,
+            (
+                self._fragment_to_table,
+                FragmentWrapper(self.fragments[index], filesystem=self.fs),
+                self.filters,
+                columns,
+                schema,
+            ),
+            index_name,
         )
-        if out_path:
-            metadata_path = fs.sep.join([out_path, "_metadata"])
-            with fs.open(metadata_path, "wb") as fil:
-                fil.write(memoryview(meta))
-            return None
-        else:
-            return meta
-
-
-def set_object_dtypes_from_pa_schema(df, schema):
-    # Simple utility to modify cudf DataFrame
-    # "object" dtypes to agree with a specific
-    # pyarrow schema.
-    if schema:
-        for col_name, col in df._data.items():
-            if col_name is None:
-                # Pyarrow cannot handle `None` as a field name.
-                # However, this should be a simple range index that
-                # we can ignore anyway
-                continue
-            typ = cudf_dtype_from_pa_type(schema.field(col_name).type)
-            if (
-                col_name in schema.names
-                and not isinstance(typ, (cudf.ListDtype, cudf.StructDtype))
-                and isinstance(col, cudf.core.column.StringColumn)
-            ):
-                df._data[col_name] = col.astype(typ)
-
-
-def read_parquet(path, columns=None, **kwargs):
-    """
-    Read parquet files into a :class:`.DataFrame`.
-
-    Calls :func:`dask.dataframe.read_parquet` with ``engine=CudfEngine``
-    to coordinate the execution of :func:`cudf.read_parquet`, and to
-    ultimately create a :class:`.DataFrame` collection.
-
-    See the :func:`dask.dataframe.read_parquet` documentation for
-    all available options.
-
-    Examples
-    --------
-    >>> from dask_cudf import read_parquet
-    >>> df = read_parquet("/path/to/dataset/")  # doctest: +SKIP
-
-    When dealing with one or more large parquet files having an
-    in-memory footprint >15% device memory, the ``split_row_groups``
-    argument should be used to map Parquet **row-groups** to DataFrame
-    partitions (instead of **files** to partitions). For example, the
-    following code will map each row-group to a distinct partition:
-
-    >>> df = read_parquet(..., split_row_groups=True)  # doctest: +SKIP
-
-    To map **multiple** row-groups to each partition, an integer can be
-    passed to ``split_row_groups`` to specify the **maximum** number of
-    row-groups allowed in each output partition:
-
-    >>> df = read_parquet(..., split_row_groups=10)  # doctest: +SKIP
-
-    See Also
-    --------
-    cudf.read_parquet
-    dask.dataframe.read_parquet
-    """
-    if isinstance(columns, str):
-        columns = [columns]
-
-    # Set "check_file_size" option to determine whether we
-    # should check the parquet-file size. This check is meant
-    # to "protect" users from `split_row_groups` default changes
-    check_file_size = kwargs.pop("check_file_size", 500_000_000)
-    if (
-        check_file_size
-        and ("split_row_groups" not in kwargs)
-        and ("chunksize" not in kwargs)
-    ):
-        # User is not specifying `split_row_groups` or `chunksize`,
-        # so we should warn them if/when a file is ~>0.5GB on disk.
-        # They can set `split_row_groups` explicitly to silence/skip
-        # this check
-        if "read" not in kwargs:
-            kwargs["read"] = {}
-        kwargs["read"]["check_file_size"] = check_file_size
-
-    return dd.read_parquet(path, columns=columns, engine=CudfEngine, **kwargs)
-
-
-to_parquet = partial(dd.to_parquet, engine=CudfEngine)
 
-if create_metadata_file_dd is None:
-    create_metadata_file = create_metadata_file_dd
-else:
-    create_metadata_file = partial(create_metadata_file_dd, engine=CudfEngine)
+    def _tune_up(self, parent):
+        if self._fusion_compression_factor >= 1:
+            return
+        if isinstance(parent, CudfFusedParquetIO):
+            return
+        return parent.substitute(self, CudfFusedParquetIO(self))
+
+
+read_parquet = _deprecated_api(
+    "dask_cudf.io.parquet.read_parquet",
+    new_api="dask_cudf.read_parquet",
+)
+to_parquet = _deprecated_api(
+    "dask_cudf.io.parquet.to_parquet",
+    new_api="dask_cudf._legacy.io.parquet.to_parquet",
+    rec="Please use the DataFrame.to_parquet method instead.",
+)
+create_metadata_file = _deprecated_api(
+    "dask_cudf.io.parquet.create_metadata_file",
+    new_api="dask_cudf._legacy.io.parquet.create_metadata_file",
+    rec="Please raise an issue if this feature is needed.",
+)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
index a35a9f1be48..a0acb86f5a9 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
@@ -264,3 +264,18 @@ def test_read_csv_nrows_error(csv_end_bad_lines):
         dask_cudf.read_csv(
             csv_end_bad_lines, nrows=2, blocksize="100 MiB"
         ).compute()
+
+
+def test_deprecated_api_paths(tmp_path):
+    csv_path = str(tmp_path / "data-*.csv")
+    df = dask_cudf.DataFrame.from_dict({"a": range(100)}, npartitions=1)
+    df.to_csv(csv_path, index=False)
+
+    # Encourage top-level read_csv import only
+    with pytest.warns(match="dask_cudf.io.read_csv is now deprecated"):
+        df2 = dask_cudf.io.read_csv(csv_path)
+    dd.assert_eq(df, df2, check_divisions=False)
+
+    with pytest.warns(match="dask_cudf.io.csv.read_csv is now deprecated"):
+        df2 = dask_cudf.io.csv.read_csv(csv_path)
+    dd.assert_eq(df, df2, check_divisions=False)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py
index abafbffd197..f5509cf91c3 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_json.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py
@@ -126,3 +126,18 @@ def test_read_json_aggregate_files(tmp_path):
         assert name in df2.columns
         assert len(df2[name].compute().unique()) == df1.npartitions
         dd.assert_eq(df1, df2.drop(columns=[name]), check_index=False)
+
+
+def test_deprecated_api_paths(tmp_path):
+    path = str(tmp_path / "data-*.json")
+    df = dd.from_dict({"a": range(100)}, npartitions=1)
+    df.to_json(path)
+
+    # Encourage top-level read_json import only
+    with pytest.warns(match="dask_cudf.io.read_json is now deprecated"):
+        df2 = dask_cudf.io.read_json(path)
+    dd.assert_eq(df, df2, check_divisions=False)
+
+    with pytest.warns(match="dask_cudf.io.json.read_json is now deprecated"):
+        df2 = dask_cudf.io.json.read_json(path)
+    dd.assert_eq(df, df2, check_divisions=False)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_orc.py b/python/dask_cudf/dask_cudf/io/tests/test_orc.py
index 457e5546bd9..b6064d851ca 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_orc.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_orc.py
@@ -145,3 +145,21 @@ def test_to_orc(tmpdir, dtypes, compression, compute):
     # the cudf dataframes (df and df_read)
     dd.assert_eq(df, ddf_read)
     dd.assert_eq(df_read, ddf_read)
+
+
+def test_deprecated_api_paths(tmpdir):
+    df = dask_cudf.DataFrame.from_dict({"a": range(100)}, npartitions=1)
+    path = tmpdir.join("test.orc")
+    # Top-level to_orc function is deprecated
+    with pytest.warns(match="dask_cudf.to_orc is now deprecated"):
+        dask_cudf.to_orc(df, path, write_index=False)
+
+    # Encourage top-level read_orc import only
+    paths = glob.glob(str(path) + "/*.orc")
+    with pytest.warns(match="dask_cudf.io.read_orc is now deprecated"):
+        df2 = dask_cudf.io.read_orc(paths)
+    dd.assert_eq(df, df2, check_divisions=False)
+
+    with pytest.warns(match="dask_cudf.io.orc.read_orc is now deprecated"):
+        df2 = dask_cudf.io.orc.read_orc(paths)
+    dd.assert_eq(df, df2, check_divisions=False)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
index a29cf9a342a..522a21e12a5 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
@@ -15,6 +15,7 @@
 import cudf
 
 import dask_cudf
+from dask_cudf._legacy.io.parquet import create_metadata_file
 from dask_cudf.tests.utils import (
     require_dask_expr,
     skip_dask_expr,
@@ -24,7 +25,7 @@
 # Check if create_metadata_file is supported by
 # the current dask.dataframe version
 need_create_meta = pytest.mark.skipif(
-    dask_cudf.io.parquet.create_metadata_file is None,
+    create_metadata_file is None,
     reason="Need create_metadata_file support in dask.dataframe.",
 )
 
@@ -425,10 +426,14 @@ def test_create_metadata_file(tmpdir, partition_on):
         fns = glob.glob(os.path.join(tmpdir, partition_on + "=*/*.parquet"))
     else:
         fns = glob.glob(os.path.join(tmpdir, "*.parquet"))
-    dask_cudf.io.parquet.create_metadata_file(
-        fns,
-        split_every=3,  # Force tree reduction
-    )
+
+    with pytest.warns(
+        match="dask_cudf.io.parquet.create_metadata_file is now deprecated"
+    ):
+        dask_cudf.io.parquet.create_metadata_file(
+            fns,
+            split_every=3,  # Force tree reduction
+        )
 
     # Check that we can now read the ddf
     # with the _metadata file present
@@ -472,7 +477,7 @@ def test_create_metadata_file_inconsistent_schema(tmpdir):
     # Add global metadata file.
     # Dask-CuDF can do this without requiring schema
     # consistency.
-    dask_cudf.io.parquet.create_metadata_file([p0, p1])
+    create_metadata_file([p0, p1])
 
     # Check that we can still read the ddf
     # with the _metadata file present
@@ -533,9 +538,9 @@ def test_check_file_size(tmpdir):
     fn = str(tmpdir.join("test.parquet"))
     cudf.DataFrame({"a": np.arange(1000)}).to_parquet(fn)
     with pytest.warns(match="large parquet file"):
-        # Need to use `dask_cudf.io` path
+        # Need to use `dask_cudf._legacy.io` path
         # TODO: Remove outdated `check_file_size` functionality
-        dask_cudf.io.read_parquet(fn, check_file_size=1).compute()
+        dask_cudf._legacy.io.read_parquet(fn, check_file_size=1).compute()
 
 
 @xfail_dask_expr("HivePartitioning cannot be hashed", lt_version="2024.3.0")
@@ -664,3 +669,21 @@ def test_to_parquet_append(tmpdir, write_metadata_file):
     )
     ddf2 = dask_cudf.read_parquet(tmpdir)
     dd.assert_eq(cudf.concat([df, df]), ddf2)
+
+
+def test_deprecated_api_paths(tmpdir):
+    df = dask_cudf.DataFrame.from_dict({"a": range(100)}, npartitions=1)
+    # io.to_parquet function is deprecated
+    with pytest.warns(match="dask_cudf.io.to_parquet is now deprecated"):
+        dask_cudf.io.to_parquet(df, tmpdir)
+
+    # Encourage top-level read_parquet import only
+    with pytest.warns(match="dask_cudf.io.read_parquet is now deprecated"):
+        df2 = dask_cudf.io.read_parquet(tmpdir)
+    dd.assert_eq(df, df2, check_divisions=False)
+
+    with pytest.warns(
+        match="dask_cudf.io.parquet.read_parquet is now deprecated"
+    ):
+        df2 = dask_cudf.io.parquet.read_parquet(tmpdir)
+    dd.assert_eq(df, df2, check_divisions=False)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_text.py b/python/dask_cudf/dask_cudf/io/tests/test_text.py
index 8912b7d5da6..e35b6411a9d 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_text.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_text.py
@@ -34,3 +34,15 @@ def test_read_text_byte_range(offset, size):
         text_file, chunksize=None, delimiter=".", byte_range=(offset, size)
     )
     dd.assert_eq(df1, df2, check_index=False)
+
+
+def test_deprecated_api_paths():
+    # Encourage top-level read_text import only
+    df = cudf.read_text(text_file, delimiter=".")
+    with pytest.warns(match="dask_cudf.io.read_text is now deprecated"):
+        df2 = dask_cudf.io.read_text(text_file, delimiter=".")
+    dd.assert_eq(df, df2, check_divisions=False)
+
+    with pytest.warns(match="dask_cudf.io.text.read_text is now deprecated"):
+        df2 = dask_cudf.io.text.read_text(text_file, delimiter=".")
+    dd.assert_eq(df, df2, check_divisions=False)
diff --git a/python/dask_cudf/dask_cudf/io/text.py b/python/dask_cudf/dask_cudf/io/text.py
index 9cdb7c5220b..1caf4e81d8e 100644
--- a/python/dask_cudf/dask_cudf/io/text.py
+++ b/python/dask_cudf/dask_cudf/io/text.py
@@ -1,54 +1,8 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
-import os
-from glob import glob
+from dask_cudf import _deprecated_api
 
-import dask.dataframe as dd
-from dask.base import tokenize
-from dask.utils import apply, parse_bytes
-
-import cudf
-
-
-def read_text(path, chunksize="256 MiB", **kwargs):
-    if isinstance(chunksize, str):
-        chunksize = parse_bytes(chunksize)
-
-    if isinstance(path, list):
-        filenames = path
-    elif isinstance(path, str):
-        filenames = sorted(glob(path))
-    elif hasattr(path, "__fspath__"):
-        filenames = sorted(glob(path.__fspath__()))
-    else:
-        raise TypeError(f"Path type not understood:{type(path)}")
-
-    if not filenames:
-        msg = f"A file in: {filenames} does not exist."
-        raise FileNotFoundError(msg)
-
-    name = "read-text-" + tokenize(path, tokenize, **kwargs)
-
-    if chunksize:
-        dsk = {}
-        i = 0
-        for fn in filenames:
-            size = os.path.getsize(fn)
-            for start in range(0, size, chunksize):
-                kwargs1 = kwargs.copy()
-                kwargs1["byte_range"] = (
-                    start,
-                    chunksize,
-                )  # specify which chunk of the file we care about
-
-                dsk[(name, i)] = (apply, cudf.read_text, [fn], kwargs1)
-                i += 1
-    else:
-        dsk = {
-            (name, i): (apply, cudf.read_text, [fn], kwargs)
-            for i, fn in enumerate(filenames)
-        }
-
-    meta = cudf.Series([], dtype="O")
-    divisions = [None] * (len(dsk) + 1)
-    return dd.core.new_dd_object(dsk, name, meta, divisions)
+read_text = _deprecated_api(
+    "dask_cudf.io.text.read_text",
+    new_api="dask_cudf.read_text",
+)
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 8e42c847ddf..5130b804179 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -39,30 +39,6 @@ def test_from_dict_backend_dispatch():
     dd.assert_eq(expect, ddf)
 
 
-def test_to_dask_dataframe_deprecated():
-    gdf = cudf.DataFrame({"a": range(100)})
-    ddf = dd.from_pandas(gdf, npartitions=2)
-    assert isinstance(ddf._meta, cudf.DataFrame)
-
-    with pytest.warns(FutureWarning, match="API is now deprecated"):
-        assert isinstance(
-            ddf.to_dask_dataframe()._meta,
-            pd.DataFrame,
-        )
-
-
-def test_from_dask_dataframe_deprecated():
-    gdf = pd.DataFrame({"a": range(100)})
-    ddf = dd.from_pandas(gdf, npartitions=2)
-    assert isinstance(ddf._meta, pd.DataFrame)
-
-    with pytest.warns(FutureWarning, match="API is now deprecated"):
-        assert isinstance(
-            dask_cudf.from_dask_dataframe(ddf)._meta,
-            cudf.DataFrame,
-        )
-
-
 def test_to_backend():
     rng = np.random.default_rng(seed=0)
     data = {
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index 042e69d86f4..918290aa6fa 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -13,7 +13,7 @@
 from cudf.testing._utils import expect_warning_if
 
 import dask_cudf
-from dask_cudf.groupby import OPTIMIZED_AGGS, _aggs_optimized
+from dask_cudf._legacy.groupby import OPTIMIZED_AGGS, _aggs_optimized
 from dask_cudf.tests.utils import (
     QUERY_PLANNING_ON,
     require_dask_expr,
diff --git a/python/dask_cudf/dask_cudf/tests/utils.py b/python/dask_cudf/dask_cudf/tests/utils.py
index 9aaf6dc8420..a9f61f75762 100644
--- a/python/dask_cudf/dask_cudf/tests/utils.py
+++ b/python/dask_cudf/dask_cudf/tests/utils.py
@@ -10,7 +10,7 @@
 
 import cudf
 
-from dask_cudf.expr import QUERY_PLANNING_ON
+from dask_cudf import QUERY_PLANNING_ON
 
 if QUERY_PLANNING_ON:
     DASK_VERSION = Version(dask.__version__)

From 9d5041c5419cd17c880961559e3a1457cdae9fcc Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Tue, 5 Nov 2024 09:56:35 -0600
Subject: [PATCH 05/12] Separate evaluation logic from `IR` objects in
 cudf-polars (#17175)

Closes https://github.com/rapidsai/cudf/issues/17127

- This PR implements the proposal in #17127
- This change technically "breaks" with the existing `IR.evaluate` convention.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17175
---
 python/cudf_polars/cudf_polars/dsl/ir.py | 450 +++++++++++++++--------
 python/cudf_polars/docs/overview.md      |   6 +-
 2 files changed, 298 insertions(+), 158 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 04aa74024cd..a242ff9300f 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -127,9 +127,12 @@ def broadcast(*columns: Column, target_length: int | None = None) -> list[Column
 class IR(Node["IR"]):
     """Abstract plan node, representing an unevaluated dataframe."""
 
-    __slots__ = ("schema",)
+    __slots__ = ("schema", "_non_child_args")
     # This annotation is needed because of https://github.com/python/mypy/issues/17981
     _non_child: ClassVar[tuple[str, ...]] = ("schema",)
+    # Concrete classes should set this up with the arguments that will
+    # be passed to do_evaluate.
+    _non_child_args: tuple[Any, ...]
     schema: Schema
     """Mapping from column names to their data types."""
 
@@ -146,9 +149,37 @@ def get_hashable(self) -> Hashable:
         schema_hash = tuple(self.schema.items())
         return (type(self), schema_hash, args)
 
+    # Hacky to avoid type-checking issues, just advertise the
+    # signature. Both mypy and pyright complain if we have an abstract
+    # method that takes arbitrary *args, but the subclasses have
+    # tighter signatures. This complaint is correct because the
+    # subclass is not Liskov-substitutable for the superclass.
+    # However, we know do_evaluate will only be called with the
+    # correct arguments by "construction".
+    do_evaluate: Callable[..., DataFrame]
+    """
+    Evaluate the node (given its evaluated children), and return a dataframe.
+
+    Parameters
+    ----------
+    args
+        Non child arguments followed by any evaluated dataframe inputs.
+
+    Returns
+    -------
+    DataFrame (on device) representing the evaluation of this plan
+    node.
+
+    Raises
+    ------
+    NotImplementedError
+        If evaluation fails. Ideally this should not occur, since the
+        translation phase should fail earlier.
+    """
+
     def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         """
-        Evaluate the node and return a dataframe.
+        Evaluate the node (recursively) and return a dataframe.
 
         Parameters
         ----------
@@ -156,21 +187,27 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             Mapping from cached node ids to constructed DataFrames.
             Used to implement evaluation of the `Cache` node.
 
+        Notes
+        -----
+        Prefer not to override this method. Instead implement
+        :meth:`do_evaluate` which doesn't encode a recursion scheme
+        and just assumes already evaluated inputs.
+
         Returns
         -------
         DataFrame (on device) representing the evaluation of this plan
-        node.
+        node (and its children).
 
         Raises
         ------
         NotImplementedError
-            If we couldn't evaluate things. Ideally this should not occur,
-            since the translation phase should pick up things that we
-            cannot handle.
+            If evaluation fails. Ideally this should not occur, since the
+            translation phase should fail earlier.
         """
-        raise NotImplementedError(
-            f"Evaluation of plan {type(self).__name__}"
-        )  # pragma: no cover
+        return self.do_evaluate(
+            *self._non_child_args,
+            *(child.evaluate(cache=cache) for child in self.children),
+        )
 
 
 class PythonScan(IR):
@@ -187,6 +224,7 @@ def __init__(self, schema: Schema, options: Any, predicate: expr.NamedExpr | Non
         self.schema = schema
         self.options = options
         self.predicate = predicate
+        self._non_child_args = (schema, options, predicate)
         self.children = ()
         raise NotImplementedError("PythonScan not implemented")
 
@@ -259,6 +297,17 @@ def __init__(
         self.n_rows = n_rows
         self.row_index = row_index
         self.predicate = predicate
+        self._non_child_args = (
+            schema,
+            typ,
+            reader_options,
+            paths,
+            with_columns,
+            skip_rows,
+            n_rows,
+            row_index,
+            predicate,
+        )
         self.children = ()
         if self.typ not in ("csv", "parquet", "ndjson"):  # pragma: no cover
             # This line is unhittable ATM since IPC/Anonymous scan raise
@@ -341,19 +390,28 @@ def get_hashable(self) -> Hashable:
             self.predicate,
         )
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        schema: Schema,
+        typ: str,
+        reader_options: dict[str, Any],
+        paths: list[str],
+        with_columns: list[str] | None,
+        skip_rows: int,
+        n_rows: int,
+        row_index: tuple[str, int] | None,
+        predicate: expr.NamedExpr | None,
+    ):
         """Evaluate and return a dataframe."""
-        with_columns = self.with_columns
-        row_index = self.row_index
-        n_rows = self.n_rows
-        if self.typ == "csv":
-            parse_options = self.reader_options["parse_options"]
+        if typ == "csv":
+            parse_options = reader_options["parse_options"]
             sep = chr(parse_options["separator"])
             quote = chr(parse_options["quote_char"])
             eol = chr(parse_options["eol_char"])
-            if self.reader_options["schema"] is not None:
+            if reader_options["schema"] is not None:
                 # Reader schema provides names
-                column_names = list(self.reader_options["schema"]["fields"].keys())
+                column_names = list(reader_options["schema"]["fields"].keys())
             else:
                 # file provides column names
                 column_names = None
@@ -380,8 +438,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # polars skips blank lines at the beginning of the file
             pieces = []
             read_partial = n_rows != -1
-            for p in self.paths:
-                skiprows = self.reader_options["skip_rows"]
+            for p in paths:
+                skiprows = reader_options["skip_rows"]
                 path = Path(p)
                 with path.open() as f:
                     while f.readline() == "\n":
@@ -400,7 +458,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                     skiprows=skiprows,
                     comment=comment,
                     decimal=decimal,
-                    dtypes=self.schema,
+                    dtypes=schema,
                     nrows=n_rows,
                 )
                 pieces.append(tbl_w_meta)
@@ -419,17 +477,17 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.concatenate.concatenate(list(tables)),
                 colnames[0],
             )
-        elif self.typ == "parquet":
+        elif typ == "parquet":
             filters = None
-            if self.predicate is not None and self.row_index is None:
+            if predicate is not None and row_index is None:
                 # Can't apply filters during read if we have a row index.
-                filters = to_parquet_filter(self.predicate.value)
+                filters = to_parquet_filter(predicate.value)
             tbl_w_meta = plc.io.parquet.read_parquet(
-                plc.io.SourceInfo(self.paths),
+                plc.io.SourceInfo(paths),
                 columns=with_columns,
                 filters=filters,
                 nrows=n_rows,
-                skip_rows=self.skip_rows,
+                skip_rows=skip_rows,
             )
             df = DataFrame.from_table(
                 tbl_w_meta.tbl,
@@ -439,12 +497,12 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             if filters is not None:
                 # Mask must have been applied.
                 return df
-        elif self.typ == "ndjson":
+        elif typ == "ndjson":
             json_schema: list[tuple[str, str, list]] = [
-                (name, typ, []) for name, typ in self.schema.items()
+                (name, typ, []) for name, typ in schema.items()
             ]
             plc_tbl_w_meta = plc.io.json.read_json(
-                plc.io.SourceInfo(self.paths),
+                plc.io.SourceInfo(paths),
                 lines=True,
                 dtypes=json_schema,
                 prune_columns=True,
@@ -454,20 +512,17 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             df = DataFrame.from_table(
                 plc_tbl_w_meta.tbl, plc_tbl_w_meta.column_names(include_children=False)
             )
-            col_order = list(self.schema.keys())
-            # TODO: remove condition when dropping support for polars 1.0
-            # https://github.com/pola-rs/polars/pull/17363
-            if row_index is not None and row_index[0] in self.schema:
+            col_order = list(schema.keys())
+            if row_index is not None:
                 col_order.remove(row_index[0])
-            if col_order is not None:
-                df = df.select(col_order)
+            df = df.select(col_order)
         else:
             raise NotImplementedError(
-                f"Unhandled scan type: {self.typ}"
+                f"Unhandled scan type: {typ}"
             )  # pragma: no cover; post init trips first
         if row_index is not None:
             name, offset = row_index
-            dtype = self.schema[name]
+            dtype = schema[name]
             step = plc.interop.from_arrow(
                 pa.scalar(1, type=plc.interop.to_arrow(dtype))
             )
@@ -482,13 +537,11 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 name=name,
             )
             df = DataFrame([index, *df.columns])
-        assert all(
-            c.obj.type() == self.schema[name] for name, c in df.column_map.items()
-        )
-        if self.predicate is None:
+        assert all(c.obj.type() == schema[name] for name, c in df.column_map.items())
+        if predicate is None:
             return df
         else:
-            (mask,) = broadcast(self.predicate.evaluate(df), target_length=df.num_rows)
+            (mask,) = broadcast(predicate.evaluate(df), target_length=df.num_rows)
             return df.filter(mask)
 
 
@@ -508,9 +561,21 @@ def __init__(self, schema: Schema, key: int, value: IR):
         self.schema = schema
         self.key = key
         self.children = (value,)
+        self._non_child_args = (key,)
+
+    @classmethod
+    def do_evaluate(
+        cls, key: int, df: DataFrame
+    ) -> DataFrame:  # pragma: no cover; basic evaluation never calls this
+        """Evaluate and return a dataframe."""
+        # Our value has already been computed for us, so let's just
+        # return it.
+        return df
 
     def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         """Evaluate and return a dataframe."""
+        # We must override the recursion scheme because we don't want
+        # to recurse if we're in the cache.
         try:
             return cache[self.key]
         except KeyError:
@@ -545,6 +610,7 @@ def __init__(
         self.df = df
         self.projection = tuple(projection) if projection is not None else None
         self.predicate = predicate
+        self._non_child_args = (schema, df, self.projection, predicate)
         self.children = ()
 
     def get_hashable(self) -> Hashable:
@@ -557,18 +623,25 @@ def get_hashable(self) -> Hashable:
         schema_hash = tuple(self.schema.items())
         return (type(self), schema_hash, id(self.df), self.projection, self.predicate)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        schema: Schema,
+        df: Any,
+        projection: tuple[str, ...] | None,
+        predicate: expr.NamedExpr | None,
+    ) -> DataFrame:
         """Evaluate and return a dataframe."""
-        pdf = pl.DataFrame._from_pydf(self.df)
-        if self.projection is not None:
-            pdf = pdf.select(self.projection)
+        pdf = pl.DataFrame._from_pydf(df)
+        if projection is not None:
+            pdf = pdf.select(projection)
         df = DataFrame.from_polars(pdf)
         assert all(
             c.obj.type() == dtype
-            for c, dtype in zip(df.columns, self.schema.values(), strict=True)
+            for c, dtype in zip(df.columns, schema.values(), strict=True)
         )
-        if self.predicate is not None:
-            (mask,) = broadcast(self.predicate.evaluate(df), target_length=df.num_rows)
+        if predicate is not None:
+            (mask,) = broadcast(predicate.evaluate(df), target_length=df.num_rows)
             return df.filter(mask)
         else:
             return df
@@ -595,14 +668,19 @@ def __init__(
         self.exprs = tuple(exprs)
         self.should_broadcast = should_broadcast
         self.children = (df,)
+        self._non_child_args = (self.exprs, should_broadcast)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        exprs: tuple[expr.NamedExpr, ...],
+        should_broadcast: bool,  # noqa: FBT001
+        df: DataFrame,
+    ) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
         # Handle any broadcasting
-        columns = [e.evaluate(df) for e in self.exprs]
-        if self.should_broadcast:
+        columns = [e.evaluate(df) for e in exprs]
+        if should_broadcast:
             columns = broadcast(*columns)
         return DataFrame(columns)
 
@@ -625,14 +703,14 @@ def __init__(
         self.schema = schema
         self.exprs = tuple(exprs)
         self.children = (df,)
+        self._non_child_args = (self.exprs,)
 
-    def evaluate(
-        self, *, cache: MutableMapping[int, DataFrame]
-    ) -> DataFrame:  # pragma: no cover; polars doesn't emit this node yet
+    @classmethod
+    def do_evaluate(
+        cls, exprs: tuple[expr.NamedExpr, ...], df: DataFrame
+    ) -> DataFrame:  # pragma: no cover; not exposed by polars yet
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        columns = broadcast(*(e.evaluate(df) for e in self.exprs))
+        columns = broadcast(*(e.evaluate(df) for e in exprs))
         assert all(column.obj.size() == 1 for column in columns)
         return DataFrame(columns)
 
@@ -681,6 +759,13 @@ def __init__(
         if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests):
             raise NotImplementedError("Nested aggregations in groupby")
         self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests]
+        self._non_child_args = (
+            self.keys,
+            self.agg_requests,
+            maintain_order,
+            options,
+            self.agg_infos,
+        )
 
     @staticmethod
     def check_agg(agg: expr.Expr) -> int:
@@ -710,13 +795,18 @@ def check_agg(agg: expr.Expr) -> int:
         else:
             raise NotImplementedError(f"No handler for {agg=}")
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        keys_in: Sequence[expr.NamedExpr],
+        agg_requests: Sequence[expr.NamedExpr],
+        maintain_order: bool,  # noqa: FBT001
+        options: Any,
+        agg_infos: Sequence[expr.AggInfo],
+        df: DataFrame,
+    ):
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        keys = broadcast(
-            *(k.evaluate(df) for k in self.keys), target_length=df.num_rows
-        )
+        keys = broadcast(*(k.evaluate(df) for k in keys_in), target_length=df.num_rows)
         sorted = (
             plc.types.Sorted.YES
             if all(k.is_sorted for k in keys)
@@ -732,7 +822,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         # TODO: uniquify
         requests = []
         replacements: list[expr.Expr] = []
-        for info in self.agg_infos:
+        for info in agg_infos:
             for pre_eval, req, rep in info.requests:
                 if pre_eval is None:
                     # A count aggregation, doesn't touch the column,
@@ -754,12 +844,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             for key, grouped_key in zip(keys, group_keys.columns(), strict=True)
         ]
         result_subs = DataFrame(raw_columns)
-        results = [
-            req.evaluate(result_subs, mapping=mapping) for req in self.agg_requests
-        ]
+        results = [req.evaluate(result_subs, mapping=mapping) for req in agg_requests]
         broadcasted = broadcast(*result_keys, *results)
         # Handle order preservation of groups
-        if self.maintain_order and not sorted:
+        if maintain_order and not sorted:
             # The order we want
             want = plc.stream_compaction.stable_distinct(
                 plc.Table([k.obj for k in keys]),
@@ -799,7 +887,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                     ordered_table.columns(), broadcasted, strict=True
                 )
             ]
-        return DataFrame(broadcasted).slice(self.options.slice)
+        return DataFrame(broadcasted).slice(options.slice)
 
 
 class Join(IR):
@@ -841,6 +929,7 @@ def __init__(
         self.right_on = tuple(right_on)
         self.options = options
         self.children = (left, right)
+        self._non_child_args = (self.left_on, self.right_on, self.options)
         if any(
             isinstance(e.value, expr.Literal)
             for e in itertools.chain(self.left_on, self.right_on)
@@ -886,8 +975,8 @@ def _joiners(
             )
         assert_never(how)
 
+    @staticmethod
     def _reorder_maps(
-        self,
         left_rows: int,
         lg: plc.Column,
         left_policy: plc.copying.OutOfBoundsPolicy,
@@ -939,10 +1028,23 @@ def _reorder_maps(
             [plc.types.NullOrder.AFTER, plc.types.NullOrder.AFTER],
         ).columns()
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        left_on_exprs: Sequence[expr.NamedExpr],
+        right_on_exprs: Sequence[expr.NamedExpr],
+        options: tuple[
+            Literal["inner", "left", "right", "full", "semi", "anti", "cross"],
+            bool,
+            tuple[int, int] | None,
+            str,
+            bool,
+        ],
+        left: DataFrame,
+        right: DataFrame,
+    ) -> DataFrame:
         """Evaluate and return a dataframe."""
-        left, right = (c.evaluate(cache=cache) for c in self.children)
-        how, join_nulls, zlice, suffix, coalesce = self.options
+        how, join_nulls, zlice, suffix, coalesce = options
         if how == "cross":
             # Separate implementation, since cross_join returns the
             # result, not the gather maps
@@ -966,14 +1068,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             ]
             return DataFrame([*left_cols, *right_cols]).slice(zlice)
         # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
-        left_on = DataFrame(broadcast(*(e.evaluate(left) for e in self.left_on)))
-        right_on = DataFrame(broadcast(*(e.evaluate(right) for e in self.right_on)))
+        left_on = DataFrame(broadcast(*(e.evaluate(left) for e in left_on_exprs)))
+        right_on = DataFrame(broadcast(*(e.evaluate(right) for e in right_on_exprs)))
         null_equality = (
             plc.types.NullEquality.EQUAL
             if join_nulls
             else plc.types.NullEquality.UNEQUAL
         )
-        join_fn, left_policy, right_policy = Join._joiners(how)
+        join_fn, left_policy, right_policy = cls._joiners(how)
         if right_policy is None:
             # Semi join
             lg = join_fn(left_on.table, right_on.table, null_equality)
@@ -987,7 +1089,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             lg, rg = join_fn(left_on.table, right_on.table, null_equality)
             if how == "left" or how == "right":
                 # Order of left table is preserved
-                lg, rg = self._reorder_maps(
+                lg, rg = cls._reorder_maps(
                     left.num_rows, lg, left_policy, right.num_rows, rg, right_policy
                 )
             if coalesce and how == "inner":
@@ -1046,14 +1148,19 @@ def __init__(
         self.schema = schema
         self.columns = tuple(columns)
         self.should_broadcast = should_broadcast
+        self._non_child_args = (self.columns, self.should_broadcast)
         self.children = (df,)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        exprs: Sequence[expr.NamedExpr],
+        should_broadcast: bool,  # noqa: FBT001
+        df: DataFrame,
+    ) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        columns = [c.evaluate(df) for c in self.columns]
-        if self.should_broadcast:
+        columns = [c.evaluate(df) for c in exprs]
+        if should_broadcast:
             columns = broadcast(*columns, target_length=df.num_rows)
         else:
             # Polars ensures this is true, but let's make sure nothing
@@ -1063,7 +1170,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # table that might have mismatching column lengths will
             # never be turned into a pylibcudf Table with all columns
             # by the Select, which is why this is safe.
-            assert all(e.name.startswith("__POLARS_CSER_0x") for e in self.columns)
+            assert all(e.name.startswith("__POLARS_CSER_0x") for e in exprs)
         return df.with_columns(columns)
 
 
@@ -1096,6 +1203,7 @@ def __init__(
         self.subset = subset
         self.zlice = zlice
         self.stable = stable
+        self._non_child_args = (keep, subset, zlice, stable)
         self.children = (df,)
 
     _KEEP_MAP: ClassVar[dict[str, plc.stream_compaction.DuplicateKeepOption]] = {
@@ -1105,33 +1213,39 @@ def __init__(
         "any": plc.stream_compaction.DuplicateKeepOption.KEEP_ANY,
     }
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        keep: plc.stream_compaction.DuplicateKeepOption,
+        subset: frozenset[str] | None,
+        zlice: tuple[int, int] | None,
+        stable: bool,  # noqa: FBT001
+        df: DataFrame,
+    ):
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        if self.subset is None:
+        if subset is None:
             indices = list(range(df.num_columns))
             keys_sorted = all(c.is_sorted for c in df.column_map.values())
         else:
-            indices = [i for i, k in enumerate(df.column_names) if k in self.subset]
-            keys_sorted = all(df.column_map[name].is_sorted for name in self.subset)
+            indices = [i for i, k in enumerate(df.column_names) if k in subset]
+            keys_sorted = all(df.column_map[name].is_sorted for name in subset)
         if keys_sorted:
             table = plc.stream_compaction.unique(
                 df.table,
                 indices,
-                self.keep,
+                keep,
                 plc.types.NullEquality.EQUAL,
             )
         else:
             distinct = (
                 plc.stream_compaction.stable_distinct
-                if self.stable
+                if stable
                 else plc.stream_compaction.distinct
             )
             table = distinct(
                 df.table,
                 indices,
-                self.keep,
+                keep,
                 plc.types.NullEquality.EQUAL,
                 plc.types.NanEquality.ALL_EQUAL,
             )
@@ -1142,9 +1256,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 for new, old in zip(table.columns(), df.columns, strict=True)
             ]
         )
-        if keys_sorted or self.stable:
+        if keys_sorted or stable:
             result = result.sorted_like(df)
-        return result.slice(self.zlice)
+        return result.slice(zlice)
 
 
 class Sort(IR):
@@ -1179,29 +1293,39 @@ def __init__(
         self.null_order = tuple(null_order)
         self.stable = stable
         self.zlice = zlice
+        self._non_child_args = (
+            self.by,
+            self.order,
+            self.null_order,
+            self.stable,
+            self.zlice,
+        )
         self.children = (df,)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(
+        cls,
+        by: Sequence[expr.NamedExpr],
+        order: Sequence[plc.types.Order],
+        null_order: Sequence[plc.types.NullOrder],
+        stable: bool,  # noqa: FBT001
+        zlice: tuple[int, int] | None,
+        df: DataFrame,
+    ) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        sort_keys = broadcast(
-            *(k.evaluate(df) for k in self.by), target_length=df.num_rows
-        )
+        sort_keys = broadcast(*(k.evaluate(df) for k in by), target_length=df.num_rows)
         # TODO: More robust identification here.
         keys_in_result = {
             k.name: i
             for i, k in enumerate(sort_keys)
             if k.name in df.column_map and k.obj is df.column_map[k.name].obj
         }
-        do_sort = (
-            plc.sorting.stable_sort_by_key if self.stable else plc.sorting.sort_by_key
-        )
+        do_sort = plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key
         table = do_sort(
             df.table,
             plc.Table([k.obj for k in sort_keys]),
-            list(self.order),
-            list(self.null_order),
+            list(order),
+            list(null_order),
         )
         columns: list[Column] = []
         for name, c in zip(df.column_map, table.columns(), strict=True):
@@ -1211,11 +1335,11 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 i = keys_in_result[name]
                 column = column.set_sorted(
                     is_sorted=plc.types.Sorted.YES,
-                    order=self.order[i],
-                    null_order=self.null_order[i],
+                    order=order[i],
+                    null_order=null_order[i],
                 )
             columns.append(column)
-        return DataFrame(columns).slice(self.zlice)
+        return DataFrame(columns).slice(zlice)
 
 
 class Slice(IR):
@@ -1232,13 +1356,13 @@ def __init__(self, schema: Schema, offset: int, length: int, df: IR):
         self.schema = schema
         self.offset = offset
         self.length = length
+        self._non_child_args = (offset, length)
         self.children = (df,)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(cls, offset: int, length: int, df: DataFrame) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        return df.slice((self.offset, self.length))
+        return df.slice((offset, length))
 
 
 class Filter(IR):
@@ -1252,13 +1376,13 @@ class Filter(IR):
     def __init__(self, schema: Schema, mask: expr.NamedExpr, df: IR):
         self.schema = schema
         self.mask = mask
+        self._non_child_args = (mask,)
         self.children = (df,)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(cls, mask_expr: expr.NamedExpr, df: DataFrame) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
-        (mask,) = broadcast(self.mask.evaluate(df), target_length=df.num_rows)
+        (mask,) = broadcast(mask_expr.evaluate(df), target_length=df.num_rows)
         return df.filter(mask)
 
 
@@ -1270,15 +1394,15 @@ class Projection(IR):
 
     def __init__(self, schema: Schema, df: IR):
         self.schema = schema
+        self._non_child_args = (schema,)
         self.children = (df,)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(cls, schema: Schema, df: DataFrame) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        df = child.evaluate(cache=cache)
         # This can reorder things.
         columns = broadcast(
-            *(df.column_map[name] for name in self.schema), target_length=df.num_rows
+            *(df.column_map[name] for name in schema), target_length=df.num_rows
         )
         return DataFrame(columns)
 
@@ -1341,33 +1465,41 @@ def __init__(self, schema: Schema, name: str, options: Any, df: IR):
                     "Unpivot cannot cast all input columns to "
                     f"{self.schema[value_name].id()}"
                 )
-            self.options = (tuple(indices), tuple(pivotees), variable_name, value_name)
+            self.options = (
+                tuple(indices),
+                tuple(pivotees),
+                (variable_name, schema[variable_name]),
+                (value_name, schema[value_name]),
+            )
+        self._non_child_args = (name, self.options)
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(cls, name: str, options: Any, df: DataFrame) -> DataFrame:
         """Evaluate and return a dataframe."""
-        (child,) = self.children
-        if self.name == "rechunk":
+        if name == "rechunk":
             # No-op in our data model
             # Don't think this appears in a plan tree from python
-            return child.evaluate(cache=cache)  # pragma: no cover
-        elif self.name == "rename":
-            df = child.evaluate(cache=cache)
+            return df  # pragma: no cover
+        elif name == "rename":
             # final tag is "swapping" which is useful for the
             # optimiser (it blocks some pushdown operations)
-            old, new, _ = self.options
+            old, new, _ = options
             return df.rename_columns(dict(zip(old, new, strict=True)))
-        elif self.name == "explode":
-            df = child.evaluate(cache=cache)
-            ((to_explode,),) = self.options
+        elif name == "explode":
+            ((to_explode,),) = options
             index = df.column_names.index(to_explode)
             subset = df.column_names_set - {to_explode}
             return DataFrame.from_table(
                 plc.lists.explode_outer(df.table, index), df.column_names
             ).sorted_like(df, subset=subset)
-        elif self.name == "unpivot":
-            indices, pivotees, variable_name, value_name = self.options
+        elif name == "unpivot":
+            (
+                indices,
+                pivotees,
+                (variable_name, variable_dtype),
+                (value_name, value_dtype),
+            ) = options
             npiv = len(pivotees)
-            df = child.evaluate(cache=cache)
             index_columns = [
                 Column(col, name=name)
                 for col, name in zip(
@@ -1382,7 +1514,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                         plc.interop.from_arrow(
                             pa.array(
                                 pivotees,
-                                type=plc.interop.to_arrow(self.schema[variable_name]),
+                                type=plc.interop.to_arrow(variable_dtype),
                             ),
                         )
                     ]
@@ -1390,10 +1522,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 df.num_rows,
             ).columns()
             value_column = plc.concatenate.concatenate(
-                [
-                    df.column_map[pivotee].astype(self.schema[value_name]).obj
-                    for pivotee in pivotees
-                ]
+                [df.column_map[pivotee].astype(value_dtype).obj for pivotee in pivotees]
             )
             return DataFrame(
                 [
@@ -1417,18 +1546,20 @@ class Union(IR):
     def __init__(self, schema: Schema, zlice: tuple[int, int] | None, *children: IR):
         self.schema = schema
         self.zlice = zlice
+        self._non_child_args = (zlice,)
         self.children = children
         schema = self.children[0].schema
         if not all(s.schema == schema for s in self.children[1:]):
             raise NotImplementedError("Schema mismatch")
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(cls, zlice: tuple[int, int] | None, *dfs: DataFrame) -> DataFrame:
         """Evaluate and return a dataframe."""
-        # TODO: only evaluate what we need if we have a slice
-        dfs = [df.evaluate(cache=cache) for df in self.children]
+        # TODO: only evaluate what we need if we have a slice?
         return DataFrame.from_table(
-            plc.concatenate.concatenate([df.table for df in dfs]), dfs[0].column_names
-        ).slice(self.zlice)
+            plc.concatenate.concatenate([df.table for df in dfs]),
+            dfs[0].column_names,
+        ).slice(zlice)
 
 
 class HConcat(IR):
@@ -1439,6 +1570,7 @@ class HConcat(IR):
 
     def __init__(self, schema: Schema, *children: IR):
         self.schema = schema
+        self._non_child_args = ()
         self.children = children
 
     @staticmethod
@@ -1469,18 +1601,22 @@ def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table:
             ]
         )
 
-    def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
+    @classmethod
+    def do_evaluate(cls, *dfs: DataFrame) -> DataFrame:
         """Evaluate and return a dataframe."""
-        dfs = [df.evaluate(cache=cache) for df in self.children]
         max_rows = max(df.num_rows for df in dfs)
         # Horizontal concatenation extends shorter tables with nulls
-        dfs = [
-            df
-            if df.num_rows == max_rows
-            else DataFrame.from_table(
-                self._extend_with_nulls(df.table, nrows=max_rows - df.num_rows),
-                df.column_names,
+        return DataFrame(
+            itertools.chain.from_iterable(
+                df.columns
+                for df in (
+                    df
+                    if df.num_rows == max_rows
+                    else DataFrame.from_table(
+                        cls._extend_with_nulls(df.table, nrows=max_rows - df.num_rows),
+                        df.column_names,
+                    )
+                    for df in dfs
+                )
             )
-            for df in dfs
-        ]
-        return DataFrame(itertools.chain.from_iterable(df.columns for df in dfs))
+        )
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index 74b2cd4e5de..17a94c633f8 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -212,7 +212,11 @@ methods.
 
 Plan node definitions live in `cudf_polars/dsl/ir.py`, these all
 inherit from the base `IR` node. The evaluation of a plan node is done
-by implementing the `evaluate` method.
+by implementing the `do_evaluate` method. This method takes in
+the non-child arguments specified in `_non_child_args`, followed by
+pre-evaluated child nodes (`DataFrame` objects). To perform the
+evaluation, one should use the base class (generic) `evaluate` method
+which handles the recursive evaluation of child nodes.
 
 To translate the plan node, add a case handler in `translate_ir` that
 lives in `cudf_polars/dsl/translate.py`.

From ac5b3ed1fd69abc424255b07bb66cebea5666f08 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 5 Nov 2024 18:43:44 -0500
Subject: [PATCH 06/12] Deprecate single component extraction methods in
 libcudf (#17221)

This PR deprecates the single component extraction methods (eg. `cudf::datetime::extract_year`) that are already covered by `cudf::datetime::extract_datetime_component`.
xref #17143

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/17221
---
 cpp/include/cudf/datetime.hpp            |  40 +++--
 cpp/tests/datetime/datetime_ops_test.cpp | 209 +++++++++--------------
 cpp/tests/streams/datetime_test.cpp      |  30 ++--
 3 files changed, 135 insertions(+), 144 deletions(-)

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index 1eaea5b6374..1f6e86d0389 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -58,6 +58,8 @@ enum class datetime_component : uint8_t {
  * @brief  Extracts year from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -65,7 +67,7 @@ enum class datetime_component : uint8_t {
  * @returns cudf::column of the extracted int16_t years
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_year(
+[[deprecated]] std::unique_ptr<cudf::column> extract_year(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -74,6 +76,8 @@ std::unique_ptr<cudf::column> extract_year(
  * @brief  Extracts month from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -81,7 +85,7 @@ std::unique_ptr<cudf::column> extract_year(
  * @returns cudf::column of the extracted int16_t months
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_month(
+[[deprecated]] std::unique_ptr<cudf::column> extract_month(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -90,6 +94,8 @@ std::unique_ptr<cudf::column> extract_month(
  * @brief  Extracts day from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -97,7 +103,7 @@ std::unique_ptr<cudf::column> extract_month(
  * @returns cudf::column of the extracted int16_t days
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_day(
+[[deprecated]] std::unique_ptr<cudf::column> extract_day(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -106,6 +112,8 @@ std::unique_ptr<cudf::column> extract_day(
  * @brief  Extracts a weekday from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -113,7 +121,7 @@ std::unique_ptr<cudf::column> extract_day(
  * @returns cudf::column of the extracted int16_t days
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_weekday(
+[[deprecated]] std::unique_ptr<cudf::column> extract_weekday(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -122,6 +130,8 @@ std::unique_ptr<cudf::column> extract_weekday(
  * @brief  Extracts hour from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -129,7 +139,7 @@ std::unique_ptr<cudf::column> extract_weekday(
  * @returns cudf::column of the extracted int16_t hours
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_hour(
+[[deprecated]] std::unique_ptr<cudf::column> extract_hour(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -138,6 +148,8 @@ std::unique_ptr<cudf::column> extract_hour(
  * @brief  Extracts minute from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -145,7 +157,7 @@ std::unique_ptr<cudf::column> extract_hour(
  * @returns cudf::column of the extracted int16_t minutes
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_minute(
+[[deprecated]] std::unique_ptr<cudf::column> extract_minute(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -154,6 +166,8 @@ std::unique_ptr<cudf::column> extract_minute(
  * @brief  Extracts second from any datetime type and returns an int16_t
  * cudf::column.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -161,7 +175,7 @@ std::unique_ptr<cudf::column> extract_minute(
  * @returns cudf::column of the extracted int16_t seconds
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_second(
+[[deprecated]] std::unique_ptr<cudf::column> extract_second(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -173,6 +187,8 @@ std::unique_ptr<cudf::column> extract_second(
  * A millisecond fraction is only the 3 digits that make up the millisecond portion of a duration.
  * For example, the millisecond fraction of 1.234567890 seconds is 234.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -180,7 +196,7 @@ std::unique_ptr<cudf::column> extract_second(
  * @returns cudf::column of the extracted int16_t milliseconds
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_millisecond_fraction(
+[[deprecated]] std::unique_ptr<cudf::column> extract_millisecond_fraction(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -192,6 +208,8 @@ std::unique_ptr<cudf::column> extract_millisecond_fraction(
  * A microsecond fraction is only the 3 digits that make up the microsecond portion of a duration.
  * For example, the microsecond fraction of 1.234567890 seconds is 567.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -199,7 +217,7 @@ std::unique_ptr<cudf::column> extract_millisecond_fraction(
  * @returns cudf::column of the extracted int16_t microseconds
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_microsecond_fraction(
+[[deprecated]] std::unique_ptr<cudf::column> extract_microsecond_fraction(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
@@ -211,6 +229,8 @@ std::unique_ptr<cudf::column> extract_microsecond_fraction(
  * A nanosecond fraction is only the 3 digits that make up the nanosecond portion of a duration.
  * For example, the nanosecond fraction of 1.234567890 seconds is 890.
  *
+ * @deprecated Deprecated in 24.12, to be removed in 25.02
+ *
  * @param column cudf::column_view of the input datetime values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate device memory of the returned column
@@ -218,7 +238,7 @@ std::unique_ptr<cudf::column> extract_microsecond_fraction(
  * @returns cudf::column of the extracted int16_t nanoseconds
  * @throw cudf::logic_error if input column datatype is not TIMESTAMP
  */
-std::unique_ptr<cudf::column> extract_nanosecond_fraction(
+[[deprecated]] std::unique_ptr<cudf::column> extract_nanosecond_fraction(
   cudf::column_view const& column,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 44f99adc0e9..1d1deb42a51 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -52,16 +52,26 @@ TYPED_TEST(NonTimestampTest, TestThrowsOnNonTimestamp)
   cudf::data_type dtype{cudf::type_to_id<T>()};
   cudf::column col{dtype, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0};
 
-  EXPECT_THROW(extract_year(col), cudf::logic_error);
-  EXPECT_THROW(extract_month(col), cudf::logic_error);
-  EXPECT_THROW(extract_day(col), cudf::logic_error);
-  EXPECT_THROW(extract_weekday(col), cudf::logic_error);
-  EXPECT_THROW(extract_hour(col), cudf::logic_error);
-  EXPECT_THROW(extract_minute(col), cudf::logic_error);
-  EXPECT_THROW(extract_second(col), cudf::logic_error);
-  EXPECT_THROW(extract_millisecond_fraction(col), cudf::logic_error);
-  EXPECT_THROW(extract_microsecond_fraction(col), cudf::logic_error);
-  EXPECT_THROW(extract_nanosecond_fraction(col), cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::YEAR),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::MONTH),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::DAY),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::WEEKDAY),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::HOUR),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::MINUTE),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::SECOND),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::MILLISECOND),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::MICROSECOND),
+               cudf::logic_error);
+  EXPECT_THROW(extract_datetime_component(col, cudf::datetime::datetime_component::NANOSECOND),
+               cudf::logic_error);
   EXPECT_THROW(last_day_of_month(col), cudf::logic_error);
   EXPECT_THROW(day_of_year(col), cudf::logic_error);
   EXPECT_THROW(add_calendrical_months(col, *cudf::make_empty_column(cudf::type_id::INT16)),
@@ -104,96 +114,6 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
       987234623   // 1970-01-01 00:00:00.987234623 GMT
     };
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{10, 7, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{10, 7, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{10, 7, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{12, 1, 1});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{26, 4, 25});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{26, 4, 25});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{26, 4, 25});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{31, 1, 1});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{2, 3, 3});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{2, 3, 3});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{2, 3, 3});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{2, 3, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{14, 12, 7});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{14, 12, 7});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{23, 0, 0});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{1, 0, 32});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{1, 0, 32});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{59, 0, 0});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{12, 0, 12});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{12, 0, 12});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{59, 0, 0});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{762, 0, 929});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{976, 23, 987});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{675, 432, 234});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_D),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_s),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ms),
-                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
-                                 fixed_width_column_wrapper<int16_t>{766, 424, 623});
-
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR),
     fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
@@ -346,16 +266,29 @@ TYPED_TEST(TypedDatetimeOpsTest, TestEmptyColumns)
   cudf::column int16s{int16s_dtype, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0};
   cudf::column timestamps{timestamps_dtype, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0};
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps), int16s);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::YEAR), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MONTH), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::DAY), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::WEEKDAY), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::HOUR), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MINUTE), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::SECOND), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MILLISECOND),
+    int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MICROSECOND),
+    int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::NANOSECOND),
+    int16s);
 }
 
 TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedDatetimeComponents)
@@ -385,13 +318,27 @@ TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedDatetimeComponents)
     expected_seconds = fixed_width_column_wrapper<int16_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   }
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps), expected_years);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps), expected_months);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps), expected_days);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps), expected_weekdays);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps), expected_hours);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps), expected_minutes);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps), expected_seconds);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::YEAR),
+    expected_years);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MONTH),
+    expected_months);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::DAY),
+    expected_days);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::WEEKDAY),
+    expected_weekdays);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::HOUR),
+    expected_hours);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MINUTE),
+    expected_minutes);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::SECOND),
+    expected_seconds);
 }
 
 TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedNullableDatetimeComponents)
@@ -441,13 +388,27 @@ TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedNullableDatetimeComponen
       {true, false, true, false, true, false, true, false, true, false}};
   }
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps), expected_years);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps), expected_months);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps), expected_days);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps), expected_weekdays);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps), expected_hours);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps), expected_minutes);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps), expected_seconds);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::YEAR),
+    expected_years);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MONTH),
+    expected_months);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::DAY),
+    expected_days);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::WEEKDAY),
+    expected_weekdays);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::HOUR),
+    expected_hours);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::MINUTE),
+    expected_minutes);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps, cudf::datetime::datetime_component::SECOND),
+    expected_seconds);
 }
 
 TEST_F(BasicDatetimeOpsTest, TestLastDayOfMonthWithSeconds)
diff --git a/cpp/tests/streams/datetime_test.cpp b/cpp/tests/streams/datetime_test.cpp
index 82629156fa6..29b302c3637 100644
--- a/cpp/tests/streams/datetime_test.cpp
+++ b/cpp/tests/streams/datetime_test.cpp
@@ -35,52 +35,62 @@ class DatetimeTest : public cudf::test::BaseFixture {
 
 TEST_F(DatetimeTest, ExtractYear)
 {
-  cudf::datetime::extract_year(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::YEAR, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractMonth)
 {
-  cudf::datetime::extract_month(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::MONTH, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractDay)
 {
-  cudf::datetime::extract_day(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::DAY, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractWeekday)
 {
-  cudf::datetime::extract_weekday(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::WEEKDAY, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractHour)
 {
-  cudf::datetime::extract_hour(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::HOUR, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractMinute)
 {
-  cudf::datetime::extract_minute(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::MINUTE, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractSecond)
 {
-  cudf::datetime::extract_second(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::SECOND, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractMillisecondFraction)
 {
-  cudf::datetime::extract_millisecond_fraction(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::MILLISECOND, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractMicrosecondFraction)
 {
-  cudf::datetime::extract_microsecond_fraction(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::MICROSECOND, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, ExtractNanosecondFraction)
 {
-  cudf::datetime::extract_nanosecond_fraction(timestamps, cudf::test::get_default_stream());
+  cudf::datetime::extract_datetime_component(
+    timestamps, cudf::datetime::datetime_component::NANOSECOND, cudf::test::get_default_stream());
 }
 
 TEST_F(DatetimeTest, LastDayOfMonth)

From adf32694e7b4eb9f91e928bf6dbf0818b97bcf35 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Nov 2024 09:26:58 -0800
Subject: [PATCH 07/12] Search for kvikio with lowercase (#17243)

## Description
<!-- Provide a standalone description of changes in this PR. -->
<!-- Reference any issues closed by this PR with "closes #1234". -->
<!-- Note: The pull request title will be included in the CHANGELOG. -->
The case-sensitive name KvikIO is will throw off `find_package`
searches, particularly after
https://github.com/rapidsai/devcontainers/pull/414 make the usage
consistent in devcontainers.

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
---
 cpp/cmake/thirdparty/get_kvikio.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/cmake/thirdparty/get_kvikio.cmake b/cpp/cmake/thirdparty/get_kvikio.cmake
index 20712beec41..c949f48505e 100644
--- a/cpp/cmake/thirdparty/get_kvikio.cmake
+++ b/cpp/cmake/thirdparty/get_kvikio.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 function(find_and_configure_kvikio VERSION)
 
   rapids_cpm_find(
-    KvikIO ${VERSION}
+    kvikio ${VERSION}
     GLOBAL_TARGETS kvikio::kvikio
     CPM_ARGS
     GIT_REPOSITORY https://github.com/rapidsai/kvikio.git

From 06b3f83b3e7f1b1364973be34f58fac4caf773f3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 6 Nov 2024 16:54:28 -0500
Subject: [PATCH 08/12] Disallow cuda-python 12.6.1 and 11.8.4 (#17253)

Due to a bug in cuda-python we must disallow cuda-python 12.6.1 and
11.8.4. This PR disallows those versions. It also silences new
cuda-python deprecation warnings so that our test suite passes.

See https://github.com/rapidsai/build-planning/issues/116 for more
information.

---------

Co-authored-by: James Lamb <jlamb@nvidia.com>
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 4 ++--
 conda/recipes/pylibcudf/meta.yaml                | 4 ++--
 dependencies.yaml                                | 8 ++++----
 python/cudf/pyproject.toml                       | 4 +++-
 python/cudf_kafka/pyproject.toml                 | 4 +++-
 python/cudf_polars/pyproject.toml                | 4 +++-
 python/custreamz/pyproject.toml                  | 2 ++
 python/dask_cudf/pyproject.toml                  | 2 ++
 python/pylibcudf/pyproject.toml                  | 4 +++-
 11 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 9d9fec97731..ace55a15c09 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
-- cuda-python>=11.7.1,<12.0a0
+- cuda-python>=11.7.1,<12.0a0,!=11.8.4
 - cuda-sanitizer-api=11.8.86
 - cuda-version=11.8
 - cudatoolkit
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 19e3eafd641..d20db44497e 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -21,7 +21,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.0,<13.0a0
+- cuda-python>=12.0,<13.0a0,!=12.6.1
 - cuda-sanitizer-api
 - cuda-version=12.5
 - cupy>=12.0.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 2c254415318..6debcb281b1 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -91,7 +91,7 @@ requirements:
     - cudatoolkit
     - ptxcompiler >=0.7.0
     - cubinlinker  # CUDA enhanced compatibility.
-    - cuda-python >=11.7.1,<12.0a0
+    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
     {% else %}
     - cuda-cudart
     - libcufile  # [linux64]
@@ -100,7 +100,7 @@ requirements:
     # TODO: Add nvjitlink here
     # xref: https://github.com/rapidsai/cudf/issues/12822
     - cuda-nvrtc
-    - cuda-python >=12.0,<13.0a0
+    - cuda-python >=12.0,<13.0a0,!=12.6.1
     - pynvjitlink
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 3d965f30986..92ca495f972 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -83,9 +83,9 @@ requirements:
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
-    - cuda-python >=11.7.1,<12.0a0
+    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
     {% else %}
-    - cuda-python >=12.0,<13.0a0
+    - cuda-python >=12.0,<13.0a0,!=12.6.1
     {% endif %}
     - nvtx >=0.2.1
     - packaging
diff --git a/dependencies.yaml b/dependencies.yaml
index 90255ca674c..cc31619c217 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -658,10 +658,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0
+              - cuda-python>=12.0,<13.0a0,!=12.6.1
           - matrix: {cuda: "11.*"}
             packages: &run_pylibcudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0
+              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
           - {matrix: null, packages: *run_pylibcudf_packages_all_cu11}
   run_cudf:
     common:
@@ -684,10 +684,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0
+              - cuda-python>=12.0,<13.0a0,!=12.6.1
           - matrix: {cuda: "11.*"}
             packages: &run_cudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0
+              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
           - {matrix: null, packages: *run_cudf_packages_all_cu11}
       - output_types: conda
         matrices:
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index b6105c17b3e..53f22a11e6b 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -20,7 +20,7 @@ requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
-    "cuda-python>=11.7.1,<12.0a0",
+    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==24.12.*,>=0.0.0a0",
@@ -90,6 +90,8 @@ filterwarnings = [
     "error",
     "ignore:::.*xdist.*",
     "ignore:::.*pytest.*",
+    # https://github.com/rapidsai/build-planning/issues/116
+    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
     "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore",
     # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 667cd7b1db8..ec0bc0eb22b 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -51,7 +51,9 @@ rapids = ["rmm", "cudf", "dask_cudf"]
 addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
-  "error"
+  "error",
+  # https://github.com/rapidsai/build-planning/issues/116
+  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
 ]
 xfail_strict = true
 
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index a2c62ef9460..2e75dff5c9e 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -53,7 +53,9 @@ version = {file = "cudf_polars/VERSION"}
 addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
-  "error"
+  "error",
+  # https://github.com/rapidsai/build-planning/issues/116
+  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
 ]
 xfail_strict = true
 
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index a8ab05a3922..d3baf3bf4d2 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -85,6 +85,8 @@ addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error",
+    # https://github.com/rapidsai/build-planning/issues/116
+    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     "ignore:unclosed <socket.socket:ResourceWarning",
     "ignore:Port .* is already in use.:UserWarning:distributed",
     # Should be fixed in the next streamz release
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 862e8f36eaa..c7e4cbc45ea 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -87,6 +87,8 @@ empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error::FutureWarning",
     "error::DeprecationWarning",
+    # https://github.com/rapidsai/build-planning/issues/116
+    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
     "ignore:.*datetime.*utcnow.*scheduled for removal:DeprecationWarning:botocore",
     "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index a80c85a1fa8..e8052dfba4c 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=11.7.1,<12.0a0",
+    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
     "libcudf==24.12.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
@@ -74,6 +74,8 @@ addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error",
+  # https://github.com/rapidsai/build-planning/issues/116
+  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
   "ignore:::.*xdist.*",
   "ignore:::.*pytest.*"
 ]

From 57900dee500a1a051393dea438d32d94ecd4de61 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Thu, 7 Nov 2024 02:47:47 +0100
Subject: [PATCH 09/12] KvikIO shared library (#17239)

Update cudf  to use the new KvikIO shared library: https://github.com/rapidsai/kvikio/pull/527

#### Tasks
- [x] Wait for the [KvikIO shared library PR](https://github.com/rapidsai/kvikio/pull/527) to be merged.
- [x] Revert the use of the [KvikIO shared library](https://github.com/rapidsai/kvikio/pull/527) in CI: https://github.com/rapidsai/cudf/commit/2d8eeafe4959357a17f6ad488811837e0a07ba65.

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/17239
---
 ci/build_wheel_cudf.sh         |  1 +
 ci/build_wheel_libcudf.sh      |  1 +
 ci/build_wheel_pylibcudf.sh    |  1 +
 dependencies.yaml              |  1 +
 python/libcudf/libcudf/load.py | 11 +++++++++++
 python/libcudf/pyproject.toml  |  1 +
 6 files changed, 16 insertions(+)

diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
index fef4416a366..ae4eb0d5c66 100755
--- a/ci/build_wheel_cudf.sh
+++ b/ci/build_wheel_cudf.sh
@@ -23,6 +23,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
 python -m auditwheel repair \
     --exclude libcudf.so \
     --exclude libnvcomp.so \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
index b3d6778ea04..aabd3814a24 100755
--- a/ci/build_wheel_libcudf.sh
+++ b/ci/build_wheel_libcudf.sh
@@ -33,6 +33,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 mkdir -p ${package_dir}/final_dist
 python -m auditwheel repair \
     --exclude libnvcomp.so.4 \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
index 839d98846fe..c4a89f20f5f 100755
--- a/ci/build_wheel_pylibcudf.sh
+++ b/ci/build_wheel_pylibcudf.sh
@@ -21,6 +21,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
 python -m auditwheel repair \
     --exclude libcudf.so \
     --exclude libnvcomp.so \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
diff --git a/dependencies.yaml b/dependencies.yaml
index cc31619c217..41ac6ce1808 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -177,6 +177,7 @@ files:
     extras:
       table: project
     includes:
+      - depends_on_libkvikio
       - depends_on_nvcomp
   py_build_pylibcudf:
     output: pyproject
diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py
index ba134710868..bf27ecfa7f5 100644
--- a/python/libcudf/libcudf/load.py
+++ b/python/libcudf/libcudf/load.py
@@ -18,6 +18,17 @@
 
 
 def load_library():
+    try:
+        # libkvikio must be loaded before libcudf because libcudf references its symbols
+        import libkvikio
+
+        libkvikio.load_library()
+    except ModuleNotFoundError:
+        # libcudf's runtime dependency on libkvikio may be satisfied by a natively
+        # installed library or a conda package, in which case the import will fail and
+        # we assume the library is discoverable on system paths.
+        pass
+
     # Dynamically load libcudf.so. Prefer a system library if one is present to
     # avoid clobbering symbols that other packages might expect, but if no
     # other library is present use the one in the wheel.
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
index c6d9ae56467..62726bb0df4 100644
--- a/python/libcudf/pyproject.toml
+++ b/python/libcudf/pyproject.toml
@@ -38,6 +38,7 @@ classifiers = [
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dependencies = [
+    "libkvikio==24.12.*,>=0.0.0a0",
     "nvidia-nvcomp==4.1.0.6",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 

From 29484cb87a417e2e36c8f3b6cd2ec961abec3156 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 7 Nov 2024 00:51:59 -0600
Subject: [PATCH 10/12] Put a ceiling on cuda-python (#17264)

Follow-up to #17253

Contributes to https://github.com/rapidsai/build-planning/issues/116

That PR used `!=` requirements to skip a particular version of `cuda-python` that `cudf` and `pylibcudf` were incompatible with. A newer version of `cuda-python` (12.6.2 for CUDA 12, 11.8.5 for CUDA 11) was just released, and it also causes some build issues for RAPIDS libraries: https://github.com/rapidsai/cuvs/pull/445#issuecomment-2461146449

To unblock CI across RAPIDS, this proposes **temporarily** switching to ceilings on the `cuda-python` dependency here.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17264
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 4 ++--
 conda/recipes/pylibcudf/meta.yaml                | 4 ++--
 dependencies.yaml                                | 8 ++++----
 python/cudf/pyproject.toml                       | 2 +-
 python/pylibcudf/pyproject.toml                  | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index ace55a15c09..8a64ebf40c5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
-- cuda-python>=11.7.1,<12.0a0,!=11.8.4
+- cuda-python>=11.7.1,<12.0a0,<=11.8.3
 - cuda-sanitizer-api=11.8.86
 - cuda-version=11.8
 - cudatoolkit
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index d20db44497e..5f779c3170f 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -21,7 +21,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.0,<13.0a0,!=12.6.1
+- cuda-python>=12.0,<13.0a0,<=12.6.0
 - cuda-sanitizer-api
 - cuda-version=12.5
 - cupy>=12.0.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 6debcb281b1..2aafcae072d 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -91,7 +91,7 @@ requirements:
     - cudatoolkit
     - ptxcompiler >=0.7.0
     - cubinlinker  # CUDA enhanced compatibility.
-    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
+    - cuda-python >=11.7.1,<12.0a0,<=11.8.3
     {% else %}
     - cuda-cudart
     - libcufile  # [linux64]
@@ -100,7 +100,7 @@ requirements:
     # TODO: Add nvjitlink here
     # xref: https://github.com/rapidsai/cudf/issues/12822
     - cuda-nvrtc
-    - cuda-python >=12.0,<13.0a0,!=12.6.1
+    - cuda-python >=12.0,<13.0a0,<=12.6.0
     - pynvjitlink
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 92ca495f972..ec3fcd59c62 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -83,9 +83,9 @@ requirements:
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
-    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
+    - cuda-python >=11.7.1,<12.0a0,<=11.8.3
     {% else %}
-    - cuda-python >=12.0,<13.0a0,!=12.6.1
+    - cuda-python >=12.0,<13.0a0,<=12.6.0
     {% endif %}
     - nvtx >=0.2.1
     - packaging
diff --git a/dependencies.yaml b/dependencies.yaml
index 41ac6ce1808..4c6aefe996f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -659,10 +659,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0,!=12.6.1
+              - cuda-python>=12.0,<13.0a0,<=12.6.0
           - matrix: {cuda: "11.*"}
             packages: &run_pylibcudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
+              - cuda-python>=11.7.1,<12.0a0,<=11.8.3
           - {matrix: null, packages: *run_pylibcudf_packages_all_cu11}
   run_cudf:
     common:
@@ -685,10 +685,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0,!=12.6.1
+              - cuda-python>=12.0,<13.0a0,<=12.6.0
           - matrix: {cuda: "11.*"}
             packages: &run_cudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
+              - cuda-python>=11.7.1,<12.0a0,<=11.8.3
           - {matrix: null, packages: *run_cudf_packages_all_cu11}
       - output_types: conda
         matrices:
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 53f22a11e6b..1eadceaaccd 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -20,7 +20,7 @@ requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
-    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
+    "cuda-python>=11.7.1,<12.0a0,<=11.8.3",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==24.12.*,>=0.0.0a0",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index e8052dfba4c..b2cec80f484 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
+    "cuda-python>=11.7.1,<12.0a0,<=11.8.3",
     "libcudf==24.12.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",

From bbd3b43719545754e9a1f6b204aad5b143f48419 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Thu, 7 Nov 2024 01:57:47 -0800
Subject: [PATCH 11/12] Fix the example in documentation for
 `get_dremel_data()` (#17242)

Closes #11396. Fixes the example in the documentation of `get_dremel_data()`

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/17242
---
 cpp/include/cudf/lists/detail/dremel.hpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp
index 96ee30dd261..f45da8e8d8d 100644
--- a/cpp/include/cudf/lists/detail/dremel.hpp
+++ b/cpp/include/cudf/lists/detail/dremel.hpp
@@ -58,7 +58,7 @@ struct dremel_data {
 };
 
 /**
- * @brief Get the dremel offsets and repetition and definition levels for a LIST column
+ * @brief Get the dremel offsets, repetition levels, and definition levels for a LIST column
  *
  * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is
  * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale
@@ -74,7 +74,7 @@ struct dremel_data {
  *
  * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/
  * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da
- * https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet
+ * https://blog.x.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet
  *
  * The remainder of this documentation assumes familiarity with the Dremel concepts.
  *
@@ -102,16 +102,17 @@ struct dremel_data {
  * ```
  * We can represent it in cudf format with two level of offsets like this:
  * ```
- * Level 0 offsets = {0, 0, 3, 5, 6}
+ * Level 0 offsets = {0, 0, 3, 4}
  * Level 1 offsets = {0, 0, 3, 5, 5}
  * Values          = {1, 2, 3, 4, 5}
  * ```
- * The desired result of this function is the repetition and definition level values that
- * correspond to the data values:
+ * This function returns the dremel offsets, repetition levels, and definition level
+ * values that correspond to the data values:
  * ```
- * col = {[], [[], [1, 2, 3], [4, 5]], [[]]}
- * def = { 0    1,  2, 2, 2,   2, 2,     1 }
- * rep = { 0,   0,  0, 2, 2,   1, 2,     0 }
+ * col =            {[], [[], [1, 2, 3], [4, 5]], [[]]}
+ * dremel_offsets = { 0,  1,                       7, 8}
+ * def_levels     = { 0,  1,   2, 2, 2,   2, 2,    1 }
+ * rep_levels     = { 0,  0,   1, 2, 2,   1, 2,    0 }
  * ```
  *
  * Since repetition and definition levels arrays contain a value for each empty list, the size of

From e29e0ab477f4a541752a578f8769d8dd816ffbe8 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 7 Nov 2024 06:14:58 -0500
Subject: [PATCH 12/12] Move strings/numeric convert benchmarks to nvbench
 (#17255)

Moves the `cpp/benchmarks/string/convert_numerics.cpp` and `cpp/benchmarks/string/convert_fixed_point.cpp` benchmark implementations from google-bench to nvbench.

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17255
---
 cpp/benchmarks/CMakeLists.txt                 |   4 +-
 cpp/benchmarks/string/convert_fixed_point.cpp | 111 +++++---------
 cpp/benchmarks/string/convert_numerics.cpp    | 138 ++++++------------
 3 files changed, 79 insertions(+), 174 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 68781889c53..bdc360c082b 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -358,8 +358,6 @@ ConfigureBench(
   STRINGS_BENCH
   string/convert_datetime.cpp
   string/convert_durations.cpp
-  string/convert_fixed_point.cpp
-  string/convert_numerics.cpp
   string/copy.cu
   string/factory.cu
   string/filter.cpp
@@ -375,6 +373,8 @@ ConfigureNVBench(
   string/char_types.cpp
   string/combine.cpp
   string/contains.cpp
+  string/convert_fixed_point.cpp
+  string/convert_numerics.cpp
   string/copy_if_else.cpp
   string/copy_range.cpp
   string/count.cpp
diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp
index e5bd794e405..97e114c0795 100644
--- a/cpp/benchmarks/string/convert_fixed_point.cpp
+++ b/cpp/benchmarks/string/convert_fixed_point.cpp
@@ -16,93 +16,48 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/types.hpp>
 
-namespace {
+#include <nvbench/nvbench.cuh>
 
-std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
-{
-  auto result =
-    create_random_column(cudf::type_id::FLOAT32, row_count{static_cast<cudf::size_type>(rows)});
-  return cudf::strings::from_floats(result->view());
-}
-
-}  // anonymous namespace
-
-class StringsToFixedPoint : public cudf::benchmark {};
-
-template <typename fixed_point_type>
-void convert_to_fixed_point(benchmark::State& state)
-{
-  auto const rows         = static_cast<cudf::size_type>(state.range(0));
-  auto const strings_col  = get_strings_column(rows);
-  auto const strings_view = cudf::strings_column_view(strings_col->view());
-  auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype);
-  }
+using Types = nvbench::type_list<numeric::decimal32, numeric::decimal64>;
 
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
-}
-
-class StringsFromFixedPoint : public cudf::benchmark {};
+NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32");
+NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64");
 
-template <typename fixed_point_type>
-void convert_from_fixed_point(benchmark::State& state)
+template <typename DataType>
+void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  auto const rows        = static_cast<cudf::size_type>(state.range(0));
-  auto const strings_col = get_strings_column(rows);
-  auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
-  auto const fp_col =
-    cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype);
-
-  std::unique_ptr<cudf::column> results = nullptr;
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    results = cudf::strings::from_fixed_point(fp_col->view());
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const from_num = state.get_string("dir") == "from";
+
+  auto const data_type = cudf::data_type{cudf::type_to_id<DataType>(), numeric::scale_type{-2}};
+  auto const fp_col    = create_random_column(data_type.id(), row_count{num_rows});
+
+  auto const strings_col = cudf::strings::from_fixed_point(fp_col->view());
+  auto const sv          = cudf::strings_column_view(strings_col->view());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (from_num) {
+    state.add_global_memory_reads<int8_t>(num_rows * cudf::size_of(data_type));
+    state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); });
+  } else {
+    state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
+    state.add_global_memory_writes<int8_t>(num_rows * cudf::size_of(data_type));
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); });
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
-     rows * cudf::size_of(dtype)));
 }
 
-#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type)                  \
-  BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \
-  {                                                                         \
-    convert_to_fixed_point<fixed_point_type>(state);                        \
-  }                                                                         \
-  BENCHMARK_REGISTER_F(StringsToFixedPoint, name)                           \
-    ->RangeMultiplier(4)                                                    \
-    ->Range(1 << 12, 1 << 24)                                               \
-    ->UseManualTime()                                                       \
-    ->Unit(benchmark::kMicrosecond);
-
-#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type)                  \
-  BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \
-  {                                                                           \
-    convert_from_fixed_point<fixed_point_type>(state);                        \
-  }                                                                           \
-  BENCHMARK_REGISTER_F(StringsFromFixedPoint, name)                           \
-    ->RangeMultiplier(4)                                                      \
-    ->Range(1 << 12, 1 << 24)                                                 \
-    ->UseManualTime()                                                         \
-    ->Unit(benchmark::kMicrosecond);
-
-CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32);
-CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64);
-
-CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32);
-CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64);
+NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types))
+  .set_name("fixed_point")
+  .set_type_axes_names({"DataType"})
+  .add_string_axis("dir", {"to", "from"})
+  .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp
index 8f875c5c80f..e1f650dd6cd 100644
--- a/cpp/benchmarks/string/convert_numerics.cpp
+++ b/cpp/benchmarks/string/convert_numerics.cpp
@@ -16,117 +16,67 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/strings/convert/convert_integers.hpp>
 #include <cudf/types.hpp>
 
-namespace {
+#include <nvbench/nvbench.cuh>
 
-template <typename NumericType>
-std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
-{
-  return create_random_column(cudf::type_to_id<NumericType>(), row_count{rows});
-}
+namespace {
 
 template <typename NumericType>
-std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
+std::unique_ptr<cudf::column> get_strings_column(cudf::column_view const& nv)
 {
-  auto const numerics_col = get_numerics_column<NumericType>(rows);
   if constexpr (std::is_floating_point_v<NumericType>) {
-    return cudf::strings::from_floats(numerics_col->view());
+    return cudf::strings::from_floats(nv);
   } else {
-    return cudf::strings::from_integers(numerics_col->view());
-  }
-}
-}  // anonymous namespace
-
-class StringsToNumeric : public cudf::benchmark {};
-
-template <typename NumericType>
-void convert_to_number(benchmark::State& state)
-{
-  auto const rows = static_cast<cudf::size_type>(state.range(0));
-
-  auto const strings_col  = get_strings_column<NumericType>(rows);
-  auto const strings_view = cudf::strings_column_view(strings_col->view());
-  auto const col_type     = cudf::type_to_id<NumericType>();
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    if constexpr (std::is_floating_point_v<NumericType>) {
-      cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
-    } else {
-      cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
-    }
+    return cudf::strings::from_integers(nv);
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
 }
+}  // namespace
 
-class StringsFromNumeric : public cudf::benchmark {};
+using Types = nvbench::type_list<float, double, int32_t, int64_t, uint8_t, uint16_t>;
 
 template <typename NumericType>
-void convert_from_number(benchmark::State& state)
+void bench_convert_number(nvbench::state& state, nvbench::type_list<NumericType>)
 {
-  auto const rows = static_cast<cudf::size_type>(state.range(0));
-
-  auto const numerics_col  = get_numerics_column<NumericType>(rows);
-  auto const numerics_view = numerics_col->view();
-
-  std::unique_ptr<cudf::column> results = nullptr;
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    if constexpr (std::is_floating_point_v<NumericType>)
-      results = cudf::strings::from_floats(numerics_view);
-    else
-      results = cudf::strings::from_integers(numerics_view);
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const from_num = state.get_string("dir") == "from";
+
+  auto const data_type = cudf::data_type(cudf::type_to_id<NumericType>());
+  auto const num_col   = create_random_column(data_type.id(), row_count{num_rows});
+
+  auto const strings_col = get_strings_column<NumericType>(num_col->view());
+  auto const sv          = cudf::strings_column_view(strings_col->view());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (from_num) {
+    state.add_global_memory_reads<NumericType>(num_rows);
+    state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      if constexpr (std::is_floating_point_v<NumericType>) {
+        cudf::strings::to_floats(sv, data_type);
+      } else {
+        cudf::strings::to_integers(sv, data_type);
+      }
+    });
+  } else {
+    state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
+    state.add_global_memory_writes<NumericType>(num_rows);
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      if constexpr (std::is_floating_point_v<NumericType>)
+        cudf::strings::from_floats(num_col->view());
+      else
+        cudf::strings::from_integers(num_col->view());
+    });
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
-     rows * sizeof(NumericType)));
 }
 
-#define CONVERT_TO_NUMERICS_BD(name, type)                               \
-  BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
-  {                                                                      \
-    convert_to_number<type>(state);                                      \
-  }                                                                      \
-  BENCHMARK_REGISTER_F(StringsToNumeric, name)                           \
-    ->RangeMultiplier(4)                                                 \
-    ->Range(1 << 10, 1 << 17)                                            \
-    ->UseManualTime()                                                    \
-    ->Unit(benchmark::kMicrosecond);
-
-#define CONVERT_FROM_NUMERICS_BD(name, type)                               \
-  BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
-  {                                                                        \
-    convert_from_number<type>(state);                                      \
-  }                                                                        \
-  BENCHMARK_REGISTER_F(StringsFromNumeric, name)                           \
-    ->RangeMultiplier(4)                                                   \
-    ->Range(1 << 10, 1 << 17)                                              \
-    ->UseManualTime()                                                      \
-    ->Unit(benchmark::kMicrosecond);
-
-CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
-CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
-CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
-CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
-CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
-CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);
-
-CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
-CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
-CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);
+NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types))
+  .set_name("numeric")
+  .set_type_axes_names({"NumericType"})
+  .add_string_axis("dir", {"to", "from"})
+  .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});