From 3aa461f0c80606cef29e52858a2a733bdf131ed7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 30 Aug 2023 20:31:59 -0700
Subject: [PATCH 01/54] Initial version of scalar

---
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |  4 ++-
 python/cudf/cudf/_lib/pylibcudf/scalar.pxd    | 25 ++++++++++++++++++
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx    | 26 +++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/scalar.pxd
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/scalar.pyx

diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index 0ce42dc43ff..cfca4add290 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -12,7 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources column.pyx copying.pyx gpumemoryview.pyx table.pyx types.pyx utils.pyx)
+set(cython_sources column.pyx copying.pyx gpumemoryview.pyx scalar.pyx table.pyx types.pyx
+                   utils.pyx
+)
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
new file mode 100644
index 00000000000..410d4206555
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
@@ -0,0 +1,25 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+from libcpp.memory cimport unique_ptr
+
+from rmm._lib.memory_resource cimport DeviceMemoryResource
+
+from cudf._lib.cpp.scalar.scalar cimport scalar
+
+from .types cimport DataType
+
+
+cdef class Scalar:
+    cdef unique_ptr[scalar] c_obj
+    cdef DataType _data_type
+
+    # Holds a reference to the DeviceMemoryResource used for allocation.
+    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
+    # needed for deallocation
+    cdef DeviceMemoryResource mr
+
+    cdef const scalar* get(self) except *
+
+    cpdef DataType type(self)
+    cpdef bool is_valid(self)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
new file mode 100644
index 00000000000..640877a9e27
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+from rmm._lib.memory_resource cimport get_current_device_resource
+
+from .types cimport DataType
+
+
+# The DeviceMemoryResource attribute could be released prematurely
+# by the gc if the DeviceScalar is in a reference cycle. Removing
+# the tp_clear function with the no_gc_clear decoration prevents that.
+# See https://github.com/rapidsai/rmm/pull/931 for details.
+cdef class Scalar:
+
+    def __cinit__(self, *args, **kwargs):
+        self.mr = get_current_device_resource()
+
+    cdef const scalar* get(self) except *:
+        return self.c_obj.get()
+
+    cpdef DataType type(self):
+        """The type of data in the column."""
+        return self._data_type
+
+    cpdef bool is_valid(self):
+        """True if the scalar is valid, false if not"""
+        return self.get().is_valid()

From 1c4f99049c0f45d1b74c0fffe678a4d92943b706 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 30 Aug 2023 20:55:24 -0700
Subject: [PATCH 02/54] Implement basic scatter algorithm with Scalar

---
 .../cudf/cudf/_lib/cpp/libcpp/functional.pxd  |  4 +-
 python/cudf/cudf/_lib/pylibcudf/copying.pxd   |  6 +++
 python/cudf/cudf/_lib/pylibcudf/copying.pyx   | 53 +++++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd b/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd
index f3e2d6d0878..c38db036119 100644
--- a/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd
+++ b/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd
@@ -1,6 +1,8 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 
+# TODO: Can be replaced once https://github.com/cython/cython/pull/5671 is
+# merged and released
 cdef extern from "<functional>" namespace "std" nogil:
     cdef cppclass reference_wrapper[T]:
         reference_wrapper()
diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd
index d57be650710..a2232fc5d81 100644
--- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd
@@ -13,3 +13,9 @@ cpdef Table gather(
     Column gather_map,
     out_of_bounds_policy bounds_policy
 )
+
+cpdef Table scatter (
+    list source_scalars,
+    Column indices,
+    Table target,
+)
diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx
index a27b44b3107..dff3e95328d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx
@@ -2,6 +2,7 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from libcpp.vector cimport vector
 
 # TODO: We want to make cpp a more full-featured package so that we can access
 # directly from that. It will make namespacing much cleaner in pylibcudf. What
@@ -9,13 +10,16 @@ from libcpp.utility cimport move
 # cimport libcudf... libcudf.copying.algo(...)
 from cudf._lib.cpp cimport copying as cpp_copying
 from cudf._lib.cpp.copying cimport out_of_bounds_policy
+from cudf._lib.cpp.libcpp.functional cimport reference_wrapper
 
 from cudf._lib.cpp.copying import \
     out_of_bounds_policy as OutOfBoundsPolicy  # no-cython-lint
 
+from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 
 from .column cimport Column
+from .scalar cimport Scalar
 from .table cimport Table
 
 
@@ -55,3 +59,52 @@ cpdef Table gather(
             )
         )
     return Table.from_libcudf(move(c_result))
+
+
+ctypedef const scalar constscalar
+
+cpdef Table scatter (
+    list source_scalars,
+    Column indices,
+    Table target,
+):
+    """Scatter source_scalars into target according to the indices.
+
+    For details on the implementation, see cudf::scatter in libcudf.
+
+    Parameters
+    ----------
+    source_scalars : List[Scalar]
+        A list containing one scalar for each column in target.
+    indices : Column
+        The rows of the target into which the source_scalars should be written.
+    target : Table
+        The table into which data should be written.
+
+    Returns
+    -------
+    pylibcudf.Table
+        The result of the scatter
+    """
+    cdef unique_ptr[table] c_result
+    # TODO: This doesn't require the constscalar ctypedef
+    cdef vector[reference_wrapper[const scalar]] c_scalars
+    c_scalars.reserve(len(source_scalars))
+    cdef Scalar d_slr
+    for d_slr in source_scalars:
+        c_scalars.push_back(
+            # TODO: This requires the constscalar ctypedef
+            # Possibly the same as https://github.com/cython/cython/issues/4180
+            reference_wrapper[constscalar](d_slr.get()[0])
+        )
+
+    with nogil:
+        c_result = move(
+            cpp_copying.scatter(
+                c_scalars,
+                indices.view(),
+                target.view(),
+            )
+        )
+
+    return Table.from_libcudf(move(c_result))

From 8ad836715cf3e49415479a1810f9deaf18e33e1b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Sep 2023 12:18:11 -0700
Subject: [PATCH 03/54] Build DeviceScalar around pylibcudf.Scalar

---
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd |  2 +
 python/cudf/cudf/_lib/pylibcudf/__init__.py  |  2 +
 python/cudf/cudf/_lib/scalar.pxd             |  5 ++-
 python/cudf/cudf/_lib/scalar.pyx             | 39 ++++++++++----------
 4 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index ba7822b0a54..5bcf234adea 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -4,6 +4,7 @@
 from . cimport copying
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview
+from .scalar cimport Scalar
 from .table cimport Table
 # TODO: cimport type_id once
 # https://github.com/cython/cython/issues/5609 is resolved
@@ -12,6 +13,7 @@ from .types cimport DataType
 __all__ = [
     "Column",
     "DataType",
+    "Scalar",
     "Table",
     "copying",
     "gpumemoryview",
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 3edff9a53e8..7bcf024d5c1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -3,12 +3,14 @@
 from . import copying
 from .column import Column
 from .gpumemoryview import gpumemoryview
+from .scalar import Scalar
 from .table import Table
 from .types import DataType, TypeId
 
 __all__ = [
     "Column",
     "DataType",
+    "Scalar",
     "Table",
     "TypeId",
     "copying",
diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd
index 1deed60d67d..09a91493723 100644
--- a/python/cudf/cudf/_lib/scalar.pxd
+++ b/python/cudf/cudf/_lib/scalar.pxd
@@ -1,15 +1,16 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 
 from rmm._lib.memory_resource cimport DeviceMemoryResource
 
+from cudf._lib cimport pylibcudf
 from cudf._lib.cpp.scalar.scalar cimport scalar
 
 
 cdef class DeviceScalar:
-    cdef unique_ptr[scalar] c_value
+    cdef pylibcudf.Scalar c_value
 
     # Holds a reference to the DeviceMemoryResource used for allocation.
     # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 0407785b2d8..54509802558 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -20,7 +20,6 @@ from libc.stdint cimport (
 )
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
@@ -41,6 +40,7 @@ from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 from cudf._lib.interop import from_arrow, to_arrow
 
 cimport cudf._lib.cpp.types as libcudf_types
+from cudf._lib cimport pylibcudf
 from cudf._lib.cpp.scalar.scalar cimport (
     duration_scalar,
     fixed_point_scalar,
@@ -81,6 +81,7 @@ cdef class DeviceScalar:
 
     def __cinit__(self, *args, **kwargs):
         self.mr = get_current_device_resource()
+        self.c_value = pylibcudf.Scalar()
 
     def __init__(self, value, dtype):
         """
@@ -105,26 +106,26 @@ cdef class DeviceScalar:
 
         if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
             _set_decimal_from_scalar(
-                self.c_value, value, dtype, valid)
+                self.c_value.c_obj, value, dtype, valid)
         elif isinstance(dtype, cudf.ListDtype):
             _set_list_from_pylist(
-                self.c_value, value, dtype, valid)
+                self.c_value.c_obj, value, dtype, valid)
         elif isinstance(dtype, cudf.StructDtype):
-            _set_struct_from_pydict(self.c_value, value, dtype, valid)
+            _set_struct_from_pydict(self.c_value.c_obj, value, dtype, valid)
         elif pd.api.types.is_string_dtype(dtype):
-            _set_string_from_np_string(self.c_value, value, valid)
+            _set_string_from_np_string(self.c_value.c_obj, value, valid)
         elif pd.api.types.is_numeric_dtype(dtype):
-            _set_numeric_from_np_scalar(self.c_value,
+            _set_numeric_from_np_scalar(self.c_value.c_obj,
                                         value,
                                         dtype,
                                         valid)
         elif pd.api.types.is_datetime64_dtype(dtype):
             _set_datetime64_from_np_scalar(
-                self.c_value, value, dtype, valid
+                self.c_value.c_obj, value, dtype, valid
             )
         elif pd.api.types.is_timedelta64_dtype(dtype):
             _set_timedelta64_from_np_scalar(
-                self.c_value, value, dtype, valid
+                self.c_value.c_obj, value, dtype, valid
             )
         else:
             raise ValueError(
@@ -134,19 +135,19 @@ cdef class DeviceScalar:
 
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
-            result = _get_py_decimal_from_fixed_point(self.c_value)
+            result = _get_py_decimal_from_fixed_point(self.c_value.c_obj)
         elif cudf.api.types.is_struct_dtype(self.dtype):
-            result = _get_py_dict_from_struct(self.c_value, self.dtype)
+            result = _get_py_dict_from_struct(self.c_value.c_obj, self.dtype)
         elif cudf.api.types.is_list_dtype(self.dtype):
-            result = _get_py_list_from_list(self.c_value, self.dtype)
+            result = _get_py_list_from_list(self.c_value.c_obj, self.dtype)
         elif pd.api.types.is_string_dtype(self.dtype):
-            result = _get_py_string_from_string(self.c_value)
+            result = _get_py_string_from_string(self.c_value.c_obj)
         elif pd.api.types.is_numeric_dtype(self.dtype):
-            result = _get_np_scalar_from_numeric(self.c_value)
+            result = _get_np_scalar_from_numeric(self.c_value.c_obj)
         elif pd.api.types.is_datetime64_dtype(self.dtype):
-            result = _get_np_scalar_from_timestamp64(self.c_value)
+            result = _get_np_scalar_from_timestamp64(self.c_value.c_obj)
         elif pd.api.types.is_timedelta64_dtype(self.dtype):
-            result = _get_np_scalar_from_timedelta64(self.c_value)
+            result = _get_np_scalar_from_timedelta64(self.c_value.c_obj)
         else:
             raise ValueError(
                 "Could not convert cudf::scalar to a Python value"
@@ -169,7 +170,7 @@ cdef class DeviceScalar:
         return self._to_host_scalar()
 
     cdef const scalar* get_raw_ptr(self) except *:
-        return self.c_value.get()
+        return self.c_value.c_obj.get()
 
     cpdef bool is_valid(self):
         """
@@ -194,7 +195,7 @@ cdef class DeviceScalar:
         cdef DeviceScalar s = DeviceScalar.__new__(DeviceScalar)
         cdef libcudf_types.data_type cdtype
 
-        s.c_value = move(ptr)
+        s.c_value.c_obj.swap(ptr)
         cdtype = s.get_raw_ptr()[0].type()
 
         if dtype is not None:
@@ -601,9 +602,9 @@ def _create_proxy_nat_scalar(dtype):
     if dtype.char in 'mM':
         nat = dtype.type('NaT').astype(dtype)
         if dtype.type == np.datetime64:
-            _set_datetime64_from_np_scalar(result.c_value, nat, dtype, True)
+            _set_datetime64_from_np_scalar(result.c_value.c_obj, nat, dtype, True)
         elif dtype.type == np.timedelta64:
-            _set_timedelta64_from_np_scalar(result.c_value, nat, dtype, True)
+            _set_timedelta64_from_np_scalar(result.c_value.c_obj, nat, dtype, True)
         return result
     else:
         raise TypeError('NAT only valid for datetime and timedelta')

From c1479725ddc3c2e7d4c192039fe750ab20947a89 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Sep 2023 14:28:38 -0700
Subject: [PATCH 04/54] Fix some initialization

---
 python/cudf/cudf/_lib/datetime.pyx | 6 ++++--
 python/cudf/cudf/_lib/scalar.pyx   | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index 81949dbaa20..3d96f59c4d6 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from cudf.core.buffer import acquire_spill_lock
 
@@ -10,6 +10,7 @@ from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.filling cimport calendrical_month_sequence
+from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
@@ -166,10 +167,11 @@ def date_range(DeviceScalar start, size_type n, offset):
         + offset.kwds.get("months", 0)
     )
 
+    cdef const scalar* c_start = start.c_value.get()
     with nogil:
         c_result = move(calendrical_month_sequence(
             n,
-            start.c_value.get()[0],
+            c_start[0],
             months
         ))
     return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 54509802558..ca0d08c4d60 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -132,6 +132,8 @@ cdef class DeviceScalar:
                 f"Cannot convert value of type "
                 f"{type(value).__name__} to cudf scalar"
             )
+        cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type()
+        self.c_value._data_type = pylibcudf.DataType.from_libcudf(cdtype)
 
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
@@ -197,6 +199,7 @@ cdef class DeviceScalar:
 
         s.c_value.c_obj.swap(ptr)
         cdtype = s.get_raw_ptr()[0].type()
+        s.c_value._data_type = pylibcudf.DataType.from_libcudf(cdtype)
 
         if dtype is not None:
             s._dtype = dtype

From 0e5e5d38c0bf360d441be47008248afbed3a6c54 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Sep 2023 14:28:51 -0700
Subject: [PATCH 05/54] Add missing no_gc_clear

---
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 640877a9e27..df35ff92e72 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
+cimport cython
+
 from rmm._lib.memory_resource cimport get_current_device_resource
 
 from .types cimport DataType
@@ -9,6 +11,7 @@ from .types cimport DataType
 # by the gc if the DeviceScalar is in a reference cycle. Removing
 # the tp_clear function with the no_gc_clear decoration prevents that.
 # See https://github.com/rapidsai/rmm/pull/931 for details.
+@cython.no_gc_clear
 cdef class Scalar:
 
     def __cinit__(self, *args, **kwargs):

From e3008840ebc7821d1504b9d4921df0afe03359db Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Sep 2023 15:11:19 -0700
Subject: [PATCH 06/54] Implement from_libcudf

---
 python/cudf/cudf/_lib/pylibcudf/scalar.pxd |  3 +++
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 21 +++++++++++++++++++++
 python/cudf/cudf/_lib/scalar.pyx           |  7 ++++---
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
index 410d4206555..85d57305622 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
@@ -23,3 +23,6 @@ cdef class Scalar:
 
     cpdef DataType type(self)
     cpdef bool is_valid(self)
+
+    @staticmethod
+    cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index df35ff92e72..3886c5a5290 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -4,6 +4,8 @@ cimport cython
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
+from cudf._lib.cpp.scalar.scalar cimport scalar
+
 from .types cimport DataType
 
 
@@ -13,6 +15,12 @@ from .types cimport DataType
 # See https://github.com/rapidsai/rmm/pull/931 for details.
 @cython.no_gc_clear
 cdef class Scalar:
+    """A scalar value in device memory."""
+    # Unlike for columns, libcudf does not support scalar views. All APIs that
+    # accept scalar values accept references to the owning object rather than a
+    # special view type. As a result, pylibcudf.Scalar has a simpler structure
+    # than pylibcudf.Column because it can be a true wrapper around a libcudf
+    # column
 
     def __cinit__(self, *args, **kwargs):
         self.mr = get_current_device_resource()
@@ -27,3 +35,16 @@ cdef class Scalar:
     cpdef bool is_valid(self):
         """True if the scalar is valid, false if not"""
         return self.get().is_valid()
+
+    @staticmethod
+    cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=None):
+        """Construct a Scalar object from a libcudf scalar.
+
+        This method is for pylibcudf's functions to use to ingest outputs of
+        calling libcudf algorithms, and should generally not be needed by users
+        (even direct pylibcudf Cython users).
+        """
+        cdef Scalar s = Scalar.__new__(Scalar)
+        s.c_obj.swap(libcudf_scalar)
+        s._data_type = DataType.from_libcudf(s.get().type())
+        return s
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index ca0d08c4d60..f6350c1cb94 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -20,10 +20,12 @@ from libc.stdint cimport (
 )
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
 import cudf
+from cudf._lib cimport pylibcudf
 from cudf._lib.types import (
     LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
     datetime_unit_map,
@@ -178,7 +180,7 @@ cdef class DeviceScalar:
         """
         Returns if the Scalar is valid or not(i.e., <NA>).
         """
-        return self.get_raw_ptr()[0].is_valid()
+        return self.c_value.is_valid()
 
     def __repr__(self):
         if cudf.utils.utils.is_na_like(self.value):
@@ -197,9 +199,8 @@ cdef class DeviceScalar:
         cdef DeviceScalar s = DeviceScalar.__new__(DeviceScalar)
         cdef libcudf_types.data_type cdtype
 
-        s.c_value.c_obj.swap(ptr)
+        s.c_value = pylibcudf.Scalar.from_libcudf(move(ptr))
         cdtype = s.get_raw_ptr()[0].type()
-        s.c_value._data_type = pylibcudf.DataType.from_libcudf(cdtype)
 
         if dtype is not None:
             s._dtype = dtype

From 4a2a436e67195ce2ddd87b1f7e2295b96c6c0a78 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Sep 2023 15:14:08 -0700
Subject: [PATCH 07/54] Remove mr from DeviceScalar

---
 python/cudf/cudf/_lib/scalar.pxd |  5 -----
 python/cudf/cudf/_lib/scalar.pyx | 10 ----------
 2 files changed, 15 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd
index 09a91493723..ae1d350edc6 100644
--- a/python/cudf/cudf/_lib/scalar.pxd
+++ b/python/cudf/cudf/_lib/scalar.pxd
@@ -12,11 +12,6 @@ from cudf._lib.cpp.scalar.scalar cimport scalar
 cdef class DeviceScalar:
     cdef pylibcudf.Scalar c_value
 
-    # Holds a reference to the DeviceMemoryResource used for allocation.
-    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
-    # needed for deallocation
-    cdef DeviceMemoryResource mr
-
     cdef object _dtype
 
     cdef const scalar* get_raw_ptr(self) except *
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index f6350c1cb94..ff1a09a9c9e 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -1,7 +1,5 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-cimport cython
-
 import decimal
 
 import numpy as np
@@ -22,8 +20,6 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from rmm._lib.memory_resource cimport get_current_device_resource
-
 import cudf
 from cudf._lib cimport pylibcudf
 from cudf._lib.types import (
@@ -74,15 +70,9 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
-# The DeviceMemoryResource attribute could be released prematurely
-# by the gc if the DeviceScalar is in a reference cycle. Removing
-# the tp_clear function with the no_gc_clear decoration prevents that.
-# See https://github.com/rapidsai/rmm/pull/931 for details.
-@cython.no_gc_clear
 cdef class DeviceScalar:
 
     def __cinit__(self, *args, **kwargs):
-        self.mr = get_current_device_resource()
         self.c_value = pylibcudf.Scalar()
 
     def __init__(self, value, dtype):

From abd5550be0e7727621838d207dc8146d6a8a4218 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 1 Sep 2023 16:11:36 -0700
Subject: [PATCH 08/54] Inline _set_value

---
 python/cudf/cudf/_lib/scalar.pyx | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index ff1a09a9c9e..32a7daae098 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -90,34 +90,30 @@ cdef class DeviceScalar:
             A NumPy dtype.
         """
         self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
-        self._set_value(value, self._dtype)
-
-    def _set_value(self, value, dtype):
-        # IMPORTANT: this should only ever be called from __init__
         valid = not _is_null_host_scalar(value)
 
-        if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
+        if isinstance(self._dtype, cudf.core.dtypes.DecimalDtype):
             _set_decimal_from_scalar(
-                self.c_value.c_obj, value, dtype, valid)
-        elif isinstance(dtype, cudf.ListDtype):
+                self.c_value.c_obj, value, self._dtype, valid)
+        elif isinstance(self._dtype, cudf.ListDtype):
             _set_list_from_pylist(
-                self.c_value.c_obj, value, dtype, valid)
-        elif isinstance(dtype, cudf.StructDtype):
-            _set_struct_from_pydict(self.c_value.c_obj, value, dtype, valid)
-        elif pd.api.types.is_string_dtype(dtype):
+                self.c_value.c_obj, value, self._dtype, valid)
+        elif isinstance(self._dtype, cudf.StructDtype):
+            _set_struct_from_pydict(self.c_value.c_obj, value, self._dtype, valid)
+        elif pd.api.types.is_string_dtype(self._dtype):
             _set_string_from_np_string(self.c_value.c_obj, value, valid)
-        elif pd.api.types.is_numeric_dtype(dtype):
+        elif pd.api.types.is_numeric_dtype(self._dtype):
             _set_numeric_from_np_scalar(self.c_value.c_obj,
                                         value,
-                                        dtype,
+                                        self._dtype,
                                         valid)
-        elif pd.api.types.is_datetime64_dtype(dtype):
+        elif pd.api.types.is_datetime64_dtype(self._dtype):
             _set_datetime64_from_np_scalar(
-                self.c_value.c_obj, value, dtype, valid
+                self.c_value.c_obj, value, self._dtype, valid
             )
-        elif pd.api.types.is_timedelta64_dtype(dtype):
+        elif pd.api.types.is_timedelta64_dtype(self._dtype):
             _set_timedelta64_from_np_scalar(
-                self.c_value.c_obj, value, dtype, valid
+                self.c_value.c_obj, value, self._dtype, valid
             )
         else:
             raise ValueError(

From e8f6847085aa26bb851b56e771b38dbb021ee49c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 5 Sep 2023 10:03:45 -0700
Subject: [PATCH 09/54] Add a comment

---
 python/cudf/cudf/_lib/scalar.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 32a7daae098..9b2ede1b55d 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -72,6 +72,9 @@ from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 cdef class DeviceScalar:
 
+    # I think this should be removable, except that currently the way that
+    # from_unique_ptr is implemented is probably dereferencing this in an
+    # invalid state. See what the best way to fix that is.
     def __cinit__(self, *args, **kwargs):
         self.c_value = pylibcudf.Scalar()
 

From 8360f8098ed022c9c74df08c8194baec6fb0016c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 5 Sep 2023 21:32:42 -0700
Subject: [PATCH 10/54] Implement from_arrow

---
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   | 27 ++++++++++++++++--
 python/cudf/cudf/_lib/pylibcudf/interop.pxd   | 10 +++++++
 python/cudf/cudf/_lib/pylibcudf/interop.pyx   | 28 +++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/interop.pxd
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/interop.pyx

diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index cfca4add290..6d01199cfc8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -12,8 +12,8 @@
 # the License.
 # =============================================================================
 
-set(cython_sources column.pyx copying.pyx gpumemoryview.pyx scalar.pyx table.pyx types.pyx
-                   utils.pyx
+set(cython_sources column.pyx copying.pyx gpumemoryview.pyx interop.pyx scalar.pyx table.pyx
+                   types.pyx utils.pyx
 )
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
@@ -21,3 +21,26 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf
 )
+
+find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
+
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
+  OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+set(targets_using_arrow_headers pylibcudf_interop)
+foreach(target IN LISTS targets_using_arrow_headers)
+  target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
+endforeach()
+
+# TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for
+# more info
+find_package(NumPy REQUIRED)
+set(targets_using_numpy pylibcudf_interop)
+foreach(target IN LISTS targets_using_numpy)
+  target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
+  # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24.
+  # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
+endforeach()
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
new file mode 100644
index 00000000000..dd518aeea52
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
@@ -0,0 +1,10 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+from pyarrow.lib cimport Table as pa_Table
+
+from .table cimport Table
+
+
+cpdef Table from_arrow(
+    pa_Table pyarrow_table,
+)
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
new file mode 100644
index 00000000000..f5161074b15
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
@@ -0,0 +1,28 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+from libcpp.memory cimport shared_ptr, unique_ptr
+from libcpp.utility cimport move
+from pyarrow.lib cimport (
+    CTable as pa_CTable,
+    Table as pa_Table,
+    pyarrow_unwrap_table,
+)
+
+from cudf._lib.cpp.interop cimport from_arrow as cpp_from_arrow
+from cudf._lib.cpp.table.table cimport table
+
+from .table cimport Table
+
+
+cpdef Table from_arrow(
+    pa_Table pyarrow_table,
+):
+    cdef shared_ptr[pa_CTable] ctable = (
+        pyarrow_unwrap_table(pyarrow_table)
+    )
+    cdef unique_ptr[table] c_result
+
+    with nogil:
+        c_result = move(cpp_from_arrow(ctable.get()[0]))
+
+    return Table.from_libcudf(move(c_result))

From 88147728b705d20ea51a6c0006e2653ab95f2c3e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Sep 2023 17:02:52 -0700
Subject: [PATCH 11/54] Add to package

---
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd | 6 +++++-
 python/cudf/cudf/_lib/pylibcudf/__init__.py  | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index 5bcf234adea..cf0007b9303 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -1,7 +1,10 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
 # TODO: Verify consistent usage of relative/absolute imports in pylibcudf.
-from . cimport copying
+# TODO: Cannot import interop because it introduces a build-time pyarrow header
+# dependency for everything that cimports pylibcudf. See if there's a way to
+# avoid that before polluting the whole package.
+from . cimport copying  # , interop
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview
 from .scalar cimport Scalar
@@ -17,4 +20,5 @@ __all__ = [
     "Table",
     "copying",
     "gpumemoryview",
+    # "interop",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 7bcf024d5c1..72b74a57b87 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-from . import copying
+from . import copying, interop
 from .column import Column
 from .gpumemoryview import gpumemoryview
 from .scalar import Scalar
@@ -15,4 +15,5 @@
     "TypeId",
     "copying",
     "gpumemoryview",
+    "interop",
 ]

From 3a04947dee18fc7506b80ea1be219514cbe45dd8 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Sep 2023 17:04:29 -0700
Subject: [PATCH 12/54] Implement release for gpumemoryview

---
 python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd |  4 +++-
 python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
index 713697bd139..2c40348ec4f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
@@ -6,4 +6,6 @@ cdef class gpumemoryview:
     # to treat this object as something like a POD struct
     cdef readonly:
         Py_ssize_t ptr
-        object obj
+        object _obj
+
+    cpdef release(self)
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
index fc98f087a1b..9121092ecdd 100644
--- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
@@ -19,9 +19,21 @@ cdef class gpumemoryview:
                 "gpumemoryview must be constructed from an object supporting "
                 "the CUDA array interface"
             )
-        self.obj = obj
+        self._obj = obj
+        self._released = False
         # TODO: Need to respect readonly
         self.ptr = cai["data"][0]
 
     def __cuda_array_interface__(self):
         return self.obj.__cuda_array_interface__
+
+    @property
+    def obj(self):
+        if not self._released:
+            return self._obj
+        else:
+            raise ValueError("operation forbidden on released gpumemoryview object")
+
+    cpdef release(self):
+        self._obj = None
+        self._released = True

From 032adbb6a91dbc2fcc70daef99bd5402093bfec6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Sep 2023 17:35:59 -0700
Subject: [PATCH 13/54] Add proper constructor for scalar from pyarrow scalar

---
 python/cudf/cudf/_lib/cpp/reduce.pxd          |  5 +-
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |  4 +-
 .../cudf/_lib/pylibcudf/gpumemoryview.pxd     |  3 ++
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx    | 49 +++++++++++++++++--
 4 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/reduce.pxd b/python/cudf/cudf/_lib/cpp/reduce.pxd
index 7952c717916..997782dec6c 100644
--- a/python/cudf/cudf/_lib/cpp/reduce.pxd
+++ b/python/cudf/cudf/_lib/cpp/reduce.pxd
@@ -1,14 +1,13 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport pair
 
-from cudf._lib.aggregation cimport reduce_aggregation, scan_aggregation
+from cudf._lib.cpp.aggregation cimport reduce_aggregation, scan_aggregation
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.types cimport data_type
-from cudf._lib.scalar cimport DeviceScalar
 
 
 cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index 6d01199cfc8..64adb38ace3 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -30,7 +30,7 @@ execute_process(
   OUTPUT_STRIP_TRAILING_WHITESPACE
 )
 
-set(targets_using_arrow_headers pylibcudf_interop)
+set(targets_using_arrow_headers pylibcudf_interop pylibcudf_scalar)
 foreach(target IN LISTS targets_using_arrow_headers)
   target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
 endforeach()
@@ -38,7 +38,7 @@ endforeach()
 # TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for
 # more info
 find_package(NumPy REQUIRED)
-set(targets_using_numpy pylibcudf_interop)
+set(targets_using_numpy pylibcudf_interop pylibcudf_scalar)
 foreach(target IN LISTS targets_using_numpy)
   target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
   # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24.
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
index 2c40348ec4f..93449fd02d1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
@@ -1,5 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
+from libcpp cimport bool
+
 
 cdef class gpumemoryview:
     # TODO: Eventually probably want to make this opaque, but for now it's fine
@@ -7,5 +9,6 @@ cdef class gpumemoryview:
     cdef readonly:
         Py_ssize_t ptr
         object _obj
+        bool _released
 
     cpdef release(self)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 3886c5a5290..292e6e9d51f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -1,11 +1,23 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-cimport cython
+cimport pyarrow.lib
+from cython cimport no_gc_clear
+from cython.operator cimport dereference
+from libcpp.utility cimport move
+
+import pyarrow.lib
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
+from cudf._lib.cpp cimport aggregation
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.reduce cimport cpp_reduce
 from cudf._lib.cpp.scalar.scalar cimport scalar
+from cudf._lib.cpp.types cimport data_type
 
+from .column cimport Column
+from .interop cimport from_arrow
+from .table cimport Table
 from .types cimport DataType
 
 
@@ -13,7 +25,7 @@ from .types cimport DataType
 # by the gc if the DeviceScalar is in a reference cycle. Removing
 # the tp_clear function with the no_gc_clear decoration prevents that.
 # See https://github.com/rapidsai/rmm/pull/931 for details.
-@cython.no_gc_clear
+@no_gc_clear
 cdef class Scalar:
     """A scalar value in device memory."""
     # Unlike for columns, libcudf does not support scalar views. All APIs that
@@ -22,9 +34,40 @@ cdef class Scalar:
     # than pylibcudf.Column because it can be a true wrapper around a libcudf
     # column
 
-    def __cinit__(self, *args, **kwargs):
+    def __init__(self, pyarrow.lib.Scalar value):
         self.mr = get_current_device_resource()
 
+        # Convert the value to a cudf object via pyarrow
+        arr = pyarrow.lib.array([value.as_py()], type=value.type)
+        cdef pyarrow.lib.Table pa_tbl = pyarrow.lib.Table.from_arrays(
+            [arr], names=["scalar"]
+        )
+        cdef Table tbl = from_arrow(pa_tbl)
+
+        # TODO: The code below is a pretty hacky way to get a scalar from a
+        # single row of a column, but for now want to see if we can write a
+        # generic solution like this that works. If it does, we can consider
+        # implementing a better approach natively in libcudf.
+        cdef Column col = tbl.columns()[0]
+        cdef column_view cv = col.view()
+
+        cdef unique_ptr[scalar] c_result
+        cdef unique_ptr[aggregation.reduce_aggregation] c_agg = (
+            aggregation.make_min_aggregation[aggregation.reduce_aggregation]()
+        )
+        cdef data_type c_type = col.type().c_obj
+
+        with nogil:
+            c_result = move(cpp_reduce(
+                cv,
+                dereference(c_agg),
+                c_type
+            ))
+
+        cdef Scalar s = Scalar.from_libcudf(move(c_result))
+        self._data_type = DataType.from_libcudf(s.get().type())
+        self.c_obj.swap(s.c_obj)
+
     cdef const scalar* get(self) except *:
         return self.c_obj.get()
 

From 16f45284ed1eddf07fa4ed4f1087cc75bc4e9b72 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Sep 2023 20:43:25 -0700
Subject: [PATCH 14/54] Construct pylibcudf.Scalar for numeric types

---
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx |  8 +++-
 python/cudf/cudf/_lib/scalar.pyx           | 44 ++++------------------
 2 files changed, 15 insertions(+), 37 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 292e6e9d51f..ef98a82a44e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -34,9 +34,15 @@ cdef class Scalar:
     # than pylibcudf.Column because it can be a true wrapper around a libcudf
     # column
 
-    def __init__(self, pyarrow.lib.Scalar value):
+    def __init__(self, pyarrow.lib.Scalar value=None):
         self.mr = get_current_device_resource()
 
+        if value is None:
+            # TODO: This early return is not something we really want to
+            # support, but it here for now to ease the transition of
+            # DeviceScalar.
+            return
+
         # Convert the value to a cudf object via pyarrow
         arr = pyarrow.lib.array([value.as_py()], type=value.type)
         cdef pyarrow.lib.Table pa_tbl = pyarrow.lib.Table.from_arrays(
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 9b2ede1b55d..5c9a6eeaf02 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -94,6 +94,7 @@ cdef class DeviceScalar:
         """
         self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
         valid = not _is_null_host_scalar(value)
+        set_dtype = True
 
         if isinstance(self._dtype, cudf.core.dtypes.DecimalDtype):
             _set_decimal_from_scalar(
@@ -106,10 +107,10 @@ cdef class DeviceScalar:
         elif pd.api.types.is_string_dtype(self._dtype):
             _set_string_from_np_string(self.c_value.c_obj, value, valid)
         elif pd.api.types.is_numeric_dtype(self._dtype):
-            _set_numeric_from_np_scalar(self.c_value.c_obj,
-                                        value,
-                                        self._dtype,
-                                        valid)
+            if value is NA:
+                value = None
+            pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
+            self.c_value = pylibcudf.Scalar(pa_scalar)
         elif pd.api.types.is_datetime64_dtype(self._dtype):
             _set_datetime64_from_np_scalar(
                 self.c_value.c_obj, value, self._dtype, valid
@@ -123,8 +124,10 @@ cdef class DeviceScalar:
                 f"Cannot convert value of type "
                 f"{type(value).__name__} to cudf scalar"
             )
+
         cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type()
-        self.c_value._data_type = pylibcudf.DataType.from_libcudf(cdtype)
+        if set_dtype:
+            self.c_value._data_type = pylibcudf.DataType.from_libcudf(cdtype)
 
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
@@ -235,37 +238,6 @@ cdef _set_string_from_np_string(unique_ptr[scalar]& s, value, bool valid=True):
     s.reset(new string_scalar(value.encode(), valid))
 
 
-cdef _set_numeric_from_np_scalar(unique_ptr[scalar]& s,
-                                 object value,
-                                 object dtype,
-                                 bool valid=True):
-    value = value if valid else 0
-    if dtype == "int8":
-        s.reset(new numeric_scalar[int8_t](value, valid))
-    elif dtype == "int16":
-        s.reset(new numeric_scalar[int16_t](value, valid))
-    elif dtype == "int32":
-        s.reset(new numeric_scalar[int32_t](value, valid))
-    elif dtype == "int64":
-        s.reset(new numeric_scalar[int64_t](value, valid))
-    elif dtype == "uint8":
-        s.reset(new numeric_scalar[uint8_t](value, valid))
-    elif dtype == "uint16":
-        s.reset(new numeric_scalar[uint16_t](value, valid))
-    elif dtype == "uint32":
-        s.reset(new numeric_scalar[uint32_t](value, valid))
-    elif dtype == "uint64":
-        s.reset(new numeric_scalar[uint64_t](value, valid))
-    elif dtype == "float32":
-        s.reset(new numeric_scalar[float](value, valid))
-    elif dtype == "float64":
-        s.reset(new numeric_scalar[double](value, valid))
-    elif dtype == "bool":
-        s.reset(new numeric_scalar[bool](<bool>value, valid))
-    else:
-        raise ValueError(f"dtype not supported: {dtype}")
-
-
 cdef _set_datetime64_from_np_scalar(unique_ptr[scalar]& s,
                                     object value,
                                     object dtype,

From b75949aa6a42421296b392fcb5e86cfbb9d3a339 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Sep 2023 20:45:48 -0700
Subject: [PATCH 15/54] Construct pylibcudf.Scalar for string types

---
 python/cudf/cudf/_lib/scalar.pyx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 5c9a6eeaf02..388508c447b 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -105,12 +105,17 @@ cdef class DeviceScalar:
         elif isinstance(self._dtype, cudf.StructDtype):
             _set_struct_from_pydict(self.c_value.c_obj, value, self._dtype, valid)
         elif pd.api.types.is_string_dtype(self._dtype):
-            _set_string_from_np_string(self.c_value.c_obj, value, valid)
+            if value is NA:
+                value = None
+            pa_scalar = pa.scalar(value, type=pa.string())
+            self.c_value = pylibcudf.Scalar(pa_scalar)
+            set_dtype = False
         elif pd.api.types.is_numeric_dtype(self._dtype):
             if value is NA:
                 value = None
             pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
             self.c_value = pylibcudf.Scalar(pa_scalar)
+            set_dtype = False
         elif pd.api.types.is_datetime64_dtype(self._dtype):
             _set_datetime64_from_np_scalar(
                 self.c_value.c_obj, value, self._dtype, valid
@@ -233,11 +238,6 @@ cdef class DeviceScalar:
         return s
 
 
-cdef _set_string_from_np_string(unique_ptr[scalar]& s, value, bool valid=True):
-    value = value if valid else ""
-    s.reset(new string_scalar(value.encode(), valid))
-
-
 cdef _set_datetime64_from_np_scalar(unique_ptr[scalar]& s,
                                     object value,
                                     object dtype,

From 8d460469ab05c091c7d7a2cb7f9a1107a92b2adf Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 6 Sep 2023 21:08:55 -0700
Subject: [PATCH 16/54] Add comment

---
 python/cudf/cudf/_lib/scalar.pyx | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 388508c447b..fc31ac1062d 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -116,6 +116,11 @@ cdef class DeviceScalar:
             pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
             self.c_value = pylibcudf.Scalar(pa_scalar)
             set_dtype = False
+        # TODO: For datetime and timedeltas need to find a way to not overflow
+        # for large values. The problem is that numpy supports larger values,
+        # but Python's built-in objects do not, and currently in
+        # pylibcudf.Scalar.__init__ we rely on as_py to toss the scalar into an
+        # array because pa.array doesn't understand pa.Scalar instances.
         elif pd.api.types.is_datetime64_dtype(self._dtype):
             _set_datetime64_from_np_scalar(
                 self.c_value.c_obj, value, self._dtype, valid

From ce164a002d06c11e5cb3dbfe4d160de576b52213 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 7 Sep 2023 12:12:25 -0700
Subject: [PATCH 17/54] Stop using reduction in favor of get_element

---
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 32 ++++++++--------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index ef98a82a44e..60803c1c71a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -2,18 +2,15 @@
 
 cimport pyarrow.lib
 from cython cimport no_gc_clear
-from cython.operator cimport dereference
 from libcpp.utility cimport move
 
 import pyarrow.lib
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
-from cudf._lib.cpp cimport aggregation
 from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.reduce cimport cpp_reduce
+from cudf._lib.cpp.copying cimport get_element
 from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.types cimport data_type
 
 from .column cimport Column
 from .interop cimport from_arrow
@@ -43,36 +40,29 @@ cdef class Scalar:
             # DeviceScalar.
             return
 
-        # Convert the value to a cudf object via pyarrow
+        cdef Scalar s = Scalar.from_pyarrow_scalar(value)
+        self._data_type = DataType.from_libcudf(s.get().type())
+        self.c_obj.swap(s.c_obj)
+
+    @staticmethod
+    def from_pyarrow_scalar(self, pyarrow.lib.Scalar value):
+        # Put the scalar into a column so that we can use from_arrow (no scalar
+        # implementation), then extract the zeroth element.
         arr = pyarrow.lib.array([value.as_py()], type=value.type)
         cdef pyarrow.lib.Table pa_tbl = pyarrow.lib.Table.from_arrays(
             [arr], names=["scalar"]
         )
         cdef Table tbl = from_arrow(pa_tbl)
 
-        # TODO: The code below is a pretty hacky way to get a scalar from a
-        # single row of a column, but for now want to see if we can write a
-        # generic solution like this that works. If it does, we can consider
-        # implementing a better approach natively in libcudf.
         cdef Column col = tbl.columns()[0]
         cdef column_view cv = col.view()
 
         cdef unique_ptr[scalar] c_result
-        cdef unique_ptr[aggregation.reduce_aggregation] c_agg = (
-            aggregation.make_min_aggregation[aggregation.reduce_aggregation]()
-        )
-        cdef data_type c_type = col.type().c_obj
 
         with nogil:
-            c_result = move(cpp_reduce(
-                cv,
-                dereference(c_agg),
-                c_type
-            ))
+            c_result = move(get_element(cv, 0))
 
-        cdef Scalar s = Scalar.from_libcudf(move(c_result))
-        self._data_type = DataType.from_libcudf(s.get().type())
-        self.c_obj.swap(s.c_obj)
+        return Scalar.from_libcudf(move(c_result))
 
     cdef const scalar* get(self) except *:
         return self.c_obj.get()

From a8d8cb602dfc73bf4771113b1c7a3159616ad2ec Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 09:28:46 -0700
Subject: [PATCH 18/54] Move construction from pyarrow scalar to a factory

---
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 19 ++++++++-----------
 python/cudf/cudf/_lib/scalar.pyx           |  4 ++--
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 60803c1c71a..74475f803bb 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -31,21 +31,18 @@ cdef class Scalar:
     # than pylibcudf.Column because it can be a true wrapper around a libcudf
     # column
 
-    def __init__(self, pyarrow.lib.Scalar value=None):
+    def __cinit__(self, *args, **kwargs):
         self.mr = get_current_device_resource()
 
-        if value is None:
-            # TODO: This early return is not something we really want to
-            # support, but it here for now to ease the transition of
-            # DeviceScalar.
-            return
-
-        cdef Scalar s = Scalar.from_pyarrow_scalar(value)
-        self._data_type = DataType.from_libcudf(s.get().type())
-        self.c_obj.swap(s.c_obj)
+    def __init__(self, pyarrow.lib.Scalar value=None):
+        # TODO: This case is not something we really want to
+        # support, but it here for now to ease the transition of
+        # DeviceScalar.
+        if value is not None:
+            raise ValueError("Scalar should be constructed with a factory")
 
     @staticmethod
-    def from_pyarrow_scalar(self, pyarrow.lib.Scalar value):
+    def from_pyarrow_scalar(pyarrow.lib.Scalar value):
         # Put the scalar into a column so that we can use from_arrow (no scalar
         # implementation), then extract the zeroth element.
         arr = pyarrow.lib.array([value.as_py()], type=value.type)
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index fc31ac1062d..8ceda0f887d 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -108,13 +108,13 @@ cdef class DeviceScalar:
             if value is NA:
                 value = None
             pa_scalar = pa.scalar(value, type=pa.string())
-            self.c_value = pylibcudf.Scalar(pa_scalar)
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
             set_dtype = False
         elif pd.api.types.is_numeric_dtype(self._dtype):
             if value is NA:
                 value = None
             pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
-            self.c_value = pylibcudf.Scalar(pa_scalar)
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
             set_dtype = False
         # TODO: For datetime and timedeltas need to find a way to not overflow
         # for large values. The problem is that numpy supports larger values,

From 3fd071e17e048453db657805c60475eb557313b0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 11:57:07 -0700
Subject: [PATCH 19/54] Implement from_arrow at the C level

---
 cpp/include/cudf/interop.hpp                |  12 ++
 cpp/src/interop/from_arrow.cu               | 119 ++++++++++++++++++++
 python/cudf/cudf/_lib/cpp/interop.pxd       |   6 +-
 python/cudf/cudf/_lib/pylibcudf/interop.pyx |  19 ++++
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx  |  26 +----
 5 files changed, 158 insertions(+), 24 deletions(-)

diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index e210179b147..da3380e1d44 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -145,5 +145,17 @@ std::unique_ptr<table> from_arrow(
   arrow::Table const& input,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Create `cudf::table` from given arrow Scalar input
+ *
+ * @param input arrow:Scalar that needs to be converted to `cudf::scalar`
+ * @param mr    Device memory resource used to allocate `cudf::scalar`
+ * @return cudf scalar generated from given arrow Scalar
+ */
+
+std::unique_ptr<cudf::scalar> from_arrow(
+  arrow::Scalar const& input,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 30cfee97fd8..5ad30d29868 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <arrow/array/array_base.h>
+#include <arrow/type.h>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/concatenate.hpp>
@@ -472,4 +474,121 @@ std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
   return detail::from_arrow(input_table, cudf::get_default_stream(), mr);
 }
 
+template <typename Functor, typename... Ts>
+constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
+                                               Functor f,
+                                               Ts&&... args)
+{
+  switch (dtype.id()) {
+    case arrow::Type::INT8:
+      return f.template operator()<arrow::Int8Type>(std::forward<Ts>(args)...);
+    case arrow::Type::INT16:
+      return f.template operator()<arrow::Int16Type>(std::forward<Ts>(args)...);
+    case arrow::Type::INT32:
+      return f.template operator()<arrow::Int32Type>(std::forward<Ts>(args)...);
+    case arrow::Type::INT64:
+      return f.template operator()<arrow::Int64Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT8:
+      return f.template operator()<arrow::UInt8Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT16:
+      return f.template operator()<arrow::UInt16Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT32:
+      return f.template operator()<arrow::UInt32Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT64:
+      return f.template operator()<arrow::UInt64Type>(std::forward<Ts>(args)...);
+    case arrow::Type::FLOAT:
+      return f.template operator()<arrow::FloatType>(std::forward<Ts>(args)...);
+    case arrow::Type::DOUBLE:
+      return f.template operator()<arrow::DoubleType>(std::forward<Ts>(args)...);
+    case arrow::Type::BOOL:
+      return f.template operator()<arrow::BooleanType>(std::forward<Ts>(args)...);
+    // case arrow::Type::TIMESTAMP_DAYS:
+    //   return f.template operator()<arrow::Type::TIMESTAMP_DAYS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::TIMESTAMP_SECONDS:
+    //   return f.template operator()<arrow::Type::TIMESTAMP_SECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::TIMESTAMP_MILLISECONDS:
+    //   return f.template operator()<arrow::Type::TIMESTAMP_MILLISECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::TIMESTAMP_MICROSECONDS:
+    //   return f.template operator()<arrow::Type::TIMESTAMP_MICROSECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::TIMESTAMP:
+    //   return f.template operator()<arrow::TimestampType>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DURATION_DAYS:
+    //   return f.template operator()<arrow::Type::DURATION_DAYS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DURATION_SECONDS:
+    //   return f.template operator()<arrow::Type::DURATION_SECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DURATION_MILLISECONDS:
+    //   return f.template operator()<arrow::Type::DURATION_MILLISECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DURATION_MICROSECONDS:
+    //   return f.template operator()<arrow::Type::DURATION_MICROSECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DURATION_NANOSECONDS:
+    //   return f.template operator()<arrow::Type::DURATION_NANOSECONDS>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DICTIONARY32:
+    //   return f.template operator()<arrow::Type::DICTIONARY32>(
+    //     std::forward<Ts>(args)...);
+    case arrow::Type::STRING:
+      return f.template operator()<arrow::StringType>(std::forward<Ts>(args)...);
+    // case arrow::Type::LIST:
+    //   return f.template operator()<arrow::Type::LIST>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DECIMAL32:
+    //   return f.template operator()<arrow::Type::DECIMAL32>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DECIMAL64:
+    //   return f.template operator()<arrow::Type::DECIMAL64>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::DECIMAL128:
+    //   return f.template operator()<arrow::Type::DECIMAL128>(
+    //     std::forward<Ts>(args)...);
+    // case arrow::Type::STRUCT:
+    //   return f.template operator()<arrow::Type::STRUCT>(
+    //     std::forward<Ts>(args)...);
+    default: {
+      CUDF_FAIL("Invalid type.");
+    }
+  }
+}
+
+struct ArrowArrayGenerator {
+  template <typename T>
+  auto operator()(arrow::Scalar const& scalar)
+  {
+    // Get a builder for the scalar type
+    typename arrow::TypeTraits<T>::BuilderType builder;
+
+    auto status = builder.AppendScalar(scalar);
+    if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
+
+    auto maybe_array = builder.Finish();
+    if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
+    return *maybe_array;
+  }
+};
+
+std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  auto array = arrow_type_dispatcher(*input.type, ArrowArrayGenerator{}, input);
+
+  auto field = arrow::field("", input.type);
+
+  auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+  auto cudf_table = from_arrow(*table);
+
+  auto col = cudf_table->get_column(0);
+
+  auto cv = col.view();
+  return get_element(cv, 0);
+}
+
 }  // namespace cudf
diff --git a/python/cudf/cudf/_lib/cpp/interop.pxd b/python/cudf/cudf/_lib/cpp/interop.pxd
index e81f0d617fb..bbfcd8c4a0a 100644
--- a/python/cudf/cudf/_lib/cpp/interop.pxd
+++ b/python/cudf/cudf/_lib/cpp/interop.pxd
@@ -1,12 +1,13 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-from pyarrow.lib cimport CTable
+from pyarrow.lib cimport CScalar, CTable
 
 from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
 
+from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 
@@ -24,6 +25,7 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
                                ) except +
 
     cdef unique_ptr[table] from_arrow(CTable input) except +
+    cdef unique_ptr[scalar] from_arrow(CScalar input) except +
 
     cdef cppclass column_metadata:
         column_metadata() except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
index f5161074b15..63f4ed68f7e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
@@ -3,14 +3,19 @@
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.utility cimport move
 from pyarrow.lib cimport (
+    CScalar as pa_CScalar,
     CTable as pa_CTable,
+    Scalar as pa_Scalar,
     Table as pa_Table,
+    pyarrow_unwrap_scalar,
     pyarrow_unwrap_table,
 )
 
 from cudf._lib.cpp.interop cimport from_arrow as cpp_from_arrow
+from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 
+from .scalar cimport Scalar
 from .table cimport Table
 
 
@@ -26,3 +31,17 @@ cpdef Table from_arrow(
         c_result = move(cpp_from_arrow(ctable.get()[0]))
 
     return Table.from_libcudf(move(c_result))
+
+
+cpdef Scalar from_arrow_scalar(
+    pa_Scalar pyarrow_scalar,
+):
+    cdef shared_ptr[pa_CScalar] cscalar = (
+        pyarrow_unwrap_scalar(pyarrow_scalar)
+    )
+    cdef unique_ptr[scalar] c_result
+
+    with nogil:
+        c_result = move(cpp_from_arrow(cscalar.get()[0]))
+
+    return Scalar.from_libcudf(move(c_result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 74475f803bb..3a365d7ad80 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -2,19 +2,13 @@
 
 cimport pyarrow.lib
 from cython cimport no_gc_clear
-from libcpp.utility cimport move
 
 import pyarrow.lib
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
-from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.copying cimport get_element
 from cudf._lib.cpp.scalar.scalar cimport scalar
 
-from .column cimport Column
-from .interop cimport from_arrow
-from .table cimport Table
 from .types cimport DataType
 
 
@@ -43,23 +37,11 @@ cdef class Scalar:
 
     @staticmethod
     def from_pyarrow_scalar(pyarrow.lib.Scalar value):
-        # Put the scalar into a column so that we can use from_arrow (no scalar
-        # implementation), then extract the zeroth element.
-        arr = pyarrow.lib.array([value.as_py()], type=value.type)
-        cdef pyarrow.lib.Table pa_tbl = pyarrow.lib.Table.from_arrays(
-            [arr], names=["scalar"]
-        )
-        cdef Table tbl = from_arrow(pa_tbl)
+        # Need a local import here to avoid a circular dependency because
+        # from_arrow_scalar returns a Scalar.
+        from .interop import from_arrow_scalar
 
-        cdef Column col = tbl.columns()[0]
-        cdef column_view cv = col.view()
-
-        cdef unique_ptr[scalar] c_result
-
-        with nogil:
-            c_result = move(get_element(cv, 0))
-
-        return Scalar.from_libcudf(move(c_result))
+        return from_arrow_scalar(value)
 
     cdef const scalar* get(self) except *:
         return self.c_obj.get()

From 33abe2b41243ca762a74bdd394097af030dd0120 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 12:13:50 -0700
Subject: [PATCH 20/54] Enable datetime

---
 cpp/src/interop/from_arrow.cu    |  7 +++----
 python/cudf/cudf/_lib/scalar.pyx | 13 +++++--------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 5ad30d29868..55c6cd9e1e6 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -502,9 +502,8 @@ constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
       return f.template operator()<arrow::DoubleType>(std::forward<Ts>(args)...);
     case arrow::Type::BOOL:
       return f.template operator()<arrow::BooleanType>(std::forward<Ts>(args)...);
-    // case arrow::Type::TIMESTAMP_DAYS:
-    //   return f.template operator()<arrow::Type::TIMESTAMP_DAYS>(
-    //     std::forward<Ts>(args)...);
+    case arrow::Type::TIMESTAMP:
+      return f.template operator()<arrow::TimestampType>(std::forward<Ts>(args)...);
     // case arrow::Type::TIMESTAMP_SECONDS:
     //   return f.template operator()<arrow::Type::TIMESTAMP_SECONDS>(
     //     std::forward<Ts>(args)...);
@@ -563,7 +562,7 @@ struct ArrowArrayGenerator {
   auto operator()(arrow::Scalar const& scalar)
   {
     // Get a builder for the scalar type
-    typename arrow::TypeTraits<T>::BuilderType builder;
+    typename arrow::TypeTraits<T>::BuilderType builder{scalar.type, arrow::default_memory_pool()};
 
     auto status = builder.AppendScalar(scalar);
     if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 8ceda0f887d..068c4dd1ef9 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -116,15 +116,12 @@ cdef class DeviceScalar:
             pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
             self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
             set_dtype = False
-        # TODO: For datetime and timedeltas need to find a way to not overflow
-        # for large values. The problem is that numpy supports larger values,
-        # but Python's built-in objects do not, and currently in
-        # pylibcudf.Scalar.__init__ we rely on as_py to toss the scalar into an
-        # array because pa.array doesn't understand pa.Scalar instances.
         elif pd.api.types.is_datetime64_dtype(self._dtype):
-            _set_datetime64_from_np_scalar(
-                self.c_value.c_obj, value, self._dtype, valid
-            )
+            if value is NA or value is NaT:
+                value = None
+            pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
+            set_dtype = False
         elif pd.api.types.is_timedelta64_dtype(self._dtype):
             _set_timedelta64_from_np_scalar(
                 self.c_value.c_obj, value, self._dtype, valid

From c6b595d5534f2e098941c0e5f19f7360a552066a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 12:22:25 -0700
Subject: [PATCH 21/54] Enable timedeltas

---
 cpp/src/interop/from_arrow.cu    | 29 ++---------------------------
 python/cudf/cudf/_lib/scalar.pyx |  8 +++++---
 2 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 55c6cd9e1e6..504f99d7d6b 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -504,33 +504,8 @@ constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
       return f.template operator()<arrow::BooleanType>(std::forward<Ts>(args)...);
     case arrow::Type::TIMESTAMP:
       return f.template operator()<arrow::TimestampType>(std::forward<Ts>(args)...);
-    // case arrow::Type::TIMESTAMP_SECONDS:
-    //   return f.template operator()<arrow::Type::TIMESTAMP_SECONDS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::TIMESTAMP_MILLISECONDS:
-    //   return f.template operator()<arrow::Type::TIMESTAMP_MILLISECONDS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::TIMESTAMP_MICROSECONDS:
-    //   return f.template operator()<arrow::Type::TIMESTAMP_MICROSECONDS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::TIMESTAMP:
-    //   return f.template operator()<arrow::TimestampType>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DURATION_DAYS:
-    //   return f.template operator()<arrow::Type::DURATION_DAYS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DURATION_SECONDS:
-    //   return f.template operator()<arrow::Type::DURATION_SECONDS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DURATION_MILLISECONDS:
-    //   return f.template operator()<arrow::Type::DURATION_MILLISECONDS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DURATION_MICROSECONDS:
-    //   return f.template operator()<arrow::Type::DURATION_MICROSECONDS>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DURATION_NANOSECONDS:
-    //   return f.template operator()<arrow::Type::DURATION_NANOSECONDS>(
-    //     std::forward<Ts>(args)...);
+    case arrow::Type::DURATION:
+      return f.template operator()<arrow::DurationType>(std::forward<Ts>(args)...);
     // case arrow::Type::DICTIONARY32:
     //   return f.template operator()<arrow::Type::DICTIONARY32>(
     //     std::forward<Ts>(args)...);
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 068c4dd1ef9..42ef880964b 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -123,9 +123,11 @@ cdef class DeviceScalar:
             self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
             set_dtype = False
         elif pd.api.types.is_timedelta64_dtype(self._dtype):
-            _set_timedelta64_from_np_scalar(
-                self.c_value.c_obj, value, self._dtype, valid
-            )
+            if value is NA or value is NaT:
+                value = None
+            pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
+            set_dtype = False
         else:
             raise ValueError(
                 f"Cannot convert value of type "

From ec4b5106fdfd091b8eafb081ae3704927d0c17ae Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 16:20:38 -0700
Subject: [PATCH 22/54] Enable lists

---
 cpp/src/interop/from_arrow.cu    | 51 ++++++++++++++++++++++++++++++--
 python/cudf/cudf/_lib/scalar.pyx | 39 +++++++++++-------------
 2 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 504f99d7d6b..2b6f803edf0 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -511,9 +511,8 @@ constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
     //     std::forward<Ts>(args)...);
     case arrow::Type::STRING:
       return f.template operator()<arrow::StringType>(std::forward<Ts>(args)...);
-    // case arrow::Type::LIST:
-    //   return f.template operator()<arrow::Type::LIST>(
-    //     std::forward<Ts>(args)...);
+    case arrow::Type::LIST:
+      return f.template operator()<arrow::ListType>(std::forward<Ts>(args)...);
     // case arrow::Type::DECIMAL32:
     //   return f.template operator()<arrow::Type::DECIMAL32>(
     //     std::forward<Ts>(args)...);
@@ -548,6 +547,52 @@ struct ArrowArrayGenerator {
   }
 };
 
+struct BuilderGenerator {
+  template <typename T>
+  std::shared_ptr<arrow::ArrayBuilder> operator()(std::shared_ptr<arrow::DataType> const& type)
+  {
+    return std::make_shared<typename arrow::TypeTraits<T>::BuilderType>(
+      type, arrow::default_memory_pool());
+  }
+};
+
+template <>
+std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::ListType>(
+  std::shared_ptr<arrow::DataType> const& type)
+{
+  CUDF_FAIL("Not implemented");
+}
+
+template <>
+auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scalar)
+{
+  std::shared_ptr<arrow::DataType> vt = scalar.type;
+  std::vector<std::shared_ptr<arrow::DataType>> types;
+
+  // TODO: Consider the possibility of list of structs/struct of lists
+  while (vt->id() == arrow::Type::LIST) {
+    types.push_back(vt);
+    // TODO: Error handling on dynamic_cast
+    vt = dynamic_cast<arrow::ListType&>(*vt).value_type();
+  }
+
+  // Now walk the list backwards and create the builders. Start with the
+  // current vt, which is the innermost one and not a list.
+  auto inner_builder = arrow_type_dispatcher(*vt, BuilderGenerator{}, vt);
+  std::shared_ptr<arrow::ListBuilder> builder;
+  for (auto it = types.rbegin(); it != types.rend(); ++it) {
+    builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(), inner_builder);
+    inner_builder = builder;
+  }
+
+  auto status = builder->AppendScalar(scalar);
+  if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
+
+  auto maybe_array = builder->Finish();
+  if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
+  return *maybe_array;
+}
+
 std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 42ef880964b..d8cb21037a7 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -21,7 +21,9 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
 import cudf
+
 from cudf._lib cimport pylibcudf
+
 from cudf._lib.types import (
     LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
     datetime_unit_map,
@@ -70,6 +72,14 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
+def nestrepl(lst):
+    for i, item in enumerate(lst):
+        if item is NA or item is NaT:
+            lst[i] = None
+        elif isinstance(item, list):
+            nestrepl(item)
+
+
 cdef class DeviceScalar:
 
     # I think this should be removable, except that currently the way that
@@ -100,8 +110,14 @@ cdef class DeviceScalar:
             _set_decimal_from_scalar(
                 self.c_value.c_obj, value, self._dtype, valid)
         elif isinstance(self._dtype, cudf.ListDtype):
-            _set_list_from_pylist(
-                self.c_value.c_obj, value, self._dtype, valid)
+            if value is NA:
+                value = None
+            else:
+                nestrepl(value)
+
+            pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
+            set_dtype = False
         elif isinstance(self._dtype, cudf.StructDtype):
             _set_struct_from_pydict(self.c_value.c_obj, value, self._dtype, valid)
         elif pd.api.types.is_string_dtype(self._dtype):
@@ -365,25 +381,6 @@ cdef _get_py_dict_from_struct(unique_ptr[scalar]& s, dtype):
     python_dict = table.to_pydict()["None"][0]
     return {k: _nested_na_replace([python_dict[k]])[0] for k in python_dict}
 
-cdef _set_list_from_pylist(unique_ptr[scalar]& s,
-                           object value,
-                           object dtype,
-                           bool valid=True):
-
-    value = value if valid else [NA]
-    cdef Column col
-    if isinstance(dtype.element_type, ListDtype):
-        pa_type = dtype.element_type.to_arrow()
-    else:
-        pa_type = dtype.to_arrow().value_type
-    col = cudf.core.column.as_column(
-        pa.array(value, from_pandas=True, type=pa_type)
-    )
-    cdef column_view col_view = col.view()
-    s.reset(
-        new list_scalar(col_view, valid)
-    )
-
 
 cdef _get_py_list_from_list(unique_ptr[scalar]& s, dtype):
 

From 636242ba0d33aeefb618a7b948f40c5a1495ebca Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 17:21:15 -0700
Subject: [PATCH 23/54] Enable structs

---
 cpp/src/interop/from_arrow.cu    | 60 +++++++++++++++++++++++++-------
 python/cudf/cudf/_lib/scalar.pyx | 25 ++++++++++---
 2 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 2b6f803edf0..5e77d946a48 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #include <arrow/array/array_base.h>
+#include <arrow/array/builder_nested.h>
 #include <arrow/type.h>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
@@ -513,18 +514,17 @@ constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
       return f.template operator()<arrow::StringType>(std::forward<Ts>(args)...);
     case arrow::Type::LIST:
       return f.template operator()<arrow::ListType>(std::forward<Ts>(args)...);
-    // case arrow::Type::DECIMAL32:
-    //   return f.template operator()<arrow::Type::DECIMAL32>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DECIMAL64:
-    //   return f.template operator()<arrow::Type::DECIMAL64>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::DECIMAL128:
-    //   return f.template operator()<arrow::Type::DECIMAL128>(
-    //     std::forward<Ts>(args)...);
-    // case arrow::Type::STRUCT:
-    //   return f.template operator()<arrow::Type::STRUCT>(
-    //     std::forward<Ts>(args)...);
+      // case arrow::Type::DECIMAL32:
+      //   return f.template operator()<arrow::Type::DECIMAL32>(
+      //     std::forward<Ts>(args)...);
+      // case arrow::Type::DECIMAL64:
+      //   return f.template operator()<arrow::Type::DECIMAL64>(
+      //     std::forward<Ts>(args)...);
+      // case arrow::Type::DECIMAL128:
+      //   return f.template operator()<arrow::Type::DECIMAL128>(
+      //     std::forward<Ts>(args)...);
+    case arrow::Type::STRUCT:
+      return f.template operator()<arrow::StructType>(std::forward<Ts>(args)...);
     default: {
       CUDF_FAIL("Invalid type.");
     }
@@ -563,6 +563,13 @@ std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::ListTyp
   CUDF_FAIL("Not implemented");
 }
 
+template <>
+std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::StructType>(
+  std::shared_ptr<arrow::DataType> const& type)
+{
+  CUDF_FAIL("Not implemented");
+}
+
 template <>
 auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scalar)
 {
@@ -593,6 +600,35 @@ auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scala
   return *maybe_array;
 }
 
+std::shared_ptr<arrow::ArrayBuilder> make_struct_builder(
+  std::shared_ptr<arrow::DataType> const& type)
+{
+  std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+
+  for (auto i = 0; i < type->num_fields(); ++i) {
+    auto const vt = type->field(i)->type();
+    if (vt->id() == arrow::Type::STRUCT) {
+      field_builders.push_back(make_struct_builder(vt));
+    } else {
+      field_builders.push_back(arrow_type_dispatcher(*vt, BuilderGenerator{}, vt));
+    }
+  }
+  return std::make_shared<arrow::StructBuilder>(type, arrow::default_memory_pool(), field_builders);
+}
+
+template <>
+auto ArrowArrayGenerator::operator()<arrow::StructType>(arrow::Scalar const& scalar)
+{
+  std::shared_ptr<arrow::ArrayBuilder> builder = make_struct_builder(scalar.type);
+
+  auto status = builder->AppendScalar(scalar);
+  if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
+
+  auto maybe_array = builder->Finish();
+  if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
+  return *maybe_array;
+}
+
 std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index d8cb21037a7..9a0a8e6e719 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -72,12 +72,22 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
-def nestrepl(lst):
+def nestrepl_list(lst):
+    # TODO: Account for struct inside list
     for i, item in enumerate(lst):
         if item is NA or item is NaT:
             lst[i] = None
         elif isinstance(item, list):
-            nestrepl(item)
+            nestrepl_list(item)
+
+
+def nestrepl_dict(dct):
+    # TODO: Account for list inside struct
+    for k, v in dct.items():
+        if v is NA or v is NaT:
+            dct[k] = None
+        elif isinstance(v, dict):
+            nestrepl_dict(v)
 
 
 cdef class DeviceScalar:
@@ -113,13 +123,20 @@ cdef class DeviceScalar:
             if value is NA:
                 value = None
             else:
-                nestrepl(value)
+                nestrepl_list(value)
 
             pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
             self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
             set_dtype = False
         elif isinstance(self._dtype, cudf.StructDtype):
-            _set_struct_from_pydict(self.c_value.c_obj, value, self._dtype, valid)
+            if value is NA:
+                value = None
+            else:
+                nestrepl_dict(value)
+
+            pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
+            set_dtype = False
         elif pd.api.types.is_string_dtype(self._dtype):
             if value is NA:
                 value = None

From 4c08dc34c5bb2519718dff1660a488445e1be11c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 17:43:02 -0700
Subject: [PATCH 24/54] Make list and struct code paths more parallel

---
 cpp/src/interop/from_arrow.cu | 44 ++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 5e77d946a48..31c1ad8b158 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -531,22 +531,6 @@ constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
   }
 }
 
-struct ArrowArrayGenerator {
-  template <typename T>
-  auto operator()(arrow::Scalar const& scalar)
-  {
-    // Get a builder for the scalar type
-    typename arrow::TypeTraits<T>::BuilderType builder{scalar.type, arrow::default_memory_pool()};
-
-    auto status = builder.AppendScalar(scalar);
-    if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
-
-    auto maybe_array = builder.Finish();
-    if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
-    return *maybe_array;
-  }
-};
-
 struct BuilderGenerator {
   template <typename T>
   std::shared_ptr<arrow::ArrayBuilder> operator()(std::shared_ptr<arrow::DataType> const& type)
@@ -570,13 +554,28 @@ std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::StructT
   CUDF_FAIL("Not implemented");
 }
 
-template <>
-auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scalar)
+struct ArrowArrayGenerator {
+  template <typename T>
+  auto operator()(arrow::Scalar const& scalar)
+  {
+    // Get a builder for the scalar type
+    auto builder = arrow_type_dispatcher(*scalar.type, BuilderGenerator{}, scalar.type);
+
+    auto status = builder->AppendScalar(scalar);
+    if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
+
+    auto maybe_array = builder->Finish();
+    if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
+    return *maybe_array;
+  }
+};
+
+std::shared_ptr<arrow::ArrayBuilder> make_list_builder(std::shared_ptr<arrow::DataType> const& type)
 {
-  std::shared_ptr<arrow::DataType> vt = scalar.type;
   std::vector<std::shared_ptr<arrow::DataType>> types;
 
   // TODO: Consider the possibility of list of structs/struct of lists
+  std::shared_ptr<arrow::DataType> vt = type;
   while (vt->id() == arrow::Type::LIST) {
     types.push_back(vt);
     // TODO: Error handling on dynamic_cast
@@ -591,6 +590,13 @@ auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scala
     builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(), inner_builder);
     inner_builder = builder;
   }
+  return builder;
+}
+
+template <>
+auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scalar)
+{
+  std::shared_ptr<arrow::ArrayBuilder> builder = make_list_builder(scalar.type);
 
   auto status = builder->AppendScalar(scalar);
   if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }

From 50bbdc00535b4795ef68fd4c30cfc87702498870 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 18:06:43 -0700
Subject: [PATCH 25/54] Unify builders and use recursion for all paths

---
 cpp/src/interop/from_arrow.cu | 103 ++++++++++------------------------
 1 file changed, 29 insertions(+), 74 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 31c1ad8b158..5d63046c60d 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -16,6 +16,7 @@
 #include <arrow/array/array_base.h>
 #include <arrow/array/builder_nested.h>
 #include <arrow/type.h>
+#include <arrow/type_fwd.h>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/concatenate.hpp>
@@ -554,91 +555,45 @@ std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::StructT
   CUDF_FAIL("Not implemented");
 }
 
-struct ArrowArrayGenerator {
-  template <typename T>
-  auto operator()(arrow::Scalar const& scalar)
-  {
-    // Get a builder for the scalar type
-    auto builder = arrow_type_dispatcher(*scalar.type, BuilderGenerator{}, scalar.type);
-
-    auto status = builder->AppendScalar(scalar);
-    if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
-
-    auto maybe_array = builder->Finish();
-    if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
-    return *maybe_array;
-  }
-};
-
-std::shared_ptr<arrow::ArrayBuilder> make_list_builder(std::shared_ptr<arrow::DataType> const& type)
-{
-  std::vector<std::shared_ptr<arrow::DataType>> types;
-
-  // TODO: Consider the possibility of list of structs/struct of lists
-  std::shared_ptr<arrow::DataType> vt = type;
-  while (vt->id() == arrow::Type::LIST) {
-    types.push_back(vt);
-    // TODO: Error handling on dynamic_cast
-    vt = dynamic_cast<arrow::ListType&>(*vt).value_type();
-  }
-
-  // Now walk the list backwards and create the builders. Start with the
-  // current vt, which is the innermost one and not a list.
-  auto inner_builder = arrow_type_dispatcher(*vt, BuilderGenerator{}, vt);
-  std::shared_ptr<arrow::ListBuilder> builder;
-  for (auto it = types.rbegin(); it != types.rend(); ++it) {
-    builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(), inner_builder);
-    inner_builder = builder;
-  }
-  return builder;
-}
-
-template <>
-auto ArrowArrayGenerator::operator()<arrow::ListType>(arrow::Scalar const& scalar)
+std::shared_ptr<arrow::ArrayBuilder> make_builder(std::shared_ptr<arrow::DataType> const& type)
 {
-  std::shared_ptr<arrow::ArrayBuilder> builder = make_list_builder(scalar.type);
-
-  auto status = builder->AppendScalar(scalar);
-  if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
-
-  auto maybe_array = builder->Finish();
-  if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
-  return *maybe_array;
-}
-
-std::shared_ptr<arrow::ArrayBuilder> make_struct_builder(
-  std::shared_ptr<arrow::DataType> const& type)
-{
-  std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
-
-  for (auto i = 0; i < type->num_fields(); ++i) {
-    auto const vt = type->field(i)->type();
-    if (vt->id() == arrow::Type::STRUCT) {
-      field_builders.push_back(make_struct_builder(vt));
-    } else {
-      field_builders.push_back(arrow_type_dispatcher(*vt, BuilderGenerator{}, vt));
+  switch (type->id()) {
+    case arrow::Type::STRUCT: {
+      std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+
+      for (auto i = 0; i < type->num_fields(); ++i) {
+        auto const vt = type->field(i)->type();
+        if (vt->id() == arrow::Type::STRUCT || vt->id() == arrow::Type::LIST) {
+          field_builders.push_back(make_builder(vt));
+        } else {
+          field_builders.push_back(arrow_type_dispatcher(*vt, BuilderGenerator{}, vt));
+        }
+      }
+      return std::make_shared<arrow::StructBuilder>(
+        type, arrow::default_memory_pool(), field_builders);
+    }
+    case arrow::Type::LIST: {
+      return std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                  make_builder(type->field(0)->type()));
+    }
+    default: {
+      return arrow_type_dispatcher(*type, BuilderGenerator{}, type);
     }
   }
-  return std::make_shared<arrow::StructBuilder>(type, arrow::default_memory_pool(), field_builders);
 }
 
-template <>
-auto ArrowArrayGenerator::operator()<arrow::StructType>(arrow::Scalar const& scalar)
+std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
+                                         rmm::mr::device_memory_resource* mr)
 {
-  std::shared_ptr<arrow::ArrayBuilder> builder = make_struct_builder(scalar.type);
+  // Get a builder for the scalar type
+  auto builder = make_builder(input.type);
 
-  auto status = builder->AppendScalar(scalar);
+  auto status = builder->AppendScalar(input);
   if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
 
   auto maybe_array = builder->Finish();
   if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
-  return *maybe_array;
-}
-
-std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  auto array = arrow_type_dispatcher(*input.type, ArrowArrayGenerator{}, input);
+  auto array = *maybe_array;
 
   auto field = arrow::field("", input.type);
 

From 7b34df464edb6261e2e2d0a97bc790e47a4c7db6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 19:24:12 -0700
Subject: [PATCH 26/54] Enable decimals

---
 cpp/src/interop/from_arrow.cu              | 11 +----
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 55 ++++++++++++++++++++--
 python/cudf/cudf/_lib/scalar.pyx           | 19 ++++++--
 3 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 5d63046c60d..21f49a327b3 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -515,15 +515,8 @@ constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
       return f.template operator()<arrow::StringType>(std::forward<Ts>(args)...);
     case arrow::Type::LIST:
       return f.template operator()<arrow::ListType>(std::forward<Ts>(args)...);
-      // case arrow::Type::DECIMAL32:
-      //   return f.template operator()<arrow::Type::DECIMAL32>(
-      //     std::forward<Ts>(args)...);
-      // case arrow::Type::DECIMAL64:
-      //   return f.template operator()<arrow::Type::DECIMAL64>(
-      //     std::forward<Ts>(args)...);
-      // case arrow::Type::DECIMAL128:
-      //   return f.template operator()<arrow::Type::DECIMAL128>(
-      //     std::forward<Ts>(args)...);
+    case arrow::Type::DECIMAL128:
+      return f.template operator()<arrow::Decimal128Type>(std::forward<Ts>(args)...);
     case arrow::Type::STRUCT:
       return f.template operator()<arrow::StructType>(std::forward<Ts>(args)...);
     default: {
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 3a365d7ad80..0821258ac6c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -2,14 +2,21 @@
 
 cimport pyarrow.lib
 from cython cimport no_gc_clear
+from libcpp.memory cimport unique_ptr
 
 import pyarrow.lib
 
 from rmm._lib.memory_resource cimport get_current_device_resource
 
-from cudf._lib.cpp.scalar.scalar cimport scalar
+from cudf._lib.cpp.scalar.scalar cimport fixed_point_scalar, scalar
+from cudf._lib.cpp.wrappers.decimals cimport (
+    decimal32,
+    decimal64,
+    decimal128,
+    scale_type,
+)
 
-from .types cimport DataType
+from .types cimport DataType, type_id
 
 
 # The DeviceMemoryResource attribute could be released prematurely
@@ -36,12 +43,52 @@ cdef class Scalar:
             raise ValueError("Scalar should be constructed with a factory")
 
     @staticmethod
-    def from_pyarrow_scalar(pyarrow.lib.Scalar value):
+    def from_pyarrow_scalar(pyarrow.lib.Scalar value, DataType data_type=None):
+        # Allow passing a dtype, but only for the purpose of decimals for now
+
         # Need a local import here to avoid a circular dependency because
         # from_arrow_scalar returns a Scalar.
         from .interop import from_arrow_scalar
 
-        return from_arrow_scalar(value)
+        cdef Scalar s = from_arrow_scalar(value)
+        if s.type().id() != type_id.DECIMAL128:
+            if data_type is not None:
+                raise ValueError(
+                    "dtype may not be passed for non-decimal types"
+                )
+            return s
+
+        if data_type is None:
+            raise ValueError(
+                "Decimal scalars must be constructed with a dtype"
+            )
+
+        cdef type_id tid = data_type.id()
+        if tid not in (type_id.DECIMAL32, type_id.DECIMAL64, type_id.DECIMAL128):
+            raise ValueError(
+                "Decimal scalars may only be cast to decimals"
+            )
+
+        if tid == type_id.DECIMAL128:
+            return s
+
+        if tid == type_id.DECIMAL32:
+            s.c_obj.reset(
+                new fixed_point_scalar[decimal32](
+                    (<fixed_point_scalar[decimal128]*> s.c_obj.get()).value(),
+                    scale_type(-value.type.scale),
+                    s.c_obj.get().is_valid()
+                )
+            )
+        elif tid == type_id.DECIMAL64:
+            s.c_obj.reset(
+                new fixed_point_scalar[decimal64](
+                    (<fixed_point_scalar[decimal128]*> s.c_obj.get()).value(),
+                    scale_type(-value.type.scale),
+                    s.c_obj.get().is_valid()
+                )
+            )
+        return s
 
     cdef const scalar* get(self) except *:
         return self.c_obj.get()
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 9a0a8e6e719..23fd455b623 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -22,7 +22,7 @@ from libcpp.utility cimport move
 
 import cudf
 
-from cudf._lib cimport pylibcudf
+from cudf._lib.pylibcudf.types cimport type_id
 
 from cudf._lib.types import (
     LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
@@ -113,12 +113,23 @@ cdef class DeviceScalar:
             A NumPy dtype.
         """
         self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
-        valid = not _is_null_host_scalar(value)
         set_dtype = True
 
+        cdef type_id tid
         if isinstance(self._dtype, cudf.core.dtypes.DecimalDtype):
-            _set_decimal_from_scalar(
-                self.c_value.c_obj, value, self._dtype, valid)
+            if value is NA:
+                value = None
+            pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
+
+            tid = type_id.DECIMAL128
+            if isinstance(self._dtype, cudf.core.dtypes.Decimal32Dtype):
+                tid = type_id.DECIMAL32
+            elif isinstance(self._dtype, cudf.core.dtypes.Decimal64Dtype):
+                tid = type_id.DECIMAL64
+            dt = pylibcudf.DataType(tid, -self._dtype.scale)
+
+            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar, dt)
+            set_dtype = False
         elif isinstance(self._dtype, cudf.ListDtype):
             if value is NA:
                 value = None

From 392407fb046de6d50287df1aff99e7c8f0cfba1c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 19:27:42 -0700
Subject: [PATCH 27/54] Some cleanup

---
 python/cudf/cudf/_lib/scalar.pyx | 67 ++------------------------------
 python/cudf/cudf/utils/dtypes.py | 18 ---------
 2 files changed, 3 insertions(+), 82 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 23fd455b623..44e1255e9c8 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -37,7 +37,7 @@ from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 
-from cudf._lib.interop import from_arrow, to_arrow
+from cudf._lib.interop import to_arrow
 
 cimport cudf._lib.cpp.types as libcudf_types
 from cudf._lib cimport pylibcudf
@@ -51,12 +51,7 @@ from cudf._lib.cpp.scalar.scalar cimport (
     struct_scalar,
     timestamp_scalar,
 )
-from cudf._lib.cpp.wrappers.decimals cimport (
-    decimal32,
-    decimal64,
-    decimal128,
-    scale_type,
-)
+from cudf._lib.cpp.wrappers.decimals cimport decimal32, decimal64, decimal128
 from cudf._lib.cpp.wrappers.durations cimport (
     duration_ms,
     duration_ns,
@@ -69,7 +64,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
     timestamp_s,
     timestamp_us,
 )
-from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
+from cudf._lib.utils cimport columns_from_table_view
 
 
 def nestrepl_list(lst):
@@ -338,62 +333,6 @@ cdef _set_timedelta64_from_np_scalar(unique_ptr[scalar]& s,
     else:
         raise ValueError(f"dtype not supported: {dtype}")
 
-cdef _set_decimal_from_scalar(unique_ptr[scalar]& s,
-                              object value,
-                              object dtype,
-                              bool valid=True):
-    value = cudf.utils.dtypes._decimal_to_int64(value) if valid else 0
-    if isinstance(dtype, cudf.Decimal64Dtype):
-        s.reset(
-            new fixed_point_scalar[decimal64](
-                <int64_t>np.int64(value), scale_type(-dtype.scale), valid
-            )
-        )
-    elif isinstance(dtype, cudf.Decimal32Dtype):
-        s.reset(
-            new fixed_point_scalar[decimal32](
-                <int32_t>np.int32(value), scale_type(-dtype.scale), valid
-            )
-        )
-    elif isinstance(dtype, cudf.Decimal128Dtype):
-        s.reset(
-            new fixed_point_scalar[decimal128](
-                <libcudf_types.int128>value, scale_type(-dtype.scale), valid
-            )
-        )
-    else:
-        raise ValueError(f"dtype not supported: {dtype}")
-
-cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
-                             object value,
-                             object dtype,
-                             bool valid=True):
-    arrow_schema = dtype.to_arrow()
-    columns = [str(i) for i in range(len(arrow_schema))]
-    if valid:
-        pyarrow_table = pa.Table.from_arrays(
-            [
-                pa.array([value[f.name]], from_pandas=True, type=f.type)
-                for f in arrow_schema
-            ],
-            names=columns
-        )
-    else:
-        pyarrow_table = pa.Table.from_arrays(
-            [
-                pa.array([NA], from_pandas=True, type=f.type)
-                for f in arrow_schema
-            ],
-            names=columns
-        )
-
-    data = from_arrow(pyarrow_table)
-    cdef table_view struct_view = table_view_from_columns(data)
-
-    s.reset(
-        new struct_scalar(struct_view, valid)
-    )
-
 cdef _get_py_dict_from_struct(unique_ptr[scalar]& s, dtype):
     if not s.get()[0].is_valid():
         return NA
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 1b94db75340..73ea8e2cfc4 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -463,24 +463,6 @@ def _get_nan_for_dtype(dtype):
         return np.float64("nan")
 
 
-def _decimal_to_int64(decimal: Decimal) -> int:
-    """
-    Scale a Decimal such that the result is the integer
-    that would result from removing the decimal point.
-
-    Examples
-    --------
-    >>> _decimal_to_int64(Decimal('1.42'))
-    142
-    >>> _decimal_to_int64(Decimal('0.0042'))
-    42
-    >>> _decimal_to_int64(Decimal('-1.004201'))
-    -1004201
-
-    """
-    return int(f"{decimal:0f}".replace(".", ""))
-
-
 def get_allowed_combinations_for_operator(dtype_l, dtype_r, op):
     error = TypeError(
         f"{op} not supported between {dtype_l} and {dtype_r} scalars"

From f5def95789ea268f12d0c0653055b9e3f0720343 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 19:32:29 -0700
Subject: [PATCH 28/54] Combine nestrepl

---
 python/cudf/cudf/_lib/scalar.pyx | 33 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 44e1255e9c8..3b0ebbf93f2 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -67,22 +67,19 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 from cudf._lib.utils cimport columns_from_table_view
 
 
-def nestrepl_list(lst):
-    # TODO: Account for struct inside list
-    for i, item in enumerate(lst):
-        if item is NA or item is NaT:
-            lst[i] = None
-        elif isinstance(item, list):
-            nestrepl_list(item)
-
-
-def nestrepl_dict(dct):
-    # TODO: Account for list inside struct
-    for k, v in dct.items():
-        if v is NA or v is NaT:
-            dct[k] = None
-        elif isinstance(v, dict):
-            nestrepl_dict(v)
+def nestrepl(obj):
+    if isinstance(obj, list):
+        for i, item in enumerate(obj):
+            if item is NA or item is NaT:
+                obj[i] = None
+            elif isinstance(item, (dict, list)):
+                nestrepl(item)
+    elif isinstance(obj, dict):
+        for k, v in obj.items():
+            if v is NA or v is NaT:
+                obj[k] = None
+            elif isinstance(v, (dict, list)):
+                nestrepl(v)
 
 
 cdef class DeviceScalar:
@@ -129,7 +126,7 @@ cdef class DeviceScalar:
             if value is NA:
                 value = None
             else:
-                nestrepl_list(value)
+                nestrepl(value)
 
             pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
             self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
@@ -138,7 +135,7 @@ cdef class DeviceScalar:
             if value is NA:
                 value = None
             else:
-                nestrepl_dict(value)
+                nestrepl(value)
 
             pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
             self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)

From 16cef1504fc71e7b624e734bafc0c1576616008d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 20:05:15 -0700
Subject: [PATCH 29/54] Unify constructor

---
 python/cudf/cudf/_lib/scalar.pyx | 99 ++++++++++----------------------
 1 file changed, 31 insertions(+), 68 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 3b0ebbf93f2..7211fe90317 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -67,19 +67,21 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 from cudf._lib.utils cimport columns_from_table_view
 
 
-def nestrepl(obj):
+# TODO: Check if this could replace _nested_na_replace
+def _replace_nested_nulls(obj):
     if isinstance(obj, list):
         for i, item in enumerate(obj):
-            if item is NA or item is NaT:
+            # TODO: Check if this should use _is_null_host_scalar
+            if cudf.utils.utils.is_na_like(item):
                 obj[i] = None
             elif isinstance(item, (dict, list)):
-                nestrepl(item)
+                _replace_nested_nulls(item)
     elif isinstance(obj, dict):
         for k, v in obj.items():
-            if v is NA or v is NaT:
+            if cudf.utils.utils.is_na_like(v):
                 obj[k] = None
             elif isinstance(v, (dict, list)):
-                nestrepl(v)
+                _replace_nested_nulls(v)
 
 
 cdef class DeviceScalar:
@@ -104,75 +106,36 @@ cdef class DeviceScalar:
         dtype : dtype
             A NumPy dtype.
         """
-        self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
-        set_dtype = True
+        dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
 
-        cdef type_id tid
-        if isinstance(self._dtype, cudf.core.dtypes.DecimalDtype):
-            if value is NA:
-                value = None
-            pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
+        if cudf.utils.utils.is_na_like(value):
+            value = None
+        else:
+            _replace_nested_nulls(value)
+
+        if isinstance(dtype, cudf.core.dtypes._BaseDtype):
+            pa_type = dtype.to_arrow()
+        elif pd.api.types.is_string_dtype(dtype):
+            # Have to manually convert object types, which we use internally
+            # for strings but pyarrow only supports as unicode 'U'
+            pa_type = pa.string()
+        else:
+            pa_type = pa.from_numpy_dtype(dtype)
 
+        pa_scalar = pa.scalar(value, type=pa_type)
+
+        cdef type_id tid
+        data_type = None
+        if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
             tid = type_id.DECIMAL128
-            if isinstance(self._dtype, cudf.core.dtypes.Decimal32Dtype):
+            if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
                 tid = type_id.DECIMAL32
-            elif isinstance(self._dtype, cudf.core.dtypes.Decimal64Dtype):
+            elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
                 tid = type_id.DECIMAL64
-            dt = pylibcudf.DataType(tid, -self._dtype.scale)
-
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar, dt)
-            set_dtype = False
-        elif isinstance(self._dtype, cudf.ListDtype):
-            if value is NA:
-                value = None
-            else:
-                nestrepl(value)
-
-            pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
-            set_dtype = False
-        elif isinstance(self._dtype, cudf.StructDtype):
-            if value is NA:
-                value = None
-            else:
-                nestrepl(value)
-
-            pa_scalar = pa.scalar(value, type=self._dtype.to_arrow())
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
-            set_dtype = False
-        elif pd.api.types.is_string_dtype(self._dtype):
-            if value is NA:
-                value = None
-            pa_scalar = pa.scalar(value, type=pa.string())
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
-            set_dtype = False
-        elif pd.api.types.is_numeric_dtype(self._dtype):
-            if value is NA:
-                value = None
-            pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
-            set_dtype = False
-        elif pd.api.types.is_datetime64_dtype(self._dtype):
-            if value is NA or value is NaT:
-                value = None
-            pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
-            set_dtype = False
-        elif pd.api.types.is_timedelta64_dtype(self._dtype):
-            if value is NA or value is NaT:
-                value = None
-            pa_scalar = pa.scalar(value, type=pa.from_numpy_dtype(self._dtype))
-            self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar)
-            set_dtype = False
-        else:
-            raise ValueError(
-                f"Cannot convert value of type "
-                f"{type(value).__name__} to cudf scalar"
-            )
+            data_type = pylibcudf.DataType(tid, -dtype.scale)
 
-        cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type()
-        if set_dtype:
-            self.c_value._data_type = pylibcudf.DataType.from_libcudf(cdtype)
+        self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar, data_type)
+        self._dtype = dtype
 
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):

From edcca20f9a3f419aa89bf94abcb7eede04e8c2c4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 8 Sep 2023 20:17:24 -0700
Subject: [PATCH 30/54] Copy to avoid overwriting inputs

---
 python/cudf/cudf/_lib/scalar.pyx | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 7211fe90317..b95a9e1fe6b 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
+import copy
 import decimal
 
 import numpy as np
@@ -111,6 +112,11 @@ cdef class DeviceScalar:
         if cudf.utils.utils.is_na_like(value):
             value = None
         else:
+            # TODO: For now we always deepcopy the input value to avoid
+            # overwriting the input values when replacing nulls. Since it's
+            # just host values it's not that expensive, but we could consider
+            # alternatives.
+            value = copy.deepcopy(value)
             _replace_nested_nulls(value)
 
         if isinstance(dtype, cudf.core.dtypes._BaseDtype):

From c81e158748dd2837ba098cbbaaaf4c1f883347c0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 10 Sep 2023 19:49:06 -0700
Subject: [PATCH 31/54] Add error check for nested null in scalar and fix test

---
 cpp/src/interop/from_arrow.cu         | 10 +++++++-
 python/cudf/cudf/tests/test_struct.py | 35 +++++++++++++++++++++++++--
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 21f49a327b3..d309b6dfc57 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -39,6 +39,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
 
+#include <stdexcept>
 #include <thrust/gather.h>
 
 namespace cudf {
@@ -582,7 +583,14 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
   auto builder = make_builder(input.type);
 
   auto status = builder->AppendScalar(input);
-  if (status != arrow::Status::OK()) { CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed"); }
+  if (status != arrow::Status::OK()) {
+    if (status == arrow::Status::NotImplemented()) {
+      // The only known failure case here is for nulls
+      CUDF_FAIL("Cannot create untyped null scalars or nested types with untyped null leaf nodes",
+                std::invalid_argument);
+    }
+    CUDF_FAIL("Arrow ArrayBuilder::AppendScalar failed");
+  }
 
   auto maybe_array = builder->Finish();
   if (!maybe_array.ok()) { CUDF_FAIL("Arrow ArrayBuilder::Finish failed"); }
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
index a3593e55b97..ce6dc587320 100644
--- a/python/cudf/cudf/tests/test_struct.py
+++ b/python/cudf/cudf/tests/test_struct.py
@@ -150,9 +150,7 @@ def test_struct_setitem(data, item):
     "data",
     [
         {"a": 1, "b": "rapids", "c": [1, 2, 3, 4]},
-        {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA},
         {"a": "Hello"},
-        {"b": [], "c": [1, 2, 3]},
     ],
 )
 def test_struct_scalar_host_construction(data):
@@ -161,6 +159,39 @@ def test_struct_scalar_host_construction(data):
     assert list(slr.device_value.value.values()) == list(data.values())
 
 
+@pytest.mark.parametrize(
+    ("data", "dtype"),
+    [
+        (
+            {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA},
+            cudf.StructDtype(
+                {
+                    "a": np.dtype(np.int64),
+                    "b": np.dtype(np.str_),
+                    "c": cudf.ListDtype(np.dtype(np.int64)),
+                    "d": np.dtype(np.int64),
+                }
+            ),
+        ),
+        (
+            {"b": [], "c": [1, 2, 3]},
+            cudf.StructDtype(
+                {
+                    "b": cudf.ListDtype(np.dtype(np.int64)),
+                    "c": cudf.ListDtype(np.dtype(np.int64)),
+                }
+            ),
+        ),
+    ],
+)
+def test_struct_scalar_host_construction_no_dtype_inference(data, dtype):
+    # cudf cannot infer the dtype of the scalar when it contains only nulls or
+    # is empty.
+    slr = cudf.Scalar(data, dtype=dtype)
+    assert slr.value == data
+    assert list(slr.device_value.value.values()) == list(data.values())
+
+
 def test_struct_scalar_null():
     slr = cudf.Scalar(cudf.NA, dtype=StructDtype)
     assert slr.device_value.value is cudf.NA

From 14b2f86813c01cf862ff082e5662f0a42bf75ff5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 10 Sep 2023 20:40:05 -0700
Subject: [PATCH 32/54] Initial implementation of to_arrow

---
 cpp/include/cudf/interop.hpp  | 11 +++++++++++
 cpp/src/interop/from_arrow.cu |  2 +-
 cpp/src/interop/to_arrow.cu   | 19 +++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index da3380e1d44..77ee1a1ff1d 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -133,6 +133,17 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        std::vector<column_metadata> const& metadata = {},
                                        arrow::MemoryPool* ar_mr = arrow::default_memory_pool());
 
+/**
+ * @brief Create `arrow::Scalar` from cudf scalar `input`
+ *
+ * Converts the `cudf::scalar` to `arrow::Scalar`.
+ *
+ * @param input scalar that needs to be converted to arrow Scalar
+ * @param ar_mr arrow memory pool to allocate memory for arrow Scalar
+ * @return arrow Scalar generated from `input`
+ */
+std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
+                                        arrow::MemoryPool* ar_mr = arrow::default_memory_pool());
 /**
  * @brief Create `cudf::table` from given arrow Table input
  *
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index d309b6dfc57..8ff361d7e04 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -584,7 +584,7 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
 
   auto status = builder->AppendScalar(input);
   if (status != arrow::Status::OK()) {
-    if (status == arrow::Status::NotImplemented()) {
+    if (status.IsNotImplemented()) {
       // The only known failure case here is for nulls
       CUDF_FAIL("Cannot create untyped null scalars or nested types with untyped null leaf nodes",
                 std::invalid_argument);
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 958a2fcb95f..6ab6c7cd3c7 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -16,9 +16,12 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
+#include <cudf/copying.hpp>
 #include <cudf/detail/interop.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/scatter.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/interop.hpp>
@@ -37,6 +40,8 @@
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
 
+#include "cudf/filling.hpp"
+#include "cudf/utilities/error.hpp"
 #include "detail/arrow_allocator.hpp"
 
 namespace cudf {
@@ -413,4 +418,18 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
   return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr);
 }
 
+std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input, arrow::MemoryPool* ar_mr)
+{
+  auto stream = cudf::get_default_stream();
+  auto column = cudf::empty_like(input);
+
+  auto view = column->mutable_view();
+  cudf::fill_in_place(view, 0, 1, input);
+  cudf::table_view tv{{column->view()}};
+  auto arrow_table  = cudf::to_arrow(tv);
+  auto ac           = arrow_table->column(0);
+  auto maybe_scalar = ac->GetScalar(0);
+  if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
+  return maybe_scalar.ValueOrDie();
+}
 }  // namespace cudf

From cd80fe0d88f1332caacbbabab26c35667231b1e7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 10 Sep 2023 20:46:00 -0700
Subject: [PATCH 33/54] Add from_arrow_scalar to Cython API

---
 python/cudf/cudf/_lib/pylibcudf/interop.pxd | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
index dd518aeea52..ab464d7e465 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
@@ -1,10 +1,15 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-from pyarrow.lib cimport Table as pa_Table
+from pyarrow.lib cimport Scalar as pa_Scalar, Table as pa_Table
 
+from .scalar cimport Scalar
 from .table cimport Table
 
 
 cpdef Table from_arrow(
     pa_Table pyarrow_table,
 )
+
+cpdef Scalar from_arrow_scalar(
+    pa_Scalar pyarrow_scalar,
+)

From 5fffc4f9c440c2b53467361aa764c2a3bc0034c6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Sep 2023 10:59:07 -0700
Subject: [PATCH 34/54] Add Python wrappers for to_arrow

---
 python/cudf/cudf/_lib/cpp/interop.pxd       | 11 +++++++++
 python/cudf/cudf/_lib/pylibcudf/interop.pxd |  4 ++++
 python/cudf/cudf/_lib/pylibcudf/interop.pyx | 26 ++++++++++++++++++++-
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/cpp/interop.pxd b/python/cudf/cudf/_lib/cpp/interop.pxd
index bbfcd8c4a0a..d6891f31646 100644
--- a/python/cudf/cudf/_lib/cpp/interop.pxd
+++ b/python/cudf/cudf/_lib/cpp/interop.pxd
@@ -33,7 +33,18 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
         string name
         vector[column_metadata] children_meta
 
+    # TODO: Adding this for pylibcudf because in pylibcudf we don't have column
+    # names. However we need to figure out how this will propagate up when cudf
+    # starts using pylibcudf for interop functionality.
+    cdef shared_ptr[CTable] to_arrow(
+        table_view input,
+    ) except +
+
     cdef shared_ptr[CTable] to_arrow(
         table_view input,
         vector[column_metadata] metadata,
     ) except +
+
+    cdef shared_ptr[CScalar] to_arrow(
+        const scalar& input,
+    ) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
index ab464d7e465..a547813a9d2 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
@@ -13,3 +13,7 @@ cpdef Table from_arrow(
 cpdef Scalar from_arrow_scalar(
     pa_Scalar pyarrow_scalar,
 )
+
+cpdef pa_Table to_arrow(Table tbl)
+
+cpdef pa_Scalar to_arrow_scalar(Scalar slr)
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
index 63f4ed68f7e..426534a1fed 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
@@ -1,5 +1,6 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
+from cython.operator cimport dereference
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.utility cimport move
 from pyarrow.lib cimport (
@@ -9,9 +10,14 @@ from pyarrow.lib cimport (
     Table as pa_Table,
     pyarrow_unwrap_scalar,
     pyarrow_unwrap_table,
+    pyarrow_wrap_scalar,
+    pyarrow_wrap_table,
 )
 
-from cudf._lib.cpp.interop cimport from_arrow as cpp_from_arrow
+from cudf._lib.cpp.interop cimport (
+    from_arrow as cpp_from_arrow,
+    to_arrow as cpp_to_arrow,
+)
 from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 
@@ -45,3 +51,21 @@ cpdef Scalar from_arrow_scalar(
         c_result = move(cpp_from_arrow(cscalar.get()[0]))
 
     return Scalar.from_libcudf(move(c_result))
+
+
+cpdef pa_Table to_arrow(Table tbl):
+    cdef shared_ptr[pa_CTable] c_result
+
+    with nogil:
+        c_result = move(cpp_to_arrow(tbl.view()))
+
+    return pyarrow_wrap_table(c_result)
+
+
+cpdef pa_Scalar to_arrow_scalar(Scalar slr):
+    cdef shared_ptr[pa_CScalar] c_result
+
+    with nogil:
+        c_result = move(cpp_to_arrow(dereference(slr.c_obj.get())))
+
+    return pyarrow_wrap_scalar(c_result)

From cb52d2b3d5b0229392115de34400b6f659c9fa71 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Sep 2023 11:04:53 -0700
Subject: [PATCH 35/54] Add pylibcudf converter

---
 python/cudf/cudf/_lib/pylibcudf/scalar.pxd | 2 ++
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
index 85d57305622..87932a78f6e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
@@ -26,3 +26,5 @@ cdef class Scalar:
 
     @staticmethod
     cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*)
+
+    cpdef to_pyarrow_scalar(self)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 0821258ac6c..05306126d2c 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -90,6 +90,10 @@ cdef class Scalar:
             )
         return s
 
+    cpdef to_pyarrow_scalar(self):
+        from interop import to_arrow_scalar
+        return to_arrow_scalar(self)
+
     cdef const scalar* get(self) except *:
         return self.c_obj.get()
 

From 643d992004793d7e06b03e92a0890f58e436c213 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Sep 2023 12:24:31 -0700
Subject: [PATCH 36/54] Fix bugs in impl

---
 cpp/src/interop/to_arrow.cu                | 7 ++++---
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 2 +-
 python/cudf/cudf/_lib/scalar.pyx           | 4 ++++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 6ab6c7cd3c7..ffe402504e7 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -420,13 +420,14 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
 
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input, arrow::MemoryPool* ar_mr)
 {
-  auto stream = cudf::get_default_stream();
-  auto column = cudf::empty_like(input);
+  auto stream     = cudf::get_default_stream();
+  auto tmp_column = cudf::empty_like(input);
+  auto column     = cudf::allocate_like(tmp_column->view(), 1);
 
   auto view = column->mutable_view();
   cudf::fill_in_place(view, 0, 1, input);
   cudf::table_view tv{{column->view()}};
-  auto arrow_table  = cudf::to_arrow(tv);
+  auto arrow_table  = cudf::to_arrow(tv, {column_metadata{""}});
   auto ac           = arrow_table->column(0);
   auto maybe_scalar = ac->GetScalar(0);
   if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 05306126d2c..0b3e429c9c8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -91,7 +91,7 @@ cdef class Scalar:
         return s
 
     cpdef to_pyarrow_scalar(self):
-        from interop import to_arrow_scalar
+        from .interop import to_arrow_scalar
         return to_arrow_scalar(self)
 
     cdef const scalar* get(self) except *:
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index b95a9e1fe6b..140f5c68171 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -164,6 +164,10 @@ cdef class DeviceScalar:
             )
         return result
 
+    # TODO: This is just here for testing and should be removed.
+    def get(self):
+        return self.c_value
+
     @property
     def dtype(self):
         """

From 309cdfa4809676c41043d85d3a8ab7421fe901f8 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Sep 2023 15:56:44 -0700
Subject: [PATCH 37/54] Make to_arrow work for all column types

---
 cpp/src/interop/to_arrow.cu | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index ffe402504e7..b850e767b4d 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -15,6 +15,7 @@
  */
 
 #include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/interop.hpp>
@@ -40,8 +41,6 @@
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
 
-#include "cudf/filling.hpp"
-#include "cudf/utilities/error.hpp"
 #include "detail/arrow_allocator.hpp"
 
 namespace cudf {
@@ -420,12 +419,8 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
 
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input, arrow::MemoryPool* ar_mr)
 {
-  auto stream     = cudf::get_default_stream();
-  auto tmp_column = cudf::empty_like(input);
-  auto column     = cudf::allocate_like(tmp_column->view(), 1);
-
-  auto view = column->mutable_view();
-  cudf::fill_in_place(view, 0, 1, input);
+  auto stream = cudf::get_default_stream();
+  auto column = cudf::make_column_from_scalar(input, 1);
   cudf::table_view tv{{column->view()}};
   auto arrow_table  = cudf::to_arrow(tv, {column_metadata{""}});
   auto ac           = arrow_table->column(0);

From 4e4bc6441cc3fe2130170ecdfd7213059b52117b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 11 Sep 2023 18:15:35 -0700
Subject: [PATCH 38/54] Add to_arrow overload for 32-bit decimal

---
 cpp/src/interop/to_arrow.cu | 40 +++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index b850e767b4d..2d7f9ca4d55 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -143,6 +143,46 @@ struct dispatch_to_arrow {
   }
 };
 
+template <>
+std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
+  column_view input,
+  cudf::type_id,
+  column_metadata const&,
+  arrow::MemoryPool* ar_mr,
+  rmm::cuda_stream_view stream)
+{
+  using DeviceType                = int32_t;
+  size_type const BIT_WIDTH_RATIO = 4;  // Array::Type:type::DECIMAL (128) / int32_t
+
+  rmm::device_uvector<__int128_t> buf(input.size() * BIT_WIDTH_RATIO, stream);
+
+  auto count = thrust::make_counting_iterator(0);
+
+  thrust::for_each(rmm::exec_policy(cudf::get_default_stream()),
+                   count,
+                   count + input.size(),
+                   [in = input.begin<DeviceType>(), out = buf.data()] __device__(auto in_idx) {
+                     auto const out_idx   = in_idx;
+                     auto unsigned_value  = in[in_idx] < 0 ? -in[in_idx] : in[in_idx];
+                     auto unsigned_128bit = static_cast<__int128_t>(unsigned_value);
+                     auto signed_128bit   = in[in_idx] < 0 ? -unsigned_128bit : unsigned_128bit;
+                     out[out_idx]         = signed_128bit;
+                   });
+
+  auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType);
+  auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);
+
+  CUDF_CUDA_TRY(cudaMemcpyAsync(
+    data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));
+
+  auto type    = arrow::decimal(9, -input.type().scale());
+  auto mask    = fetch_mask_buffer(input, ar_mr, stream);
+  auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
+  auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
+
+  return std::make_shared<arrow::Decimal128Array>(data);
+}
+
 template <>
 std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
   column_view input,

From c62c71bf81318fc3c8d7467c8a28298edf6ba911 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 13 Sep 2023 18:02:06 -0700
Subject: [PATCH 39/54] Add support for list/struct up to output field naming

---
 python/cudf/cudf/_lib/scalar.pyx | 63 ++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 140f5c68171..9bf373b544c 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -85,6 +85,21 @@ def _replace_nested_nulls(obj):
                 _replace_nested_nulls(v)
 
 
+def _replace_nested_none(obj):
+    if isinstance(obj, list):
+        for i, item in enumerate(obj):
+            if item is None:
+                obj[i] = NA
+            elif isinstance(item, (dict, list)):
+                _replace_nested_none(item)
+    elif isinstance(obj, dict):
+        for k, v in obj.items():
+            if v is None:
+                obj[k] = NA
+            elif isinstance(v, (dict, list)):
+                _replace_nested_none(v)
+
+
 cdef class DeviceScalar:
 
     # I think this should be removable, except that currently the way that
@@ -145,19 +160,53 @@ cdef class DeviceScalar:
 
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
-            result = _get_py_decimal_from_fixed_point(self.c_value.c_obj)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NA
+            return ps.as_py()
+        # For nested types we should eventually account for the special cases
+        # that we handle for other types, e.g. numerics being casted to numpy
+        # types or datetime/timedelta needing to be cast to int64 to handle
+        # overflow. However, the old implementation didn't handle these cases
+        # either, so we can leave that for a follow-up PR.
         elif cudf.api.types.is_struct_dtype(self.dtype):
-            result = _get_py_dict_from_struct(self.c_value.c_obj, self.dtype)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NA
+            ret = ps.as_py()
+            _replace_nested_none(ret)
+            return ret
         elif cudf.api.types.is_list_dtype(self.dtype):
-            result = _get_py_list_from_list(self.c_value.c_obj, self.dtype)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NA
+            ret = ps.as_py()
+            _replace_nested_none(ret)
+            return ret
         elif pd.api.types.is_string_dtype(self.dtype):
-            result = _get_py_string_from_string(self.c_value.c_obj)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NA
+            return ps.as_py()
         elif pd.api.types.is_numeric_dtype(self.dtype):
-            result = _get_np_scalar_from_numeric(self.c_value.c_obj)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NA
+            return ps.type.to_pandas_dtype()(ps.as_py())
         elif pd.api.types.is_datetime64_dtype(self.dtype):
-            result = _get_np_scalar_from_timestamp64(self.c_value.c_obj)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NaT
+            time_unit, _ = np.datetime_data(self.dtype)
+            # Cast to int64 to avoid overflow
+            return np.datetime64(ps.cast('int64').as_py(), time_unit)
         elif pd.api.types.is_timedelta64_dtype(self.dtype):
-            result = _get_np_scalar_from_timedelta64(self.c_value.c_obj)
+            ps = self.c_value.to_pyarrow_scalar()
+            if not ps.is_valid:
+                return NaT
+            time_unit, _ = np.datetime_data(self.dtype)
+            # Cast to int64 to avoid overflow
+            return np.timedelta64(ps.cast('int64').as_py(), time_unit)
         else:
             raise ValueError(
                 "Could not convert cudf::scalar to a Python value"

From d6eee4b2ec7942f6656e7e7bd6452b4cdc2d7c83 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 10:01:10 -0700
Subject: [PATCH 40/54] Fully implement metadata

---
 cpp/include/cudf/interop.hpp                |  2 ++
 cpp/src/interop/to_arrow.cu                 |  7 ++--
 python/cudf/cudf/_lib/cpp/interop.pxd       |  8 +----
 python/cudf/cudf/_lib/pylibcudf/interop.pxd | 11 +++++--
 python/cudf/cudf/_lib/pylibcudf/interop.pyx | 35 +++++++++++++++++---
 python/cudf/cudf/_lib/pylibcudf/scalar.pxd  |  4 ++-
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx  |  4 +--
 python/cudf/cudf/_lib/scalar.pyx            | 36 +++++++++++++++++----
 python/cudf/cudf/tests/test_list.py         |  4 +--
 9 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 77ee1a1ff1d..3ad99cbcc99 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -139,10 +139,12 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
  * Converts the `cudf::scalar` to `arrow::Scalar`.
  *
  * @param input scalar that needs to be converted to arrow Scalar
+ * @param metadata Contains hierarchy of names of columns and children
  * @param ar_mr arrow memory pool to allocate memory for arrow Scalar
  * @return arrow Scalar generated from `input`
  */
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
+                                        column_metadata const& metadata = {},
                                         arrow::MemoryPool* ar_mr = arrow::default_memory_pool());
 /**
  * @brief Create `cudf::table` from given arrow Table input
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 2d7f9ca4d55..b501c1e1d7e 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -457,12 +457,15 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
   return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr);
 }
 
-std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input, arrow::MemoryPool* ar_mr)
+std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
+                                        column_metadata const& metadata,
+
+                                        arrow::MemoryPool* ar_mr)
 {
   auto stream = cudf::get_default_stream();
   auto column = cudf::make_column_from_scalar(input, 1);
   cudf::table_view tv{{column->view()}};
-  auto arrow_table  = cudf::to_arrow(tv, {column_metadata{""}});
+  auto arrow_table  = cudf::to_arrow(tv, {metadata});
   auto ac           = arrow_table->column(0);
   auto maybe_scalar = ac->GetScalar(0);
   if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
diff --git a/python/cudf/cudf/_lib/cpp/interop.pxd b/python/cudf/cudf/_lib/cpp/interop.pxd
index d6891f31646..88e9d83ee98 100644
--- a/python/cudf/cudf/_lib/cpp/interop.pxd
+++ b/python/cudf/cudf/_lib/cpp/interop.pxd
@@ -33,13 +33,6 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
         string name
         vector[column_metadata] children_meta
 
-    # TODO: Adding this for pylibcudf because in pylibcudf we don't have column
-    # names. However we need to figure out how this will propagate up when cudf
-    # starts using pylibcudf for interop functionality.
-    cdef shared_ptr[CTable] to_arrow(
-        table_view input,
-    ) except +
-
     cdef shared_ptr[CTable] to_arrow(
         table_view input,
         vector[column_metadata] metadata,
@@ -47,4 +40,5 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
 
     cdef shared_ptr[CScalar] to_arrow(
         const scalar& input,
+        column_metadata metadata,
     ) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
index a547813a9d2..c1268b8ca1a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
@@ -2,10 +2,17 @@
 
 from pyarrow.lib cimport Scalar as pa_Scalar, Table as pa_Table
 
+from cudf._lib.cpp.interop cimport column_metadata
+
 from .scalar cimport Scalar
 from .table cimport Table
 
 
+cdef class ColumnMetadata:
+    cdef public object name
+    cdef public object children_meta
+    cdef column_metadata to_c_metadata(self)
+
 cpdef Table from_arrow(
     pa_Table pyarrow_table,
 )
@@ -14,6 +21,6 @@ cpdef Scalar from_arrow_scalar(
     pa_Scalar pyarrow_scalar,
 )
 
-cpdef pa_Table to_arrow(Table tbl)
+cpdef pa_Table to_arrow(Table tbl, list metadata)
 
-cpdef pa_Scalar to_arrow_scalar(Scalar slr)
+cpdef pa_Scalar to_arrow_scalar(Scalar slr, ColumnMetadata metadata)
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
index 426534a1fed..9e6b44b117d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
@@ -3,6 +3,7 @@
 from cython.operator cimport dereference
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.utility cimport move
+from libcpp.vector cimport vector
 from pyarrow.lib cimport (
     CScalar as pa_CScalar,
     CTable as pa_CTable,
@@ -15,6 +16,7 @@ from pyarrow.lib cimport (
 )
 
 from cudf._lib.cpp.interop cimport (
+    column_metadata,
     from_arrow as cpp_from_arrow,
     to_arrow as cpp_to_arrow,
 )
@@ -25,6 +27,26 @@ from .scalar cimport Scalar
 from .table cimport Table
 
 
+cdef class ColumnMetadata:
+    def __init__(self, name):
+        self.name = name
+        self.children_meta = []
+
+    cdef column_metadata to_c_metadata(self):
+        """Convert to C++ column_metadata.
+
+        Since this class is mutable and cheap, it is easier to create the C++
+        object on the fly rather than have it directly backing the storage for
+        the Cython class.
+        """
+        cdef column_metadata c_metadata
+        cdef ColumnMetadata child_meta
+        c_metadata.name = self.name.encode()
+        for child_meta in self.children_meta:
+            c_metadata.children_meta.push_back(child_meta.to_c_metadata())
+        return c_metadata
+
+
 cpdef Table from_arrow(
     pa_Table pyarrow_table,
 ):
@@ -53,19 +75,24 @@ cpdef Scalar from_arrow_scalar(
     return Scalar.from_libcudf(move(c_result))
 
 
-cpdef pa_Table to_arrow(Table tbl):
+cpdef pa_Table to_arrow(Table tbl, list metadata):
     cdef shared_ptr[pa_CTable] c_result
+    cdef vector[column_metadata] c_metadata
+    cdef ColumnMetadata meta
+    for meta in metadata:
+        c_metadata.push_back(meta.to_c_metadata())
 
     with nogil:
-        c_result = move(cpp_to_arrow(tbl.view()))
+        c_result = move(cpp_to_arrow(tbl.view(), c_metadata))
 
     return pyarrow_wrap_table(c_result)
 
 
-cpdef pa_Scalar to_arrow_scalar(Scalar slr):
+cpdef pa_Scalar to_arrow_scalar(Scalar slr, ColumnMetadata metadata):
     cdef shared_ptr[pa_CScalar] c_result
+    cdef column_metadata c_metadata = metadata.to_c_metadata()
 
     with nogil:
-        c_result = move(cpp_to_arrow(dereference(slr.c_obj.get())))
+        c_result = move(cpp_to_arrow(dereference(slr.c_obj.get()), c_metadata))
 
     return pyarrow_wrap_scalar(c_result)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
index 87932a78f6e..d20c65f0be0 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
@@ -27,4 +27,6 @@ cdef class Scalar:
     @staticmethod
     cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*)
 
-    cpdef to_pyarrow_scalar(self)
+    # TODO: Make sure I'm correct to avoid typing the metadata as
+    # ColumnMetadata, I assume that will cause circular cimport problems
+    cpdef to_pyarrow_scalar(self, metadata)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
index 0b3e429c9c8..d06bb9adeb9 100644
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
@@ -90,9 +90,9 @@ cdef class Scalar:
             )
         return s
 
-    cpdef to_pyarrow_scalar(self):
+    cpdef to_pyarrow_scalar(self, metadata):
         from .interop import to_arrow_scalar
-        return to_arrow_scalar(self)
+        return to_arrow_scalar(self, metadata)
 
     cdef const scalar* get(self) except *:
         return self.c_obj.get()
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 9bf373b544c..30d46bd3fe2 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -22,6 +22,7 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
 import cudf
+from cudf._lib.pylibcudf.interop import ColumnMetadata
 
 from cudf._lib.pylibcudf.types cimport type_id
 
@@ -30,6 +31,7 @@ from cudf._lib.types import (
     datetime_unit_map,
     duration_unit_map,
 )
+from cudf.api.types import is_list_dtype, is_struct_dtype
 from cudf.core.dtypes import ListDtype, StructDtype
 from cudf.core.missing import NA, NaT
 
@@ -100,6 +102,25 @@ def _replace_nested_none(obj):
                 _replace_nested_none(v)
 
 
+def gather_metadata(dtypes):
+    # dtypes is a dict mapping names to column dtypes
+    # This interface is a bit clunky, but it matches libcudf. May want to
+    # consider better approaches to building up the metadata eventually.
+    out = []
+    for name, dtype in dtypes.items():
+        v = ColumnMetadata(name)
+        if is_struct_dtype(dtype):
+            v.children_meta = gather_metadata(dtype.fields)
+        elif is_list_dtype(dtype):
+            # Offsets column is unnamed and has no children
+            v.children_meta.append(ColumnMetadata(""))
+            v.children_meta.extend(
+                gather_metadata({"": dtype.element_type})
+            )
+        out.append(v)
+    return out
+
+
 cdef class DeviceScalar:
 
     # I think this should be removable, except that currently the way that
@@ -159,8 +180,9 @@ cdef class DeviceScalar:
         self._dtype = dtype
 
     def _to_host_scalar(self):
+        metadata = gather_metadata({"": self.dtype})[0]
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NA
             return ps.as_py()
@@ -170,38 +192,38 @@ cdef class DeviceScalar:
         # overflow. However, the old implementation didn't handle these cases
         # either, so we can leave that for a follow-up PR.
         elif cudf.api.types.is_struct_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NA
             ret = ps.as_py()
             _replace_nested_none(ret)
             return ret
         elif cudf.api.types.is_list_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NA
             ret = ps.as_py()
             _replace_nested_none(ret)
             return ret
         elif pd.api.types.is_string_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NA
             return ps.as_py()
         elif pd.api.types.is_numeric_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NA
             return ps.type.to_pandas_dtype()(ps.as_py())
         elif pd.api.types.is_datetime64_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NaT
             time_unit, _ = np.datetime_data(self.dtype)
             # Cast to int64 to avoid overflow
             return np.datetime64(ps.cast('int64').as_py(), time_unit)
         elif pd.api.types.is_timedelta64_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar()
+            ps = self.c_value.to_pyarrow_scalar(metadata)
             if not ps.is_valid:
                 return NaT
             time_unit, _ = np.datetime_data(self.dtype)
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 5dd58d8a875..ac10dd97c56 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -895,14 +895,14 @@ def test_memory_usage():
     "data, idx",
     [
         (
-            [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": None}]],
+            [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": NA}]],
             0,
         ),
         (
             [
                 [
                     {"f2": {"a": 100, "c": 90, "f2": 10}, "f1": "a"},
-                    {"f1": "sf12", "f2": None},
+                    {"f1": "sf12", "f2": NA},
                 ]
             ],
             0,

From 1591bdba3ecdaab33399f7d969e5c30eaf337c12 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 10:56:55 -0700
Subject: [PATCH 41/54] Split up detail and public APIs

---
 cpp/include/cudf/detail/interop.hpp |  79 ++++++++++++-
 cpp/include/cudf/interop.hpp        |   2 +-
 cpp/src/interop/from_arrow.cu       | 177 +++++++++++-----------------
 cpp/src/interop/to_arrow.cu         |  25 ++--
 4 files changed, 161 insertions(+), 122 deletions(-)

diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 3d4832c8d17..6ddb4fbd1ed 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -104,11 +104,67 @@ std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
   }
 }
 
+/**
+ * @brief Invokes an `operator()` template with the type instantiation based on
+ * the specified `arrow::DataType`'s `id()`.
+ *
+ * This function is analogous to libcudf's type_dispatcher, but instead applies
+ * to Arrow functions. It's primary use case is to leverage Arrow's
+ * metaprogramming facilities like arrow::TypeTraits that require translating
+ * the runtime dtype information into compile-time types.
+ */
+template <typename Functor, typename... Ts>
+constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
+                                               Functor f,
+                                               Ts&&... args)
+{
+  switch (dtype.id()) {
+    case arrow::Type::INT8:
+      return f.template operator()<arrow::Int8Type>(std::forward<Ts>(args)...);
+    case arrow::Type::INT16:
+      return f.template operator()<arrow::Int16Type>(std::forward<Ts>(args)...);
+    case arrow::Type::INT32:
+      return f.template operator()<arrow::Int32Type>(std::forward<Ts>(args)...);
+    case arrow::Type::INT64:
+      return f.template operator()<arrow::Int64Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT8:
+      return f.template operator()<arrow::UInt8Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT16:
+      return f.template operator()<arrow::UInt16Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT32:
+      return f.template operator()<arrow::UInt32Type>(std::forward<Ts>(args)...);
+    case arrow::Type::UINT64:
+      return f.template operator()<arrow::UInt64Type>(std::forward<Ts>(args)...);
+    case arrow::Type::FLOAT:
+      return f.template operator()<arrow::FloatType>(std::forward<Ts>(args)...);
+    case arrow::Type::DOUBLE:
+      return f.template operator()<arrow::DoubleType>(std::forward<Ts>(args)...);
+    case arrow::Type::BOOL:
+      return f.template operator()<arrow::BooleanType>(std::forward<Ts>(args)...);
+    case arrow::Type::TIMESTAMP:
+      return f.template operator()<arrow::TimestampType>(std::forward<Ts>(args)...);
+    case arrow::Type::DURATION:
+      return f.template operator()<arrow::DurationType>(std::forward<Ts>(args)...);
+    case arrow::Type::STRING:
+      return f.template operator()<arrow::StringType>(std::forward<Ts>(args)...);
+    case arrow::Type::LIST:
+      return f.template operator()<arrow::ListType>(std::forward<Ts>(args)...);
+    case arrow::Type::DECIMAL128:
+      return f.template operator()<arrow::Decimal128Type>(std::forward<Ts>(args)...);
+    case arrow::Type::STRUCT:
+      return f.template operator()<arrow::StructType>(std::forward<Ts>(args)...);
+    default: {
+      CUDF_FAIL("Invalid type.");
+    }
+  }
+}
+
 // Converting arrow type to cudf type
 data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);
 
 /**
- * @copydoc cudf::to_arrow
+ * @copydoc cudf::to_arrow(table_view input, std::vector<column_metadata> const& metadata,
+ * arrow::MemoryPool* ar_mr)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
@@ -118,7 +174,18 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        arrow::MemoryPool* ar_mr);
 
 /**
- * @copydoc cudf::arrow_to_cudf
+ * @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata,
+ * arrow::MemoryPool* ar_mr)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
+                                        column_metadata const& metadata,
+
+                                        rmm::cuda_stream_view stream,
+                                        arrow::MemoryPool* ar_mr);
+/**
+ * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::mr::device_memory_resource* mr)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
@@ -126,5 +193,13 @@ std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr);
 
+/**
+ * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::mr::device_memory_resource* mr)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 3ad99cbcc99..34232a90cbc 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -159,7 +159,7 @@ std::unique_ptr<table> from_arrow(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Create `cudf::table` from given arrow Scalar input
+ * @brief Create `cudf::scalar` from given arrow Scalar input
  *
  * @param input arrow:Scalar that needs to be converted to `cudf::scalar`
  * @param mr    Device memory resource used to allocate `cudf::scalar`
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 8ff361d7e04..67b32f0e831 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -424,6 +424,52 @@ std::unique_ptr<column> get_column(arrow::Array const& array,
            : get_empty_type_column(array.length());
 }
 
+struct BuilderGenerator {
+  template <typename T,
+            CUDF_ENABLE_IF(!std::is_same_v<T, arrow::ListType> &&
+                           !std::is_same_v<T, arrow::StructType>)>
+  std::shared_ptr<arrow::ArrayBuilder> operator()(std::shared_ptr<arrow::DataType> const& type)
+  {
+    return std::make_shared<typename arrow::TypeTraits<T>::BuilderType>(
+      type, arrow::default_memory_pool());
+  }
+
+  template <typename T,
+            CUDF_ENABLE_IF(std::is_same_v<T, arrow::ListType> ||
+                           std::is_same_v<T, arrow::StructType>)>
+  std::shared_ptr<arrow::ArrayBuilder> operator()(std::shared_ptr<arrow::DataType> const& type)
+  {
+    CUDF_FAIL("Type not support for BuilderGenerator");
+  }
+};
+
+std::shared_ptr<arrow::ArrayBuilder> make_builder(std::shared_ptr<arrow::DataType> const& type)
+{
+  switch (type->id()) {
+    case arrow::Type::STRUCT: {
+      std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+
+      for (auto i = 0; i < type->num_fields(); ++i) {
+        auto const vt = type->field(i)->type();
+        if (vt->id() == arrow::Type::STRUCT || vt->id() == arrow::Type::LIST) {
+          field_builders.push_back(make_builder(vt));
+        } else {
+          field_builders.push_back(arrow_type_dispatcher(*vt, BuilderGenerator{}, vt));
+        }
+      }
+      return std::make_shared<arrow::StructBuilder>(
+        type, arrow::default_memory_pool(), field_builders);
+    }
+    case arrow::Type::LIST: {
+      return std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                  make_builder(type->field(0)->type()));
+    }
+    default: {
+      return arrow_type_dispatcher(*type, BuilderGenerator{}, type);
+    }
+  }
+}
+
 }  // namespace
 
 std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
@@ -467,120 +513,12 @@ std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
   return std::make_unique<table>(std::move(columns));
 }
 
-}  // namespace detail
-
-std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
-                                  rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-
-  return detail::from_arrow(input_table, cudf::get_default_stream(), mr);
-}
-
-template <typename Functor, typename... Ts>
-constexpr decltype(auto) arrow_type_dispatcher(arrow::DataType const& dtype,
-                                               Functor f,
-                                               Ts&&... args)
-{
-  switch (dtype.id()) {
-    case arrow::Type::INT8:
-      return f.template operator()<arrow::Int8Type>(std::forward<Ts>(args)...);
-    case arrow::Type::INT16:
-      return f.template operator()<arrow::Int16Type>(std::forward<Ts>(args)...);
-    case arrow::Type::INT32:
-      return f.template operator()<arrow::Int32Type>(std::forward<Ts>(args)...);
-    case arrow::Type::INT64:
-      return f.template operator()<arrow::Int64Type>(std::forward<Ts>(args)...);
-    case arrow::Type::UINT8:
-      return f.template operator()<arrow::UInt8Type>(std::forward<Ts>(args)...);
-    case arrow::Type::UINT16:
-      return f.template operator()<arrow::UInt16Type>(std::forward<Ts>(args)...);
-    case arrow::Type::UINT32:
-      return f.template operator()<arrow::UInt32Type>(std::forward<Ts>(args)...);
-    case arrow::Type::UINT64:
-      return f.template operator()<arrow::UInt64Type>(std::forward<Ts>(args)...);
-    case arrow::Type::FLOAT:
-      return f.template operator()<arrow::FloatType>(std::forward<Ts>(args)...);
-    case arrow::Type::DOUBLE:
-      return f.template operator()<arrow::DoubleType>(std::forward<Ts>(args)...);
-    case arrow::Type::BOOL:
-      return f.template operator()<arrow::BooleanType>(std::forward<Ts>(args)...);
-    case arrow::Type::TIMESTAMP:
-      return f.template operator()<arrow::TimestampType>(std::forward<Ts>(args)...);
-    case arrow::Type::DURATION:
-      return f.template operator()<arrow::DurationType>(std::forward<Ts>(args)...);
-    // case arrow::Type::DICTIONARY32:
-    //   return f.template operator()<arrow::Type::DICTIONARY32>(
-    //     std::forward<Ts>(args)...);
-    case arrow::Type::STRING:
-      return f.template operator()<arrow::StringType>(std::forward<Ts>(args)...);
-    case arrow::Type::LIST:
-      return f.template operator()<arrow::ListType>(std::forward<Ts>(args)...);
-    case arrow::Type::DECIMAL128:
-      return f.template operator()<arrow::Decimal128Type>(std::forward<Ts>(args)...);
-    case arrow::Type::STRUCT:
-      return f.template operator()<arrow::StructType>(std::forward<Ts>(args)...);
-    default: {
-      CUDF_FAIL("Invalid type.");
-    }
-  }
-}
-
-struct BuilderGenerator {
-  template <typename T>
-  std::shared_ptr<arrow::ArrayBuilder> operator()(std::shared_ptr<arrow::DataType> const& type)
-  {
-    return std::make_shared<typename arrow::TypeTraits<T>::BuilderType>(
-      type, arrow::default_memory_pool());
-  }
-};
-
-template <>
-std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::ListType>(
-  std::shared_ptr<arrow::DataType> const& type)
-{
-  CUDF_FAIL("Not implemented");
-}
-
-template <>
-std::shared_ptr<arrow::ArrayBuilder> BuilderGenerator::operator()<arrow::StructType>(
-  std::shared_ptr<arrow::DataType> const& type)
-{
-  CUDF_FAIL("Not implemented");
-}
-
-std::shared_ptr<arrow::ArrayBuilder> make_builder(std::shared_ptr<arrow::DataType> const& type)
-{
-  switch (type->id()) {
-    case arrow::Type::STRUCT: {
-      std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
-
-      for (auto i = 0; i < type->num_fields(); ++i) {
-        auto const vt = type->field(i)->type();
-        if (vt->id() == arrow::Type::STRUCT || vt->id() == arrow::Type::LIST) {
-          field_builders.push_back(make_builder(vt));
-        } else {
-          field_builders.push_back(arrow_type_dispatcher(*vt, BuilderGenerator{}, vt));
-        }
-      }
-      return std::make_shared<arrow::StructBuilder>(
-        type, arrow::default_memory_pool(), field_builders);
-    }
-    case arrow::Type::LIST: {
-      return std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
-                                                  make_builder(type->field(0)->type()));
-    }
-    default: {
-      return arrow_type_dispatcher(*type, BuilderGenerator{}, type);
-    }
-  }
-}
-
 std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
+                                         rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
   // Get a builder for the scalar type
-  auto builder = make_builder(input.type);
+  auto builder = detail::make_builder(input.type);
 
   auto status = builder->AppendScalar(input);
   if (status != arrow::Status::OK()) {
@@ -600,7 +538,7 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
 
   auto table = arrow::Table::Make(arrow::schema({field}), {array});
 
-  auto cudf_table = from_arrow(*table);
+  auto cudf_table = detail::from_arrow(*table, stream, mr);
 
   auto col = cudf_table->get_column(0);
 
@@ -608,4 +546,21 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
   return get_element(cv, 0);
 }
 
+}  // namespace detail
+
+std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
+                                  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+
+  return detail::from_arrow(input_table, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+
+  return detail::from_arrow(input, cudf::get_default_stream(), mr);
+}
 }  // namespace cudf
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index b501c1e1d7e..5874867fc6f 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -447,6 +447,21 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
 
   return result;
 }
+
+std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
+                                        column_metadata const& metadata,
+
+                                        rmm::cuda_stream_view stream,
+                                        arrow::MemoryPool* ar_mr)
+{
+  auto column = cudf::make_column_from_scalar(input, 1);
+  cudf::table_view tv{{column->view()}};
+  auto arrow_table  = cudf::to_arrow(tv, {metadata});
+  auto ac           = arrow_table->column(0);
+  auto maybe_scalar = ac->GetScalar(0);
+  if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
+  return maybe_scalar.ValueOrDie();
+}
 }  // namespace detail
 
 std::shared_ptr<arrow::Table> to_arrow(table_view input,
@@ -462,13 +477,7 @@ std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
 
                                         arrow::MemoryPool* ar_mr)
 {
-  auto stream = cudf::get_default_stream();
-  auto column = cudf::make_column_from_scalar(input, 1);
-  cudf::table_view tv{{column->view()}};
-  auto arrow_table  = cudf::to_arrow(tv, {metadata});
-  auto ac           = arrow_table->column(0);
-  auto maybe_scalar = ac->GetScalar(0);
-  if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
-  return maybe_scalar.ValueOrDie();
+  CUDF_FUNC_RANGE();
+  return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr);
 }
 }  // namespace cudf

From 7fff56db10dc713ddf60acf82703f63742be4357 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 11:00:40 -0700
Subject: [PATCH 42/54] Add streams to public APIs

---
 cpp/include/cudf/detail/interop.hpp | 18 ++++++------------
 cpp/include/cudf/interop.hpp        | 10 +++++++++-
 cpp/src/interop/from_arrow.cu       |  6 ++++--
 cpp/src/interop/to_arrow.cu         |  7 ++++---
 4 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 6ddb4fbd1ed..6a294b1d360 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -164,9 +164,7 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);
 
 /**
  * @copydoc cudf::to_arrow(table_view input, std::vector<column_metadata> const& metadata,
- * arrow::MemoryPool* ar_mr)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
  */
 std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        std::vector<column_metadata> const& metadata,
@@ -175,9 +173,7 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
 
 /**
  * @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata,
- * arrow::MemoryPool* ar_mr)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
  */
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         column_metadata const& metadata,
@@ -185,18 +181,16 @@ std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         rmm::cuda_stream_view stream,
                                         arrow::MemoryPool* ar_mr);
 /**
- * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::mr::device_memory_resource* mr)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream,
+ * rmm::mr::device_memory_resource* mr)
  */
 std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr);
 
 /**
- * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::mr::device_memory_resource* mr)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream,
+ * rmm::mr::device_memory_resource* mr)
  */
 std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
                                          rmm::cuda_stream_view stream,
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 34232a90cbc..b1514699e21 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -126,12 +126,14 @@ struct column_metadata {
  *
  * @param input table_view that needs to be converted to arrow Table
  * @param metadata Contains hierarchy of names of columns and children
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param ar_mr arrow memory pool to allocate memory for arrow Table
  * @return arrow Table generated from `input`
  */
 std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        std::vector<column_metadata> const& metadata = {},
-                                       arrow::MemoryPool* ar_mr = arrow::default_memory_pool());
+                                       rmm::cuda_stream_view stream = cudf::get_default_stream(),
+                                       arrow::MemoryPool* ar_mr     = arrow::default_memory_pool());
 
 /**
  * @brief Create `arrow::Scalar` from cudf scalar `input`
@@ -140,34 +142,40 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
  *
  * @param input scalar that needs to be converted to arrow Scalar
  * @param metadata Contains hierarchy of names of columns and children
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param ar_mr arrow memory pool to allocate memory for arrow Scalar
  * @return arrow Scalar generated from `input`
  */
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         column_metadata const& metadata = {},
+                                        rmm::cuda_stream_view stream = cudf::get_default_stream(),
                                         arrow::MemoryPool* ar_mr = arrow::default_memory_pool());
 /**
  * @brief Create `cudf::table` from given arrow Table input
  *
  * @param input arrow:Table that needs to be converted to `cudf::table`
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr    Device memory resource used to allocate `cudf::table`
  * @return cudf table generated from given arrow Table
  */
 
 std::unique_ptr<table> from_arrow(
   arrow::Table const& input,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Create `cudf::scalar` from given arrow Scalar input
  *
  * @param input arrow:Scalar that needs to be converted to `cudf::scalar`
+ * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr    Device memory resource used to allocate `cudf::scalar`
  * @return cudf scalar generated from given arrow Scalar
  */
 
 std::unique_ptr<cudf::scalar> from_arrow(
   arrow::Scalar const& input,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 67b32f0e831..d3e781db986 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -549,18 +549,20 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
 }  // namespace detail
 
 std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
+                                  rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
 
-  return detail::from_arrow(input_table, cudf::get_default_stream(), mr);
+  return detail::from_arrow(input_table, stream, mr);
 }
 
 std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
+                                         rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
 
-  return detail::from_arrow(input, cudf::get_default_stream(), mr);
+  return detail::from_arrow(input, stream, mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 5874867fc6f..6a6dc3fa0e2 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -466,18 +466,19 @@ std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
 
 std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        std::vector<column_metadata> const& metadata,
+                                       rmm::cuda_stream_view stream,
                                        arrow::MemoryPool* ar_mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr);
+  return detail::to_arrow(input, metadata, stream, ar_mr);
 }
 
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         column_metadata const& metadata,
-
+                                        rmm::cuda_stream_view stream,
                                         arrow::MemoryPool* ar_mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr);
+  return detail::to_arrow(input, metadata, stream, ar_mr);
 }
 }  // namespace cudf

From 00050f6fc74dc86c0f4f2af55a8c35061d2331af Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 13:35:38 -0700
Subject: [PATCH 43/54] Add tests of from_arrow

---
 cpp/tests/interop/from_arrow_test.cpp | 92 +++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 9a5cc3733af..6716bac0b96 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -456,3 +456,95 @@ INSTANTIATE_TEST_CASE_P(FromArrowTest,
                                           std::make_tuple(0, 0),
                                           std::make_tuple(0, 3000),
                                           std::make_tuple(10000, 10000)));
+
+template <typename T>
+struct FromArrowNumericScalarTest : public cudf::test::BaseFixture {};
+
+TYPED_TEST_SUITE(FromArrowNumericScalarTest, cudf::test::NumericTypes);
+
+TYPED_TEST(FromArrowNumericScalarTest, Basic)
+{
+  auto const value         = TypeParam(42);
+  auto arrow_scalar        = arrow::MakeScalar(value);
+  auto cudf_scalar         = cudf::from_arrow(*arrow_scalar.get());
+  auto cudf_numeric_scalar = dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.get());
+  if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); }
+  EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id<TypeParam>()));
+  EXPECT_EQ(cudf_numeric_scalar->value(), value);
+}
+
+struct FromArrowDecimalScalarTest : public cudf::test::BaseFixture {};
+
+// Only testing Decimal128 because that's the only size cudf and arrow have in common.
+TEST_F(FromArrowDecimalScalarTest, Basic)
+{
+  auto const value     = 42;
+  auto const precision = 8;
+  auto const scale     = 4;
+  auto arrow_scalar    = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
+  auto cudf_scalar     = cudf::from_arrow(arrow_scalar);
+
+  // Arrow offers a minimum of 128 bits for the Decimal type.
+  auto cudf_decimal_scalar =
+    dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.get());
+  EXPECT_EQ(cudf_decimal_scalar->type(),
+            cudf::data_type(cudf::type_to_id<numeric::decimal128>(), scale));
+  EXPECT_EQ(cudf_decimal_scalar->value(), value);
+}
+
+struct FromArrowStringScalarTest : public cudf::test::BaseFixture {};
+
+TEST_F(FromArrowStringScalarTest, Basic)
+{
+  auto value        = std::string("hello world");
+  auto arrow_scalar = arrow::StringScalar(value);
+  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+
+  auto cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.get());
+  EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING));
+  EXPECT_EQ(cudf_string_scalar->to_string(), value);
+}
+
+struct FromArrowListScalarTest : public cudf::test::BaseFixture {};
+
+TEST_F(FromArrowListScalarTest, Basic)
+{
+  std::vector<int64_t> host_values = {1, 2, 3, 5, 6, 7, 8};
+  std::vector<bool> host_validity  = {true, true, true, false, true, true, true};
+
+  arrow::Int64Builder builder;
+  auto status      = builder.AppendValues(host_values, host_validity);
+  auto maybe_array = builder.Finish();
+  auto array       = *maybe_array;
+
+  auto arrow_scalar = arrow::ListScalar(array);
+  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+
+  auto cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.get());
+  EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST));
+
+  cudf::test::fixed_width_column_wrapper<int64_t> lhs(
+    host_values.begin(), host_values.end(), host_validity.begin());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lhs, cudf_list_scalar->view());
+}
+
+struct FromArrowStructScalarTest : public cudf::test::BaseFixture {};
+
+TEST_F(FromArrowStructScalarTest, Basic)
+{
+  int64_t const value          = 42;
+  auto underlying_arrow_scalar = arrow::MakeScalar(value);
+
+  auto field        = arrow::field("", underlying_arrow_scalar->type);
+  auto arrow_type   = arrow::struct_({field});
+  auto arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
+  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+
+  auto cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.get());
+  EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT));
+
+  cudf::test::fixed_width_column_wrapper<int64_t> col({value});
+  cudf::table_view lhs({col});
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view());
+}

From 1c2570b76a1b59fd45b77da00750d1ed3300b970 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 15:53:36 -0700
Subject: [PATCH 44/54] Add tests of to_arrow

---
 cpp/tests/interop/from_arrow_test.cpp |  20 ++---
 cpp/tests/interop/to_arrow_test.cpp   | 102 ++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 6716bac0b96..5305b0103d8 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -460,13 +460,15 @@ INSTANTIATE_TEST_CASE_P(FromArrowTest,
 template <typename T>
 struct FromArrowNumericScalarTest : public cudf::test::BaseFixture {};
 
-TYPED_TEST_SUITE(FromArrowNumericScalarTest, cudf::test::NumericTypes);
+using NumericTypesNotBool =
+  cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
+TYPED_TEST_SUITE(FromArrowNumericScalarTest, NumericTypesNotBool);
 
 TYPED_TEST(FromArrowNumericScalarTest, Basic)
 {
-  auto const value         = TypeParam(42);
+  TypeParam const value{42};
   auto arrow_scalar        = arrow::MakeScalar(value);
-  auto cudf_scalar         = cudf::from_arrow(*arrow_scalar.get());
+  auto cudf_scalar         = cudf::from_arrow(*arrow_scalar);
   auto cudf_numeric_scalar = dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.get());
   if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); }
   EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id<TypeParam>()));
@@ -478,11 +480,11 @@ struct FromArrowDecimalScalarTest : public cudf::test::BaseFixture {};
 // Only testing Decimal128 because that's the only size cudf and arrow have in common.
 TEST_F(FromArrowDecimalScalarTest, Basic)
 {
-  auto const value     = 42;
-  auto const precision = 8;
-  auto const scale     = 4;
-  auto arrow_scalar    = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
-  auto cudf_scalar     = cudf::from_arrow(arrow_scalar);
+  auto const value{42};
+  auto const precision{8};
+  auto const scale{4};
+  auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
+  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
 
   // Arrow offers a minimum of 128 bits for the Decimal type.
   auto cudf_decimal_scalar =
@@ -532,7 +534,7 @@ struct FromArrowStructScalarTest : public cudf::test::BaseFixture {};
 
 TEST_F(FromArrowStructScalarTest, Basic)
 {
-  int64_t const value          = 42;
+  int64_t const value{42};
   auto underlying_arrow_scalar = arrow::MakeScalar(value);
 
   auto field        = arrow::field("", underlying_arrow_scalar->type);
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 97d80984272..2982cdb2760 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -22,6 +22,7 @@
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/dictionary/encode.hpp>
 #include <cudf/interop.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -578,4 +579,105 @@ INSTANTIATE_TEST_CASE_P(ToArrowTest,
                                           std::make_tuple(0, 0),
                                           std::make_tuple(0, 3000)));
 
+template <typename T>
+struct ToArrowNumericScalarTest : public cudf::test::BaseFixture {};
+
+using NumericTypesNotBool =
+  cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
+TYPED_TEST_SUITE(ToArrowNumericScalarTest, NumericTypesNotBool);
+
+TYPED_TEST(ToArrowNumericScalarTest, Basic)
+{
+  TypeParam const value{42};
+  auto cudf_scalar = cudf::make_fixed_width_scalar<TypeParam>(value);
+
+  cudf::column_metadata metadata{""};
+  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+
+  auto ref_arrow_scalar = arrow::MakeScalar(value);
+  EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
+}
+
+struct ToArrowDecimalScalarTest : public cudf::test::BaseFixture {};
+
+// Only testing Decimal128 because that's the only size cudf and arrow have in common.
+TEST_F(ToArrowDecimalScalarTest, Basic)
+{
+  auto const value{42};
+  auto const precision{18};  // cudf will convert to the widest-precision Arrow scalar of the type
+  int32_t const scale{4};
+
+  auto cudf_scalar =
+    cudf::make_fixed_point_scalar<numeric::decimal128>(value, numeric::scale_type{scale});
+
+  cudf::column_metadata metadata{""};
+  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+
+  auto maybe_ref_arrow_scalar = arrow::MakeScalar(arrow::decimal128(precision, -scale), value);
+  if (!maybe_ref_arrow_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); }
+  auto ref_arrow_scalar = *maybe_ref_arrow_scalar;
+  EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
+}
+
+struct ToArrowStringScalarTest : public cudf::test::BaseFixture {};
+
+TEST_F(ToArrowStringScalarTest, Basic)
+{
+  std::string const value{"hello world"};
+  auto cudf_scalar = cudf::make_string_scalar(value);
+  cudf::column_metadata metadata{""};
+  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+
+  auto ref_arrow_scalar = arrow::MakeScalar(value);
+  EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
+}
+
+struct ToArrowListScalarTest : public cudf::test::BaseFixture {};
+
+TEST_F(ToArrowListScalarTest, Basic)
+{
+  std::vector<int64_t> host_values = {1, 2, 3, 5, 6, 7, 8};
+  std::vector<bool> host_validity  = {true, true, true, false, true, true, true};
+
+  cudf::test::fixed_width_column_wrapper<int64_t> col(
+    host_values.begin(), host_values.end(), host_validity.begin());
+
+  auto cudf_scalar = cudf::make_list_scalar(col);
+
+  cudf::column_metadata metadata{""};
+  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+
+  arrow::Int64Builder builder;
+  auto status      = builder.AppendValues(host_values, host_validity);
+  auto maybe_array = builder.Finish();
+  auto array       = *maybe_array;
+
+  auto ref_arrow_scalar = arrow::ListScalar(array);
+
+  EXPECT_TRUE(arrow_scalar->Equals(ref_arrow_scalar));
+}
+
+struct ToArrowStructScalarTest : public cudf::test::BaseFixture {};
+
+TEST_F(ToArrowStructScalarTest, Basic)
+{
+  int64_t const value{42};
+  auto const field_name{"a"};
+
+  cudf::test::fixed_width_column_wrapper<int64_t> col{value};
+  cudf::table_view tbl({col});
+  auto cudf_scalar = cudf::make_struct_scalar(tbl);
+
+  cudf::column_metadata metadata{""};
+  metadata.children_meta.emplace_back(field_name);
+  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+
+  auto underlying_arrow_scalar = arrow::MakeScalar(value);
+  auto field                   = arrow::field(field_name, underlying_arrow_scalar->type, false);
+  auto arrow_type              = arrow::struct_({field});
+  auto ref_arrow_scalar        = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
+
+  EXPECT_TRUE(arrow_scalar->Equals(ref_arrow_scalar));
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From 663fde08e081003545ad38166afbe1fcd1467904 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 16:56:23 -0700
Subject: [PATCH 45/54] Add stream tests and fix missing stream usage

---
 cpp/src/interop/from_arrow.cu      |  4 +--
 cpp/src/interop/to_arrow.cu        |  5 ++--
 cpp/tests/CMakeLists.txt           |  1 +
 cpp/tests/streams/interop_test.cpp | 41 ++++++++++++++++++++++++++++++
 4 files changed, 46 insertions(+), 5 deletions(-)
 create mode 100644 cpp/tests/streams/interop_test.cpp

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index d3e781db986..6e0ef462ac9 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -540,10 +540,10 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
 
   auto cudf_table = detail::from_arrow(*table, stream, mr);
 
-  auto col = cudf_table->get_column(0);
+  auto& col = cudf_table->get_column(0);
 
   auto cv = col.view();
-  return get_element(cv, 0);
+  return get_element(cv, 0, stream);
 }
 
 }  // namespace detail
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 6a6dc3fa0e2..186f3798154 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -450,13 +450,12 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
 
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         column_metadata const& metadata,
-
                                         rmm::cuda_stream_view stream,
                                         arrow::MemoryPool* ar_mr)
 {
-  auto column = cudf::make_column_from_scalar(input, 1);
+  auto column = cudf::make_column_from_scalar(input, 1, stream);
   cudf::table_view tv{{column->view()}};
-  auto arrow_table  = cudf::to_arrow(tv, {metadata});
+  auto arrow_table  = cudf::to_arrow(tv, {metadata}, stream);
   auto ac           = arrow_table->column(0);
   auto maybe_scalar = ac->GetScalar(0);
   if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index a69dc9bf2f8..af241996e27 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -627,6 +627,7 @@ ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE t
 ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
 ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
+ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing)
 
 # ##################################################################################################
 # Install tests ####################################################################################
diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp
new file mode 100644
index 00000000000..e16b3c9cf0d
--- /dev/null
+++ b/cpp/tests/streams/interop_test.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/interop.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/default_stream.hpp>
+
+struct ArrowTest : public cudf::test::BaseFixture {};
+
+TEST_F(ArrowTest, ToArrow)
+{
+  int32_t const value{42};
+  auto cudf_scalar =
+    cudf::make_fixed_width_scalar<int32_t>(value, cudf::test::get_default_stream());
+
+  cudf::column_metadata metadata{""};
+  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream());
+}
+
+TEST_F(ArrowTest, FromArrow)
+{
+  int32_t const value{42};
+  auto arrow_scalar = arrow::MakeScalar(value);
+  cudf::from_arrow(*arrow_scalar, cudf::test::get_default_stream());
+}

From cf30fef29071d3674135d114968dd7db6e2266e4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 17:07:12 -0700
Subject: [PATCH 46/54] Also add stream tests for the table APIs

---
 cpp/tests/streams/interop_test.cpp | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp
index e16b3c9cf0d..5235714c342 100644
--- a/cpp/tests/streams/interop_test.cpp
+++ b/cpp/tests/streams/interop_test.cpp
@@ -19,11 +19,38 @@
 #include <cudf/scalar/scalar_factories.hpp>
 
 #include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/default_stream.hpp>
 
 struct ArrowTest : public cudf::test::BaseFixture {};
 
 TEST_F(ArrowTest, ToArrow)
+{
+  int32_t const value{42};
+  auto col = cudf::test::fixed_width_column_wrapper<int32_t>{{value}};
+  cudf::table_view tbl{{col}};
+
+  std::vector<cudf::column_metadata> metadata{{""}};
+  auto arrow_scalar = cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream());
+}
+
+TEST_F(ArrowTest, FromArrow)
+{
+  std::vector<int64_t> host_values = {1, 2, 3, 5, 6, 7, 8};
+  std::vector<bool> host_validity  = {true, true, true, false, true, true, true};
+
+  arrow::Int64Builder builder;
+  auto status      = builder.AppendValues(host_values, host_validity);
+  auto maybe_array = builder.Finish();
+  auto array       = *maybe_array;
+
+  auto field  = arrow::field("", arrow::int32());
+  auto schema = arrow::schema({field});
+  auto table  = arrow::Table::Make(schema, {array});
+  cudf::from_arrow(*table, cudf::test::get_default_stream());
+}
+
+TEST_F(ArrowTest, ToArrowScalar)
 {
   int32_t const value{42};
   auto cudf_scalar =
@@ -33,7 +60,7 @@ TEST_F(ArrowTest, ToArrow)
   auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream());
 }
 
-TEST_F(ArrowTest, FromArrow)
+TEST_F(ArrowTest, FromArrowScalar)
 {
   int32_t const value{42};
   auto arrow_scalar = arrow::MakeScalar(value);

From 49b26935d97b39bcfdcda3ada648fb5486843a21 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Sep 2023 17:07:33 -0700
Subject: [PATCH 47/54] Revert Python changes

---
 python/cudf/cudf/_lib/cpp/interop.pxd         |  11 +-
 .../cudf/cudf/_lib/cpp/libcpp/functional.pxd  |   4 +-
 python/cudf/cudf/_lib/cpp/reduce.pxd          |   5 +-
 python/cudf/cudf/_lib/datetime.pyx            |   6 +-
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |  27 +-
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd  |   8 +-
 python/cudf/cudf/_lib/pylibcudf/__init__.py   |   5 +-
 python/cudf/cudf/_lib/pylibcudf/copying.pxd   |   6 -
 python/cudf/cudf/_lib/pylibcudf/copying.pyx   |  53 ---
 .../cudf/_lib/pylibcudf/gpumemoryview.pxd     |   7 +-
 .../cudf/_lib/pylibcudf/gpumemoryview.pyx     |  14 +-
 python/cudf/cudf/_lib/pylibcudf/interop.pxd   |  26 --
 python/cudf/cudf/_lib/pylibcudf/interop.pyx   |  98 ------
 python/cudf/cudf/_lib/pylibcudf/scalar.pxd    |  32 --
 python/cudf/cudf/_lib/pylibcudf/scalar.pyx    | 119 -------
 python/cudf/cudf/_lib/scalar.pxd              |  10 +-
 python/cudf/cudf/_lib/scalar.pyx              | 321 ++++++++++--------
 python/cudf/cudf/tests/test_list.py           |   4 +-
 python/cudf/cudf/tests/test_struct.py         |  35 +-
 python/cudf/cudf/utils/dtypes.py              |  18 +
 20 files changed, 215 insertions(+), 594 deletions(-)
 delete mode 100644 python/cudf/cudf/_lib/pylibcudf/interop.pxd
 delete mode 100644 python/cudf/cudf/_lib/pylibcudf/interop.pyx
 delete mode 100644 python/cudf/cudf/_lib/pylibcudf/scalar.pxd
 delete mode 100644 python/cudf/cudf/_lib/pylibcudf/scalar.pyx

diff --git a/python/cudf/cudf/_lib/cpp/interop.pxd b/python/cudf/cudf/_lib/cpp/interop.pxd
index 88e9d83ee98..e81f0d617fb 100644
--- a/python/cudf/cudf/_lib/cpp/interop.pxd
+++ b/python/cudf/cudf/_lib/cpp/interop.pxd
@@ -1,13 +1,12 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020, NVIDIA CORPORATION.
 
 from libcpp.memory cimport shared_ptr, unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
-from pyarrow.lib cimport CScalar, CTable
+from pyarrow.lib cimport CTable
 
 from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
 
-from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 
@@ -25,7 +24,6 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
                                ) except +
 
     cdef unique_ptr[table] from_arrow(CTable input) except +
-    cdef unique_ptr[scalar] from_arrow(CScalar input) except +
 
     cdef cppclass column_metadata:
         column_metadata() except +
@@ -37,8 +35,3 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \
         table_view input,
         vector[column_metadata] metadata,
     ) except +
-
-    cdef shared_ptr[CScalar] to_arrow(
-        const scalar& input,
-        column_metadata metadata,
-    ) except +
diff --git a/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd b/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd
index c38db036119..f3e2d6d0878 100644
--- a/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd
+++ b/python/cudf/cudf/_lib/cpp/libcpp/functional.pxd
@@ -1,8 +1,6 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020, NVIDIA CORPORATION.
 
 
-# TODO: Can be replaced once https://github.com/cython/cython/pull/5671 is
-# merged and released
 cdef extern from "<functional>" namespace "std" nogil:
     cdef cppclass reference_wrapper[T]:
         reference_wrapper()
diff --git a/python/cudf/cudf/_lib/cpp/reduce.pxd b/python/cudf/cudf/_lib/cpp/reduce.pxd
index 997782dec6c..7952c717916 100644
--- a/python/cudf/cudf/_lib/cpp/reduce.pxd
+++ b/python/cudf/cudf/_lib/cpp/reduce.pxd
@@ -1,13 +1,14 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport pair
 
-from cudf._lib.cpp.aggregation cimport reduce_aggregation, scan_aggregation
+from cudf._lib.aggregation cimport reduce_aggregation, scan_aggregation
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.types cimport data_type
+from cudf._lib.scalar cimport DeviceScalar
 
 
 cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index 3d96f59c4d6..81949dbaa20 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from cudf.core.buffer import acquire_spill_lock
 
@@ -10,7 +10,6 @@ from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.filling cimport calendrical_month_sequence
-from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
@@ -167,11 +166,10 @@ def date_range(DeviceScalar start, size_type n, offset):
         + offset.kwds.get("months", 0)
     )
 
-    cdef const scalar* c_start = start.c_value.get()
     with nogil:
         c_result = move(calendrical_month_sequence(
             n,
-            c_start[0],
+            start.c_value.get()[0],
             months
         ))
     return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index 64adb38ace3..0ce42dc43ff 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -12,35 +12,10 @@
 # the License.
 # =============================================================================
 
-set(cython_sources column.pyx copying.pyx gpumemoryview.pyx interop.pyx scalar.pyx table.pyx
-                   types.pyx utils.pyx
-)
+set(cython_sources column.pyx copying.pyx gpumemoryview.pyx table.pyx types.pyx utils.pyx)
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf
 )
-
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
-
-execute_process(
-  COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
-  OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
-  OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-
-set(targets_using_arrow_headers pylibcudf_interop pylibcudf_scalar)
-foreach(target IN LISTS targets_using_arrow_headers)
-  target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}")
-endforeach()
-
-# TODO: Clean up this include when switching to scikit-build-core. See cudf/_lib/CMakeLists.txt for
-# more info
-find_package(NumPy REQUIRED)
-set(targets_using_numpy pylibcudf_interop pylibcudf_scalar)
-foreach(target IN LISTS targets_using_numpy)
-  target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
-  # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24.
-  # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
-endforeach()
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index cf0007b9303..ba7822b0a54 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -1,13 +1,9 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
 # TODO: Verify consistent usage of relative/absolute imports in pylibcudf.
-# TODO: Cannot import interop because it introduces a build-time pyarrow header
-# dependency for everything that cimports pylibcudf. See if there's a way to
-# avoid that before polluting the whole package.
-from . cimport copying  # , interop
+from . cimport copying
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview
-from .scalar cimport Scalar
 from .table cimport Table
 # TODO: cimport type_id once
 # https://github.com/cython/cython/issues/5609 is resolved
@@ -16,9 +12,7 @@ from .types cimport DataType
 __all__ = [
     "Column",
     "DataType",
-    "Scalar",
     "Table",
     "copying",
     "gpumemoryview",
-    # "interop",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 72b74a57b87..3edff9a53e8 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -1,19 +1,16 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-from . import copying, interop
+from . import copying
 from .column import Column
 from .gpumemoryview import gpumemoryview
-from .scalar import Scalar
 from .table import Table
 from .types import DataType, TypeId
 
 __all__ = [
     "Column",
     "DataType",
-    "Scalar",
     "Table",
     "TypeId",
     "copying",
     "gpumemoryview",
-    "interop",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd
index a2232fc5d81..d57be650710 100644
--- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd
@@ -13,9 +13,3 @@ cpdef Table gather(
     Column gather_map,
     out_of_bounds_policy bounds_policy
 )
-
-cpdef Table scatter (
-    list source_scalars,
-    Column indices,
-    Table target,
-)
diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx
index dff3e95328d..a27b44b3107 100644
--- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx
@@ -2,7 +2,6 @@
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
-from libcpp.vector cimport vector
 
 # TODO: We want to make cpp a more full-featured package so that we can access
 # directly from that. It will make namespacing much cleaner in pylibcudf. What
@@ -10,16 +9,13 @@ from libcpp.vector cimport vector
 # cimport libcudf... libcudf.copying.algo(...)
 from cudf._lib.cpp cimport copying as cpp_copying
 from cudf._lib.cpp.copying cimport out_of_bounds_policy
-from cudf._lib.cpp.libcpp.functional cimport reference_wrapper
 
 from cudf._lib.cpp.copying import \
     out_of_bounds_policy as OutOfBoundsPolicy  # no-cython-lint
 
-from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 
 from .column cimport Column
-from .scalar cimport Scalar
 from .table cimport Table
 
 
@@ -59,52 +55,3 @@ cpdef Table gather(
             )
         )
     return Table.from_libcudf(move(c_result))
-
-
-ctypedef const scalar constscalar
-
-cpdef Table scatter (
-    list source_scalars,
-    Column indices,
-    Table target,
-):
-    """Scatter source_scalars into target according to the indices.
-
-    For details on the implementation, see cudf::scatter in libcudf.
-
-    Parameters
-    ----------
-    source_scalars : List[Scalar]
-        A list containing one scalar for each column in target.
-    indices : Column
-        The rows of the target into which the source_scalars should be written.
-    target : Table
-        The table into which data should be written.
-
-    Returns
-    -------
-    pylibcudf.Table
-        The result of the scatter
-    """
-    cdef unique_ptr[table] c_result
-    # TODO: This doesn't require the constscalar ctypedef
-    cdef vector[reference_wrapper[const scalar]] c_scalars
-    c_scalars.reserve(len(source_scalars))
-    cdef Scalar d_slr
-    for d_slr in source_scalars:
-        c_scalars.push_back(
-            # TODO: This requires the constscalar ctypedef
-            # Possibly the same as https://github.com/cython/cython/issues/4180
-            reference_wrapper[constscalar](d_slr.get()[0])
-        )
-
-    with nogil:
-        c_result = move(
-            cpp_copying.scatter(
-                c_scalars,
-                indices.view(),
-                target.view(),
-            )
-        )
-
-    return Table.from_libcudf(move(c_result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
index 93449fd02d1..713697bd139 100644
--- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd
@@ -1,14 +1,9 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-from libcpp cimport bool
-
 
 cdef class gpumemoryview:
     # TODO: Eventually probably want to make this opaque, but for now it's fine
     # to treat this object as something like a POD struct
     cdef readonly:
         Py_ssize_t ptr
-        object _obj
-        bool _released
-
-    cpdef release(self)
+        object obj
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
index 9121092ecdd..fc98f087a1b 100644
--- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
@@ -19,21 +19,9 @@ cdef class gpumemoryview:
                 "gpumemoryview must be constructed from an object supporting "
                 "the CUDA array interface"
             )
-        self._obj = obj
-        self._released = False
+        self.obj = obj
         # TODO: Need to respect readonly
         self.ptr = cai["data"][0]
 
     def __cuda_array_interface__(self):
         return self.obj.__cuda_array_interface__
-
-    @property
-    def obj(self):
-        if not self._released:
-            return self._obj
-        else:
-            raise ValueError("operation forbidden on released gpumemoryview object")
-
-    cpdef release(self):
-        self._obj = None
-        self._released = True
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pxd b/python/cudf/cudf/_lib/pylibcudf/interop.pxd
deleted file mode 100644
index c1268b8ca1a..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pxd
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-from pyarrow.lib cimport Scalar as pa_Scalar, Table as pa_Table
-
-from cudf._lib.cpp.interop cimport column_metadata
-
-from .scalar cimport Scalar
-from .table cimport Table
-
-
-cdef class ColumnMetadata:
-    cdef public object name
-    cdef public object children_meta
-    cdef column_metadata to_c_metadata(self)
-
-cpdef Table from_arrow(
-    pa_Table pyarrow_table,
-)
-
-cpdef Scalar from_arrow_scalar(
-    pa_Scalar pyarrow_scalar,
-)
-
-cpdef pa_Table to_arrow(Table tbl, list metadata)
-
-cpdef pa_Scalar to_arrow_scalar(Scalar slr, ColumnMetadata metadata)
diff --git a/python/cudf/cudf/_lib/pylibcudf/interop.pyx b/python/cudf/cudf/_lib/pylibcudf/interop.pyx
deleted file mode 100644
index 9e6b44b117d..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/interop.pyx
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-from cython.operator cimport dereference
-from libcpp.memory cimport shared_ptr, unique_ptr
-from libcpp.utility cimport move
-from libcpp.vector cimport vector
-from pyarrow.lib cimport (
-    CScalar as pa_CScalar,
-    CTable as pa_CTable,
-    Scalar as pa_Scalar,
-    Table as pa_Table,
-    pyarrow_unwrap_scalar,
-    pyarrow_unwrap_table,
-    pyarrow_wrap_scalar,
-    pyarrow_wrap_table,
-)
-
-from cudf._lib.cpp.interop cimport (
-    column_metadata,
-    from_arrow as cpp_from_arrow,
-    to_arrow as cpp_to_arrow,
-)
-from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.table.table cimport table
-
-from .scalar cimport Scalar
-from .table cimport Table
-
-
-cdef class ColumnMetadata:
-    def __init__(self, name):
-        self.name = name
-        self.children_meta = []
-
-    cdef column_metadata to_c_metadata(self):
-        """Convert to C++ column_metadata.
-
-        Since this class is mutable and cheap, it is easier to create the C++
-        object on the fly rather than have it directly backing the storage for
-        the Cython class.
-        """
-        cdef column_metadata c_metadata
-        cdef ColumnMetadata child_meta
-        c_metadata.name = self.name.encode()
-        for child_meta in self.children_meta:
-            c_metadata.children_meta.push_back(child_meta.to_c_metadata())
-        return c_metadata
-
-
-cpdef Table from_arrow(
-    pa_Table pyarrow_table,
-):
-    cdef shared_ptr[pa_CTable] ctable = (
-        pyarrow_unwrap_table(pyarrow_table)
-    )
-    cdef unique_ptr[table] c_result
-
-    with nogil:
-        c_result = move(cpp_from_arrow(ctable.get()[0]))
-
-    return Table.from_libcudf(move(c_result))
-
-
-cpdef Scalar from_arrow_scalar(
-    pa_Scalar pyarrow_scalar,
-):
-    cdef shared_ptr[pa_CScalar] cscalar = (
-        pyarrow_unwrap_scalar(pyarrow_scalar)
-    )
-    cdef unique_ptr[scalar] c_result
-
-    with nogil:
-        c_result = move(cpp_from_arrow(cscalar.get()[0]))
-
-    return Scalar.from_libcudf(move(c_result))
-
-
-cpdef pa_Table to_arrow(Table tbl, list metadata):
-    cdef shared_ptr[pa_CTable] c_result
-    cdef vector[column_metadata] c_metadata
-    cdef ColumnMetadata meta
-    for meta in metadata:
-        c_metadata.push_back(meta.to_c_metadata())
-
-    with nogil:
-        c_result = move(cpp_to_arrow(tbl.view(), c_metadata))
-
-    return pyarrow_wrap_table(c_result)
-
-
-cpdef pa_Scalar to_arrow_scalar(Scalar slr, ColumnMetadata metadata):
-    cdef shared_ptr[pa_CScalar] c_result
-    cdef column_metadata c_metadata = metadata.to_c_metadata()
-
-    with nogil:
-        c_result = move(cpp_to_arrow(dereference(slr.c_obj.get()), c_metadata))
-
-    return pyarrow_wrap_scalar(c_result)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
deleted file mode 100644
index d20c65f0be0..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-from libcpp cimport bool
-from libcpp.memory cimport unique_ptr
-
-from rmm._lib.memory_resource cimport DeviceMemoryResource
-
-from cudf._lib.cpp.scalar.scalar cimport scalar
-
-from .types cimport DataType
-
-
-cdef class Scalar:
-    cdef unique_ptr[scalar] c_obj
-    cdef DataType _data_type
-
-    # Holds a reference to the DeviceMemoryResource used for allocation.
-    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
-    # needed for deallocation
-    cdef DeviceMemoryResource mr
-
-    cdef const scalar* get(self) except *
-
-    cpdef DataType type(self)
-    cpdef bool is_valid(self)
-
-    @staticmethod
-    cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*)
-
-    # TODO: Make sure I'm correct to avoid typing the metadata as
-    # ColumnMetadata, I assume that will cause circular cimport problems
-    cpdef to_pyarrow_scalar(self, metadata)
diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
deleted file mode 100644
index d06bb9adeb9..00000000000
--- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-cimport pyarrow.lib
-from cython cimport no_gc_clear
-from libcpp.memory cimport unique_ptr
-
-import pyarrow.lib
-
-from rmm._lib.memory_resource cimport get_current_device_resource
-
-from cudf._lib.cpp.scalar.scalar cimport fixed_point_scalar, scalar
-from cudf._lib.cpp.wrappers.decimals cimport (
-    decimal32,
-    decimal64,
-    decimal128,
-    scale_type,
-)
-
-from .types cimport DataType, type_id
-
-
-# The DeviceMemoryResource attribute could be released prematurely
-# by the gc if the DeviceScalar is in a reference cycle. Removing
-# the tp_clear function with the no_gc_clear decoration prevents that.
-# See https://github.com/rapidsai/rmm/pull/931 for details.
-@no_gc_clear
-cdef class Scalar:
-    """A scalar value in device memory."""
-    # Unlike for columns, libcudf does not support scalar views. All APIs that
-    # accept scalar values accept references to the owning object rather than a
-    # special view type. As a result, pylibcudf.Scalar has a simpler structure
-    # than pylibcudf.Column because it can be a true wrapper around a libcudf
-    # column
-
-    def __cinit__(self, *args, **kwargs):
-        self.mr = get_current_device_resource()
-
-    def __init__(self, pyarrow.lib.Scalar value=None):
-        # TODO: This case is not something we really want to
-        # support, but it here for now to ease the transition of
-        # DeviceScalar.
-        if value is not None:
-            raise ValueError("Scalar should be constructed with a factory")
-
-    @staticmethod
-    def from_pyarrow_scalar(pyarrow.lib.Scalar value, DataType data_type=None):
-        # Allow passing a dtype, but only for the purpose of decimals for now
-
-        # Need a local import here to avoid a circular dependency because
-        # from_arrow_scalar returns a Scalar.
-        from .interop import from_arrow_scalar
-
-        cdef Scalar s = from_arrow_scalar(value)
-        if s.type().id() != type_id.DECIMAL128:
-            if data_type is not None:
-                raise ValueError(
-                    "dtype may not be passed for non-decimal types"
-                )
-            return s
-
-        if data_type is None:
-            raise ValueError(
-                "Decimal scalars must be constructed with a dtype"
-            )
-
-        cdef type_id tid = data_type.id()
-        if tid not in (type_id.DECIMAL32, type_id.DECIMAL64, type_id.DECIMAL128):
-            raise ValueError(
-                "Decimal scalars may only be cast to decimals"
-            )
-
-        if tid == type_id.DECIMAL128:
-            return s
-
-        if tid == type_id.DECIMAL32:
-            s.c_obj.reset(
-                new fixed_point_scalar[decimal32](
-                    (<fixed_point_scalar[decimal128]*> s.c_obj.get()).value(),
-                    scale_type(-value.type.scale),
-                    s.c_obj.get().is_valid()
-                )
-            )
-        elif tid == type_id.DECIMAL64:
-            s.c_obj.reset(
-                new fixed_point_scalar[decimal64](
-                    (<fixed_point_scalar[decimal128]*> s.c_obj.get()).value(),
-                    scale_type(-value.type.scale),
-                    s.c_obj.get().is_valid()
-                )
-            )
-        return s
-
-    cpdef to_pyarrow_scalar(self, metadata):
-        from .interop import to_arrow_scalar
-        return to_arrow_scalar(self, metadata)
-
-    cdef const scalar* get(self) except *:
-        return self.c_obj.get()
-
-    cpdef DataType type(self):
-        """The type of data in the column."""
-        return self._data_type
-
-    cpdef bool is_valid(self):
-        """True if the scalar is valid, false if not"""
-        return self.get().is_valid()
-
-    @staticmethod
-    cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=None):
-        """Construct a Scalar object from a libcudf scalar.
-
-        This method is for pylibcudf's functions to use to ingest outputs of
-        calling libcudf algorithms, and should generally not be needed by users
-        (even direct pylibcudf Cython users).
-        """
-        cdef Scalar s = Scalar.__new__(Scalar)
-        s.c_obj.swap(libcudf_scalar)
-        s._data_type = DataType.from_libcudf(s.get().type())
-        return s
diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd
index ae1d350edc6..1deed60d67d 100644
--- a/python/cudf/cudf/_lib/scalar.pxd
+++ b/python/cudf/cudf/_lib/scalar.pxd
@@ -1,16 +1,20 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 
 from rmm._lib.memory_resource cimport DeviceMemoryResource
 
-from cudf._lib cimport pylibcudf
 from cudf._lib.cpp.scalar.scalar cimport scalar
 
 
 cdef class DeviceScalar:
-    cdef pylibcudf.Scalar c_value
+    cdef unique_ptr[scalar] c_value
+
+    # Holds a reference to the DeviceMemoryResource used for allocation.
+    # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
+    # needed for deallocation
+    cdef DeviceMemoryResource mr
 
     cdef object _dtype
 
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 30d46bd3fe2..0407785b2d8 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-import copy
+cimport cython
+
 import decimal
 
 import numpy as np
@@ -21,17 +22,14 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-import cudf
-from cudf._lib.pylibcudf.interop import ColumnMetadata
-
-from cudf._lib.pylibcudf.types cimport type_id
+from rmm._lib.memory_resource cimport get_current_device_resource
 
+import cudf
 from cudf._lib.types import (
     LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
     datetime_unit_map,
     duration_unit_map,
 )
-from cudf.api.types import is_list_dtype, is_struct_dtype
 from cudf.core.dtypes import ListDtype, StructDtype
 from cudf.core.missing import NA, NaT
 
@@ -40,10 +38,9 @@ from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 
-from cudf._lib.interop import to_arrow
+from cudf._lib.interop import from_arrow, to_arrow
 
 cimport cudf._lib.cpp.types as libcudf_types
-from cudf._lib cimport pylibcudf
 from cudf._lib.cpp.scalar.scalar cimport (
     duration_scalar,
     fixed_point_scalar,
@@ -54,7 +51,12 @@ from cudf._lib.cpp.scalar.scalar cimport (
     struct_scalar,
     timestamp_scalar,
 )
-from cudf._lib.cpp.wrappers.decimals cimport decimal32, decimal64, decimal128
+from cudf._lib.cpp.wrappers.decimals cimport (
+    decimal32,
+    decimal64,
+    decimal128,
+    scale_type,
+)
 from cudf._lib.cpp.wrappers.durations cimport (
     duration_ms,
     duration_ns,
@@ -67,67 +69,18 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
     timestamp_s,
     timestamp_us,
 )
-from cudf._lib.utils cimport columns_from_table_view
-
-
-# TODO: Check if this could replace _nested_na_replace
-def _replace_nested_nulls(obj):
-    if isinstance(obj, list):
-        for i, item in enumerate(obj):
-            # TODO: Check if this should use _is_null_host_scalar
-            if cudf.utils.utils.is_na_like(item):
-                obj[i] = None
-            elif isinstance(item, (dict, list)):
-                _replace_nested_nulls(item)
-    elif isinstance(obj, dict):
-        for k, v in obj.items():
-            if cudf.utils.utils.is_na_like(v):
-                obj[k] = None
-            elif isinstance(v, (dict, list)):
-                _replace_nested_nulls(v)
-
-
-def _replace_nested_none(obj):
-    if isinstance(obj, list):
-        for i, item in enumerate(obj):
-            if item is None:
-                obj[i] = NA
-            elif isinstance(item, (dict, list)):
-                _replace_nested_none(item)
-    elif isinstance(obj, dict):
-        for k, v in obj.items():
-            if v is None:
-                obj[k] = NA
-            elif isinstance(v, (dict, list)):
-                _replace_nested_none(v)
-
-
-def gather_metadata(dtypes):
-    # dtypes is a dict mapping names to column dtypes
-    # This interface is a bit clunky, but it matches libcudf. May want to
-    # consider better approaches to building up the metadata eventually.
-    out = []
-    for name, dtype in dtypes.items():
-        v = ColumnMetadata(name)
-        if is_struct_dtype(dtype):
-            v.children_meta = gather_metadata(dtype.fields)
-        elif is_list_dtype(dtype):
-            # Offsets column is unnamed and has no children
-            v.children_meta.append(ColumnMetadata(""))
-            v.children_meta.extend(
-                gather_metadata({"": dtype.element_type})
-            )
-        out.append(v)
-    return out
+from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
+# The DeviceMemoryResource attribute could be released prematurely
+# by the gc if the DeviceScalar is in a reference cycle. Removing
+# the tp_clear function with the no_gc_clear decoration prevents that.
+# See https://github.com/rapidsai/rmm/pull/931 for details.
+@cython.no_gc_clear
 cdef class DeviceScalar:
 
-    # I think this should be removable, except that currently the way that
-    # from_unique_ptr is implemented is probably dereferencing this in an
-    # invalid state. See what the best way to fix that is.
     def __cinit__(self, *args, **kwargs):
-        self.c_value = pylibcudf.Scalar()
+        self.mr = get_current_device_resource()
 
     def __init__(self, value, dtype):
         """
@@ -143,102 +96,63 @@ cdef class DeviceScalar:
         dtype : dtype
             A NumPy dtype.
         """
-        dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
+        self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
+        self._set_value(value, self._dtype)
 
-        if cudf.utils.utils.is_na_like(value):
-            value = None
-        else:
-            # TODO: For now we always deepcopy the input value to avoid
-            # overwriting the input values when replacing nulls. Since it's
-            # just host values it's not that expensive, but we could consider
-            # alternatives.
-            value = copy.deepcopy(value)
-            _replace_nested_nulls(value)
-
-        if isinstance(dtype, cudf.core.dtypes._BaseDtype):
-            pa_type = dtype.to_arrow()
-        elif pd.api.types.is_string_dtype(dtype):
-            # Have to manually convert object types, which we use internally
-            # for strings but pyarrow only supports as unicode 'U'
-            pa_type = pa.string()
-        else:
-            pa_type = pa.from_numpy_dtype(dtype)
-
-        pa_scalar = pa.scalar(value, type=pa_type)
+    def _set_value(self, value, dtype):
+        # IMPORTANT: this should only ever be called from __init__
+        valid = not _is_null_host_scalar(value)
 
-        cdef type_id tid
-        data_type = None
         if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
-            tid = type_id.DECIMAL128
-            if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
-                tid = type_id.DECIMAL32
-            elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
-                tid = type_id.DECIMAL64
-            data_type = pylibcudf.DataType(tid, -dtype.scale)
-
-        self.c_value = pylibcudf.Scalar.from_pyarrow_scalar(pa_scalar, data_type)
-        self._dtype = dtype
+            _set_decimal_from_scalar(
+                self.c_value, value, dtype, valid)
+        elif isinstance(dtype, cudf.ListDtype):
+            _set_list_from_pylist(
+                self.c_value, value, dtype, valid)
+        elif isinstance(dtype, cudf.StructDtype):
+            _set_struct_from_pydict(self.c_value, value, dtype, valid)
+        elif pd.api.types.is_string_dtype(dtype):
+            _set_string_from_np_string(self.c_value, value, valid)
+        elif pd.api.types.is_numeric_dtype(dtype):
+            _set_numeric_from_np_scalar(self.c_value,
+                                        value,
+                                        dtype,
+                                        valid)
+        elif pd.api.types.is_datetime64_dtype(dtype):
+            _set_datetime64_from_np_scalar(
+                self.c_value, value, dtype, valid
+            )
+        elif pd.api.types.is_timedelta64_dtype(dtype):
+            _set_timedelta64_from_np_scalar(
+                self.c_value, value, dtype, valid
+            )
+        else:
+            raise ValueError(
+                f"Cannot convert value of type "
+                f"{type(value).__name__} to cudf scalar"
+            )
 
     def _to_host_scalar(self):
-        metadata = gather_metadata({"": self.dtype})[0]
         if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NA
-            return ps.as_py()
-        # For nested types we should eventually account for the special cases
-        # that we handle for other types, e.g. numerics being casted to numpy
-        # types or datetime/timedelta needing to be cast to int64 to handle
-        # overflow. However, the old implementation didn't handle these cases
-        # either, so we can leave that for a follow-up PR.
+            result = _get_py_decimal_from_fixed_point(self.c_value)
         elif cudf.api.types.is_struct_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NA
-            ret = ps.as_py()
-            _replace_nested_none(ret)
-            return ret
+            result = _get_py_dict_from_struct(self.c_value, self.dtype)
         elif cudf.api.types.is_list_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NA
-            ret = ps.as_py()
-            _replace_nested_none(ret)
-            return ret
+            result = _get_py_list_from_list(self.c_value, self.dtype)
         elif pd.api.types.is_string_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NA
-            return ps.as_py()
+            result = _get_py_string_from_string(self.c_value)
         elif pd.api.types.is_numeric_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NA
-            return ps.type.to_pandas_dtype()(ps.as_py())
+            result = _get_np_scalar_from_numeric(self.c_value)
         elif pd.api.types.is_datetime64_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NaT
-            time_unit, _ = np.datetime_data(self.dtype)
-            # Cast to int64 to avoid overflow
-            return np.datetime64(ps.cast('int64').as_py(), time_unit)
+            result = _get_np_scalar_from_timestamp64(self.c_value)
         elif pd.api.types.is_timedelta64_dtype(self.dtype):
-            ps = self.c_value.to_pyarrow_scalar(metadata)
-            if not ps.is_valid:
-                return NaT
-            time_unit, _ = np.datetime_data(self.dtype)
-            # Cast to int64 to avoid overflow
-            return np.timedelta64(ps.cast('int64').as_py(), time_unit)
+            result = _get_np_scalar_from_timedelta64(self.c_value)
         else:
             raise ValueError(
                 "Could not convert cudf::scalar to a Python value"
             )
         return result
 
-    # TODO: This is just here for testing and should be removed.
-    def get(self):
-        return self.c_value
-
     @property
     def dtype(self):
         """
@@ -255,13 +169,13 @@ cdef class DeviceScalar:
         return self._to_host_scalar()
 
     cdef const scalar* get_raw_ptr(self) except *:
-        return self.c_value.c_obj.get()
+        return self.c_value.get()
 
     cpdef bool is_valid(self):
         """
         Returns if the Scalar is valid or not(i.e., <NA>).
         """
-        return self.c_value.is_valid()
+        return self.get_raw_ptr()[0].is_valid()
 
     def __repr__(self):
         if cudf.utils.utils.is_na_like(self.value):
@@ -280,7 +194,7 @@ cdef class DeviceScalar:
         cdef DeviceScalar s = DeviceScalar.__new__(DeviceScalar)
         cdef libcudf_types.data_type cdtype
 
-        s.c_value = pylibcudf.Scalar.from_libcudf(move(ptr))
+        s.c_value = move(ptr)
         cdtype = s.get_raw_ptr()[0].type()
 
         if dtype is not None:
@@ -322,6 +236,42 @@ cdef class DeviceScalar:
         return s
 
 
+cdef _set_string_from_np_string(unique_ptr[scalar]& s, value, bool valid=True):
+    value = value if valid else ""
+    s.reset(new string_scalar(value.encode(), valid))
+
+
+cdef _set_numeric_from_np_scalar(unique_ptr[scalar]& s,
+                                 object value,
+                                 object dtype,
+                                 bool valid=True):
+    value = value if valid else 0
+    if dtype == "int8":
+        s.reset(new numeric_scalar[int8_t](value, valid))
+    elif dtype == "int16":
+        s.reset(new numeric_scalar[int16_t](value, valid))
+    elif dtype == "int32":
+        s.reset(new numeric_scalar[int32_t](value, valid))
+    elif dtype == "int64":
+        s.reset(new numeric_scalar[int64_t](value, valid))
+    elif dtype == "uint8":
+        s.reset(new numeric_scalar[uint8_t](value, valid))
+    elif dtype == "uint16":
+        s.reset(new numeric_scalar[uint16_t](value, valid))
+    elif dtype == "uint32":
+        s.reset(new numeric_scalar[uint32_t](value, valid))
+    elif dtype == "uint64":
+        s.reset(new numeric_scalar[uint64_t](value, valid))
+    elif dtype == "float32":
+        s.reset(new numeric_scalar[float](value, valid))
+    elif dtype == "float64":
+        s.reset(new numeric_scalar[double](value, valid))
+    elif dtype == "bool":
+        s.reset(new numeric_scalar[bool](<bool>value, valid))
+    else:
+        raise ValueError(f"dtype not supported: {dtype}")
+
+
 cdef _set_datetime64_from_np_scalar(unique_ptr[scalar]& s,
                                     object value,
                                     object dtype,
@@ -374,6 +324,62 @@ cdef _set_timedelta64_from_np_scalar(unique_ptr[scalar]& s,
     else:
         raise ValueError(f"dtype not supported: {dtype}")
 
+cdef _set_decimal_from_scalar(unique_ptr[scalar]& s,
+                              object value,
+                              object dtype,
+                              bool valid=True):
+    value = cudf.utils.dtypes._decimal_to_int64(value) if valid else 0
+    if isinstance(dtype, cudf.Decimal64Dtype):
+        s.reset(
+            new fixed_point_scalar[decimal64](
+                <int64_t>np.int64(value), scale_type(-dtype.scale), valid
+            )
+        )
+    elif isinstance(dtype, cudf.Decimal32Dtype):
+        s.reset(
+            new fixed_point_scalar[decimal32](
+                <int32_t>np.int32(value), scale_type(-dtype.scale), valid
+            )
+        )
+    elif isinstance(dtype, cudf.Decimal128Dtype):
+        s.reset(
+            new fixed_point_scalar[decimal128](
+                <libcudf_types.int128>value, scale_type(-dtype.scale), valid
+            )
+        )
+    else:
+        raise ValueError(f"dtype not supported: {dtype}")
+
+cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
+                             object value,
+                             object dtype,
+                             bool valid=True):
+    arrow_schema = dtype.to_arrow()
+    columns = [str(i) for i in range(len(arrow_schema))]
+    if valid:
+        pyarrow_table = pa.Table.from_arrays(
+            [
+                pa.array([value[f.name]], from_pandas=True, type=f.type)
+                for f in arrow_schema
+            ],
+            names=columns
+        )
+    else:
+        pyarrow_table = pa.Table.from_arrays(
+            [
+                pa.array([NA], from_pandas=True, type=f.type)
+                for f in arrow_schema
+            ],
+            names=columns
+        )
+
+    data = from_arrow(pyarrow_table)
+    cdef table_view struct_view = table_view_from_columns(data)
+
+    s.reset(
+        new struct_scalar(struct_view, valid)
+    )
+
 cdef _get_py_dict_from_struct(unique_ptr[scalar]& s, dtype):
     if not s.get()[0].is_valid():
         return NA
@@ -389,6 +395,25 @@ cdef _get_py_dict_from_struct(unique_ptr[scalar]& s, dtype):
     python_dict = table.to_pydict()["None"][0]
     return {k: _nested_na_replace([python_dict[k]])[0] for k in python_dict}
 
+cdef _set_list_from_pylist(unique_ptr[scalar]& s,
+                           object value,
+                           object dtype,
+                           bool valid=True):
+
+    value = value if valid else [NA]
+    cdef Column col
+    if isinstance(dtype.element_type, ListDtype):
+        pa_type = dtype.element_type.to_arrow()
+    else:
+        pa_type = dtype.to_arrow().value_type
+    col = cudf.core.column.as_column(
+        pa.array(value, from_pandas=True, type=pa_type)
+    )
+    cdef column_view col_view = col.view()
+    s.reset(
+        new list_scalar(col_view, valid)
+    )
+
 
 cdef _get_py_list_from_list(unique_ptr[scalar]& s, dtype):
 
@@ -576,9 +601,9 @@ def _create_proxy_nat_scalar(dtype):
     if dtype.char in 'mM':
         nat = dtype.type('NaT').astype(dtype)
         if dtype.type == np.datetime64:
-            _set_datetime64_from_np_scalar(result.c_value.c_obj, nat, dtype, True)
+            _set_datetime64_from_np_scalar(result.c_value, nat, dtype, True)
         elif dtype.type == np.timedelta64:
-            _set_timedelta64_from_np_scalar(result.c_value.c_obj, nat, dtype, True)
+            _set_timedelta64_from_np_scalar(result.c_value, nat, dtype, True)
         return result
     else:
         raise TypeError('NAT only valid for datetime and timedelta')
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index ac10dd97c56..5dd58d8a875 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -895,14 +895,14 @@ def test_memory_usage():
     "data, idx",
     [
         (
-            [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": NA}]],
+            [[{"f2": {"a": 100}, "f1": "a"}, {"f1": "sf12", "f2": None}]],
             0,
         ),
         (
             [
                 [
                     {"f2": {"a": 100, "c": 90, "f2": 10}, "f1": "a"},
-                    {"f1": "sf12", "f2": NA},
+                    {"f1": "sf12", "f2": None},
                 ]
             ],
             0,
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
index ce6dc587320..a3593e55b97 100644
--- a/python/cudf/cudf/tests/test_struct.py
+++ b/python/cudf/cudf/tests/test_struct.py
@@ -150,7 +150,9 @@ def test_struct_setitem(data, item):
     "data",
     [
         {"a": 1, "b": "rapids", "c": [1, 2, 3, 4]},
+        {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA},
         {"a": "Hello"},
+        {"b": [], "c": [1, 2, 3]},
     ],
 )
 def test_struct_scalar_host_construction(data):
@@ -159,39 +161,6 @@ def test_struct_scalar_host_construction(data):
     assert list(slr.device_value.value.values()) == list(data.values())
 
 
-@pytest.mark.parametrize(
-    ("data", "dtype"),
-    [
-        (
-            {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA},
-            cudf.StructDtype(
-                {
-                    "a": np.dtype(np.int64),
-                    "b": np.dtype(np.str_),
-                    "c": cudf.ListDtype(np.dtype(np.int64)),
-                    "d": np.dtype(np.int64),
-                }
-            ),
-        ),
-        (
-            {"b": [], "c": [1, 2, 3]},
-            cudf.StructDtype(
-                {
-                    "b": cudf.ListDtype(np.dtype(np.int64)),
-                    "c": cudf.ListDtype(np.dtype(np.int64)),
-                }
-            ),
-        ),
-    ],
-)
-def test_struct_scalar_host_construction_no_dtype_inference(data, dtype):
-    # cudf cannot infer the dtype of the scalar when it contains only nulls or
-    # is empty.
-    slr = cudf.Scalar(data, dtype=dtype)
-    assert slr.value == data
-    assert list(slr.device_value.value.values()) == list(data.values())
-
-
 def test_struct_scalar_null():
     slr = cudf.Scalar(cudf.NA, dtype=StructDtype)
     assert slr.device_value.value is cudf.NA
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 73ea8e2cfc4..1b94db75340 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -463,6 +463,24 @@ def _get_nan_for_dtype(dtype):
         return np.float64("nan")
 
 
+def _decimal_to_int64(decimal: Decimal) -> int:
+    """
+    Scale a Decimal such that the result is the integer
+    that would result from removing the decimal point.
+
+    Examples
+    --------
+    >>> _decimal_to_int64(Decimal('1.42'))
+    142
+    >>> _decimal_to_int64(Decimal('0.0042'))
+    42
+    >>> _decimal_to_int64(Decimal('-1.004201'))
+    -1004201
+
+    """
+    return int(f"{decimal:0f}".replace(".", ""))
+
+
 def get_allowed_combinations_for_operator(dtype_l, dtype_r, op):
     error = TypeError(
         f"{op} not supported between {dtype_l} and {dtype_r} scalars"

From 9b7f2402a3ca3db193a99cec7a92a0474c4b9b8c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 17 Sep 2023 18:06:35 -0700
Subject: [PATCH 48/54] Switch back to older implementation of writing decimal
 bytes directly without needing intermediates

---
 cpp/src/interop/to_arrow.cu | 80 +++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 44 deletions(-)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 186f3798154..0673d8e8ec2 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -143,18 +143,22 @@ struct dispatch_to_arrow {
   }
 };
 
-template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
-  column_view input,
-  cudf::type_id,
-  column_metadata const&,
-  arrow::MemoryPool* ar_mr,
-  rmm::cuda_stream_view stream)
+// Convert decimal types from libcudf to arrow where those types are not
+// directly supported by Arrow. These types must be fit into 128 bits, the
+// smallest decimal resolution supported by Arrow.
+template <typename DeviceType>
+std::shared_ptr<arrow::Array> unsupported_decimals_to_arrow(column_view input,
+                                                            arrow::MemoryPool* ar_mr,
+                                                            rmm::cuda_stream_view stream)
 {
-  using DeviceType                = int32_t;
-  size_type const BIT_WIDTH_RATIO = 4;  // Array::Type:type::DECIMAL (128) / int32_t
+  constexpr size_type BIT_WIDTH_RATIO = size_type{128} / sizeof(DeviceType);
+
+  // libcudf decimals are always converted to the highest Arrow precision
+  // because libcudf does not support precision.
+  constexpr auto decimal128_max_precision = int32_t{38};
+  constexpr auto precision                = decimal128_max_precision / BIT_WIDTH_RATIO;
 
-  rmm::device_uvector<__int128_t> buf(input.size() * BIT_WIDTH_RATIO, stream);
+  rmm::device_uvector<DeviceType> buf(input.size() * BIT_WIDTH_RATIO, stream);
 
   auto count = thrust::make_counting_iterator(0);
 
@@ -162,11 +166,15 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
                    count,
                    count + input.size(),
                    [in = input.begin<DeviceType>(), out = buf.data()] __device__(auto in_idx) {
-                     auto const out_idx   = in_idx;
-                     auto unsigned_value  = in[in_idx] < 0 ? -in[in_idx] : in[in_idx];
-                     auto unsigned_128bit = static_cast<__int128_t>(unsigned_value);
-                     auto signed_128bit   = in[in_idx] < 0 ? -unsigned_128bit : unsigned_128bit;
-                     out[out_idx]         = signed_128bit;
+                     auto const out_idx = in_idx * BIT_WIDTH_RATIO;
+                     // The lowest order bits are the value, the remainder
+                     // simply match the sign bit to satisfy the two's
+                     // complement integer representation of negative numbers.
+                     out[out_idx] = in[in_idx];
+#pragma unroll
+                     for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) {
+                       out[out_idx + i] = in[in_idx] < 0 ? -1 : 0;
+                     }
                    });
 
   auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType);
@@ -175,7 +183,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
   CUDF_CUDA_TRY(cudaMemcpyAsync(
     data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));
 
-  auto type    = arrow::decimal(9, -input.type().scale());
+  auto type    = arrow::decimal(precision, -input.type().scale());
   auto mask    = fetch_mask_buffer(input, ar_mr, stream);
   auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
   auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
@@ -184,41 +192,25 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
 }
 
 template <>
-std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
+std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
   column_view input,
   cudf::type_id,
   column_metadata const&,
   arrow::MemoryPool* ar_mr,
   rmm::cuda_stream_view stream)
 {
-  using DeviceType                = int64_t;
-  size_type const BIT_WIDTH_RATIO = 2;  // Array::Type:type::DECIMAL (128) / int64_t
-
-  rmm::device_uvector<DeviceType> buf(input.size() * BIT_WIDTH_RATIO, stream);
-
-  auto count = thrust::make_counting_iterator(0);
-
-  thrust::for_each(rmm::exec_policy(cudf::get_default_stream()),
-                   count,
-                   count + input.size(),
-                   [in = input.begin<DeviceType>(), out = buf.data()] __device__(auto in_idx) {
-                     auto const out_idx = in_idx * 2;
-                     out[out_idx]       = in[in_idx];
-                     out[out_idx + 1]   = in[in_idx] < 0 ? -1 : 0;
-                   });
-
-  auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType);
-  auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);
-
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value()));
-
-  auto type    = arrow::decimal(18, -input.type().scale());
-  auto mask    = fetch_mask_buffer(input, ar_mr, stream);
-  auto buffers = std::vector<std::shared_ptr<arrow::Buffer>>{mask, std::move(data_buffer)};
-  auto data    = std::make_shared<arrow::ArrayData>(type, input.size(), buffers);
+  return unsupported_decimals_to_arrow<int32_t>(input, ar_mr, stream);
+}
 
-  return std::make_shared<arrow::Decimal128Array>(data);
+template <>
+std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
+  column_view input,
+  cudf::type_id,
+  column_metadata const&,
+  arrow::MemoryPool* ar_mr,
+  rmm::cuda_stream_view stream)
+{
+  return unsupported_decimals_to_arrow<int64_t>(input, ar_mr, stream);
 }
 
 template <>

From 3d4ac776296ff70f7b2ff220e4359e8e57417a0b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Sep 2023 08:39:38 -0700
Subject: [PATCH 49/54] Specify unroll

---
 cpp/src/interop/to_arrow.cu | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 0673d8e8ec2..6524d58815d 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -162,20 +162,21 @@ std::shared_ptr<arrow::Array> unsupported_decimals_to_arrow(column_view input,
 
   auto count = thrust::make_counting_iterator(0);
 
-  thrust::for_each(rmm::exec_policy(cudf::get_default_stream()),
-                   count,
-                   count + input.size(),
-                   [in = input.begin<DeviceType>(), out = buf.data()] __device__(auto in_idx) {
-                     auto const out_idx = in_idx * BIT_WIDTH_RATIO;
-                     // The lowest order bits are the value, the remainder
-                     // simply match the sign bit to satisfy the two's
-                     // complement integer representation of negative numbers.
-                     out[out_idx] = in[in_idx];
-#pragma unroll
-                     for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) {
-                       out[out_idx + i] = in[in_idx] < 0 ? -1 : 0;
-                     }
-                   });
+  thrust::for_each(
+    rmm::exec_policy(cudf::get_default_stream()),
+    count,
+    count + input.size(),
+    [in = input.begin<DeviceType>(), out = buf.data(), BIT_WIDTH_RATIO] __device__(auto in_idx) {
+      auto const out_idx = in_idx * BIT_WIDTH_RATIO;
+      // The lowest order bits are the value, the remainder
+      // simply match the sign bit to satisfy the two's
+      // complement integer representation of negative numbers.
+      out[out_idx] = in[in_idx];
+#pragma unroll BIT_WIDTH_RATIO - 1
+      for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) {
+        out[out_idx + i] = in[in_idx] < 0 ? -1 : 0;
+      }
+    });
 
   auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType);
   auto data_buffer             = allocate_arrow_buffer(buf_size_in_bytes, ar_mr);

From 93116633a15de46e74c7ee060896bea96334b614 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Sep 2023 08:45:10 -0700
Subject: [PATCH 50/54] Remove unnecessary includes

---
 cpp/src/interop/from_arrow.cu | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 6e0ef462ac9..ecdb0933b2b 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -13,10 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <arrow/array/array_base.h>
-#include <arrow/array/builder_nested.h>
-#include <arrow/type.h>
-#include <arrow/type_fwd.h>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/concatenate.hpp>
@@ -39,7 +35,6 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
 
-#include <stdexcept>
 #include <thrust/gather.h>
 
 namespace cudf {

From 431321298f53bcc8882c8b425b3d864c26430e0d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Sep 2023 10:26:21 -0700
Subject: [PATCH 51/54] Pass in precision instead of computing

---
 cpp/src/interop/to_arrow.cu | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 6524d58815d..7beec1b9879 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -148,15 +148,11 @@ struct dispatch_to_arrow {
 // smallest decimal resolution supported by Arrow.
 template <typename DeviceType>
 std::shared_ptr<arrow::Array> unsupported_decimals_to_arrow(column_view input,
+                                                            int32_t precision,
                                                             arrow::MemoryPool* ar_mr,
                                                             rmm::cuda_stream_view stream)
 {
-  constexpr size_type BIT_WIDTH_RATIO = size_type{128} / sizeof(DeviceType);
-
-  // libcudf decimals are always converted to the highest Arrow precision
-  // because libcudf does not support precision.
-  constexpr auto decimal128_max_precision = int32_t{38};
-  constexpr auto precision                = decimal128_max_precision / BIT_WIDTH_RATIO;
+  constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DeviceType);
 
   rmm::device_uvector<DeviceType> buf(input.size() * BIT_WIDTH_RATIO, stream);
 
@@ -200,7 +196,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal32>(
   arrow::MemoryPool* ar_mr,
   rmm::cuda_stream_view stream)
 {
-  return unsupported_decimals_to_arrow<int32_t>(input, ar_mr, stream);
+  return unsupported_decimals_to_arrow<int32_t>(input, 9, ar_mr, stream);
 }
 
 template <>
@@ -211,7 +207,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
   arrow::MemoryPool* ar_mr,
   rmm::cuda_stream_view stream)
 {
-  return unsupported_decimals_to_arrow<int64_t>(input, ar_mr, stream);
+  return unsupported_decimals_to_arrow<int64_t>(input, 18, ar_mr, stream);
 }
 
 template <>

From 2aade20da1f0b6ecd1d4beabd04baf1bc737423d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Sep 2023 17:31:43 -0700
Subject: [PATCH 52/54] Address reviews

---
 cpp/include/cudf/detail/interop.hpp   |  3 +-
 cpp/src/interop/from_arrow.cu         |  2 +-
 cpp/src/interop/to_arrow.cu           | 12 ++---
 cpp/tests/interop/from_arrow_test.cpp | 47 ++++++++++----------
 cpp/tests/interop/to_arrow_test.cpp   | 63 ++++++++++++++-------------
 5 files changed, 64 insertions(+), 63 deletions(-)

diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 6a294b1d360..44024333239 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -109,7 +109,7 @@ std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
  * the specified `arrow::DataType`'s `id()`.
  *
  * This function is analogous to libcudf's type_dispatcher, but instead applies
- * to Arrow functions. It's primary use case is to leverage Arrow's
+ * to Arrow functions. Its primary use case is to leverage Arrow's
  * metaprogramming facilities like arrow::TypeTraits that require translating
  * the runtime dtype information into compile-time types.
  */
@@ -177,7 +177,6 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
  */
 std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         column_metadata const& metadata,
-
                                         rmm::cuda_stream_view stream,
                                         arrow::MemoryPool* ar_mr);
 /**
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index ecdb0933b2b..aa7a70719a6 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -434,7 +434,7 @@ struct BuilderGenerator {
                            std::is_same_v<T, arrow::StructType>)>
   std::shared_ptr<arrow::ArrayBuilder> operator()(std::shared_ptr<arrow::DataType> const& type)
   {
-    CUDF_FAIL("Type not support for BuilderGenerator");
+    CUDF_FAIL("Type not supported by BuilderGenerator");
   }
 };
 
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 7beec1b9879..66c4bd2cb6c 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -165,7 +165,7 @@ std::shared_ptr<arrow::Array> unsupported_decimals_to_arrow(column_view input,
     [in = input.begin<DeviceType>(), out = buf.data(), BIT_WIDTH_RATIO] __device__(auto in_idx) {
       auto const out_idx = in_idx * BIT_WIDTH_RATIO;
       // The lowest order bits are the value, the remainder
-      // simply match the sign bit to satisfy the two's
+      // simply matches the sign bit to satisfy the two's
       // complement integer representation of negative numbers.
       out[out_idx] = in[in_idx];
 #pragma unroll BIT_WIDTH_RATIO - 1
@@ -442,11 +442,11 @@ std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
                                         rmm::cuda_stream_view stream,
                                         arrow::MemoryPool* ar_mr)
 {
-  auto column = cudf::make_column_from_scalar(input, 1, stream);
-  cudf::table_view tv{{column->view()}};
-  auto arrow_table  = cudf::to_arrow(tv, {metadata}, stream);
-  auto ac           = arrow_table->column(0);
-  auto maybe_scalar = ac->GetScalar(0);
+  auto const column = cudf::make_column_from_scalar(input, 1, stream);
+  cudf::table_view const tv{{column->view()}};
+  auto const arrow_table  = cudf::to_arrow(tv, {metadata}, stream);
+  auto const ac           = arrow_table->column(0);
+  auto const maybe_scalar = ac->GetScalar(0);
   if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); }
   return maybe_scalar.ValueOrDie();
 }
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 5305b0103d8..a898106a5b2 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -467,9 +467,10 @@ TYPED_TEST_SUITE(FromArrowNumericScalarTest, NumericTypesNotBool);
 TYPED_TEST(FromArrowNumericScalarTest, Basic)
 {
   TypeParam const value{42};
-  auto arrow_scalar        = arrow::MakeScalar(value);
-  auto cudf_scalar         = cudf::from_arrow(*arrow_scalar);
-  auto cudf_numeric_scalar = dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.get());
+  auto const arrow_scalar = arrow::MakeScalar(value);
+  auto const cudf_scalar  = cudf::from_arrow(*arrow_scalar);
+  auto const cudf_numeric_scalar =
+    dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.get());
   if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); }
   EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id<TypeParam>()));
   EXPECT_EQ(cudf_numeric_scalar->value(), value);
@@ -487,7 +488,7 @@ TEST_F(FromArrowDecimalScalarTest, Basic)
   auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
 
   // Arrow offers a minimum of 128 bits for the Decimal type.
-  auto cudf_decimal_scalar =
+  auto const cudf_decimal_scalar =
     dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.get());
   EXPECT_EQ(cudf_decimal_scalar->type(),
             cudf::data_type(cudf::type_to_id<numeric::decimal128>(), scale));
@@ -498,11 +499,11 @@ struct FromArrowStringScalarTest : public cudf::test::BaseFixture {};
 
 TEST_F(FromArrowStringScalarTest, Basic)
 {
-  auto value        = std::string("hello world");
-  auto arrow_scalar = arrow::StringScalar(value);
-  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto const value        = std::string("hello world");
+  auto const arrow_scalar = arrow::StringScalar(value);
+  auto const cudf_scalar  = cudf::from_arrow(arrow_scalar);
 
-  auto cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.get());
+  auto const cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.get());
   EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING));
   EXPECT_EQ(cudf_string_scalar->to_string(), value);
 }
@@ -515,17 +516,17 @@ TEST_F(FromArrowListScalarTest, Basic)
   std::vector<bool> host_validity  = {true, true, true, false, true, true, true};
 
   arrow::Int64Builder builder;
-  auto status      = builder.AppendValues(host_values, host_validity);
-  auto maybe_array = builder.Finish();
-  auto array       = *maybe_array;
+  auto const status      = builder.AppendValues(host_values, host_validity);
+  auto const maybe_array = builder.Finish();
+  auto const array       = *maybe_array;
 
-  auto arrow_scalar = arrow::ListScalar(array);
-  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto const arrow_scalar = arrow::ListScalar(array);
+  auto const cudf_scalar  = cudf::from_arrow(arrow_scalar);
 
-  auto cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.get());
+  auto const cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.get());
   EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST));
 
-  cudf::test::fixed_width_column_wrapper<int64_t> lhs(
+  cudf::test::fixed_width_column_wrapper<int64_t> const lhs(
     host_values.begin(), host_values.end(), host_validity.begin());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lhs, cudf_list_scalar->view());
 }
@@ -535,18 +536,18 @@ struct FromArrowStructScalarTest : public cudf::test::BaseFixture {};
 TEST_F(FromArrowStructScalarTest, Basic)
 {
   int64_t const value{42};
-  auto underlying_arrow_scalar = arrow::MakeScalar(value);
+  auto const underlying_arrow_scalar = arrow::MakeScalar(value);
 
-  auto field        = arrow::field("", underlying_arrow_scalar->type);
-  auto arrow_type   = arrow::struct_({field});
-  auto arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
-  auto cudf_scalar  = cudf::from_arrow(arrow_scalar);
+  auto const field        = arrow::field("", underlying_arrow_scalar->type);
+  auto const arrow_type   = arrow::struct_({field});
+  auto const arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
+  auto const cudf_scalar  = cudf::from_arrow(arrow_scalar);
 
-  auto cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.get());
+  auto const cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.get());
   EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT));
 
-  cudf::test::fixed_width_column_wrapper<int64_t> col({value});
-  cudf::table_view lhs({col});
+  cudf::test::fixed_width_column_wrapper<int64_t> const col({value});
+  cudf::table_view const lhs({col});
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view());
 }
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 2982cdb2760..6bb4cdfd747 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -589,12 +589,12 @@ TYPED_TEST_SUITE(ToArrowNumericScalarTest, NumericTypesNotBool);
 TYPED_TEST(ToArrowNumericScalarTest, Basic)
 {
   TypeParam const value{42};
-  auto cudf_scalar = cudf::make_fixed_width_scalar<TypeParam>(value);
+  auto const cudf_scalar = cudf::make_fixed_width_scalar<TypeParam>(value);
 
-  cudf::column_metadata metadata{""};
-  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  cudf::column_metadata const metadata{""};
+  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
 
-  auto ref_arrow_scalar = arrow::MakeScalar(value);
+  auto const ref_arrow_scalar = arrow::MakeScalar(value);
   EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
 }
 
@@ -607,15 +607,16 @@ TEST_F(ToArrowDecimalScalarTest, Basic)
   auto const precision{18};  // cudf will convert to the widest-precision Arrow scalar of the type
   int32_t const scale{4};
 
-  auto cudf_scalar =
+  auto const cudf_scalar =
     cudf::make_fixed_point_scalar<numeric::decimal128>(value, numeric::scale_type{scale});
 
-  cudf::column_metadata metadata{""};
-  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  cudf::column_metadata const metadata{""};
+  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
 
-  auto maybe_ref_arrow_scalar = arrow::MakeScalar(arrow::decimal128(precision, -scale), value);
+  auto const maybe_ref_arrow_scalar =
+    arrow::MakeScalar(arrow::decimal128(precision, -scale), value);
   if (!maybe_ref_arrow_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); }
-  auto ref_arrow_scalar = *maybe_ref_arrow_scalar;
+  auto const ref_arrow_scalar = *maybe_ref_arrow_scalar;
   EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
 }
 
@@ -624,11 +625,11 @@ struct ToArrowStringScalarTest : public cudf::test::BaseFixture {};
 TEST_F(ToArrowStringScalarTest, Basic)
 {
   std::string const value{"hello world"};
-  auto cudf_scalar = cudf::make_string_scalar(value);
-  cudf::column_metadata metadata{""};
-  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  auto const cudf_scalar = cudf::make_string_scalar(value);
+  cudf::column_metadata const metadata{""};
+  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
 
-  auto ref_arrow_scalar = arrow::MakeScalar(value);
+  auto const ref_arrow_scalar = arrow::MakeScalar(value);
   EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
 }
 
@@ -636,23 +637,23 @@ struct ToArrowListScalarTest : public cudf::test::BaseFixture {};
 
 TEST_F(ToArrowListScalarTest, Basic)
 {
-  std::vector<int64_t> host_values = {1, 2, 3, 5, 6, 7, 8};
-  std::vector<bool> host_validity  = {true, true, true, false, true, true, true};
+  std::vector<int64_t> const host_values = {1, 2, 3, 5, 6, 7, 8};
+  std::vector<bool> const host_validity  = {true, true, true, false, true, true, true};
 
-  cudf::test::fixed_width_column_wrapper<int64_t> col(
+  cudf::test::fixed_width_column_wrapper<int64_t> const col(
     host_values.begin(), host_values.end(), host_validity.begin());
 
-  auto cudf_scalar = cudf::make_list_scalar(col);
+  auto const cudf_scalar = cudf::make_list_scalar(col);
 
-  cudf::column_metadata metadata{""};
-  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  cudf::column_metadata const metadata{""};
+  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
 
   arrow::Int64Builder builder;
-  auto status      = builder.AppendValues(host_values, host_validity);
-  auto maybe_array = builder.Finish();
-  auto array       = *maybe_array;
+  auto const status      = builder.AppendValues(host_values, host_validity);
+  auto const maybe_array = builder.Finish();
+  auto const array       = *maybe_array;
 
-  auto ref_arrow_scalar = arrow::ListScalar(array);
+  auto const ref_arrow_scalar = arrow::ListScalar(array);
 
   EXPECT_TRUE(arrow_scalar->Equals(ref_arrow_scalar));
 }
@@ -664,18 +665,18 @@ TEST_F(ToArrowStructScalarTest, Basic)
   int64_t const value{42};
   auto const field_name{"a"};
 
-  cudf::test::fixed_width_column_wrapper<int64_t> col{value};
-  cudf::table_view tbl({col});
-  auto cudf_scalar = cudf::make_struct_scalar(tbl);
+  cudf::test::fixed_width_column_wrapper<int64_t> const col{value};
+  cudf::table_view const tbl({col});
+  auto const cudf_scalar = cudf::make_struct_scalar(tbl);
 
   cudf::column_metadata metadata{""};
   metadata.children_meta.emplace_back(field_name);
-  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
+  auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata);
 
-  auto underlying_arrow_scalar = arrow::MakeScalar(value);
-  auto field                   = arrow::field(field_name, underlying_arrow_scalar->type, false);
-  auto arrow_type              = arrow::struct_({field});
-  auto ref_arrow_scalar        = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
+  auto const underlying_arrow_scalar = arrow::MakeScalar(value);
+  auto const field            = arrow::field(field_name, underlying_arrow_scalar->type, false);
+  auto const arrow_type       = arrow::struct_({field});
+  auto const ref_arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
 
   EXPECT_TRUE(arrow_scalar->Equals(ref_arrow_scalar));
 }

From 37376e0cbc05afbccdd64721c95b76f39ea8d6c3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 21 Sep 2023 09:44:48 -0700
Subject: [PATCH 53/54] PR reviews

---
 cpp/include/cudf/interop.hpp       | 8 ++++----
 cpp/src/interop/from_arrow.cu      | 8 +++-----
 cpp/src/interop/to_arrow.cu        | 7 ++++---
 cpp/tests/streams/interop_test.cpp | 4 ++--
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index b1514699e21..50e1a2dbf5c 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -126,7 +126,7 @@ struct column_metadata {
  *
  * @param input table_view that needs to be converted to arrow Table
  * @param metadata Contains hierarchy of names of columns and children
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param ar_mr arrow memory pool to allocate memory for arrow Table
  * @return arrow Table generated from `input`
  */
@@ -142,7 +142,7 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
  *
  * @param input scalar that needs to be converted to arrow Scalar
  * @param metadata Contains hierarchy of names of columns and children
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param ar_mr arrow memory pool to allocate memory for arrow Scalar
  * @return arrow Scalar generated from `input`
  */
@@ -154,7 +154,7 @@ std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
  * @brief Create `cudf::table` from given arrow Table input
  *
  * @param input arrow:Table that needs to be converted to `cudf::table`
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr    Device memory resource used to allocate `cudf::table`
  * @return cudf table generated from given arrow Table
  */
@@ -168,7 +168,7 @@ std::unique_ptr<table> from_arrow(
  * @brief Create `cudf::scalar` from given arrow Scalar input
  *
  * @param input arrow:Scalar that needs to be converted to `cudf::scalar`
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr    Device memory resource used to allocate `cudf::scalar`
  * @return cudf scalar generated from given arrow Scalar
  */
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index aa7a70719a6..e39625c92e7 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -444,8 +444,8 @@ std::shared_ptr<arrow::ArrayBuilder> make_builder(std::shared_ptr<arrow::DataTyp
     case arrow::Type::STRUCT: {
       std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
 
-      for (auto i = 0; i < type->num_fields(); ++i) {
-        auto const vt = type->field(i)->type();
+      for (auto field : type->fields()) {
+        auto const vt = field->type();
         if (vt->id() == arrow::Type::STRUCT || vt->id() == arrow::Type::LIST) {
           field_builders.push_back(make_builder(vt));
         } else {
@@ -535,9 +535,7 @@ std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
 
   auto cudf_table = detail::from_arrow(*table, stream, mr);
 
-  auto& col = cudf_table->get_column(0);
-
-  auto cv = col.view();
+  auto cv = cudf_table->view().column(0);
   return get_element(cv, 0, stream);
 }
 
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 66c4bd2cb6c..0cd750bc947 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -22,11 +22,9 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/scatter.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/interop.hpp>
-#include <cudf/null_mask.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -81,7 +79,10 @@ std::shared_ptr<arrow::Buffer> fetch_mask_buffer(column_view input_view,
     auto mask_buffer = allocate_arrow_bitmap(static_cast<int64_t>(input_view.size()), ar_mr);
     CUDF_CUDA_TRY(cudaMemcpyAsync(
       mask_buffer->mutable_data(),
-      (input_view.offset() > 0) ? cudf::copy_bitmask(input_view).data() : input_view.null_mask(),
+      (input_view.offset() > 0)
+        ? cudf::detail::copy_bitmask(input_view, stream, rmm::mr::get_current_device_resource())
+            .data()
+        : input_view.null_mask(),
       mask_size_in_bytes,
       cudaMemcpyDefault,
       stream.value()));
diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp
index 5235714c342..7eac9e016eb 100644
--- a/cpp/tests/streams/interop_test.cpp
+++ b/cpp/tests/streams/interop_test.cpp
@@ -31,7 +31,7 @@ TEST_F(ArrowTest, ToArrow)
   cudf::table_view tbl{{col}};
 
   std::vector<cudf::column_metadata> metadata{{""}};
-  auto arrow_scalar = cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream());
+  cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream());
 }
 
 TEST_F(ArrowTest, FromArrow)
@@ -57,7 +57,7 @@ TEST_F(ArrowTest, ToArrowScalar)
     cudf::make_fixed_width_scalar<int32_t>(value, cudf::test::get_default_stream());
 
   cudf::column_metadata metadata{""};
-  auto arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream());
+  cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream());
 }
 
 TEST_F(ArrowTest, FromArrowScalar)

From 637e0a43d2d311ae96ac174a3e2d35099011fefb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Fri, 22 Sep 2023 16:17:35 -0700
Subject: [PATCH 54/54] Update cpp/include/cudf/interop.hpp

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 cpp/include/cudf/interop.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 50e1a2dbf5c..865cc004107 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -167,7 +167,7 @@ std::unique_ptr<table> from_arrow(
 /**
  * @brief Create `cudf::scalar` from given arrow Scalar input
  *
- * @param input arrow:Scalar that needs to be converted to `cudf::scalar`
+ * @param input `arrow::Scalar` that needs to be converted to `cudf::scalar`
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr    Device memory resource used to allocate `cudf::scalar`
  * @return cudf scalar generated from given arrow Scalar