From f46cb31b602c7e23a9c08a64add28526f4ac7bf2 Mon Sep 17 00:00:00 2001
From: Divye Gala <divyegala@gmail.com>
Date: Wed, 2 Aug 2023 20:00:35 -0400
Subject: [PATCH 1/5] Reduce `lists::contains` dispatches for scalars (#13805)

This PR is to prepare for https://github.com/rapidsai/cudf/pull/13672 which uses experimental comparators for both nested and non-nested types.

`lists::contains` currently has two APIs that support:

1. `cudf::scalar`
2. `cudf::column_view`

Both APIs currently need to call `cudf::type_dispatcher`. However, by converting `cudf::scalar` to a `cudf::column_view` by materializing all rows, we cut down the number of dispatches to half.

Compile times:
Before this PR: [12:07](https://downloads.rapids.ai/ci/cudf/pull-request/13788/0498f7d/cuda11_x86_64.ninja_log.html)
After this PR: [5:46](https://downloads.rapids.ai/ci/cudf/pull-request/13805/7664a98/cuda11_x86_64.ninja_log.html)

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/13805
---
 cpp/src/lists/contains.cu | 142 ++++++++++++--------------------------
 1 file changed, 46 insertions(+), 96 deletions(-)
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index a3293e36825..9d39f2f9a90 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -18,6 +18,7 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/lists/detail/contains.hpp>
+#include <cudf/lists/detail/lists_column_factories.hpp>
 #include <cudf/lists/list_device_view.cuh>
 #include <cudf/lists/lists_column_device_view.cuh>
 #include <cudf/lists/lists_column_view.hpp>
@@ -154,16 +155,11 @@ struct search_list_nested_types_fn {
   duplicate_find_option const find_option;
   KeyValidityIter const key_validity_iter;
   EqComparator const d_comp;
-  bool const search_key_is_scalar;
 
   search_list_nested_types_fn(duplicate_find_option const find_option,
                               KeyValidityIter const key_validity_iter,
-                              EqComparator const& d_comp,
-                              bool search_key_is_scalar)
-    : find_option(find_option),
-      key_validity_iter(key_validity_iter),
-      d_comp(d_comp),
-      search_key_is_scalar(search_key_is_scalar)
+                              EqComparator const& d_comp)
+    : find_option(find_option), key_validity_iter(key_validity_iter), d_comp(d_comp)
   {
   }
 
@@ -186,9 +182,8 @@ struct search_list_nested_types_fn {
     auto const [begin, end] = element_index_pair_iter<forward>(list.size());
     auto const found_iter =
       thrust::find_if(thrust::seq, begin, end, [=] __device__(auto const idx) {
-        return !list.is_null(idx) &&
-               d_comp(static_cast<lhs_index_type>(list.element_offset(idx)),
-                      static_cast<rhs_index_type>(search_key_is_scalar ? 0 : list.row_index()));
+        return !list.is_null(idx) && d_comp(static_cast<lhs_index_type>(list.element_offset(idx)),
+                                            static_cast<rhs_index_type>(list.row_index()));
       });
     // If the key is found, return its found position in the list from `found_iter`.
     return found_iter == end ? NOT_FOUND_SENTINEL : *found_iter;
@@ -199,93 +194,53 @@ struct search_list_nested_types_fn {
  * @brief Function to search for key element(s) in the corresponding rows of a lists column,
  * specialized for non-nested types.
  */
-template <bool search_key_is_scalar,
-          typename Element,
-          typename InputIterator,
-          typename OutputIterator,
-          typename SearchKeyType>
+template <typename Element, typename InputIterator, typename OutputIterator>
 void index_of_non_nested_types(InputIterator input_it,
                                size_type num_rows,
                                OutputIterator output_it,
-                               SearchKeyType const& search_keys,
+                               column_view const& search_keys,
                                bool search_keys_have_nulls,
                                duplicate_find_option find_option,
                                rmm::cuda_stream_view stream)
 {
-  auto const do_search = [=](auto const keys_iter) {
-    thrust::transform(rmm::exec_policy(stream),
-                      input_it,
-                      input_it + num_rows,
-                      keys_iter,
-                      output_it,
-                      search_list_non_nested_types_fn{find_option});
-  };
-
-  if constexpr (search_key_is_scalar) {
-    auto const keys_iter = cudf::detail::make_optional_iterator<Element>(
-      search_keys, nullate::DYNAMIC{search_keys_have_nulls});
-    do_search(keys_iter);
-  } else {
-    auto const keys_cdv_ptr = column_device_view::create(search_keys, stream);
-    auto const keys_iter    = cudf::detail::make_optional_iterator<Element>(
-      *keys_cdv_ptr, nullate::DYNAMIC{search_keys_have_nulls});
-    do_search(keys_iter);
-  }
+  auto const keys_cdv_ptr = column_device_view::create(search_keys, stream);
+  auto const keys_iter    = cudf::detail::make_optional_iterator<Element>(
+    *keys_cdv_ptr, nullate::DYNAMIC{search_keys_have_nulls});
+  thrust::transform(rmm::exec_policy(stream),
+                    input_it,
+                    input_it + num_rows,
+                    keys_iter,
+                    output_it,
+                    search_list_non_nested_types_fn{find_option});
 }
 
 /**
  * @brief Function to search for index of key element(s) in the corresponding rows of a lists
  * column, specialized for nested types.
  */
-template <bool search_key_is_scalar,
-          typename InputIterator,
-          typename OutputIterator,
-          typename SearchKeyType>
+template <typename InputIterator, typename OutputIterator>
 void index_of_nested_types(InputIterator input_it,
                            size_type num_rows,
                            OutputIterator output_it,
                            column_view const& child,
-                           SearchKeyType const& search_keys,
+                           column_view const& search_keys,
                            duplicate_find_option find_option,
                            rmm::cuda_stream_view stream)
 {
-  // Create a `table_view` from the search key(s).
-  // If the input search key is a (nested type) scalar, a new column is materialized from that
-  // scalar before a `table_view` is generated from it. As such, the new created column will also be
-  // returned to keep the result `table_view` valid.
-  [[maybe_unused]] auto const [keys_tview, unused_column] =
-    [&]() -> std::pair<table_view, std::unique_ptr<column>> {
-    if constexpr (search_key_is_scalar) {
-      auto tmp_column = make_column_from_scalar(search_keys, 1, stream);
-      return {table_view{{tmp_column->view()}}, std::move(tmp_column)};
-    } else {
-      return {table_view{{search_keys}}, nullptr};
-    }
-  }();
-
+  auto const keys_tview  = cudf::table_view{{search_keys}};
   auto const child_tview = table_view{{child}};
   auto const has_nulls   = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview);
   auto const comparator =
     cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream);
   auto const d_comp = comparator.equal_to<true>(nullate::DYNAMIC{has_nulls});
 
-  auto const do_search = [=](auto const key_validity_iter) {
-    thrust::transform(
-      rmm::exec_policy(stream),
-      input_it,
-      input_it + num_rows,
-      output_it,
-      search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar});
-  };
-
-  if constexpr (search_key_is_scalar) {
-    auto const key_validity_iter = cudf::detail::make_validity_iterator<true>(search_keys);
-    do_search(key_validity_iter);
-  } else {
-    auto const keys_dv_ptr       = column_device_view::create(search_keys, stream);
-    auto const key_validity_iter = cudf::detail::make_validity_iterator<true>(*keys_dv_ptr);
-    do_search(key_validity_iter);
-  }
+  auto const keys_dv_ptr       = column_device_view::create(search_keys, stream);
+  auto const key_validity_iter = cudf::detail::make_validity_iterator<true>(*keys_dv_ptr);
+  thrust::transform(rmm::exec_policy(stream),
+                    input_it,
+                    input_it + num_rows,
+                    output_it,
+                    search_list_nested_types_fn{find_option, key_validity_iter, d_comp});
 }
 
 /**
@@ -295,10 +250,10 @@ void index_of_nested_types(InputIterator input_it,
 struct dispatch_index_of {
   // SFINAE with conditional return type because we need to support device lambda in this function.
   // This is required due to a limitation of nvcc.
-  template <typename Element, typename SearchKeyType>
+  template <typename Element>
   std::enable_if_t<is_supported_type<Element>(), std::unique_ptr<column>> operator()(
     lists_column_view const& lists,
-    SearchKeyType const& search_keys,
+    column_view const& search_keys,
     duplicate_find_option find_option,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const
@@ -313,27 +268,10 @@ struct dispatch_index_of {
                  cudf::data_type_error);
     CUDF_EXPECTS(search_keys.type().id() != type_id::EMPTY, "Type cannot be empty.");
 
-    auto constexpr search_key_is_scalar = std::is_same_v<SearchKeyType, cudf::scalar>;
-    auto const search_keys_have_nulls   = [&search_keys, stream] {
-      if constexpr (search_key_is_scalar) {
-        return !search_keys.is_valid(stream);
-      } else {
-        return search_keys.has_nulls();
-      }
-    }();
+    auto const search_keys_have_nulls = search_keys.has_nulls();
 
     auto const num_rows = lists.size();
 
-    if (search_key_is_scalar && search_keys_have_nulls) {
-      // If the scalar key is invalid/null, the entire output column will be all nulls.
-      return make_numeric_column(data_type{cudf::type_to_id<size_type>()},
-                                 num_rows,
-                                 cudf::create_null_mask(num_rows, mask_state::ALL_NULL, mr),
-                                 num_rows,
-                                 stream,
-                                 mr);
-    }
-
     auto const lists_cdv_ptr = column_device_view::create(lists.parent(), stream);
     auto const input_it      = cudf::detail::make_counting_transform_iterator(
       size_type{0},
@@ -346,11 +284,10 @@ struct dispatch_index_of {
     auto const output_it = out_positions->mutable_view().template begin<size_type>();
 
     if constexpr (not cudf::is_nested<Element>()) {
-      index_of_non_nested_types<search_key_is_scalar, Element>(
+      index_of_non_nested_types<Element>(
         input_it, num_rows, output_it, search_keys, search_keys_have_nulls, find_option, stream);
     } else {  // list + struct
-      index_of_nested_types<search_key_is_scalar>(
-        input_it, num_rows, output_it, child, search_keys, find_option, stream);
+      index_of_nested_types(input_it, num_rows, output_it, child, search_keys, find_option, stream);
     }
 
     if (search_keys_have_nulls || lists.has_nulls()) {
@@ -414,8 +351,21 @@ std::unique_ptr<column> index_of(lists_column_view const& lists,
                                  rmm::cuda_stream_view stream,
                                  rmm::mr::device_memory_resource* mr)
 {
-  return cudf::type_dispatcher(
-    search_key.type(), dispatch_index_of{}, lists, search_key, find_option, stream, mr);
+  if (!search_key.is_valid(stream)) {
+    return make_numeric_column(data_type{cudf::type_to_id<size_type>()},
+                               lists.size(),
+                               cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr),
+                               lists.size(),
+                               stream,
+                               mr);
+  }
+  if (lists.size() == 0) {
+    return make_numeric_column(
+      data_type{type_to_id<size_type>()}, 0, cudf::mask_state::UNALLOCATED, stream, mr);
+  }
+
+  auto search_key_col = cudf::make_column_from_scalar(search_key, lists.size(), stream, mr);
+  return index_of(lists, search_key_col->view(), find_option, stream, mr);
 }
 
 std::unique_ptr<column> index_of(lists_column_view const& lists,

From 11fd25c25e8d593ddfb4e0d29281aaf63898d9ba Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 2 Aug 2023 19:59:04 -0500
Subject: [PATCH 2/5] Fix unbounded sequence issue in `DataFrame` constructor
 (#13811)

In `cudf`, we currently have a hang in this scenario:
```python
In [1]: import cudf

In [2]:     class A:
   ...:         def __getitem__(self, key):
   ...:             return 1
   ...:

In [3]: cudf.DataFrame([A()])
```

This PR introduces additional checks before letting the list-like inputs pass onto `itertools` for transposing.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/13811
---
 python/cudf/cudf/core/dataframe.py       |  6 ++++++
 python/cudf/cudf/tests/test_dataframe.py | 12 ++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a510e6829d1..d421258b06b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -843,6 +843,12 @@ def _init_from_list_like(self, data, index=None, columns=None):
             data = DataFrame.from_pandas(pd.DataFrame(data))
             self._data = data._data
         else:
+            if any(
+                not isinstance(col, (abc.Iterable, abc.Sequence))
+                for col in data
+            ):
+                raise TypeError("Inputs should be an iterable or sequence.")
+
             data = list(itertools.zip_longest(*data))
 
             if columns is not None and len(data) == 0:
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index aad0b757177..e35ab147bf4 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -10243,3 +10243,15 @@ def test_dataframe_init_columns_named_index():
     pdf = pd.DataFrame(data, columns=columns)
 
     assert_eq(gdf, pdf)
+
+
+def test_dataframe_constructor_unbounded_sequence():
+    class A:
+        def __getitem__(self, key):
+            return 1
+
+    with pytest.raises(TypeError):
+        cudf.DataFrame([A()])
+
+    with pytest.raises(TypeError):
+        cudf.DataFrame({"a": A()})

From 399efb960f689085bf671f6fa62916b1020e3b30 Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Wed, 2 Aug 2023 23:50:47 -0700
Subject: [PATCH 3/5] Fix for Parquet writer when requested pages per row is
 smaller than fragment size (#13806)

#12685 introduced a bug in page calculation. If the `max_page_size_rows` parameter is set smaller than the page fragment size, the writer will produce a spurious empty page. This PR fixes this by only checking the fragment size if there are already rows in the page, and then returns the old check for number of rows exceeding the page limit.

Interestingly, libcudf can read these files with empty pages just fine, but parquet-mr cannot.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/13806
---
 cpp/src/io/parquet/page_enc.cu | 12 ++++++++---
 cpp/tests/io/parquet_test.cpp  | 38 ++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 05f8bba7477..190f70d0747 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -432,9 +432,15 @@ __global__ void __launch_bounds__(128)
                                 max_RLE_page_size(col_g.num_def_level_bits(), num_vals) +
                                   max_RLE_page_size(col_g.num_rep_level_bits(), num_vals));
 
-      if (num_rows >= ck_g.num_rows ||
-          (values_in_page > 0 && (page_size + fragment_data_size > this_max_page_size)) ||
-          rows_in_page + frag_g.num_rows > max_page_size_rows) {
+      // checks to see when we need to close the current page and start a new one
+      auto const is_last_chunk          = num_rows >= ck_g.num_rows;
+      auto const is_page_bytes_exceeded = page_size + fragment_data_size > this_max_page_size;
+      auto const is_page_rows_exceeded  = rows_in_page + frag_g.num_rows > max_page_size_rows;
+      // only check for limit overflow if there's already at least one fragment for this page
+      auto const is_page_too_big =
+        values_in_page > 0 && (is_page_bytes_exceeded || is_page_rows_exceeded);
+
+      if (is_last_chunk || is_page_too_big) {
         if (ck_g.use_dictionary) {
           // Additional byte to store entry bit width
           page_size = 1 + max_RLE_page_size(ck_g.dict_rle_bits, values_in_page);
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 4e28f536728..a5054daed19 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -3709,6 +3709,44 @@ TEST_F(ParquetWriterTest, CheckPageRowsAdjusted)
   EXPECT_LE(ph.data_page_header.num_values, rows_per_page);
 }
 
+TEST_F(ParquetWriterTest, CheckPageRowsTooSmall)
+{
+  constexpr auto rows_per_page = 1'000;
+  constexpr auto fragment_size = 5'000;
+  constexpr auto num_rows      = 3 * rows_per_page;
+  const std::string s1(32, 'a');
+  auto col0_elements =
+    cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s1; });
+  auto col0 = cudf::test::strings_column_wrapper(col0_elements, col0_elements + num_rows);
+
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckPageRowsTooSmall.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_fragment_size(fragment_size)
+      .max_page_size_rows(rows_per_page);
+  cudf::io::write_parquet(out_opts);
+
+  // check that file is written correctly when rows/page < fragment size
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  ASSERT_TRUE(fmd.row_groups.size() > 0);
+  ASSERT_TRUE(fmd.row_groups[0].columns.size() == 1);
+  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
+  ASSERT_TRUE(first_chunk.data_page_offset > 0);
+
+  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
+  // version should be smaller than size of the struct.
+  auto const ph = read_page_header(
+    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0});
+
+  // there should be only one page since the fragment size is larger than rows_per_page
+  EXPECT_EQ(ph.data_page_header.num_values, num_rows);
+}
+
 TEST_F(ParquetWriterTest, Decimal128Stats)
 {
   // check that decimal128 min and max statistics are written in network byte order

From 9c559c94fcaa1525d7c95faf94e5486fcd992ef1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 3 Aug 2023 08:22:51 -0400
Subject: [PATCH 4/5] Remove the libcudf cudf::offset_type type (#13788)

Replace all occurrences  of `cudf::offset_type` with `cudf::size_type`
This helps clear up code where sizes are computed and then converted to offsets in-place.

Also, reference #13733

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - https://github.com/brandon-b-miller
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/13788
---
 cpp/benchmarks/copying/contiguous_split.cu    |  2 +-
 cpp/benchmarks/lists/copying/scatter_lists.cu | 12 ++---
 cpp/examples/strings/custom_prealloc.cu       |  2 +-
 .../cudf/detail/sizes_to_offsets_iterator.cuh |  4 +-
 cpp/include/cudf/lists/detail/scatter.cuh     |  8 ++--
 cpp/include/cudf/lists/lists_column_view.hpp  |  6 +--
 .../cudf/strings/strings_column_view.hpp      |  6 +--
 .../cudf/tdigest/tdigest_column_view.hpp      |  4 +-
 cpp/include/cudf/types.hpp                    |  1 -
 cpp/include/cudf_test/column_utilities.hpp    |  4 +-
 cpp/include/cudf_test/column_wrapper.hpp      |  2 +-
 cpp/src/copying/concatenate.cu                |  6 +--
 cpp/src/copying/contiguous_split.cu           | 15 +++----
 cpp/src/groupby/groupby.cu                    |  2 +-
 cpp/src/groupby/sort/group_collect.cu         |  4 +-
 cpp/src/groupby/sort/group_merge_lists.cu     |  6 +--
 cpp/src/io/csv/writer_impl.cu                 |  8 ++--
 cpp/src/io/json/json_column.cu                |  4 +-
 cpp/src/io/json/write_json.cu                 | 12 ++---
 cpp/src/io/parquet/reader_impl_preprocess.cu  |  4 +-
 cpp/src/io/statistics/statistics.cuh          |  2 +-
 .../combine/concatenate_list_elements.cu      | 12 +++--
 cpp/src/lists/combine/concatenate_rows.cu     | 19 ++++----
 cpp/src/lists/copying/scatter_helper.cu       | 12 ++---
 cpp/src/lists/interleave_columns.cu           | 24 +++++-----
 cpp/src/lists/lists_column_factories.cu       |  8 ++--
 cpp/src/lists/reverse.cu                      |  2 +-
 cpp/src/lists/sequences.cu                    | 10 ++---
 .../stream_compaction/apply_boolean_mask.cu   | 10 ++---
 cpp/src/lists/utilities.cu                    | 10 ++---
 cpp/src/quantiles/tdigest/tdigest.cu          | 16 +++----
 .../quantiles/tdigest/tdigest_aggregation.cu  | 44 +++++++++----------
 cpp/src/rolling/detail/nth_element.cuh        |  4 +-
 cpp/src/rolling/detail/rolling.cuh            |  2 +-
 .../rolling/detail/rolling_collect_list.cu    |  4 +-
 cpp/src/rolling/grouped_rolling.cu            |  4 +-
 cpp/src/strings/capitalize.cu                 | 12 ++---
 cpp/src/strings/combine/concatenate.cu        |  6 +--
 cpp/src/strings/combine/join_list_elements.cu |  6 +--
 cpp/src/strings/convert/convert_booleans.cu   |  2 +-
 cpp/src/strings/convert/convert_durations.cu  |  2 +-
 .../strings/convert/convert_fixed_point.cu    |  2 +-
 cpp/src/strings/convert/convert_hex.cu        |  4 +-
 cpp/src/strings/convert/convert_lists.cu      |  4 +-
 cpp/src/strings/convert/convert_urls.cu       | 19 ++++----
 cpp/src/strings/copying/concatenate.cu        |  4 +-
 cpp/src/strings/copying/shift.cu              | 18 ++++----
 cpp/src/strings/extract/extract_all.cu        |  8 ++--
 cpp/src/strings/json/json_path.cu             | 16 +++----
 cpp/src/strings/padding.cu                    |  4 +-
 cpp/src/strings/repeat_strings.cu             | 10 ++---
 cpp/src/strings/reverse.cu                    |  6 +--
 cpp/src/strings/search/find_multiple.cu       |  6 +--
 cpp/src/strings/search/findall.cu             |  6 +--
 cpp/src/strings/split/split_re.cu             | 12 ++---
 cpp/src/strings/strings_column_view.cpp       |  4 +-
 cpp/src/text/subword/bpe_tokenizer.cu         | 20 ++++-----
 cpp/src/text/subword/load_merges_file.cu      |  2 +-
 cpp/src/transform/row_bit_count.cu            | 12 ++---
 cpp/tests/column/factories_test.cpp           | 12 ++---
 cpp/tests/copying/concatenate_tests.cpp       |  8 ++--
 .../copying/copy_if_else_nested_tests.cpp     |  2 +-
 cpp/tests/copying/gather_struct_tests.cpp     |  4 +-
 cpp/tests/copying/get_value_tests.cpp         | 20 ++++-----
 .../copying/scatter_list_scalar_tests.cpp     |  8 ++--
 cpp/tests/copying/split_tests.cpp             |  6 +--
 cpp/tests/groupby/collect_list_tests.cpp      |  8 ++--
 cpp/tests/io/parquet_test.cpp                 | 12 ++---
 cpp/tests/lists/extract_tests.cpp             |  6 +--
 .../quantiles/percentile_approx_test.cpp      |  4 +-
 cpp/tests/reductions/tdigest_tests.cu         |  6 +--
 cpp/tests/strings/array_tests.cpp             |  2 +-
 cpp/tests/strings/contains_tests.cpp          |  6 +--
 cpp/tests/strings/factories_test.cu           |  8 ++--
 cpp/tests/transform/row_bit_count_test.cu     | 43 +++++++++---------
 cpp/tests/utilities/column_utilities.cu       |  6 +--
 cpp/tests/utilities/tdigest_utilities.cu      |  6 +--
 .../column_utilities_tests.cpp                |  2 +-
 java/src/main/native/src/ColumnViewJni.cu     |  6 +--
 java/src/main/native/src/row_conversion.cu    |  6 +--
 python/cudf/cudf/_lib/cpp/types.pxd           |  1 -
 python/cudf/cudf/_lib/pylibcudf/column.pxd    |  4 +-
 python/cudf/cudf/_lib/pylibcudf/column.pyx    |  4 +-
 83 files changed, 325 insertions(+), 345 deletions(-)

diff --git a/cpp/benchmarks/copying/contiguous_split.cu b/cpp/benchmarks/copying/contiguous_split.cu
index cad5a7c7b58..910fc689c0b 100644
--- a/cpp/benchmarks/copying/contiguous_split.cu
+++ b/cpp/benchmarks/copying/contiguous_split.cu
@@ -151,7 +151,7 @@ void BM_contiguous_split_strings(benchmark::State& state, ContiguousSplitImpl& i
   }
 
   int64_t const total_bytes =
-    total_desired_bytes + ((num_rows + 1) * sizeof(cudf::offset_type)) +
+    total_desired_bytes + ((num_rows + 1) * sizeof(cudf::size_type)) +
     (include_validity ? (max(int64_t{1}, (num_rows / 32)) * sizeof(cudf::bitmask_type) * num_cols)
                       : 0);
 
diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu
index 85d730b94fb..dbc3234dabf 100644
--- a/cpp/benchmarks/lists/copying/scatter_lists.cu
+++ b/cpp/benchmarks/lists/copying/scatter_lists.cu
@@ -62,26 +62,26 @@ void BM_lists_scatter(::benchmark::State& state)
                    target_base_col->mutable_view().end<TypeParam>());
 
   auto source_offsets =
-    make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
+    make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
                             num_rows + 1,
                             cudf::mask_state::UNALLOCATED,
                             stream,
                             mr);
   auto target_offsets =
-    make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
+    make_fixed_width_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
                             num_rows + 1,
                             cudf::mask_state::UNALLOCATED,
                             stream,
                             mr);
 
   thrust::sequence(rmm::exec_policy(stream),
-                   source_offsets->mutable_view().begin<cudf::offset_type>(),
-                   source_offsets->mutable_view().end<cudf::offset_type>(),
+                   source_offsets->mutable_view().begin<cudf::size_type>(),
+                   source_offsets->mutable_view().end<cudf::size_type>(),
                    0,
                    num_elements_per_row);
   thrust::sequence(rmm::exec_policy(stream),
-                   target_offsets->mutable_view().begin<cudf::offset_type>(),
-                   target_offsets->mutable_view().end<cudf::offset_type>(),
+                   target_offsets->mutable_view().begin<cudf::size_type>(),
+                   target_offsets->mutable_view().end<cudf::size_type>(),
                    0,
                    num_elements_per_row);
 
diff --git a/cpp/examples/strings/custom_prealloc.cu b/cpp/examples/strings/custom_prealloc.cu
index a956550f505..0af4c47e947 100644
--- a/cpp/examples/strings/custom_prealloc.cu
+++ b/cpp/examples/strings/custom_prealloc.cu
@@ -41,7 +41,7 @@ __global__ void redact_kernel(cudf::column_device_view const d_names,
                               cudf::column_device_view const d_visibilities,
                               cudf::string_view redaction,
                               char* working_memory,
-                              cudf::offset_type const* d_offsets,
+                              cudf::size_type const* d_offsets,
                               cudf::string_view* d_output)
 {
   // The row index is resolved from the CUDA thread/block objects
diff --git a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
index 532d66c8483..155b1ce5691 100644
--- a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
+++ b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
@@ -303,9 +303,9 @@ std::pair<std::unique_ptr<column>, size_type> make_offsets_child_column(
 {
   auto count          = static_cast<size_type>(std::distance(begin, end));
   auto offsets_column = make_numeric_column(
-    data_type{type_to_id<offset_type>()}, count + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, count + 1, mask_state::UNALLOCATED, stream, mr);
   auto offsets_view = offsets_column->mutable_view();
-  auto d_offsets    = offsets_view.template data<offset_type>();
+  auto d_offsets    = offsets_view.template data<size_type>();
 
   // The number of offsets is count+1 so to build the offsets from the sizes
   // using exclusive-scan technically requires count+1 input values even though
diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh
index 18cb147d1e4..f04b2fda2bf 100644
--- a/cpp/include/cudf/lists/detail/scatter.cuh
+++ b/cpp/include/cudf/lists/detail/scatter.cuh
@@ -240,11 +240,11 @@ std::unique_ptr<column> scatter(scalar const& slr,
   rmm::device_buffer null_mask =
     slr_valid ? cudf::detail::create_null_mask(1, mask_state::UNALLOCATED, stream, mr)
               : cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream, mr);
-  auto offset_column = make_numeric_column(
-    data_type{type_to_id<offset_type>()}, 2, mask_state::UNALLOCATED, stream, mr);
+  auto offset_column =
+    make_numeric_column(data_type{type_to_id<size_type>()}, 2, mask_state::UNALLOCATED, stream, mr);
   thrust::sequence(rmm::exec_policy_nosync(stream),
-                   offset_column->mutable_view().begin<offset_type>(),
-                   offset_column->mutable_view().end<offset_type>(),
+                   offset_column->mutable_view().begin<size_type>(),
+                   offset_column->mutable_view().end<size_type>(),
                    0,
                    lv->view().size());
   auto wrapped = column_view(data_type{type_id::LIST},
diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp
index 60bdc654af6..8c6368eacb6 100644
--- a/cpp/include/cudf/lists/lists_column_view.hpp
+++ b/cpp/include/cudf/lists/lists_column_view.hpp
@@ -71,9 +71,7 @@ class lists_column_view : private column_view {
   using column_view::null_mask;
   using column_view::offset;
   using column_view::size;
-  static_assert(std::is_same_v<offset_type, size_type>,
-                "offset_type is expected to be the same as size_type.");
-  using offset_iterator = offset_type const*;  ///< Iterator type for offsets
+  using offset_iterator = size_type const*;  ///< Iterator type for offsets
 
   /**
    * @brief Returns the parent column.
@@ -119,7 +117,7 @@ class lists_column_view : private column_view {
    */
   [[nodiscard]] offset_iterator offsets_begin() const noexcept
   {
-    return offsets().begin<offset_type>() + offset();
+    return offsets().begin<size_type>() + offset();
   }
 
   /**
diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp
index e617dbde024..f1aa8e49f00 100644
--- a/cpp/include/cudf/strings/strings_column_view.hpp
+++ b/cpp/include/cudf/strings/strings_column_view.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,8 +67,8 @@ class strings_column_view : private column_view {
   using column_view::offset;
   using column_view::size;
 
-  using offset_iterator = offset_type const*;  ///< offsets iterator type
-  using chars_iterator  = char const*;         ///< character iterator type
+  using offset_iterator = size_type const*;  ///< offsets iterator type
+  using chars_iterator  = char const*;       ///< character iterator type
 
   /**
    * @brief Returns the parent column.
diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.hpp b/cpp/include/cudf/tdigest/tdigest_column_view.hpp
index 89903c24c21..f2f493cbbe4 100644
--- a/cpp/include/cudf/tdigest/tdigest_column_view.hpp
+++ b/cpp/include/cudf/tdigest/tdigest_column_view.hpp
@@ -67,9 +67,7 @@ class tdigest_column_view : private column_view {
   tdigest_column_view& operator=(tdigest_column_view&&) = default;
 
   using column_view::size;
-  static_assert(std::is_same_v<offset_type, size_type>,
-                "offset_type is expected to be the same as size_type.");
-  using offset_iterator = offset_type const*;  ///< Iterator over offsets
+  using offset_iterator = size_type const*;  ///< Iterator over offsets
 
   // mean and weight column indices within tdigest inner struct columns
   static constexpr size_type mean_column_index{0};    ///< Mean column index
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 6991a90b31b..addab160b6e 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -80,7 +80,6 @@ class mutable_table_view;
 using size_type         = int32_t;   ///< Row index type for columns and tables
 using bitmask_type      = uint32_t;  ///< Bitmask type stored as 32-bit unsigned integer
 using valid_type        = uint8_t;   ///< Valid type in host memory
-using offset_type       = int32_t;   ///< Offset type for column offsets
 using thread_index_type = int64_t;   ///< Thread index type in kernels
 
 /**
diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp
index f288c30e313..059bd10eae1 100644
--- a/cpp/include/cudf_test/column_utilities.hpp
+++ b/cpp/include/cudf_test/column_utilities.hpp
@@ -261,8 +261,8 @@ inline std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to
       cudf::device_span<char const>(scv.chars().data<char>(), scv.chars().size()),
       cudf::get_default_stream());
     auto const h_offsets = cudf::detail::make_std_vector_sync(
-      cudf::device_span<cudf::offset_type const>(
-        scv.offsets().data<cudf::offset_type>() + scv.offset(), scv.size() + 1),
+      cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
+                                               scv.size() + 1),
       cudf::get_default_stream());
 
     // build std::string vector from chars and offsets
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index 9c1afc64550..1e311322de1 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -1507,7 +1507,7 @@ class lists_column_wrapper : public detail::column_wrapper {
    */
   static lists_column_wrapper<T> make_one_empty_row_column(bool valid = true)
   {
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0};
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0};
     cudf::test::fixed_width_column_wrapper<int> values{};
     return lists_column_wrapper<T>(
       1,
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 170eccbcb09..a53ec295512 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -408,15 +408,15 @@ void traverse_children::operator()<cudf::string_view>(host_span<column_view cons
       return a + (scv.is_empty() ? 0
                   // if the column is unsliced, skip the offset retrieval.
                   : scv.offset() > 0
-                    ? cudf::detail::get_value<offset_type>(
+                    ? cudf::detail::get_value<size_type>(
                         scv.offsets(), scv.offset() + scv.size(), stream) -
-                        cudf::detail::get_value<offset_type>(scv.offsets(), scv.offset(), stream)
+                        cudf::detail::get_value<size_type>(scv.offsets(), scv.offset(), stream)
                   // if the offset() is 0, it can still be sliced to a shorter length. in this case
                   // we only need to read a single offset. otherwise just return the full length
                   // (chars_size())
                   : scv.size() + 1 == scv.offsets().size()
                     ? scv.chars_size()
-                    : cudf::detail::get_value<offset_type>(scv.offsets(), scv.size(), stream));
+                    : cudf::detail::get_value<size_type>(scv.offsets(), scv.size(), stream));
     });
   CUDF_EXPECTS(total_char_count <= static_cast<size_t>(std::numeric_limits<size_type>::max()),
                "Total number of concatenated chars exceeds the column size limit",
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index 6c61af0050d..e1a55ec5419 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -939,7 +939,7 @@ struct batch_byte_size_function {
  * @brief Get the input buffer index given the output buffer index.
  */
 struct out_to_in_index_function {
-  offset_type const* const batch_offsets;
+  size_type const* const batch_offsets;
   int const num_bufs;
   __device__ int operator()(size_type i) const
   {
@@ -1312,7 +1312,7 @@ std::unique_ptr<packed_partition_buf_size_and_dst_buf_info> compute_splits(
  */
 struct chunk_iteration_state {
   chunk_iteration_state(rmm::device_uvector<dst_buf_info> _d_batched_dst_buf_info,
-                        rmm::device_uvector<offset_type> _d_batch_offsets,
+                        rmm::device_uvector<size_type> _d_batch_offsets,
                         std::vector<std::size_t>&& _h_num_buffs_per_iteration,
                         std::vector<std::size_t>&& _h_size_of_buffs_per_iteration,
                         std::size_t total_size)
@@ -1375,11 +1375,10 @@ struct chunk_iteration_state {
   bool has_more_copies() const { return current_iteration < num_iterations; }
 
   rmm::device_uvector<dst_buf_info> d_batched_dst_buf_info;  ///< dst_buf_info per 1MB batch
-  rmm::device_uvector<offset_type> const
-    d_batch_offsets;             ///< Offset within a batch per dst_buf_info
-  std::size_t const total_size;  ///< The aggregate size of all iterations
-  int const num_iterations;      ///< The total number of iterations
-  int current_iteration;         ///< Marks the current iteration being worked on
+  rmm::device_uvector<size_type> const d_batch_offsets;  ///< Offset within a batch per dst_buf_info
+  std::size_t const total_size;                          ///< The aggregate size of all iterations
+  int const num_iterations;                              ///< The total number of iterations
+  int current_iteration;  ///< Marks the current iteration being worked on
 
  private:
   std::size_t starting_batch;  ///< Starting batch index for the current iteration
@@ -1398,7 +1397,7 @@ std::unique_ptr<chunk_iteration_state> chunk_iteration_state::create(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* temp_mr)
 {
-  rmm::device_uvector<offset_type> d_batch_offsets(num_bufs + 1, stream, temp_mr);
+  rmm::device_uvector<size_type> d_batch_offsets(num_bufs + 1, stream, temp_mr);
 
   auto const buf_count_iter = cudf::detail::make_counting_transform_iterator(
     0, [num_bufs, num_batches = num_batches_func{batches.begin()}] __device__(size_type i) {
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index eddc748df7c..ce1fc71968f 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -107,7 +107,7 @@ struct empty_column_constructor {
 
     if constexpr (k == aggregation::Kind::COLLECT_LIST || k == aggregation::Kind::COLLECT_SET) {
       return make_lists_column(
-        0, make_empty_column(type_to_id<offset_type>()), empty_like(values), 0, {});
+        0, make_empty_column(type_to_id<size_type>()), empty_like(values), 0, {});
     }
 
     if constexpr (k == aggregation::Kind::RANK) {
diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu
index c61a998a40c..f95ad72f453 100644
--- a/cpp/src/groupby/sort/group_collect.cu
+++ b/cpp/src/groupby/sort/group_collect.cu
@@ -96,12 +96,12 @@ std::unique_ptr<column> group_collect(column_view const& values,
   auto [child_column,
         offsets_column] = [null_handling, num_groups, &values, &group_offsets, stream, mr] {
     auto offsets_column = make_numeric_column(
-      data_type(type_to_id<offset_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);
+      data_type(type_to_id<size_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);
 
     thrust::copy(rmm::exec_policy(stream),
                  group_offsets.begin(),
                  group_offsets.end(),
-                 offsets_column->mutable_view().template begin<offset_type>());
+                 offsets_column->mutable_view().template begin<size_type>());
 
     // If column of grouped values contains null elements, and null_policy == EXCLUDE,
     // those elements must be filtered out, and offsets recomputed.
diff --git a/cpp/src/groupby/sort/group_merge_lists.cu b/cpp/src/groupby/sort/group_merge_lists.cu
index 3043d107635..2c72128dbfb 100644
--- a/cpp/src/groupby/sort/group_merge_lists.cu
+++ b/cpp/src/groupby/sort/group_merge_lists.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ std::unique_ptr<column> group_merge_lists(column_view const& values,
                "Input to `group_merge_lists` must be a non-nullable lists column.");
 
   auto offsets_column = make_numeric_column(
-    data_type(type_to_id<offset_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type(type_to_id<size_type>()), num_groups + 1, mask_state::UNALLOCATED, stream, mr);
 
   // Generate offsets of the output lists column by gathering from the provided group offsets and
   // the input list offsets.
@@ -54,7 +54,7 @@ std::unique_ptr<column> group_merge_lists(column_view const& values,
                  group_offsets.begin(),
                  group_offsets.end(),
                  lists_column_view(values).offsets_begin(),
-                 offsets_column->mutable_view().template begin<offset_type>());
+                 offsets_column->mutable_view().template begin<size_type>());
 
   // The child column of the output lists column is just copied from the input column.
   auto child_column =
diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu
index 12e9fccdee7..8c586306ad5 100644
--- a/cpp/src/io/csv/writer_impl.cu
+++ b/cpp/src/io/csv/writer_impl.cu
@@ -75,10 +75,10 @@ namespace {
 struct escape_strings_fn {
   column_device_view const d_column;
   string_view const d_delimiter;  // check for column delimiter
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
-  __device__ void write_char(char_utf8 chr, char*& d_buffer, offset_type& bytes)
+  __device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes)
   {
     if (d_buffer)
       d_buffer += cudf::strings::detail::from_char_utf8(chr, d_buffer);
@@ -105,8 +105,8 @@ struct escape_strings_fn {
         return chr == quote || chr == new_line || chr == d_delimiter[0];
       });
 
-    char* d_buffer    = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    offset_type bytes = 0;
+    char* d_buffer  = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    size_type bytes = 0;
 
     if (quote_row) write_char(quote, d_buffer, bytes);
     for (auto chr : d_str) {
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index 3a79d832d06..b18637c86d7 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -363,8 +363,8 @@ std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
       cudf::device_span<char const>(scv.chars().data<char>(), scv.chars().size()),
       cudf::get_default_stream());
     auto const h_offsets = cudf::detail::make_std_vector_sync(
-      cudf::device_span<cudf::offset_type const>(
-        scv.offsets().data<cudf::offset_type>() + scv.offset(), scv.size() + 1),
+      cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
+                                               scv.size() + 1),
       cudf::get_default_stream());
 
     // build std::string vector from chars and offsets
diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index 9ecf77a798a..6ee16f8866e 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -75,10 +75,10 @@ namespace {
 struct escape_strings_fn {
   column_device_view const d_column;
   bool const append_colon{false};
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
-  __device__ void write_char(char_utf8 chr, char*& d_buffer, offset_type& bytes)
+  __device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes)
   {
     if (d_buffer)
       d_buffer += cudf::strings::detail::from_char_utf8(chr, d_buffer);
@@ -91,7 +91,7 @@ struct escape_strings_fn {
     return nibble < 10 ? '0' + nibble : 'a' + nibble - 10;
   }
 
-  __device__ void write_utf8_codepoint(uint16_t codepoint, char*& d_buffer, offset_type& bytes)
+  __device__ void write_utf8_codepoint(uint16_t codepoint, char*& d_buffer, size_type& bytes)
   {
     if (d_buffer) {
       d_buffer[0] = '\\';
@@ -106,7 +106,7 @@ struct escape_strings_fn {
     }
   }
 
-  __device__ void write_utf16_codepoint(uint32_t codepoint, char*& d_buffer, offset_type& bytes)
+  __device__ void write_utf16_codepoint(uint32_t codepoint, char*& d_buffer, size_type& bytes)
   {
     constexpr uint16_t UTF16_HIGH_SURROGATE_BEGIN = 0xD800;
     constexpr uint16_t UTF16_LOW_SURROGATE_BEGIN  = 0xDC00;
@@ -130,8 +130,8 @@ struct escape_strings_fn {
     constexpr char_utf8 const quote = '\"';  // wrap quotes
     bool constexpr quote_row        = true;
 
-    char* d_buffer    = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    offset_type bytes = 0;
+    char* d_buffer  = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    size_type bytes = 0;
 
     if (quote_row) write_char(quote, d_buffer, bytes);
     for (auto utf8_char : d_str) {
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 8c3bdabe6b4..c7e3de03312 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1016,7 +1016,7 @@ struct row_size_functor {
 template <>
 __device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool nullable)
 {
-  auto const offset_size = sizeof(offset_type);
+  auto const offset_size = sizeof(size_type);
   // NOTE: Adding the + 1 offset here isn't strictly correct.  There will only be 1 extra offset
   // for the entire column, whereas this is adding an extra offset per page.  So we will get a
   // small over-estimate of the real size of the order :  # of pages * 4 bytes. It seems better
@@ -1036,7 +1036,7 @@ __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, boo
 {
   // only returns the size of offsets and validity. the size of the actual string chars
   // is tracked separately.
-  auto const offset_size = sizeof(offset_type);
+  auto const offset_size = sizeof(size_type);
   // see note about offsets in the list_view template.
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
 }
diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh
index 89b26fd731a..805ca43553e 100644
--- a/cpp/src/io/statistics/statistics.cuh
+++ b/cpp/src/io/statistics/statistics.cuh
@@ -132,7 +132,7 @@ __device__ T get_element(column_device_view const& col, uint32_t row)
 {
   using et              = typename T::element_type;
   size_type const index = row + col.offset();  // account for this view's _offset
-  auto const* d_offsets = col.child(lists_column_view::offsets_column_index).data<offset_type>();
+  auto const* d_offsets = col.child(lists_column_view::offsets_column_index).data<size_type>();
   auto const* d_data    = col.child(lists_column_view::child_column_index).data<et>();
   auto const offset     = d_offsets[index];
   return T(d_data + offset, d_offsets[index + 1] - offset);
diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu
index fc3d8a9728a..3b00d7bd26e 100644
--- a/cpp/src/lists/combine/concatenate_list_elements.cu
+++ b/cpp/src/lists/combine/concatenate_list_elements.cu
@@ -53,11 +53,10 @@ std::unique_ptr<column> concatenate_lists_ignore_null(column_view const& input,
 {
   auto const num_rows = input.size();
 
-  static_assert(std::is_same_v<offset_type, int32_t> && std::is_same_v<size_type, int32_t>);
   auto out_offsets = make_numeric_column(
-    data_type{type_id::INT32}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
 
-  auto const d_out_offsets  = out_offsets->mutable_view().template begin<offset_type>();
+  auto const d_out_offsets  = out_offsets->mutable_view().template begin<size_type>();
   auto const d_row_offsets  = lists_column_view(input).offsets_begin();
   auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin();
 
@@ -121,13 +120,12 @@ generate_list_offsets_and_validities(column_view const& input,
 {
   auto const num_rows = input.size();
 
-  static_assert(std::is_same_v<offset_type, int32_t> && std::is_same_v<size_type, int32_t>);
   auto out_offsets = make_numeric_column(
-    data_type{type_id::INT32}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
 
   auto const lists_of_lists_dv_ptr = column_device_view::create(input, stream);
   auto const lists_dv_ptr   = column_device_view::create(lists_column_view(input).child(), stream);
-  auto const d_out_offsets  = out_offsets->mutable_view().template begin<offset_type>();
+  auto const d_out_offsets  = out_offsets->mutable_view().template begin<size_type>();
   auto const d_row_offsets  = lists_column_view(input).offsets_begin();
   auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin();
 
@@ -198,7 +196,7 @@ std::unique_ptr<column> gather_list_entries(column_view const& input,
      d_list_offsets,
      d_indices = gather_map.begin(),
      d_out_list_offsets =
-       output_list_offsets.template begin<offset_type>()] __device__(size_type const idx) {
+       output_list_offsets.template begin<size_type>()] __device__(size_type const idx) {
       // The output row has been identified as a null/empty list during list size computation.
       if (d_out_list_offsets[idx + 1] == d_out_list_offsets[idx]) { return; }
 
diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu
index 993d5e3fc78..658538b0195 100644
--- a/cpp/src/lists/combine/concatenate_rows.cu
+++ b/cpp/src/lists/combine/concatenate_rows.cu
@@ -77,11 +77,8 @@ generate_regrouped_offsets_and_null_mask(table_device_view const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
   // outgoing offsets.
-  auto offsets = cudf::make_fixed_width_column(data_type{type_to_id<offset_type>()},
-                                               input.num_rows() + 1,
-                                               mask_state::UNALLOCATED,
-                                               stream,
-                                               mr);
+  auto offsets = cudf::make_fixed_width_column(
+    data_type{type_to_id<size_type>()}, input.num_rows() + 1, mask_state::UNALLOCATED, stream, mr);
 
   auto keys = thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}),
                                               [num_columns = input.num_columns()] __device__(
@@ -91,7 +88,7 @@ generate_regrouped_offsets_and_null_mask(table_device_view const& input,
   auto values = thrust::make_transform_iterator(
     thrust::make_counting_iterator(size_t{0}),
     [input, row_null_counts = row_null_counts.data(), null_policy] __device__(
-      size_t i) -> offset_type {
+      size_t i) -> size_type {
       auto const col_index = i % input.num_columns();
       auto const row_index = i / input.num_columns();
 
@@ -105,7 +102,7 @@ generate_regrouped_offsets_and_null_mask(table_device_view const& input,
         }
       }
       auto offsets =
-        input.column(col_index).child(lists_column_view::offsets_column_index).data<offset_type>() +
+        input.column(col_index).child(lists_column_view::offsets_column_index).data<size_type>() +
         input.column(col_index).offset();
       return offsets[row_index + 1] - offsets[row_index];
     });
@@ -115,13 +112,13 @@ generate_regrouped_offsets_and_null_mask(table_device_view const& input,
                         keys + (input.num_rows() * input.num_columns()),
                         values,
                         thrust::make_discard_iterator(),
-                        offsets->mutable_view().begin<offset_type>());
+                        offsets->mutable_view().begin<size_type>());
 
   // convert to offsets
   thrust::exclusive_scan(rmm::exec_policy(stream),
-                         offsets->view().begin<offset_type>(),
-                         offsets->view().begin<offset_type>() + input.num_rows() + 1,
-                         offsets->mutable_view().begin<offset_type>(),
+                         offsets->view().begin<size_type>(),
+                         offsets->view().begin<size_type>() + input.num_rows() + 1,
+                         offsets->mutable_view().begin<size_type>(),
                          0);
 
   // generate appropriate null mask
diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu
index 2cb0671c2dc..ca5358798c0 100644
--- a/cpp/src/lists/copying/scatter_helper.cu
+++ b/cpp/src/lists/copying/scatter_helper.cu
@@ -189,7 +189,7 @@ struct list_child_constructor {
       thrust::make_counting_iterator(0),
       thrust::make_counting_iterator(child_column->size()),
       child_column->mutable_view().begin<T>(),
-      [offset_begin  = list_offsets.begin<offset_type>(),
+      [offset_begin  = list_offsets.begin<size_type>(),
        offset_size   = list_offsets.size(),
        d_list_vector = list_vector.begin(),
        source_lists,
@@ -241,7 +241,7 @@ struct list_child_constructor {
       thrust::make_counting_iterator<size_type>(0),
       thrust::make_counting_iterator<size_type>(string_views.size()),
       string_views.begin(),
-      [offset_begin  = list_offsets.begin<offset_type>(),
+      [offset_begin  = list_offsets.begin<size_type>(),
        offset_size   = list_offsets.size(),
        d_list_vector = list_vector.begin(),
        source_lists,
@@ -255,7 +255,7 @@ struct list_child_constructor {
         auto row_index         = d_list_vector[list_index].row_index();
         auto actual_list_row = d_list_vector[list_index].bind_to_column(source_lists, target_lists);
         auto lists_column    = actual_list_row.get_column();
-        auto lists_offsets_ptr    = lists_column.offsets().template data<offset_type>();
+        auto lists_offsets_ptr    = lists_column.offsets().template data<size_type>();
         auto child_strings_column = lists_column.child();
         auto strings_offset       = lists_offsets_ptr[row_index] + intra_index;
 
@@ -308,7 +308,7 @@ struct list_child_constructor {
       thrust::make_counting_iterator<size_type>(0),
       thrust::make_counting_iterator<size_type>(child_list_views.size()),
       child_list_views.begin(),
-      [offset_begin  = list_offsets.begin<offset_type>(),
+      [offset_begin  = list_offsets.begin<size_type>(),
        offset_size   = list_offsets.size(),
        d_list_vector = list_vector.begin(),
        source_lists,
@@ -323,10 +323,10 @@ struct list_child_constructor {
         auto actual_list_row = d_list_vector[list_index].bind_to_column(source_lists, target_lists);
         auto lists_column    = actual_list_row.get_column();
         auto child_lists_column = lists_column.child();
-        auto lists_offsets_ptr  = lists_column.offsets().template data<offset_type>();
+        auto lists_offsets_ptr  = lists_column.offsets().template data<size_type>();
         auto child_lists_offsets_ptr =
           child_lists_column.child(lists_column_view::offsets_column_index)
-            .template data<offset_type>();
+            .template data<size_type>();
         auto child_row_index = lists_offsets_ptr[row_index] + intra_index;
         auto size =
           child_lists_offsets_ptr[child_row_index + 1] - child_lists_offsets_ptr[child_row_index];
diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu
index f76aaadaf7b..e80d63939ea 100644
--- a/cpp/src/lists/interleave_columns.cu
+++ b/cpp/src/lists/interleave_columns.cu
@@ -58,11 +58,9 @@ generate_list_offsets_and_validities(table_view const& input,
   auto const table_dv_ptr     = table_device_view::create(input, stream);
 
   // The output offsets column.
-  static_assert(sizeof(offset_type) == sizeof(int32_t));
-  static_assert(sizeof(size_type) == sizeof(int32_t));
   auto list_offsets = make_numeric_column(
-    data_type{type_id::INT32}, num_output_lists + 1, mask_state::UNALLOCATED, stream, mr);
-  auto const d_offsets = list_offsets->mutable_view().template begin<offset_type>();
+    data_type{type_to_id<size_type>()}, num_output_lists + 1, mask_state::UNALLOCATED, stream, mr);
+  auto const d_offsets = list_offsets->mutable_view().template begin<size_type>();
 
   // The array of int8_t to store validities for list elements.
   auto validities = rmm::device_uvector<int8_t>(has_null_mask ? num_output_lists : 0, stream);
@@ -82,7 +80,7 @@ generate_list_offsets_and_validities(table_view const& input,
       auto const& lists_col = table_dv.column(col_id);
       if (has_null_mask) { d_validities[idx] = static_cast<int8_t>(lists_col.is_valid(list_id)); }
       auto const list_offsets =
-        lists_col.child(lists_column_view::offsets_column_index).template data<offset_type>() +
+        lists_col.child(lists_column_view::offsets_column_index).template data<size_type>() +
         lists_col.offset();
       return list_offsets[list_id + 1] - list_offsets[list_id];
     });
@@ -139,13 +137,13 @@ struct compute_string_sizes_and_interleave_lists_fn {
   table_device_view const table_dv;
 
   // Store list offsets of the output lists column.
-  offset_type const* const dst_list_offsets;
+  size_type const* const dst_list_offsets;
 
   // Flag to specify whether to compute string validities.
   bool const has_null_mask;
 
   // Store offsets of the strings.
-  offset_type* d_offsets{nullptr};
+  size_type* d_offsets{nullptr};
 
   // If d_chars == nullptr: only compute sizes and validities of the output strings.
   // If d_chars != nullptr: only interleave lists of strings.
@@ -164,11 +162,11 @@ struct compute_string_sizes_and_interleave_lists_fn {
     if (has_null_mask and lists_col.is_null(list_id)) { return; }
 
     auto const list_offsets =
-      lists_col.child(lists_column_view::offsets_column_index).template data<offset_type>() +
+      lists_col.child(lists_column_view::offsets_column_index).template data<size_type>() +
       lists_col.offset();
     auto const& str_col = lists_col.child(lists_column_view::child_column_index);
     auto const str_offsets =
-      str_col.child(strings_column_view::offsets_column_index).template data<offset_type>();
+      str_col.child(strings_column_view::offsets_column_index).template data<size_type>();
 
     // The range of indices of the strings within the source list.
     auto const start_str_idx = list_offsets[list_id];
@@ -224,7 +222,7 @@ struct interleave_list_entries_impl<T, std::enable_if_t<std::is_same_v<T, cudf::
   {
     auto const table_dv_ptr = table_device_view::create(input, stream);
     auto comp_fn            = compute_string_sizes_and_interleave_lists_fn{
-      *table_dv_ptr, output_list_offsets.template begin<offset_type>(), data_has_null_mask};
+      *table_dv_ptr, output_list_offsets.template begin<size_type>(), data_has_null_mask};
 
     auto validities =
       rmm::device_uvector<int8_t>(data_has_null_mask ? num_output_entries : 0, stream);
@@ -276,14 +274,14 @@ struct interleave_list_entries_impl<T, std::enable_if_t<cudf::is_fixed_width<T>(
       [num_cols,
        table_dv     = *table_dv_ptr,
        d_validities = validities.begin(),
-       d_offsets    = output_list_offsets.template begin<offset_type>(),
+       d_offsets    = output_list_offsets.template begin<size_type>(),
        d_output     = output_dv_ptr->template begin<T>(),
        data_has_null_mask] __device__(size_type const idx) {
         auto const col_id     = idx % num_cols;
         auto const list_id    = idx / num_cols;
         auto const& lists_col = table_dv.column(col_id);
         auto const list_offsets =
-          lists_col.child(lists_column_view::offsets_column_index).template data<offset_type>() +
+          lists_col.child(lists_column_view::offsets_column_index).template data<size_type>() +
           lists_col.offset();
         auto const& data_col = lists_col.child(lists_column_view::child_column_index);
 
@@ -384,7 +382,7 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
   // specialized for different types.
   auto const num_output_lists = input.num_rows() * input.num_columns();
   auto const num_output_entries =
-    cudf::detail::get_value<offset_type>(offsets_view, num_output_lists, stream);
+    cudf::detail::get_value<size_type>(offsets_view, num_output_lists, stream);
   auto const data_has_null_mask =
     std::any_of(std::cbegin(input), std::cend(input), [](auto const& col) {
       return col.child(lists_column_view::child_column_index).nullable();
diff --git a/cpp/src/lists/lists_column_factories.cu b/cpp/src/lists/lists_column_factories.cu
index 754735f5a5b..7f82d32d327 100644
--- a/cpp/src/lists/lists_column_factories.cu
+++ b/cpp/src/lists/lists_column_factories.cu
@@ -39,7 +39,7 @@ std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& v
 {
   if (size == 0) {
     return make_lists_column(0,
-                             make_empty_column(type_to_id<offset_type>()),
+                             make_empty_column(type_to_id<size_type>()),
                              empty_like(value.view()),
                              0,
                              cudf::detail::create_null_mask(0, mask_state::UNALLOCATED, stream, mr),
@@ -50,7 +50,7 @@ std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& v
 
   // Handcraft a 1-row column
   auto offsets = make_numeric_column(
-    data_type{type_to_id<offset_type>()}, 2, mask_state::UNALLOCATED, stream, mr_final);
+    data_type{type_to_id<size_type>()}, 2, mask_state::UNALLOCATED, stream, mr_final);
   auto m_offsets = offsets->mutable_view();
   thrust::sequence(rmm::exec_policy(stream),
                    m_offsets.begin<size_type>(),
@@ -90,7 +90,7 @@ std::unique_ptr<column> make_empty_lists_column(data_type child_type,
                                                 rmm::cuda_stream_view stream,
                                                 rmm::mr::device_memory_resource* mr)
 {
-  auto offsets = make_empty_column(data_type(type_to_id<offset_type>()));
+  auto offsets = make_empty_column(data_type(type_to_id<size_type>()));
   auto child   = make_empty_column(child_type);
   return make_lists_column(
     0, std::move(offsets), std::move(child), 0, rmm::device_buffer{}, stream, mr);
@@ -103,7 +103,7 @@ std::unique_ptr<column> make_all_nulls_lists_column(size_type size,
 {
   auto offsets = [&] {
     auto offsets_buff =
-      cudf::detail::make_zeroed_device_uvector_async<offset_type>(size + 1, stream, mr);
+      cudf::detail::make_zeroed_device_uvector_async<size_type>(size + 1, stream, mr);
     return std::make_unique<column>(std::move(offsets_buff), rmm::device_buffer{}, 0);
   }();
   auto child     = make_empty_column(child_type);
diff --git a/cpp/src/lists/reverse.cu b/cpp/src/lists/reverse.cu
index d606f11bdb9..a2af85b5dad 100644
--- a/cpp/src/lists/reverse.cu
+++ b/cpp/src/lists/reverse.cu
@@ -56,7 +56,7 @@ std::unique_ptr<column> reverse(lists_column_view const& input,
   thrust::for_each_n(rmm::exec_policy(stream),
                      thrust::counting_iterator<size_type>(0),
                      child.size(),
-                     [list_offsets = out_offsets->view().begin<offset_type>(),
+                     [list_offsets = out_offsets->view().begin<size_type>(),
                       list_indices = labels->view().begin<size_type>(),
                       gather_map   = gather_map.begin()] __device__(auto const idx) {
                        auto const list_idx     = list_indices[idx];
diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu
index 8e1e6c37a95..aaee5608cc3 100644
--- a/cpp/src/lists/sequences.cu
+++ b/cpp/src/lists/sequences.cu
@@ -47,7 +47,7 @@ struct tabulator {
 
   T const* const starts;
   T const* const steps;
-  offset_type const* const offsets;
+  size_type const* const offsets;
 
   template <typename U>
   static std::enable_if_t<!cudf::is_duration<U>(), T> __device__ multiply(U x, size_type times)
@@ -86,7 +86,7 @@ struct sequences_dispatcher {
                                      size_type n_elements,
                                      column_view const& starts,
                                      std::optional<column_view> const& steps,
-                                     offset_type const* offsets,
+                                     size_type const* offsets,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
@@ -106,7 +106,7 @@ struct sequences_functor<T, std::enable_if_t<is_supported<T>()>> {
                                         size_type n_elements,
                                         column_view const& starts,
                                         std::optional<column_view> const& steps,
-                                        offset_type const* offsets,
+                                        size_type const* offsets,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr)
   {
@@ -154,8 +154,8 @@ std::unique_ptr<column> sequences(column_view const& starts,
 
   // Generate list offsets for the output.
   auto list_offsets = make_numeric_column(
-    data_type(type_to_id<offset_type>()), n_lists + 1, mask_state::UNALLOCATED, stream, mr);
-  auto const offsets_begin  = list_offsets->mutable_view().template begin<offset_type>();
+    data_type(type_to_id<size_type>()), n_lists + 1, mask_state::UNALLOCATED, stream, mr);
+  auto const offsets_begin  = list_offsets->mutable_view().template begin<size_type>();
   auto const sizes_input_it = cudf::detail::indexalator_factory::make_input_iterator(sizes);
   // First copy the sizes since the exclusive_scan tries to read (n_lists+1) values
   thrust::copy_n(rmm::exec_policy(stream), sizes_input_it, sizes.size(), offsets_begin);
diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
index 0aaa8356304..ad43fbd5b00 100644
--- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
+++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
@@ -74,7 +74,7 @@ std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
                                              stream,
                                              rmm::mr::get_current_device_resource());
     auto const d_sizes     = column_device_view::create(*sizes, stream);
-    auto const sizes_begin = cudf::detail::make_null_replacement_iterator(*d_sizes, offset_type{0});
+    auto const sizes_begin = cudf::detail::make_null_replacement_iterator(*d_sizes, size_type{0});
     auto const sizes_end   = sizes_begin + sizes->size();
     auto output_offsets    = cudf::make_numeric_column(
       offset_data_type, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
@@ -82,12 +82,10 @@ std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
 
     // Could have attempted an exclusive_scan(), but it would not compute the last entry.
     // Instead, inclusive_scan(), followed by writing `0` to the head of the offsets column.
-    thrust::inclusive_scan(rmm::exec_policy(stream),
-                           sizes_begin,
-                           sizes_end,
-                           output_offsets_view.begin<offset_type>() + 1);
+    thrust::inclusive_scan(
+      rmm::exec_policy(stream), sizes_begin, sizes_end, output_offsets_view.begin<size_type>() + 1);
     CUDF_CUDA_TRY(cudaMemsetAsync(
-      output_offsets_view.begin<offset_type>(), 0, sizeof(offset_type), stream.value()));
+      output_offsets_view.begin<size_type>(), 0, sizeof(size_type), stream.value()));
     return output_offsets;
   };
 
diff --git a/cpp/src/lists/utilities.cu b/cpp/src/lists/utilities.cu
index 50a41c51f76..2c4966c969e 100644
--- a/cpp/src/lists/utilities.cu
+++ b/cpp/src/lists/utilities.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,10 +42,10 @@ std::unique_ptr<column> reconstruct_offsets(column_view const& labels,
 
 {
   auto out_offsets = make_numeric_column(
-    data_type{type_to_id<offset_type>()}, n_lists + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, n_lists + 1, mask_state::UNALLOCATED, stream, mr);
 
   auto const labels_begin  = labels.template begin<size_type>();
-  auto const offsets_begin = out_offsets->mutable_view().template begin<offset_type>();
+  auto const offsets_begin = out_offsets->mutable_view().template begin<size_type>();
   cudf::detail::labels_to_offsets(labels_begin,
                                   labels_begin + labels.size(),
                                   offsets_begin,
@@ -60,7 +60,7 @@ std::unique_ptr<column> get_normalized_offsets(lists_column_view const& input,
 {
   if (input.is_empty()) { return empty_like(input.offsets()); }
 
-  auto out_offsets = make_numeric_column(data_type(type_to_id<offset_type>()),
+  auto out_offsets = make_numeric_column(data_type(type_to_id<size_type>()),
                                          input.size() + 1,
                                          cudf::mask_state::UNALLOCATED,
                                          stream,
@@ -68,7 +68,7 @@ std::unique_ptr<column> get_normalized_offsets(lists_column_view const& input,
   thrust::transform(rmm::exec_policy(stream),
                     input.offsets_begin(),
                     input.offsets_end(),
-                    out_offsets->mutable_view().begin<offset_type>(),
+                    out_offsets->mutable_view().begin<size_type>(),
                     [d_offsets = input.offsets_begin()] __device__(auto const offset_val) {
                       // The first offset value, used for zero-normalizing offsets.
                       return offset_val - *d_offsets;
diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu
index 0c90b0af8d2..cfdb386ff64 100644
--- a/cpp/src/quantiles/tdigest/tdigest.cu
+++ b/cpp/src/quantiles/tdigest/tdigest.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ struct make_centroid {
 
 // kernel for computing percentiles on input tdigest (mean, weight) centroid data.
 template <typename CentroidIter>
-__global__ void compute_percentiles_kernel(device_span<offset_type const> tdigest_offsets,
+__global__ void compute_percentiles_kernel(device_span<size_type const> tdigest_offsets,
                                            column_device_view percentiles,
                                            CentroidIter centroids_,
                                            double const* min_,
@@ -199,8 +199,8 @@ std::unique_ptr<column> compute_approx_percentiles(tdigest_column_view const& in
                                                           rmm::mr::get_current_device_resource());
   auto keys               = cudf::detail::make_counting_transform_iterator(
     0,
-    [offsets_begin = offsets.begin<offset_type>(),
-     offsets_end   = offsets.end<offset_type>()] __device__(size_type i) {
+    [offsets_begin = offsets.begin<size_type>(),
+     offsets_end   = offsets.end<size_type>()] __device__(size_type i) {
       return thrust::distance(
         offsets_begin,
         thrust::prev(thrust::upper_bound(thrust::seq, offsets_begin, offsets_end, i)));
@@ -239,7 +239,7 @@ std::unique_ptr<column> compute_approx_percentiles(tdigest_column_view const& in
   constexpr size_type block_size = 256;
   cudf::detail::grid_1d const grid(percentiles.size() * input.size(), block_size);
   compute_percentiles_kernel<<<grid.num_blocks, block_size, 0, stream.value()>>>(
-    {offsets.begin<offset_type>(), static_cast<size_t>(offsets.size())},
+    {offsets.begin<size_type>(), static_cast<size_t>(offsets.size())},
     *percentiles_cdv,
     centroids,
     tdv.min_begin(),
@@ -294,8 +294,8 @@ std::unique_ptr<column> make_empty_tdigest_column(rmm::cuda_stream_view stream,
   auto offsets = cudf::make_fixed_width_column(
     data_type(type_id::INT32), 2, mask_state::UNALLOCATED, stream, mr);
   thrust::fill(rmm::exec_policy(stream),
-               offsets->mutable_view().begin<offset_type>(),
-               offsets->mutable_view().end<offset_type>(),
+               offsets->mutable_view().begin<size_type>(),
+               offsets->mutable_view().end<size_type>(),
                0);
 
   auto min_col =
@@ -362,7 +362,7 @@ std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
   thrust::exclusive_scan(rmm::exec_policy(stream),
                          row_size_iter,
                          row_size_iter + input.size() + 1,
-                         offsets->mutable_view().begin<offset_type>());
+                         offsets->mutable_view().begin<size_type>());
 
   if (percentiles.size() == 0 || all_empty_rows) {
     return cudf::make_lists_column(
diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
index 35f2ce05bb6..2ce55e10fb1 100644
--- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
+++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
@@ -127,7 +127,7 @@ struct merge_centroids {
  * nearest whole number <= it is floor(3.56) == 3.
  */
 struct nearest_value_scalar_weights_grouped {
-  offset_type const* group_offsets;
+  size_type const* group_offsets;
 
   thrust::pair<double, int> operator() __device__(double next_limit, size_type group_index) const
   {
@@ -167,8 +167,8 @@ struct nearest_value_scalar_weights {
 template <typename GroupOffsetsIter>
 struct nearest_value_centroid_weights {
   double const* cumulative_weights;
-  GroupOffsetsIter outer_offsets;    // groups
-  offset_type const* inner_offsets;  // tdigests within a group
+  GroupOffsetsIter outer_offsets;  // groups
+  size_type const* inner_offsets;  // tdigests within a group
 
   thrust::pair<double, int> operator() __device__(double next_limit, size_type group_index) const
   {
@@ -234,8 +234,8 @@ template <typename GroupLabelsIter, typename GroupOffsetsIter>
 struct cumulative_centroid_weight {
   double const* cumulative_weights;
   GroupLabelsIter group_labels;
-  GroupOffsetsIter outer_offsets;                      // groups
-  cudf::device_span<offset_type const> inner_offsets;  // tdigests with a group
+  GroupOffsetsIter outer_offsets;                    // groups
+  cudf::device_span<size_type const> inner_offsets;  // tdigests with a group
 
   std::tuple<size_type, size_type, double> operator() __device__(size_type value_index) const
   {
@@ -257,7 +257,7 @@ struct cumulative_centroid_weight {
 // retrieve group info (total weight, size, start offset) of scalar inputs by group index.
 struct scalar_group_info_grouped {
   size_type const* group_valid_counts;
-  offset_type const* group_offsets;
+  size_type const* group_offsets;
 
   __device__ thrust::tuple<double, size_type, size_type> operator()(size_type group_index) const
   {
@@ -283,7 +283,7 @@ template <typename GroupOffsetsIter>
 struct centroid_group_info {
   double const* cumulative_weights;
   GroupOffsetsIter outer_offsets;
-  offset_type const* inner_offsets;
+  size_type const* inner_offsets;
 
   __device__ thrust::tuple<double, size_type, size_type> operator()(size_type group_index) const
   {
@@ -375,7 +375,7 @@ __global__ void generate_cluster_limits_kernel(int delta,
                                                CumulativeWeight cumulative_weight,
                                                double* group_cluster_wl,
                                                size_type* group_num_clusters,
-                                               offset_type const* group_cluster_offsets,
+                                               size_type const* group_cluster_offsets,
                                                bool has_nulls)
 {
   int const tid = threadIdx.x + blockIdx.x * blockDim.x;
@@ -544,12 +544,12 @@ generate_group_cluster_info(int delta,
   thrust::exclusive_scan(rmm::exec_policy(stream),
                          cluster_size,
                          cluster_size + num_groups + 1,
-                         group_cluster_offsets->mutable_view().begin<offset_type>(),
+                         group_cluster_offsets->mutable_view().begin<size_type>(),
                          0);
 
   // total # of clusters
-  offset_type total_clusters =
-    cudf::detail::get_value<offset_type>(group_cluster_offsets->view(), num_groups, stream);
+  size_type total_clusters =
+    cudf::detail::get_value<size_type>(group_cluster_offsets->view(), num_groups, stream);
 
   // fill in the actual cluster weight limits
   rmm::device_uvector<double> group_cluster_wl(total_clusters, stream);
@@ -561,7 +561,7 @@ generate_group_cluster_info(int delta,
     cumulative_weight,
     group_cluster_wl.begin(),
     group_num_clusters.begin(),
-    group_cluster_offsets->view().begin<offset_type>(),
+    group_cluster_offsets->view().begin<size_type>(),
     has_nulls);
 
   return {std::move(group_cluster_wl),
@@ -584,7 +584,7 @@ std::unique_ptr<column> build_output_column(size_type num_rows,
     return weights[i] == 0;
   };
   // whether or not this particular tdigest is a stub
-  auto is_stub_digest = [offsets = offsets->view().begin<offset_type>(), is_stub_weight] __device__(
+  auto is_stub_digest = [offsets = offsets->view().begin<size_type>(), is_stub_weight] __device__(
                           size_type i) { return is_stub_weight(offsets[i]) ? 1 : 0; };
 
   size_type const num_stubs = [&]() {
@@ -622,12 +622,12 @@ std::unique_ptr<column> build_output_column(size_type num_rows,
   auto _weights = remove_stubs(*weights, num_stubs);
 
   // adjust offsets.
-  rmm::device_uvector<offset_type> sizes(num_rows, stream);
+  rmm::device_uvector<size_type> sizes(num_rows, stream);
   thrust::transform(rmm::exec_policy(stream),
                     thrust::make_counting_iterator(0),
                     thrust::make_counting_iterator(0) + num_rows,
                     sizes.begin(),
-                    [offsets = offsets->view().begin<offset_type>()] __device__(size_type i) {
+                    [offsets = offsets->view().begin<size_type>()] __device__(size_type i) {
                       return offsets[i + 1] - offsets[i];
                     });
   auto iter = cudf::detail::make_counting_transform_iterator(
@@ -637,7 +637,7 @@ std::unique_ptr<column> build_output_column(size_type num_rows,
   thrust::exclusive_scan(rmm::exec_policy(stream),
                          iter,
                          iter + num_rows + 1,
-                         offsets->mutable_view().begin<offset_type>(),
+                         offsets->mutable_view().begin<size_type>(),
                          0);
 
   // assemble final column
@@ -717,7 +717,7 @@ std::unique_ptr<column> compute_tdigests(int delta,
     thrust::make_counting_iterator(0),
     [delta,
      group_cluster_wl      = group_cluster_wl.data(),
-     group_cluster_offsets = group_cluster_offsets->view().begin<offset_type>(),
+     group_cluster_offsets = group_cluster_offsets->view().begin<size_type>(),
      group_cumulative_weight] __device__(size_type value_index) -> size_type {
       // get group index, relative value index within the group and cumulative weight.
       [[maybe_unused]] auto [group_index, relative_value_index, cumulative_weight] =
@@ -1018,10 +1018,10 @@ std::unique_ptr<column> merge_tdigests(tdigest_column_view const& tdv,
 
   // bring tdigest offsets back to the host
   auto tdigest_offsets = tdv.centroids().offsets();
-  std::vector<offset_type> h_inner_offsets(tdigest_offsets.size());
+  std::vector<size_type> h_inner_offsets(tdigest_offsets.size());
   cudaMemcpyAsync(h_inner_offsets.data(),
-                  tdigest_offsets.begin<offset_type>(),
-                  sizeof(offset_type) * tdigest_offsets.size(),
+                  tdigest_offsets.begin<size_type>(),
+                  sizeof(size_type) * tdigest_offsets.size(),
                   cudaMemcpyDefault,
                   stream);
 
@@ -1154,7 +1154,7 @@ std::unique_ptr<column> merge_tdigests(tdigest_column_view const& tdv,
       cumulative_weights->view().begin<double>(),
       group_labels,
       group_offsets,
-      {tdigest_offsets.begin<offset_type>(), static_cast<size_t>(tdigest_offsets.size())}},
+      {tdigest_offsets.begin<size_type>(), static_cast<size_t>(tdigest_offsets.size())}},
     false,
     stream,
     mr);
@@ -1174,7 +1174,7 @@ std::unique_ptr<column> merge_tdigests(tdigest_column_view const& tdv,
       cumulative_weights->view().begin<double>(),
       group_labels,
       group_offsets,
-      {tdigest_offsets.begin<offset_type>(), static_cast<size_t>(tdigest_offsets.size())}},
+      {tdigest_offsets.begin<size_type>(), static_cast<size_t>(tdigest_offsets.size())}},
     std::move(merged_min_col),
     std::move(merged_max_col),
     group_cluster_wl,
diff --git a/cpp/src/rolling/detail/nth_element.cuh b/cpp/src/rolling/detail/nth_element.cuh
index c28d96e7793..bd3cbb39168 100644
--- a/cpp/src/rolling/detail/nth_element.cuh
+++ b/cpp/src/rolling/detail/nth_element.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -157,7 +157,7 @@ std::unique_ptr<column> nth_element(size_type n,
     gather_index_calculator<null_handling, PrecedingIter, FollowingIter>{
       n, input, preceding, following, min_periods, stream});
 
-  auto gather_map = rmm::device_uvector<offset_type>(input.size(), stream);
+  auto gather_map = rmm::device_uvector<size_type>(input.size(), stream);
   thrust::copy(
     rmm::exec_policy(stream), gather_iter, gather_iter + input.size(), gather_map.begin());
 
diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh
index 84352e36550..3b6d53f43c4 100644
--- a/cpp/src/rolling/detail/rolling.cuh
+++ b/cpp/src/rolling/detail/rolling.cuh
@@ -454,7 +454,7 @@ struct agg_specific_empty_output {
 
     if constexpr (op == aggregation::COLLECT_LIST) {
       return cudf::make_lists_column(
-        0, make_empty_column(type_to_id<offset_type>()), empty_like(input), 0, {});
+        0, make_empty_column(type_to_id<size_type>()), empty_like(input), 0, {});
     }
 
     return empty_like(input);
diff --git a/cpp/src/rolling/detail/rolling_collect_list.cu b/cpp/src/rolling/detail/rolling_collect_list.cu
index f7544e81ba5..85dced0efe3 100644
--- a/cpp/src/rolling/detail/rolling_collect_list.cu
+++ b/cpp/src/rolling/detail/rolling_collect_list.cu
@@ -140,8 +140,8 @@ std::pair<std::unique_ptr<column>, std::unique_ptr<column>> purge_null_entries(
   thrust::tabulate(rmm::exec_policy(stream),
                    new_sizes->mutable_view().template begin<size_type>(),
                    new_sizes->mutable_view().template end<size_type>(),
-                   [d_gather_map  = gather_map.template begin<offset_type>(),
-                    d_old_offsets = offsets.template begin<offset_type>(),
+                   [d_gather_map  = gather_map.template begin<size_type>(),
+                    d_old_offsets = offsets.template begin<size_type>(),
                     input_row_not_null] __device__(auto i) {
                      return thrust::count_if(thrust::seq,
                                              d_gather_map + d_old_offsets[i],
diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu
index 899cd8b6c86..ca5c04d1c4f 100644
--- a/cpp/src/rolling/grouped_rolling.cu
+++ b/cpp/src/rolling/grouped_rolling.cu
@@ -327,12 +327,12 @@ std::unique_ptr<column> expand_to_column(Calculator const& calc,
                                          rmm::cuda_stream_view stream)
 {
   auto window_column = cudf::make_numeric_column(
-    cudf::data_type{type_to_id<offset_type>()}, num_rows, cudf::mask_state::UNALLOCATED, stream);
+    cudf::data_type{type_to_id<size_type>()}, num_rows, cudf::mask_state::UNALLOCATED, stream);
 
   auto begin = cudf::detail::make_counting_transform_iterator(0, calc);
 
   thrust::copy_n(
-    rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data<offset_type>());
+    rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data<size_type>());
 
   return window_column;
 }
diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu
index fcb0bacad9a..4e248922702 100644
--- a/cpp/src/strings/capitalize.cu
+++ b/cpp/src/strings/capitalize.cu
@@ -63,7 +63,7 @@ struct base_fn {
   character_cases_table_type const* d_case_table;
   special_case_mapping const* d_special_case_mapping;
   column_device_view const d_column;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   base_fn(column_device_view const& d_column)
@@ -111,11 +111,11 @@ struct base_fn {
       return;
     }
 
-    auto& derived     = static_cast<Derived&>(*this);
-    auto const d_str  = d_column.element<string_view>(idx);
-    offset_type bytes = 0;
-    auto d_buffer     = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    bool capitalize   = true;
+    auto& derived    = static_cast<Derived&>(*this);
+    auto const d_str = d_column.element<string_view>(idx);
+    size_type bytes  = 0;
+    auto d_buffer    = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    bool capitalize  = true;
     for (auto const chr : d_str) {
       auto const info        = get_char_info(d_flags, chr);
       auto const flag        = info.second;
diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu
index 29023fbb139..ba8acd23467 100644
--- a/cpp/src/strings/combine/concatenate.cu
+++ b/cpp/src/strings/combine/concatenate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,7 +48,7 @@ struct concat_strings_base {
   table_device_view const d_table;
   string_scalar_device_view const d_narep;
   separator_on_nulls separate_nulls;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   /**
@@ -72,7 +72,7 @@ struct concat_strings_base {
     }
 
     char* d_buffer       = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    offset_type bytes    = 0;
+    size_type bytes      = 0;
     bool write_separator = false;
 
     for (auto itr = d_table.begin(); itr < d_table.end(); ++itr) {
diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu
index 7c9acbfbc58..eee59e37478 100644
--- a/cpp/src/strings/combine/join_list_elements.cu
+++ b/cpp/src/strings/combine/join_list_elements.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,13 +53,13 @@ template <class Functor>
 struct compute_size_and_concatenate_fn {
   Functor const func;
   column_device_view const lists_dv;
-  offset_type const* const list_offsets;
+  size_type const* const list_offsets;
   column_device_view const strings_dv;
   string_scalar_device_view const string_narep_dv;
   separator_on_nulls const separate_nulls;
   output_if_empty_list const empty_list_policy;
 
-  offset_type* d_offsets{nullptr};
+  size_type* d_offsets{nullptr};
 
   // If d_chars == nullptr: only compute sizes and validities of the output strings.
   // If d_chars != nullptr: only concatenate strings.
diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu
index 4f446c8c1cf..0d04fc74b0c 100644
--- a/cpp/src/strings/convert/convert_booleans.cu
+++ b/cpp/src/strings/convert/convert_booleans.cu
@@ -95,7 +95,7 @@ struct from_booleans_fn {
   column_device_view const d_column;
   string_view d_true;
   string_view d_false;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   __device__ void operator()(size_type idx) const
diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu
index 0a0f197c8a1..863f76b9b98 100644
--- a/cpp/src/strings/convert/convert_durations.cu
+++ b/cpp/src/strings/convert/convert_durations.cu
@@ -191,7 +191,7 @@ struct from_durations_fn {
   column_device_view d_durations;
   format_item const* d_format_items;
   size_type items_count;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   __device__ int8_t format_length(char format_char, duration_component const* const timeparts) const
diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu
index cb061d03e5a..a3336258d3e 100644
--- a/cpp/src/strings/convert/convert_fixed_point.cu
+++ b/cpp/src/strings/convert/convert_fixed_point.cu
@@ -197,7 +197,7 @@ namespace {
 template <typename DecimalType>
 struct from_fixed_point_fn {
   column_device_view d_decimals;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   /**
diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu
index 8728ad06964..bed682aba71 100644
--- a/cpp/src/strings/convert/convert_hex.cu
+++ b/cpp/src/strings/convert/convert_hex.cu
@@ -129,7 +129,7 @@ void dispatch_hex_to_integers_fn::operator()<bool>(column_device_view const&,
 template <typename IntegerType>
 struct integer_to_hex_fn {
   column_device_view const d_column;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   __device__ void byte_to_hex(uint8_t byte, char* hex)
@@ -173,7 +173,7 @@ struct integer_to_hex_fn {
         --byte_index;
       }
     } else {
-      d_offsets[idx] = static_cast<offset_type>(bytes) * 2;  // 2 hex characters per byte
+      d_offsets[idx] = static_cast<size_type>(bytes) * 2;  // 2 hex characters per byte
     }
   }
 };
diff --git a/cpp/src/strings/convert/convert_lists.cu b/cpp/src/strings/convert/convert_lists.cu
index 609ced97c26..3aef37914fd 100644
--- a/cpp/src/strings/convert/convert_lists.cu
+++ b/cpp/src/strings/convert/convert_lists.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -134,7 +134,7 @@ struct format_lists_fn {
       auto const view = get_nested_child(stack_idx);
 
       auto offsets   = view.child(cudf::lists_column_view::offsets_column_index);
-      auto d_offsets = offsets.data<offset_type>() + view.offset();
+      auto d_offsets = offsets.data<size_type>() + view.offset();
 
       // add pending separator
       if (item.separator == item_separator::LIST) {
diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu
index 8b6305b68e2..401a04cdc9d 100644
--- a/cpp/src/strings/convert/convert_urls.cu
+++ b/cpp/src/strings/convert/convert_urls.cu
@@ -197,7 +197,7 @@ __forceinline__ __device__ char escaped_sequence_to_byte(char const* const ptr)
  */
 template <int num_warps_per_threadblock, int char_block_size>
 __global__ void url_decode_char_counter(column_device_view const in_strings,
-                                        offset_type* const out_counts)
+                                        size_type* const out_counts)
 {
   constexpr int halo_size = 2;
   __shared__ char temporary_buffer[num_warps_per_threadblock][char_block_size + halo_size];
@@ -221,7 +221,7 @@ __global__ void url_decode_char_counter(column_device_view const in_strings,
     auto const in_chars      = in_string.data();
     auto const string_length = in_string.size_bytes();
     int const nblocks        = cudf::util::div_rounding_up_unsafe(string_length, char_block_size);
-    offset_type escape_char_count = 0;
+    size_type escape_char_count = 0;
 
     for (int block_idx = 0; block_idx < nblocks; block_idx++) {
       int const string_length_block =
@@ -280,7 +280,7 @@ __global__ void url_decode_char_counter(column_device_view const in_strings,
 template <int num_warps_per_threadblock, int char_block_size>
 __global__ void url_decode_char_replacer(column_device_view const in_strings,
                                          char* const out_chars,
-                                         offset_type const* const out_offsets)
+                                         size_type const* const out_offsets)
 {
   constexpr int halo_size = 2;
   __shared__ char temporary_buffer[num_warps_per_threadblock][char_block_size + halo_size * 2];
@@ -393,18 +393,17 @@ std::unique_ptr<column> url_decode(strings_column_view const& strings,
   auto offsets_mutable_view = offsets_column->mutable_view();
   url_decode_char_counter<num_warps_per_threadblock, char_block_size>
     <<<num_threadblocks, threadblock_size, 0, stream.value()>>>(
-      *d_strings, offsets_mutable_view.begin<offset_type>());
+      *d_strings, offsets_mutable_view.begin<size_type>());
 
   // use scan to transform number of bytes into offsets
   thrust::exclusive_scan(rmm::exec_policy(stream),
-                         offsets_view.begin<offset_type>(),
-                         offsets_view.end<offset_type>(),
-                         offsets_mutable_view.begin<offset_type>());
+                         offsets_view.begin<size_type>(),
+                         offsets_view.end<size_type>(),
+                         offsets_mutable_view.begin<size_type>());
 
   // copy the total number of characters of all strings combined (last element of the offset column)
   // to the host memory
-  auto out_chars_bytes =
-    cudf::detail::get_value<offset_type>(offsets_view, offset_count - 1, stream);
+  auto out_chars_bytes = cudf::detail::get_value<size_type>(offsets_view, offset_count - 1, stream);
 
   // create the chars column
   auto chars_column = create_chars_child_column(out_chars_bytes, stream, mr);
@@ -413,7 +412,7 @@ std::unique_ptr<column> url_decode(strings_column_view const& strings,
   // decode and copy the characters from the input column to the output column
   url_decode_char_replacer<num_warps_per_threadblock, char_block_size>
     <<<num_threadblocks, threadblock_size, 0, stream.value()>>>(
-      *d_strings, d_out_chars, offsets_column->view().begin<offset_type>());
+      *d_strings, d_out_chars, offsets_column->view().begin<size_type>());
 
   // copy null mask
   rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);
diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu
index c5dfd4a8b93..287910c9a6f 100644
--- a/cpp/src/strings/copying/concatenate.cu
+++ b/cpp/src/strings/copying/concatenate.cu
@@ -287,12 +287,12 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
         column_view chars_child   = column->child(strings_column_view::chars_column_index);
 
         auto bytes_offset =
-          cudf::detail::get_value<offset_type>(offsets_child, column_offset, stream);
+          cudf::detail::get_value<size_type>(offsets_child, column_offset, stream);
 
         // copy the chars column data
         auto d_chars = chars_child.data<char>() + bytes_offset;
         auto const bytes =
-          cudf::detail::get_value<offset_type>(offsets_child, column_size + column_offset, stream) -
+          cudf::detail::get_value<size_type>(offsets_child, column_size + column_offset, stream) -
           bytes_offset;
 
         CUDF_CUDA_TRY(
diff --git a/cpp/src/strings/copying/shift.cu b/cpp/src/strings/copying/shift.cu
index bdcf01bd336..5f8fc483a34 100644
--- a/cpp/src/strings/copying/shift.cu
+++ b/cpp/src/strings/copying/shift.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,15 +36,15 @@ struct adjust_offsets_fn {
   string_view const d_filler;
   size_type const offset;
 
-  __device__ offset_type operator()(size_type idx)
+  __device__ size_type operator()(size_type idx)
   {
     if (offset < 0) {
-      auto const first      = d_column.element<offset_type>(-offset);
+      auto const first      = d_column.element<size_type>(-offset);
       auto const last_index = d_column.size() + offset;
       if (idx < last_index) {
-        return d_column.element<offset_type>(idx - offset) - first;
+        return d_column.element<size_type>(idx - offset) - first;
       } else {
-        auto const last = d_column.element<offset_type>(d_column.size() - 1);
+        auto const last = d_column.element<size_type>(d_column.size() - 1);
         return (last - first) + ((idx - last_index + 1) * d_filler.size_bytes());
       }
     } else {
@@ -52,7 +52,7 @@ struct adjust_offsets_fn {
         return idx * d_filler.size_bytes();
       } else {
         auto const total_filler = d_filler.size_bytes() * offset;
-        return total_filler + d_column.element<offset_type>(idx - offset);
+        return total_filler + d_column.element<size_type>(idx - offset);
       }
     }
   }
@@ -112,19 +112,19 @@ std::unique_ptr<column> shift(strings_column_view const& input,
   thrust::transform(rmm::exec_policy(stream),
                     thrust::counting_iterator<size_type>(0),
                     thrust::counting_iterator<size_type>(offsets_size),
-                    d_offsets->data<offset_type>(),
+                    d_offsets->data<size_type>(),
                     adjust_offsets_fn{*d_input_offsets, d_fill_str, offset});
 
   // compute the shift-offset for the output characters child column
   auto const shift_offset = [&] {
     auto const index = (offset >= 0) ? offset : offsets_size - 1 + offset;
     return (offset < 0 ? -1 : 1) *
-           cudf::detail::get_value<offset_type>(offsets_column->view(), index, stream);
+           cudf::detail::get_value<size_type>(offsets_column->view(), index, stream);
   }();
 
   // create output chars child column
   auto const chars_size =
-    cudf::detail::get_value<offset_type>(offsets_column->view(), offsets_size - 1, stream);
+    cudf::detail::get_value<size_type>(offsets_column->view(), offsets_size - 1, stream);
   auto chars_column = create_chars_child_column(chars_size, stream, mr);
   auto d_chars      = mutable_column_device_view::create(chars_column->mutable_view(), stream);
   auto const d_input_chars = column_device_view::create(input.chars(), stream);
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index fcd05ee9dc6..8a2f8f0cbfc 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -50,7 +50,7 @@ namespace {
  */
 struct extract_fn {
   column_device_view const d_strings;
-  offset_type const* d_offsets;
+  size_type const* d_offsets;
   string_index_pair* d_indices;
 
   __device__ void operator()(size_type const idx,
@@ -119,7 +119,7 @@ std::unique_ptr<column> extract_all_record(strings_column_view const& input,
   // Get the match counts for each string.
   // This column will become the output lists child offsets column.
   auto offsets   = count_matches(*d_strings, *d_prog, strings_count + 1, stream, mr);
-  auto d_offsets = offsets->mutable_view().data<offset_type>();
+  auto d_offsets = offsets->mutable_view().data<size_type>();
 
   // Compute null output rows
   auto [null_mask, null_count] = cudf::detail::valid_if(
@@ -138,10 +138,10 @@ std::unique_ptr<column> extract_all_record(strings_column_view const& input,
     d_offsets + strings_count + 1,
     d_offsets,
     [groups] __device__(auto v) { return v * groups; },
-    offset_type{0},
+    size_type{0},
     thrust::plus{});
   auto const total_groups =
-    cudf::detail::get_value<offset_type>(offsets->view(), strings_count, stream);
+    cudf::detail::get_value<size_type>(offsets->view(), strings_count, stream);
 
   rmm::device_uvector<string_index_pair> indices(total_groups, stream);
 
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
index f4dfafeb51f..be5b089c6e0 100644
--- a/cpp/src/strings/json/json_path.cu
+++ b/cpp/src/strings/json/json_path.cu
@@ -901,7 +901,7 @@ template <int block_size>
 __launch_bounds__(block_size) __global__
   void get_json_object_kernel(column_device_view col,
                               path_operator const* const commands,
-                              offset_type* output_offsets,
+                              size_type* output_offsets,
                               thrust::optional<char*> out_buf,
                               thrust::optional<bitmask_type*> out_validity,
                               thrust::optional<size_type*> out_valid_count,
@@ -932,7 +932,7 @@ __launch_bounds__(block_size) __global__
 
     // filled in only during the precompute step. during the compute step, the offsets
     // are fed back in so we do -not- want to write them out
-    if (!out_buf.has_value()) { output_offsets[tid] = static_cast<offset_type>(output_size); }
+    if (!out_buf.has_value()) { output_offsets[tid] = static_cast<size_type>(output_size); }
 
     // validity filled in only during the output step
     if (out_validity.has_value()) {
@@ -995,7 +995,7 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
     <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
       *cdv,
       std::get<0>(preprocess).value().data(),
-      offsets_view.head<offset_type>(),
+      offsets_view.head<size_type>(),
       thrust::nullopt,
       thrust::nullopt,
       thrust::nullopt,
@@ -1003,12 +1003,12 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
 
   // convert sizes to offsets
   thrust::exclusive_scan(rmm::exec_policy(stream),
-                         offsets_view.head<offset_type>(),
-                         offsets_view.head<offset_type>() + col.size() + 1,
-                         offsets_view.head<offset_type>(),
+                         offsets_view.head<size_type>(),
+                         offsets_view.head<size_type>() + col.size() + 1,
+                         offsets_view.head<size_type>(),
                          0);
   size_type const output_size =
-    cudf::detail::get_value<offset_type>(offsets_view, col.size(), stream);
+    cudf::detail::get_value<size_type>(offsets_view, col.size(), stream);
 
   // allocate output string column
   auto chars = create_chars_child_column(output_size, stream, mr);
@@ -1026,7 +1026,7 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
     <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
       *cdv,
       std::get<0>(preprocess).value().data(),
-      offsets_view.head<offset_type>(),
+      offsets_view.head<size_type>(),
       chars_view.head<char>(),
       static_cast<bitmask_type*>(validity.data()),
       d_valid_count.data(),
diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu
index da6d01c92dc..c501a8bf7b4 100644
--- a/cpp/src/strings/padding.cu
+++ b/cpp/src/strings/padding.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,7 +46,7 @@ struct base_fn {
   column_device_view const d_column;
   size_type const width;
   size_type const fill_char_size;
-  offset_type* d_offsets{};
+  size_type* d_offsets{};
   char* d_chars{};
 
   base_fn(column_device_view const& d_column, size_type width, size_type fill_char_size)
diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu
index 4e0294f188c..396e1e6a2ac 100644
--- a/cpp/src/strings/repeat_strings.cu
+++ b/cpp/src/strings/repeat_strings.cu
@@ -84,10 +84,10 @@ auto generate_empty_output(strings_column_view const& input,
   auto chars_column = create_chars_child_column(0, stream, mr);
 
   auto offsets_column = make_numeric_column(
-    data_type{type_to_id<offset_type>()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
-  CUDF_CUDA_TRY(cudaMemsetAsync(offsets_column->mutable_view().template data<offset_type>(),
+    data_type{type_to_id<size_type>()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
+  CUDF_CUDA_TRY(cudaMemsetAsync(offsets_column->mutable_view().template data<size_type>(),
                                 0,
-                                offsets_column->size() * sizeof(offset_type),
+                                offsets_column->size() * sizeof(size_type),
                                 stream.value()));
 
   return make_strings_column(strings_count,
@@ -109,7 +109,7 @@ struct compute_size_and_repeat_fn {
   size_type const repeat_times;
   bool const has_nulls;
 
-  offset_type* d_offsets{nullptr};
+  size_type* d_offsets{nullptr};
 
   // If d_chars == nullptr: only compute sizes of the output strings.
   // If d_chars != nullptr: only repeat strings.
@@ -184,7 +184,7 @@ struct compute_sizes_and_repeat_fn {
   bool const strings_has_nulls;
   bool const rtimes_has_nulls;
 
-  offset_type* d_offsets{nullptr};
+  size_type* d_offsets{nullptr};
 
   // If d_chars == nullptr: only compute sizes of the output strings.
   // If d_chars != nullptr: only repeat strings.
diff --git a/cpp/src/strings/reverse.cu b/cpp/src/strings/reverse.cu
index 3c1fae7a00f..090705ac25d 100644
--- a/cpp/src/strings/reverse.cu
+++ b/cpp/src/strings/reverse.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ namespace {
  */
 struct reverse_characters_fn {
   column_device_view const d_strings;
-  offset_type const* d_offsets;
+  size_type const* d_offsets;
   char* d_chars;
 
   __device__ void operator()(size_type idx)
@@ -64,7 +64,7 @@ std::unique_ptr<column> reverse(strings_column_view const& input,
   // copy the column; replace data in the chars column
   auto result = std::make_unique<column>(input.parent(), stream, mr);
   auto const d_offsets =
-    result->view().child(strings_column_view::offsets_column_index).data<offset_type>();
+    result->view().child(strings_column_view::offsets_column_index).data<size_type>();
   auto d_chars = result->mutable_view().child(strings_column_view::chars_column_index).data<char>();
 
   auto const d_column = column_device_view::create(input.parent(), stream);
diff --git a/cpp/src/strings/search/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu
index 1907c0d749b..4a823ad1dcb 100644
--- a/cpp/src/strings/search/find_multiple.cu
+++ b/cpp/src/strings/search/find_multiple.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -70,8 +70,8 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
   results->set_null_count(0);
 
   auto offsets = cudf::detail::sequence(strings_count + 1,
-                                        numeric_scalar<offset_type>(0),
-                                        numeric_scalar<offset_type>(targets_count),
+                                        numeric_scalar<size_type>(0),
+                                        numeric_scalar<size_type>(targets_count),
                                         stream,
                                         mr);
   return make_lists_column(strings_count,
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index 596fbb39d15..2df64c6a0a7 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -50,7 +50,7 @@ namespace {
  */
 struct findall_fn {
   column_device_view const d_strings;
-  offset_type const* d_offsets;
+  size_type const* d_offsets;
   string_index_pair* d_indices;
 
   __device__ void operator()(size_type const idx, reprog_device const prog, int32_t const prog_idx)
@@ -78,7 +78,7 @@ struct findall_fn {
 std::unique_ptr<column> findall_util(column_device_view const& d_strings,
                                      reprog_device& d_prog,
                                      size_type total_matches,
-                                     offset_type const* d_offsets,
+                                     size_type const* d_offsets,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
 {
@@ -106,7 +106,7 @@ std::unique_ptr<column> findall(strings_column_view const& input,
 
   // Create lists offsets column
   auto offsets   = count_matches(*d_strings, *d_prog, strings_count + 1, stream, mr);
-  auto d_offsets = offsets->mutable_view().data<offset_type>();
+  auto d_offsets = offsets->mutable_view().data<size_type>();
 
   // Convert counts into offsets
   thrust::exclusive_scan(
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index 0dc92cf343c..9aeb6b69bdc 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -60,7 +60,7 @@ enum class split_direction {
 struct token_reader_fn {
   column_device_view const d_strings;
   split_direction const direction;
-  offset_type const* d_token_offsets;
+  size_type const* d_token_offsets;
   string_index_pair* d_tokens;
 
   __device__ void operator()(size_type const idx, reprog_device const prog, int32_t const prog_idx)
@@ -143,17 +143,17 @@ rmm::device_uvector<string_index_pair> generate_tokens(column_device_view const&
 
   auto const begin     = thrust::make_counting_iterator<size_type>(0);
   auto const end       = thrust::make_counting_iterator<size_type>(strings_count);
-  auto const d_offsets = offsets.data<offset_type>();
+  auto const d_offsets = offsets.data<size_type>();
 
   // convert match counts to token offsets
   auto map_fn = [d_strings, d_offsets, max_tokens] __device__(auto idx) {
     return d_strings.is_null(idx) ? 0 : std::min(d_offsets[idx], max_tokens) + 1;
   };
   thrust::transform_exclusive_scan(
-    rmm::exec_policy(stream), begin, end + 1, d_offsets, map_fn, 0, thrust::plus<offset_type>{});
+    rmm::exec_policy(stream), begin, end + 1, d_offsets, map_fn, 0, thrust::plus<size_type>{});
 
   // the last offset entry is the total number of tokens to be generated
-  auto const total_tokens = cudf::detail::get_value<offset_type>(offsets, strings_count, stream);
+  auto const total_tokens = cudf::detail::get_value<size_type>(offsets, strings_count, stream);
 
   rmm::device_uvector<string_index_pair> tokens(total_tokens, stream);
   if (total_tokens == 0) { return tokens; }
@@ -176,7 +176,7 @@ rmm::device_uvector<string_index_pair> generate_tokens(column_device_view const&
 struct tokens_transform_fn {
   column_device_view const d_strings;
   string_index_pair const* d_tokens;
-  offset_type const* d_token_offsets;
+  size_type const* d_token_offsets;
   size_type const column_index;
 
   __device__ string_index_pair operator()(size_type idx) const
@@ -215,7 +215,7 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
   auto offsets = count_matches(
     *d_strings, *d_prog, strings_count + 1, stream, rmm::mr::get_current_device_resource());
   auto offsets_view = offsets->mutable_view();
-  auto d_offsets    = offsets_view.data<offset_type>();
+  auto d_offsets    = offsets_view.data<size_type>();
 
   // get the split tokens from the input column; this also converts the counts into offsets
   auto tokens = generate_tokens(*d_strings, *d_prog, direction, maxsplit, offsets_view, stream);
diff --git a/cpp/src/strings/strings_column_view.cpp b/cpp/src/strings/strings_column_view.cpp
index 6de478d3e1e..4b206666d4b 100644
--- a/cpp/src/strings/strings_column_view.cpp
+++ b/cpp/src/strings/strings_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ column_view strings_column_view::offsets() const
 
 strings_column_view::offset_iterator strings_column_view::offsets_begin() const
 {
-  return offsets().begin<offset_type>() + offset();
+  return offsets().begin<size_type>() + offset();
 }
 
 strings_column_view::offset_iterator strings_column_view::offsets_end() const
diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu
index 413fb2497c0..ac55fe76db1 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cu
+++ b/cpp/src/text/subword/bpe_tokenizer.cu
@@ -199,7 +199,7 @@ struct byte_pair_encoding_fn {
     }
 
     auto const offset = d_strings.child(cudf::strings_column_view::offsets_column_index)
-                          .element<cudf::offset_type>(idx);
+                          .element<cudf::size_type>(idx);
     auto const d_indices = d_byte_indices + offset;
 
     // initialize the byte indices for this string;
@@ -304,7 +304,7 @@ struct byte_pair_encoding_fn {
 struct build_encoding_fn {
   cudf::column_device_view const d_strings;
   cudf::size_type const* d_byte_indices;
-  cudf::offset_type const* d_offsets;
+  cudf::size_type const* d_offsets;
   char* d_chars{};
 
   __device__ void operator()(cudf::size_type idx)
@@ -314,7 +314,7 @@ struct build_encoding_fn {
     if (d_str.empty()) { return; }
 
     auto const offset = d_strings.child(cudf::strings_column_view::offsets_column_index)
-                          .element<cudf::offset_type>(idx);
+                          .element<cudf::size_type>(idx);
     auto const d_indices = d_byte_indices + offset;
     auto d_output        = d_chars ? d_chars + d_offsets[idx] : nullptr;
 
@@ -362,12 +362,12 @@ std::unique_ptr<cudf::column> byte_pair_encoding(
   auto const d_merges  = cudf::column_device_view::create(merge_pairs.get_merge_pairs(), stream);
   auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
 
-  auto offsets   = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
+  auto offsets   = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
                                            static_cast<cudf::size_type>(input.size() + 1),
                                            cudf::mask_state::UNALLOCATED,
                                            stream,
                                            rmm::mr::get_current_device_resource());
-  auto d_offsets = offsets->mutable_view().data<cudf::offset_type>();
+  auto d_offsets = offsets->mutable_view().data<cudf::size_type>();
 
   byte_pair_encoding_fn fn{*d_merges,
                            *d_strings,
@@ -406,14 +406,14 @@ std::unique_ptr<cudf::column> byte_pair_encoding(
  */
 struct edge_of_space_fn {
   cudf::column_device_view const d_strings;
-  __device__ bool operator()(cudf::offset_type offset)
+  __device__ bool operator()(cudf::size_type offset)
   {
     auto const d_chars =
       d_strings.child(cudf::strings_column_view::chars_column_index).data<char>();
     if (is_whitespace(d_chars[offset]) || !is_whitespace(d_chars[offset - 1])) { return false; }
 
     auto const offsets   = d_strings.child(cudf::strings_column_view::offsets_column_index);
-    auto const d_offsets = offsets.data<cudf::offset_type>() + d_strings.offset();
+    auto const d_offsets = offsets.data<cudf::size_type>() + d_strings.offset();
     // ignore offsets outside sliced range
     if (offset < d_offsets[0] || offset >= d_offsets[d_strings.size()]) { return false; }
 
@@ -452,12 +452,12 @@ std::unique_ptr<cudf::column> space_offsets(cudf::strings_column_view const& inp
   auto const space_count = thrust::count_if(rmm::exec_policy(stream), begin, end, edge_of_space);
 
   // copy space offsets
-  rmm::device_uvector<cudf::offset_type> space_offsets(space_count, stream);
+  rmm::device_uvector<cudf::size_type> space_offsets(space_count, stream);
   thrust::copy_if(rmm::exec_policy(stream), begin, end, space_offsets.data(), edge_of_space);
 
   // create output offsets
   auto result =
-    cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::offset_type>()},
+    cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
                               static_cast<cudf::size_type>(space_count + input.size() + 1),
                               cudf::mask_state::UNALLOCATED,
                               stream,
@@ -469,7 +469,7 @@ std::unique_ptr<cudf::column> space_offsets(cudf::strings_column_view const& inp
                 input.offsets_end(),
                 space_offsets.begin(),
                 space_offsets.end(),
-                result->mutable_view().begin<cudf::offset_type>());
+                result->mutable_view().begin<cudf::size_type>());
 
   return result;
 }
diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu
index dffe035ad35..b39413af98f 100644
--- a/cpp/src/text/subword/load_merges_file.cu
+++ b/cpp/src/text/subword/load_merges_file.cu
@@ -78,7 +78,7 @@ std::unique_ptr<cudf::column> load_file_to_column(std::string const& filename_me
   CUDF_EXPECTS(merges_file.good(), "Could not open " + filename_merges);
 
   std::vector<char> chars{};
-  std::vector<cudf::offset_type> offsets(1, 0);
+  std::vector<cudf::size_type> offsets(1, 0);
 
   std::string line;
   std::getline(merges_file, line);
diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index 1507a8ce7c6..b151b44565d 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -352,10 +352,10 @@ __device__ size_type row_size_functor::operator()<string_view>(column_device_vie
     return 0;
   }
 
-  auto const offsets_size  = sizeof(offset_type) * CHAR_BIT;
+  auto const offsets_size  = sizeof(size_type) * CHAR_BIT;
   auto const validity_size = col.nullable() ? 1 : 0;
   auto const chars_size =
-    (offsets.data<offset_type>()[row_end] - offsets.data<offset_type>()[row_start]) * CHAR_BIT;
+    (offsets.data<size_type>()[row_end] - offsets.data<size_type>()[row_start]) * CHAR_BIT;
   return ((offsets_size + validity_size) * num_rows) + chars_size;
 }
 
@@ -372,7 +372,7 @@ __device__ size_type row_size_functor::operator()<list_view>(column_device_view
 {
   auto const num_rows{span.row_end - span.row_start};
 
-  auto const offsets_size  = sizeof(offset_type) * CHAR_BIT;
+  auto const offsets_size  = sizeof(size_type) * CHAR_BIT;
   auto const validity_size = col.nullable() ? 1 : 0;
   return (offsets_size + validity_size) * num_rows;
 }
@@ -451,10 +451,10 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
     // if this is a list column, update the working span from our offsets
     if (col.type().id() == type_id::LIST && col.size() > 0) {
       column_device_view const& offsets = col.child(lists_column_view::offsets_column_index);
-      auto const base_offset            = offsets.data<offset_type>()[col.offset()];
+      auto const base_offset            = offsets.data<size_type>()[col.offset()];
       cur_span.row_start =
-        offsets.data<offset_type>()[cur_span.row_start + col.offset()] - base_offset;
-      cur_span.row_end = offsets.data<offset_type>()[cur_span.row_end + col.offset()] - base_offset;
+        offsets.data<size_type>()[cur_span.row_start + col.offset()] - base_offset;
+      cur_span.row_end = offsets.data<size_type>()[cur_span.row_end + col.offset()] - base_offset;
     }
 
     last_branch_depth = info[idx].branch_depth_end;
diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp
index 3eccb6b2a55..66de4e19b27 100644
--- a/cpp/tests/column/factories_test.cpp
+++ b/cpp/tests/column/factories_test.cpp
@@ -508,7 +508,7 @@ TYPED_TEST_SUITE(ListsDictionaryLeafTest, cudf::test::FixedWidthTypes);
 TYPED_TEST(ListsDictionaryLeafTest, FromNonNested)
 {
   using DCW      = cudf::test::dictionary_column_wrapper<TypeParam>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto s   = cudf::make_list_scalar(DCW({1, 3, -1, 1, 3}, {1, 1, 0, 1, 1}));
   auto col = cudf::make_column_from_scalar(*s, 2);
@@ -524,7 +524,7 @@ TYPED_TEST(ListsDictionaryLeafTest, FromNonNested)
 TYPED_TEST(ListsDictionaryLeafTest, FromNested)
 {
   using DCW      = cudf::test::dictionary_column_wrapper<TypeParam>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   DCW leaf({1, 3, -1, 1, 3, 1, 3, -1, 1, 3}, {1, 1, 0, 1, 1, 1, 1, 0, 1, 1});
   offset_t offsets{0, 3, 3, 6, 6, 10};
@@ -617,7 +617,7 @@ TYPED_TEST(ListsStructsLeafTest, FromNonNested)
 {
   using LCWinner_t = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
   using StringCW   = cudf::test::strings_column_wrapper;
-  using offset_t   = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t   = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
   using valid_t    = std::vector<cudf::valid_type>;
 
   auto data = this->make_test_structs_column(
@@ -648,7 +648,7 @@ TYPED_TEST(ListsStructsLeafTest, FromNested)
 {
   using LCWinner_t = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
   using StringCW   = cudf::test::strings_column_wrapper;
-  using offset_t   = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t   = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
   using valid_t    = std::vector<cudf::valid_type>;
   auto leaf        = this->make_test_structs_column(
     {{1, 2}, {0, 1}},
@@ -702,7 +702,7 @@ TEST_F(ListsZeroLengthColumnTest, MixedTypes)
   using FCW      = cudf::test::fixed_width_column_wrapper<int32_t>;
   using StringCW = cudf::test::strings_column_wrapper;
   using LCW      = cudf::test::lists_column_wrapper<int32_t>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
   {
     auto s   = cudf::make_list_scalar(FCW{1, 2, 3});
     auto got = cudf::make_column_from_scalar(*s, 0);
@@ -759,7 +759,7 @@ TEST_F(ListsZeroLengthColumnTest, SuperimposeNulls)
   using FCW      = cudf::test::fixed_width_column_wrapper<int32_t>;
   using StringCW = cudf::test::strings_column_wrapper;
   using LCW      = cudf::test::lists_column_wrapper<int32_t>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto const lists = [&] {
     auto child = this
diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp
index 99ef1df1c2b..7701ca1ba56 100644
--- a/cpp/tests/copying/concatenate_tests.cpp
+++ b/cpp/tests/copying/concatenate_tests.cpp
@@ -377,7 +377,7 @@ TEST_F(OverflowTest, OverflowTest)
     constexpr auto size = static_cast<cudf::size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // try and concatenate 6 string columns of with 1 billion chars in each
-    auto offsets    = cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, size};
+    auto offsets    = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, size};
     auto many_chars = cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT8}, size);
     auto col        = cudf::make_strings_column(
       1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{});
@@ -418,7 +418,7 @@ TEST_F(OverflowTest, OverflowTest)
       cudf::make_structs_column(inner_size, std::move(children), 0, rmm::device_buffer{});
 
     // list
-    auto offsets = cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, inner_size};
+    auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, inner_size};
     auto col =
       cudf::make_lists_column(1, offsets.release(), std::move(struct_col), 0, rmm::device_buffer{});
 
@@ -435,7 +435,7 @@ TEST_F(OverflowTest, OverflowTest)
     constexpr cudf::size_type size = 3;
 
     // list
-    auto offsets = cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, 0, 0, inner_size};
+    auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, inner_size};
     auto many_chars =
       cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT8}, inner_size);
     auto list_col =
@@ -643,7 +643,7 @@ TEST_F(OverflowTest, Presliced)
     constexpr cudf::size_type list_size = inner_size / num_rows;
 
     // list
-    auto offsets = cudf::test::fixed_width_column_wrapper<cudf::offset_type>{
+    auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
       0, list_size, (list_size * 2) - 1, list_size * 3, inner_size};
     auto many_chars =
       cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT8}, inner_size);
diff --git a/cpp/tests/copying/copy_if_else_nested_tests.cpp b/cpp/tests/copying/copy_if_else_nested_tests.cpp
index ff28156ef1d..579e1bdce8a 100644
--- a/cpp/tests/copying/copy_if_else_nested_tests.cpp
+++ b/cpp/tests/copying/copy_if_else_nested_tests.cpp
@@ -332,7 +332,7 @@ TYPED_TEST(TypedCopyIfElseNestedTest, ListsWithStructs)
   using strings = cudf::test::strings_column_wrapper;
   using structs = cudf::test::structs_column_wrapper;
   using bools   = cudf::test::fixed_width_column_wrapper<bool, int32_t>;
-  using offsets = cudf::test::fixed_width_column_wrapper<cudf::offset_type, int32_t>;
+  using offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type, int32_t>;
 
   auto const null_at_0 = null_at(0);
   auto const null_at_3 = null_at(3);
diff --git a/cpp/tests/copying/gather_struct_tests.cpp b/cpp/tests/copying/gather_struct_tests.cpp
index ebfd950df4d..2bc18c706db 100644
--- a/cpp/tests/copying/gather_struct_tests.cpp
+++ b/cpp/tests/copying/gather_struct_tests.cpp
@@ -37,7 +37,7 @@
 #include <memory>
 
 using vector_of_columns = std::vector<std::unique_ptr<cudf::column>>;
-using gather_map_t      = std::vector<cudf::offset_type>;
+using gather_map_t      = std::vector<cudf::size_type>;
 using offsets           = cudf::test::fixed_width_column_wrapper<int32_t>;
 using structs           = cudf::test::structs_column_wrapper;
 using strings           = cudf::test::strings_column_wrapper;
@@ -54,7 +54,7 @@ using numerics = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 template <typename T>
 using lists = cudf::test::lists_column_wrapper<T, int32_t>;
 
-auto constexpr null_index = std::numeric_limits<cudf::offset_type>::max();
+auto constexpr null_index = std::numeric_limits<cudf::size_type>::max();
 
 struct StructGatherTest : public cudf::test::BaseFixture {};
 
diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp
index a18ed6a1ccf..d322fbe11f2 100644
--- a/cpp/tests/copying/get_value_tests.cpp
+++ b/cpp/tests/copying/get_value_tests.cpp
@@ -311,7 +311,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull)
 {
   using LCW      = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
   using FCW      = cudf::test::fixed_width_column_wrapper<TypeParam>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   std::vector<cudf::valid_type> valid{1, 0, 1, 0};
   // clang-format off
@@ -466,7 +466,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty)
 TEST_F(ListGetStringValueTest, NestedGetNull)
 {
   using LCW      = cudf::test::lists_column_wrapper<cudf::string_view>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
   using StringCW = cudf::test::strings_column_wrapper;
 
   std::vector<cudf::valid_type> valid{0, 0, 1, 1};
@@ -508,7 +508,7 @@ struct ListGetStructValueTest : public cudf::test::BaseFixture {
    */
   std::unique_ptr<cudf::column> make_test_lists_column(
     cudf::size_type num_lists,
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets,
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets,
     std::unique_ptr<cudf::column> child,
     std::initializer_list<cudf::valid_type> null_mask)
   {
@@ -776,7 +776,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNull)
   // NULL                      <- cudf::get_element(2)
 
   using valid_t  = std::vector<cudf::valid_type>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto list_column = this->make_test_lists_column(2, {0, 2, 3}, this->leaf_data(), {1, 1});
   auto list_column_nested =
@@ -900,12 +900,12 @@ TEST_F(StructGetValueTest, multi_level_nested)
   // col fields
   LCW l3({LCW{1, 1, 1}, LCW{2, 2}, LCW{3}}, validity_mask_t{false, true, true}.begin());
   cudf::test::structs_column_wrapper l2{l3};
-  auto l1 = cudf::make_lists_column(
-    1,
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, 3}.release(),
-    l2.release(),
-    0,
-    cudf::create_null_mask(1, cudf::mask_state::UNALLOCATED));
+  auto l1 =
+    cudf::make_lists_column(1,
+                            cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 3}.release(),
+                            l2.release(),
+                            0,
+                            cudf::create_null_mask(1, cudf::mask_state::UNALLOCATED));
   std::vector<std::unique_ptr<cudf::column>> l0_fields;
   l0_fields.emplace_back(std::move(l1));
   cudf::test::structs_column_wrapper l0(std::move(l0_fields));
diff --git a/cpp/tests/copying/scatter_list_scalar_tests.cpp b/cpp/tests/copying/scatter_list_scalar_tests.cpp
index 9dda3c12edf..42d2e004d6b 100644
--- a/cpp/tests/copying/scatter_list_scalar_tests.cpp
+++ b/cpp/tests/copying/scatter_list_scalar_tests.cpp
@@ -307,7 +307,7 @@ TYPED_TEST_SUITE(ScatterListOfStructScalarTest, cudf::test::FixedWidthTypesWitho
 TYPED_TEST(ScatterListOfStructScalarTest, Basic)
 {
   using LCW      = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto data =
     this->make_test_structs({{42, 42, 42}, {1, 0, 1}},
@@ -346,7 +346,7 @@ TYPED_TEST(ScatterListOfStructScalarTest, Basic)
 TYPED_TEST(ScatterListOfStructScalarTest, EmptyValidScalar)
 {
   using LCW      = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto data = this->make_test_structs({}, {}, LCW{}, {});
   auto slr  = std::make_unique<cudf::list_scalar>(data, true);
@@ -379,7 +379,7 @@ TYPED_TEST(ScatterListOfStructScalarTest, EmptyValidScalar)
 TYPED_TEST(ScatterListOfStructScalarTest, NullScalar)
 {
   using LCW      = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto data = this->make_test_structs({}, {}, {}, {});
   auto slr  = std::make_unique<cudf::list_scalar>(data, false);
@@ -411,7 +411,7 @@ TYPED_TEST(ScatterListOfStructScalarTest, NullScalar)
 TYPED_TEST(ScatterListOfStructScalarTest, NullableTargetRow)
 {
   using LCW      = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
-  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto data =
     this->make_test_structs({{42, 42, 42}, {1, 0, 1}},
diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp
index c9a53d6ebe0..da85242410b 100644
--- a/cpp/tests/copying/split_tests.cpp
+++ b/cpp/tests/copying/split_tests.cpp
@@ -2072,8 +2072,7 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitList)
 
   // list<struct<float>>
   {
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{
-      0, 2, 5, 7, 10, 12, 14, 17, 20};
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 2, 5, 7, 10, 12, 14, 17, 20};
     cudf::test::fixed_width_column_wrapper<float> floats{1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
                                                          11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
     cudf::test::structs_column_wrapper data({floats});
@@ -2131,8 +2130,7 @@ TEST_F(ContiguousSplitTableCornerCases, PreSplitStructs)
 
   // struct<list<struct>>
   {
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{
-      0, 2, 5, 7, 10, 12, 14, 17, 20};
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 2, 5, 7, 10, 12, 14, 17, 20};
     cudf::test::fixed_width_column_wrapper<float> floats{1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
                                                          11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
     cudf::test::structs_column_wrapper data({floats});
diff --git a/cpp/tests/groupby/collect_list_tests.cpp b/cpp/tests/groupby/collect_list_tests.cpp
index 485febe0d92..749f4013013 100644
--- a/cpp/tests/groupby/collect_list_tests.cpp
+++ b/cpp/tests/groupby/collect_list_tests.cpp
@@ -146,7 +146,7 @@ TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputLists)
 
   using LCW = cudf::test::lists_column_wrapper<V, int32_t>;
 
-  auto offsets = cudf::data_type{cudf::type_to_id<cudf::offset_type>()};
+  auto offsets = cudf::data_type{cudf::type_to_id<cudf::size_type>()};
 
   cudf::test::fixed_width_column_wrapper<K, int32_t> keys{};
   auto values =
@@ -176,7 +176,7 @@ TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputListsOfStructs)
 
   auto values =
     cudf::make_lists_column(0,
-                            cudf::make_empty_column(cudf::type_to_id<cudf::offset_type>()),
+                            cudf::make_empty_column(cudf::type_to_id<cudf::size_type>()),
                             struct_column.release(),
                             0,
                             {});
@@ -188,13 +188,13 @@ TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputListsOfStructs)
 
   auto expect_child =
     cudf::make_lists_column(0,
-                            cudf::make_empty_column(cudf::type_to_id<cudf::offset_type>()),
+                            cudf::make_empty_column(cudf::type_to_id<cudf::size_type>()),
                             expect_struct_column.release(),
                             0,
                             {});
   auto expect_values =
     cudf::make_lists_column(0,
-                            cudf::make_empty_column(cudf::type_to_id<cudf::offset_type>()),
+                            cudf::make_empty_column(cudf::type_to_id<cudf::size_type>()),
                             std::move(expect_child),
                             0,
                             {});
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index a5054daed19..ea2bad0cabf 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -2687,8 +2687,8 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
   constexpr int floats_per_row = 4;
   auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
     0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c1_offsets(
-    c1_offset_iter, c1_offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
+                                                                     c1_offset_iter + num_rows + 1);
   cudf::test::fixed_width_column_wrapper<float> c1_floats(
     values, values + (num_rows * floats_per_row), valids);
   auto [null_mask, null_count] = cudf::test::detail::make_null_mask(valids, valids + num_rows);
@@ -2711,8 +2711,8 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
   cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
   auto offset_iter = cudf::detail::make_counting_transform_iterator(
     0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
-                                                                    offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offset_iter,
+                                                                  offset_iter + num_rows + 1);
 
   auto _c3_valids =
     cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; });
@@ -5034,8 +5034,8 @@ TEST_F(ParquetReaderTest, NestingOptimizationTest)
       0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; });
     total_values_produced += (num_rows + 1);
 
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offsets_iter,
-                                                                      offsets_iter + num_rows + 1);
+    cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offsets_iter,
+                                                                    offsets_iter + num_rows + 1);
     auto c   = cudf::make_lists_column(num_rows, offsets.release(), std::move(prev_col), 0, {});
     prev_col = std::move(c);
   }
diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp
index 2c2b3c8b29c..017cd471e01 100644
--- a/cpp/tests/lists/extract_tests.cpp
+++ b/cpp/tests/lists/extract_tests.cpp
@@ -269,7 +269,7 @@ TYPED_TEST(ListsExtractColumnIndicesTypedTest, ExtractElement)
 {
   using LCW     = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
   using FWCW    = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;
-  using indices = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using indices = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto input_column = LCW({LCW{3, 2, 1}, LCW{}, LCW{30, 20, 10, 50}, LCW{100, 120}, LCW{0}, LCW{}},
                           cudf::test::iterators::null_at(1));
@@ -329,7 +329,7 @@ TYPED_TEST(ListsExtractColumnIndicesTypedTest, ExtractElement)
 TYPED_TEST(ListsExtractColumnIndicesTypedTest, FailureCases)
 {
   using LCW     = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
-  using indices = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using indices = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   {
     // Non-empty input, with mismatched size of indices.
@@ -361,7 +361,7 @@ TEST_F(ListsExtractColumnIndicesTest, ExtractStrings)
 {
   using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
   using strings = cudf::test::strings_column_wrapper;
-  using indices = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using indices = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   auto input_column = LCW(
     {LCW{"3", "2", "1"}, LCW{}, LCW{"30", "20", "10", "50"}, LCW{"100", "120"}, LCW{"0"}, LCW{}},
diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp
index c6069acad8a..46d4066ddff 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cpp
+++ b/cpp/tests/quantiles/percentile_approx_test.cpp
@@ -383,7 +383,7 @@ TEST_F(PercentileApproxTest, EmptyInput)
   cudf::tdigest::tdigest_column_view tdv(*empty);
   auto result = cudf::percentile_approx(tdv, percentiles);
 
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0, 0, 0};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0, 0, 0};
   std::vector<bool> nulls{0, 0, 0};
   auto [null_mask, null_count] = cudf::test::detail::make_null_mask(nulls.begin(), nulls.end());
 
@@ -415,7 +415,7 @@ TEST_F(PercentileApproxTest, EmptyPercentiles)
   cudf::tdigest::tdigest_column_view tdv(*tdigest_column.second[0].results[0]);
   auto result = cudf::percentile_approx(tdv, percentiles);
 
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0, 0};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0, 0};
   std::vector<bool> nulls{0, 0};
   auto [null_mask, null_count] = cudf::test::detail::make_null_mask(nulls.begin(), nulls.end());
 
diff --git a/cpp/tests/reductions/tdigest_tests.cu b/cpp/tests/reductions/tdigest_tests.cu
index b0087766c0c..c8fec51e1c9 100644
--- a/cpp/tests/reductions/tdigest_tests.cu
+++ b/cpp/tests/reductions/tdigest_tests.cu
@@ -94,7 +94,7 @@ TEST_F(ReductionTDigestMerge, FewHeavyCentroids)
   cudf::test::fixed_width_column_wrapper<double> c0c{1.0, 2.0};
   cudf::test::fixed_width_column_wrapper<double> c0w{100.0, 50.0};
   cudf::test::structs_column_wrapper c0s({c0c, c0w});
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c0_offsets{0, 2};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> c0_offsets{0, 2};
   auto c0l =
     cudf::make_lists_column(1, c0_offsets.release(), c0s.release(), 0, rmm::device_buffer{});
   cudf::test::fixed_width_column_wrapper<double> c0min{1.0};
@@ -111,7 +111,7 @@ TEST_F(ReductionTDigestMerge, FewHeavyCentroids)
   cudf::test::fixed_width_column_wrapper<double> c1c{3.0, 4.0};
   cudf::test::fixed_width_column_wrapper<double> c1w{200.0, 50.0};
   cudf::test::structs_column_wrapper c1s({c1c, c1w});
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c1_offsets{0, 2};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets{0, 2};
   auto c1l =
     cudf::make_lists_column(1, c1_offsets.release(), c1s.release(), 0, rmm::device_buffer{});
   cudf::test::fixed_width_column_wrapper<double> c1min{3.0};
@@ -147,7 +147,7 @@ TEST_F(ReductionTDigestMerge, FewHeavyCentroids)
   cudf::test::fixed_width_column_wrapper<double> ec{1.0, 2.0, 3.0, 4.0};
   cudf::test::fixed_width_column_wrapper<double> ew{100.0, 50.0, 200.0, 50.0};
   cudf::test::structs_column_wrapper es({ec, ew});
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> e_offsets{0, 4};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> e_offsets{0, 4};
   auto el = cudf::make_lists_column(1, e_offsets.release(), es.release(), 0, rmm::device_buffer{});
   cudf::test::fixed_width_column_wrapper<double> emin{1.0};
   cudf::test::fixed_width_column_wrapper<double> emax{4.0};
diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp
index e8e603f8533..ecc38dfd26e 100644
--- a/cpp/tests/strings/array_tests.cpp
+++ b/cpp/tests/strings/array_tests.cpp
@@ -152,7 +152,7 @@ TEST_F(StringsColumnTest, GatherTooBig)
 {
   std::vector<int8_t> h_chars(3000000);
   cudf::test::fixed_width_column_wrapper<int8_t> chars(h_chars.begin(), h_chars.end());
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets({0, 3000000});
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets({0, 3000000});
   auto input = cudf::column_view(
     cudf::data_type{cudf::type_id::STRING}, 1, nullptr, nullptr, 0, 0, {offsets, chars});
   auto map = thrust::constant_iterator<int8_t>(0);
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index 868785b4612..0cb5023a32e 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -294,9 +294,9 @@ TEST_F(StringsContainsTests, HexTest)
   std::vector<char> ascii_chars(  // all possible matchable chars
     {thrust::make_counting_iterator<char>(0), thrust::make_counting_iterator<char>(127)});
   auto const count = static_cast<cudf::size_type>(ascii_chars.size());
-  std::vector<cudf::offset_type> offsets(
-    {thrust::make_counting_iterator<cudf::offset_type>(0),
-     thrust::make_counting_iterator<cudf::offset_type>(0) + count + 1});
+  std::vector<cudf::size_type> offsets(
+    {thrust::make_counting_iterator<cudf::size_type>(0),
+     thrust::make_counting_iterator<cudf::size_type>(0) + count + 1});
   auto d_chars = cudf::detail::make_device_uvector_sync(
     ascii_chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   auto d_offsets = cudf::detail::make_device_uvector_sync(
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index d8594fa4923..a3d392cfed0 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -99,8 +99,8 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
     cudf::device_span<char const>(strings_view.chars().data<char>(), strings_view.chars().size()),
     cudf::get_default_stream());
   auto h_offsets_data = cudf::detail::make_std_vector_sync(
-    cudf::device_span<cudf::offset_type const>(
-      strings_view.offsets().data<cudf::offset_type>() + strings_view.offset(),
+    cudf::device_span<cudf::size_type const>(
+      strings_view.offsets().data<cudf::size_type>() + strings_view.offset(),
       strings_view.size() + 1),
     cudf::get_default_stream());
   EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0);
@@ -164,8 +164,8 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets)
     cudf::device_span<char const>(strings_view.chars().data<char>(), strings_view.chars().size()),
     cudf::get_default_stream());
   auto h_offsets_data = cudf::detail::make_std_vector_sync(
-    cudf::device_span<cudf::offset_type const>(
-      strings_view.offsets().data<cudf::offset_type>() + strings_view.offset(),
+    cudf::device_span<cudf::size_type const>(
+      strings_view.offsets().data<cudf::size_type>() + strings_view.offset(),
       strings_view.size() + 1),
     cudf::get_default_stream());
   EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0);
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 095495456e9..4832cdf816f 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -98,10 +98,10 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> build_li
   // }
   cudf::test::fixed_width_column_wrapper<T> values{
     1, 2, 3, 4, 5, 10, 6, 7, 8, 9, -1, -2, -3, -4, -5, -6, -7, -8, -9};
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> inner_offsets{
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> inner_offsets{
     0, 2, 5, 6, 9, 10, 12, 14, 17, 19};
   auto inner_list = cudf::make_lists_column(9, inner_offsets.release(), values.release(), 0, {});
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> outer_offsets{0, 2, 2, 3, 5, 7, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> outer_offsets{0, 2, 2, 3, 5, 7, 9};
   auto list = cudf::make_lists_column(6, outer_offsets.release(), std::move(inner_list), 0, {});
 
   // expected size = (num rows at level 1 + num_rows at level 2) + # values in the leaf
@@ -142,13 +142,13 @@ TYPED_TEST(RowBitCountTyped, ListsWithNulls)
   // }
   cudf::test::fixed_width_column_wrapper<T> values{{1, 2, 3, 4, 5, 10, 6, 7, 8},
                                                    {1, 1, 1, 0, 1, 1, 0, 1, 0}};
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> inner_offsets{0, 2, 5, 6, 9, 9};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> inner_offsets{0, 2, 5, 6, 9, 9};
   std::vector<bool> inner_list_validity{1, 1, 1, 1, 0};
   auto [null_mask, null_count] =
     cudf::test::detail::make_null_mask(inner_list_validity.begin(), inner_list_validity.end());
   auto inner_list = cudf::make_lists_column(
     5, inner_offsets.release(), values.release(), null_count, std::move(null_mask));
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> outer_offsets{0, 2, 2, 3, 5};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> outer_offsets{0, 2, 2, 3, 5};
   auto list = cudf::make_lists_column(4, outer_offsets.release(), std::move(inner_list), 0, {});
 
   cudf::table_view t({*list});
@@ -177,7 +177,7 @@ TEST_F(RowBitCount, Strings)
 
   // expect 1 offset (4 bytes) + length of string per row
   auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) {
-    return (static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::offset_type)) * CHAR_BIT;
+    return (static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::size_type)) * CHAR_BIT;
   });
   cudf::test::fixed_width_column_wrapper<cudf::size_type> expected(size_iter,
                                                                    size_iter + strings.size());
@@ -200,7 +200,7 @@ TEST_F(RowBitCount, StringsWithNulls)
   // expect 1 offset (4 bytes) + (length of string, or 0 if null) + 1 validity bit per row
   auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings, &valids](int i) {
     return ((static_cast<cudf::size_type>(valids[i] ? strings[i].size() : 0) +
-             sizeof(cudf::offset_type)) *
+             sizeof(cudf::size_type)) *
             CHAR_BIT) +
            1;
   });
@@ -247,8 +247,8 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
     cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, num_rows + 1);
   auto list_offsets_view = list_offsets->mutable_view();
   thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   list_offsets_view.begin<cudf::offset_type>(),
-                   list_offsets_view.end<cudf::offset_type>(),
+                   list_offsets_view.begin<cudf::size_type>(),
+                   list_offsets_view.end<cudf::size_type>(),
                    times_2{});
 
   // List<int32_t> = {{0,1}, {2,3}, {4,5}, ..., {2*(num_rows-1), 2*num_rows-1}};
@@ -267,7 +267,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
   thrust::fill_n(rmm::exec_policy(cudf::get_default_stream()),
                  expected_row_bit_counts->mutable_view().begin<int32_t>(),
                  num_rows,
-                 CHAR_BIT * (2 * sizeof(int32_t) + sizeof(cudf::offset_type)));
+                 CHAR_BIT * (2 * sizeof(int32_t) + sizeof(cudf::size_type)));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(row_bit_counts->view(), expected_row_bit_counts->view());
 }
@@ -309,8 +309,7 @@ TEST_F(RowBitCount, StructsNoNulls)
   // expect 1 offset (4 bytes) + (length of string) + 1 float + 1 int16_t
   auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) {
     return ((sizeof(float) + sizeof(int16_t)) * CHAR_BIT) +
-           ((static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::offset_type)) *
-            CHAR_BIT);
+           ((static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::size_type)) * CHAR_BIT);
   });
   cudf::test::fixed_width_column_wrapper<cudf::size_type> expected(size_iter,
                                                                    size_iter + t.num_rows());
@@ -534,7 +533,7 @@ TEST_F(RowBitCount, NestedTypes)
 
 TEST_F(RowBitCount, NullsInStringsList)
 {
-  using offsets_wrapper = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using offsets_wrapper = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
   // clang-format off
   auto strings = std::vector<std::string>{ "daïs", "def", "", "z", "bananas", "warp", "", "zing" };
@@ -552,7 +551,7 @@ TEST_F(RowBitCount, NullsInStringsList)
     {});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
     cudf::row_bit_count(cudf::table_view{{lists_col->view()}})->view(),
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type>{138, 106, 130, 130});
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{138, 106, 130, 130});
 }
 
 TEST_F(RowBitCount, EmptyChildColumnInListOfStrings)
@@ -560,13 +559,13 @@ TEST_F(RowBitCount, EmptyChildColumnInListOfStrings)
   // Test with a list<string> column with 4 empty list rows.
   // Note: Since there are no strings in any of the lists,
   //       the lists column's child can be empty.
-  auto offsets   = cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, 0, 0, 0, 0};
+  auto offsets   = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 0};
   auto lists_col = cudf::make_lists_column(
     4, offsets.release(), cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}), 0, {});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
     cudf::row_bit_count(cudf::table_view{{lists_col->view()}})->view(),
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type>{32, 32, 32, 32});
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{32, 32, 32, 32});
 }
 
 TEST_F(RowBitCount, EmptyChildColumnInListOfLists)
@@ -579,12 +578,12 @@ TEST_F(RowBitCount, EmptyChildColumnInListOfLists)
     return cudf::empty_like(exemplar);
   };
 
-  auto offsets   = cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, 0, 0, 0, 0};
+  auto offsets   = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 0};
   auto lists_col = cudf::make_lists_column(4, offsets.release(), empty_child_lists_column(), 0, {});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
     cudf::row_bit_count(cudf::table_view{{lists_col->view()}})->view(),
-    cudf::test::fixed_width_column_wrapper<cudf::offset_type>{32, 32, 32, 32});
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{32, 32, 32, 32});
 }
 
 struct sum_functor {
@@ -639,12 +638,12 @@ TEST_F(RowBitCount, DepthJump)
   // the jump occurs from depth 2 (the leafmost int column)
   // to depth 0 (the topmost int column)
   cudf::test::fixed_width_column_wrapper<T> ____c0{1, 2, 3, 5, 5, 6, 7, 8};
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> ___offsets{0, 2, 4, 6, 8};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> ___offsets{0, 2, 4, 6, 8};
   auto ___c0 = cudf::make_lists_column(4, ___offsets.release(), ____c0.release(), 0, {});
   std::vector<std::unique_ptr<cudf::column>> __children;
   __children.push_back(std::move(___c0));
   cudf::test::structs_column_wrapper __c0(std::move(__children));
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> _offsets{0, 3, 4};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> _offsets{0, 3, 4};
   auto _c0 = cudf::make_lists_column(2, _offsets.release(), __c0.release(), 0, {});
   cudf::test::fixed_width_column_wrapper<int> _c1{3, 4};
   std::vector<std::unique_ptr<cudf::column>> children;
@@ -657,7 +656,7 @@ TEST_F(RowBitCount, DepthJump)
 
   // expected size = (num rows at level 1 + num_rows at level 2) + (# values the leaf int column) +
   // 1 (value in topmost int column)
-  constexpr cudf::size_type offset_size = sizeof(cudf::offset_type) * CHAR_BIT;
+  constexpr cudf::size_type offset_size = sizeof(cudf::size_type) * CHAR_BIT;
   constexpr cudf::size_type type_size   = sizeof(T) * CHAR_BIT;
   cudf::test::fixed_width_column_wrapper<cudf::size_type> expected{
     ((1 + 3) * offset_size) + (6 * type_size) + (1 * type_size),
@@ -693,7 +692,7 @@ TEST_F(RowBitCount, SlicedColumnsStrings)
 
   // expect 1 offset (4 bytes) + length of string per row
   auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) {
-    return (static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::offset_type)) * CHAR_BIT;
+    return (static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::size_type)) * CHAR_BIT;
   });
   cudf::test::fixed_width_column_wrapper<cudf::size_type> expected(size_iter + 3,
                                                                    size_iter + 3 + slice_size);
@@ -736,7 +735,7 @@ TEST_F(RowBitCount, SlicedColumnsStructs)
 
   // expect 1 offset (4 bytes) + length of string per row + 1 int16_t per row
   auto size_iter = cudf::detail::make_counting_transform_iterator(0, [&strings](int i) {
-    return (static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::offset_type) +
+    return (static_cast<cudf::size_type>(strings[i].size()) + sizeof(cudf::size_type) +
             sizeof(int16_t)) *
            CHAR_BIT;
   });
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index 3e0545935ef..fcaf23fd456 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -123,7 +123,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
     0,
     [row_indices = row_indices.begin<size_type>(),
      validity    = c.null_mask(),
-     offsets     = c.offsets().begin<offset_type>(),
+     offsets     = c.offsets().begin<size_type>(),
      offset      = c.offset()] __device__(int index) {
       // both null mask and offsets data are not pre-sliced. so we need to add the column offset to
       // every incoming index.
@@ -168,9 +168,9 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   auto output_row_iter = cudf::detail::make_counting_transform_iterator(
     0,
     [row_indices  = row_indices.begin<size_type>(),
-     offsets      = c.offsets().begin<offset_type>(),
+     offsets      = c.offsets().begin<size_type>(),
      offset       = c.offset(),
-     first_offset = cudf::detail::get_value<offset_type>(
+     first_offset = cudf::detail::get_value<size_type>(
        c.offsets(), c.offset(), cudf::test::get_default_stream())] __device__(int index) {
       auto const true_index = row_indices[index] + offset;
       return offsets[true_index] - first_offset;
diff --git a/cpp/tests/utilities/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu
index d2e95812894..9294aa0f681 100644
--- a/cpp/tests/utilities/tdigest_utilities.cu
+++ b/cpp/tests/utilities/tdigest_utilities.cu
@@ -110,12 +110,12 @@ std::unique_ptr<column> make_expected_tdigest_column(std::vector<expected_tdiges
     auto tdigests =
       cudf::make_structs_column(tdigest.mean.size(), std::move(inner_children), 0, {});
 
-    std::vector<offset_type> h_offsets{0, tdigest.mean.size()};
+    std::vector<size_type> h_offsets{0, tdigest.mean.size()};
     auto offsets =
       cudf::make_fixed_width_column(data_type{type_id::INT32}, 2, mask_state::UNALLOCATED);
-    CUDF_CUDA_TRY(cudaMemcpy(offsets->mutable_view().begin<offset_type>(),
+    CUDF_CUDA_TRY(cudaMemcpy(offsets->mutable_view().begin<size_type>(),
                              h_offsets.data(),
-                             sizeof(offset_type) * 2,
+                             sizeof(size_type) * 2,
                              cudaMemcpyDefault));
 
     auto list = cudf::make_lists_column(1, std::move(offsets), std::move(tdigests), 0, {});
diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cpp b/cpp/tests/utilities_tests/column_utilities_tests.cpp
index 6cdcdd22dd7..e90a3f9ac6e 100644
--- a/cpp/tests/utilities_tests/column_utilities_tests.cpp
+++ b/cpp/tests/utilities_tests/column_utilities_tests.cpp
@@ -385,7 +385,7 @@ TEST_F(ColumnUtilitiesListsTest, UnsanitaryLists)
   //    0, 1, 2
   std::vector<std::unique_ptr<cudf::column>> children;
   children.emplace_back(
-    std::move(cudf::test::fixed_width_column_wrapper<cudf::offset_type>{0, 3}.release()));
+    std::move(cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 3}.release()));
   children.emplace_back(std::move(cudf::test::fixed_width_column_wrapper<int>{0, 1, 2}.release()));
 
   auto l0 = std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::LIST},
diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu
index 580f63d73b2..56aea0b45e2 100644
--- a/java/src/main/native/src/ColumnViewJni.cu
+++ b/java/src/main/native/src/ColumnViewJni.cu
@@ -208,10 +208,10 @@ std::unique_ptr<cudf::column> lists_distinct_by_key(cudf::lists_column_view cons
       cudf::make_structs_column(out_labels.size(), std::move(out_structs_members), 0, {});
 
   // Assemble a lists column of structs<out_keys, out_vals>.
-  auto out_offsets = make_numeric_column(data_type{type_to_id<offset_type>()}, input.size() + 1,
+  auto out_offsets = make_numeric_column(data_type{type_to_id<size_type>()}, input.size() + 1,
                                          mask_state::UNALLOCATED, stream);
-  auto const offsets_begin = out_offsets->mutable_view().template begin<offset_type>();
-  auto const labels_begin = out_labels.template begin<offset_type>();
+  auto const offsets_begin = out_offsets->mutable_view().template begin<size_type>();
+  auto const labels_begin = out_labels.template begin<size_type>();
   cudf::detail::labels_to_offsets(labels_begin, labels_begin + out_labels.size(), offsets_begin,
                                   offsets_begin + out_offsets->size(), stream);
 
diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu
index a0dbfb3b38c..d93d38c7758 100644
--- a/java/src/main/native/src/row_conversion.cu
+++ b/java/src/main/native/src/row_conversion.cu
@@ -179,9 +179,9 @@ struct tile_info {
  *
  */
 struct row_batch {
-  size_type num_bytes;                     // number of bytes in this batch
-  size_type row_count;                     // number of rows in the batch
-  device_uvector<offset_type> row_offsets; // offsets column of output cudf column
+  size_type num_bytes;                   // number of bytes in this batch
+  size_type row_count;                   // number of rows in the batch
+  device_uvector<size_type> row_offsets; // offsets column of output cudf column
 };
 
 /**
diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd
index ee871f06231..11480d774ef 100644
--- a/python/cudf/cudf/_lib/cpp/types.pxd
+++ b/python/cudf/cudf/_lib/cpp/types.pxd
@@ -5,7 +5,6 @@ from libc.stdint cimport int32_t, uint32_t
 
 cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
     ctypedef int32_t size_type
-    ctypedef int32_t offset_type
     ctypedef uint32_t bitmask_type
     ctypedef uint32_t char_utf8
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd
index 740db51db6c..2b08e6863a1 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd
@@ -5,7 +5,7 @@ from libcpp.vector cimport vector
 
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.types cimport bitmask_type, offset_type, size_type
+from cudf._lib.cpp.types cimport bitmask_type, size_type
 
 from .gpumemoryview cimport gpumemoryview
 from .types cimport DataType
@@ -20,7 +20,7 @@ cdef class Column:
         gpumemoryview data
         gpumemoryview mask
         size_type null_count
-        offset_type offset
+        size_type offset
         # children: List[Column]
         list children
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
index 35c30b280c9..be4eff4c49d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -6,7 +6,7 @@ from libcpp.utility cimport move
 from rmm._lib.device_buffer cimport DeviceBuffer
 
 from cudf._lib.cpp.column.column cimport column, column_contents
-from cudf._lib.cpp.types cimport offset_type, size_type
+from cudf._lib.cpp.types cimport size_type
 
 from .gpumemoryview cimport gpumemoryview
 from .types cimport DataType
@@ -42,7 +42,7 @@ cdef class Column:
     """
     def __init__(
         self, DataType data_type not None, size_type size, gpumemoryview data,
-        gpumemoryview mask, size_type null_count, offset_type offset,
+        gpumemoryview mask, size_type null_count, size_type offset,
         list children
     ):
         self.data_type = data_type

From b7994bc16b1b1743b0743860b4f02ac4da8245d5 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 3 Aug 2023 07:54:33 -1000
Subject: [PATCH 5/5] Raise NotImplementedError for pd.SparseDtype (#13798)

Currently cuDF seems to cast `pd.SparseDtype` to it's subtype instead of maintaining the sparse data type from pandas. Since `pd.SparseDtype` is not supported in cuDF, it is better to raise and tell users to cast directly to the sparse subtype

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/13798
---
 python/cudf/cudf/core/column/column.py   | 10 ++++++++++
 python/cudf/cudf/tests/test_dataframe.py |  6 ++++++
 python/cudf/cudf/tests/test_series.py    |  6 ++++++
 3 files changed, 22 insertions(+)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b4f3f533d44..da3d04c15c0 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2010,6 +2010,11 @@ def as_column(
                 return as_column(arbitrary.array)
             elif PANDAS_GE_150 and isinstance(arbitrary.dtype, pd.ArrowDtype):
                 return as_column(pa.array(arbitrary.array, from_pandas=True))
+            elif isinstance(arbitrary.dtype, pd.SparseDtype):
+                raise NotImplementedError(
+                    f"{arbitrary.dtype} is not supported. Convert first to "
+                    f"{arbitrary.dtype.subtype}."
+                )
         if is_categorical_dtype(arbitrary):
             data = as_column(pa.array(arbitrary, from_pandas=True))
         elif is_interval_dtype(arbitrary.dtype):
@@ -2214,6 +2219,11 @@ def as_column(
             )
         if dtype is not None:
             data = data.astype(dtype)
+    elif isinstance(arbitrary, pd.arrays.SparseArray):
+        raise NotImplementedError(
+            f"{arbitrary.dtype} is not supported. Convert first to "
+            f"{arbitrary.dtype.subtype}."
+        )
     elif isinstance(arbitrary, memoryview):
         data = as_column(
             np.asarray(arbitrary), dtype=dtype, nan_as_null=nan_as_null
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e35ab147bf4..0898cb2ef3d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -10245,6 +10245,12 @@ def test_dataframe_init_columns_named_index():
     assert_eq(gdf, pdf)
 
 
+def test_dataframe_from_pandas_sparse():
+    pdf = pd.DataFrame(range(2), dtype=pd.SparseDtype(np.int64, 0))
+    with pytest.raises(NotImplementedError):
+        cudf.DataFrame(pdf)
+
+
 def test_dataframe_constructor_unbounded_sequence():
     class A:
         def __getitem__(self, key):
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 83d22bbca2d..58eaebae925 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -2206,6 +2206,12 @@ def test_series_contains(data, index):
     assert_eq(False in ps, False in gs)
 
 
+def test_series_from_pandas_sparse():
+    pser = pd.Series(range(2), dtype=pd.SparseDtype(np.int64, 0))
+    with pytest.raises(NotImplementedError):
+        cudf.Series(pser)
+
+
 def test_series_constructor_unbounded_sequence():
     class A:
         def __getitem__(self, key):