From 41c74c05539978d20f1d5ba129242b7af85ed37d Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 18:23:29 +0000
Subject: [PATCH 01/16] Fix pylibcudf isort sections

---
 python/pylibcudf/pyproject.toml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index a80c85a1fa8..7ddb70b180e 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -58,11 +58,10 @@ extend = "../../pyproject.toml"
 
 [tool.ruff.lint.isort]
 combine-as-imports = true
-known-first-party = ["cudf"]
-section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"]
+known-first-party = ["pylibcudf"]
+section-order = ["future", "standard-library", "third-party", "rapids", "first-party", "local-folder"]
 
 [tool.ruff.lint.isort.sections]
-dask = ["dask", "distributed", "dask_cuda"]
 rapids = ["rmm"]
 
 [tool.ruff.lint.per-file-ignores]

From 2bea16a9c6fdac000449aeb774e7f86223f485cf Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 13:44:26 +0000
Subject: [PATCH 02/16] Missing pxd signature for apply_boolean_mask

---
 python/pylibcudf/pylibcudf/stream_compaction.pxd | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd
index a4f39792f0c..a20a23e2e58 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pxd
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd
@@ -17,6 +17,8 @@ cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold)
 
 cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold)
 
+cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask)
+
 cpdef Table unique(
     Table input,
     list keys,

From 2522e8418edc1de3f036baaeb1e8e094af678230 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 15:41:54 +0000
Subject: [PATCH 03/16] Import quote style as QuoteStyle

---
 python/pylibcudf/pylibcudf/io/types.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 967d05e7057..e2439fbad88 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -27,6 +27,7 @@ from pylibcudf.libcudf.io.types import (
     compression_type as CompressionType,  # no-cython-lint
     column_encoding as ColumnEncoding,  # no-cython-lint
     dictionary_policy as DictionaryPolicy,  # no-cython-lint
+    quote_style as QuoteStyle,  # no-cython-lint
     statistics_freq as StatisticsFreq, # no-cython-lint
 )
 

From 6232dbf510d5d8043e93420a7d101c1a838ea650 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 13:46:06 +0000
Subject: [PATCH 04/16] Type stubs for pylibcudf

---
 python/pylibcudf/pylibcudf/__init__.pyi       |  95 +++++++++++++++
 python/pylibcudf/pylibcudf/aggregation.pyi    | 109 ++++++++++++++++++
 python/pylibcudf/pylibcudf/binaryop.pyi       |  54 +++++++++
 python/pylibcudf/pylibcudf/column.pyi         |  48 ++++++++
 .../pylibcudf/pylibcudf/column_factories.pyi  |  20 ++++
 python/pylibcudf/pylibcudf/concatenate.pyi    |   8 ++
 .../pylibcudf/pylibcudf/contiguous_split.pyi  |  13 +++
 python/pylibcudf/pylibcudf/copying.pyi        |  54 +++++++++
 python/pylibcudf/pylibcudf/datetime.pyi       |  45 ++++++++
 python/pylibcudf/pylibcudf/experimental.pyi   |   5 +
 python/pylibcudf/pylibcudf/expressions.pyi    |  78 +++++++++++++
 python/pylibcudf/pylibcudf/filling.pyi        |  14 +++
 python/pylibcudf/pylibcudf/gpumemoryview.pyi  |   9 ++
 python/pylibcudf/pylibcudf/groupby.pyi        |  38 ++++++
 python/pylibcudf/pylibcudf/hashing.pyi        |  22 ++++
 python/pylibcudf/pylibcudf/interop.pyi        |  50 ++++++++
 python/pylibcudf/pylibcudf/io/__init__.pyi    |  27 +++++
 python/pylibcudf/pylibcudf/io/avro.pyi        |  11 ++
 python/pylibcudf/pylibcudf/io/csv.pyi         |  54 +++++++++
 python/pylibcudf/pylibcudf/io/datasource.pyi  |   4 +
 python/pylibcudf/pylibcudf/io/json.pyi        |  50 ++++++++
 python/pylibcudf/pylibcudf/io/orc.pyi         |  39 +++++++
 python/pylibcudf/pylibcudf/io/parquet.pyi     |  36 ++++++
 python/pylibcudf/pylibcudf/io/timezone.pyi    |   7 ++
 python/pylibcudf/pylibcudf/io/types.pyi       |  97 ++++++++++++++++
 python/pylibcudf/pylibcudf/join.pyi           |  78 +++++++++++++
 python/pylibcudf/pylibcudf/json.pyi           |  23 ++++
 python/pylibcudf/pylibcudf/labeling.pyi       |  10 ++
 python/pylibcudf/pylibcudf/lists.pyi          |  42 +++++++
 python/pylibcudf/pylibcudf/merge.pyi          |  11 ++
 python/pylibcudf/pylibcudf/null_mask.pyi      |  14 +++
 .../pylibcudf/pylibcudf/nvtext/__init__.pyi   |  29 +++++
 .../pylibcudf/nvtext/byte_pair_encode.pyi     |  11 ++
 .../pylibcudf/nvtext/edit_distance.pyi        |   6 +
 .../pylibcudf/nvtext/generate_ngrams.pyi      |  10 ++
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyi |   5 +
 python/pylibcudf/pylibcudf/nvtext/minhash.pyi |  13 +++
 .../pylibcudf/nvtext/ngrams_tokenize.pyi      |   8 ++
 .../pylibcudf/pylibcudf/nvtext/normalize.pyi  |   6 +
 python/pylibcudf/pylibcudf/nvtext/replace.pyi |  17 +++
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyi |   8 ++
 .../pylibcudf/nvtext/subword_tokenize.pyi     |  15 +++
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyi   |  26 +++++
 python/pylibcudf/pylibcudf/partitioning.pyi   |  14 +++
 python/pylibcudf/pylibcudf/py.typed           |   0
 python/pylibcudf/pylibcudf/quantiles.pyi      |  23 ++++
 python/pylibcudf/pylibcudf/reduce.pyi         |  16 +++
 python/pylibcudf/pylibcudf/replace.pyi        |  29 +++++
 python/pylibcudf/pylibcudf/reshape.pyi        |   7 ++
 python/pylibcudf/pylibcudf/rolling.pyi        |  12 ++
 python/pylibcudf/pylibcudf/round.pyi          |  15 +++
 python/pylibcudf/pylibcudf/scalar.pyi         |  10 ++
 python/pylibcudf/pylibcudf/search.pyi         |  19 +++
 python/pylibcudf/pylibcudf/sorting.pyi        |  62 ++++++++++
 .../pylibcudf/pylibcudf/stream_compaction.pyi |  53 +++++++++
 .../pylibcudf/pylibcudf/strings/__init__.pyi  |  55 +++++++++
 .../pylibcudf/strings/attributes.pyi          |   7 ++
 .../pylibcudf/strings/capitalize.pyi          |   8 ++
 python/pylibcudf/pylibcudf/strings/case.pyi   |   7 ++
 .../pylibcudf/strings/char_types.pyi          |  30 +++++
 .../pylibcudf/pylibcudf/strings/combine.pyi   |  34 ++++++
 .../pylibcudf/pylibcudf/strings/contains.pyi  |  14 +++
 .../pylibcudf/strings/convert/__init__.py     |  12 ++
 .../pylibcudf/strings/convert/__init__.pyi    |  24 ++++
 .../strings/convert/convert_booleans.pyi      |   9 ++
 .../strings/convert/convert_datetime.pyi      |  12 ++
 .../strings/convert/convert_durations.pyi     |   9 ++
 .../strings/convert/convert_fixed_point.pyi   |  10 ++
 .../strings/convert/convert_floats.pyi        |   8 ++
 .../strings/convert/convert_integers.pyi      |  11 ++
 .../strings/convert/convert_ipv4.pyi          |   7 ++
 .../strings/convert/convert_lists.pyi         |  10 ++
 .../strings/convert/convert_urls.pyi          |   6 +
 .../pylibcudf/pylibcudf/strings/extract.pyi   |   8 ++
 python/pylibcudf/pylibcudf/strings/find.pyi   |  14 +++
 .../pylibcudf/strings/find_multiple.pyi       |   5 +
 .../pylibcudf/pylibcudf/strings/findall.pyi   |   7 ++
 .../pylibcudf/pylibcudf/strings/padding.pyi   |   9 ++
 .../pylibcudf/strings/regex_flags.pyi         |   7 ++
 .../pylibcudf/strings/regex_program.pyi       |   7 ++
 python/pylibcudf/pylibcudf/strings/repeat.pyi |   5 +
 .../pylibcudf/pylibcudf/strings/replace.pyi   |  14 +++
 .../pylibcudf/strings/replace_re.pyi          |  27 +++++
 .../pylibcudf/pylibcudf/strings/side_type.pyi |   7 ++
 python/pylibcudf/pylibcudf/strings/slice.pyi  |  11 ++
 .../pylibcudf/strings/split/__init__.py       |   2 +
 .../pylibcudf/strings/split/__init__.pyi      |   4 +
 .../pylibcudf/strings/split/partition.pyi     |   8 ++
 .../pylibcudf/strings/split/split.pyi         |  27 +++++
 python/pylibcudf/pylibcudf/strings/strip.pyi  |  11 ++
 .../pylibcudf/pylibcudf/strings/translate.pyi |  20 ++++
 python/pylibcudf/pylibcudf/strings/wrap.pyi   |   5 +
 python/pylibcudf/pylibcudf/table.pyi          |   9 ++
 python/pylibcudf/pylibcudf/traits.pyi         |  22 ++++
 python/pylibcudf/pylibcudf/transform.pyi      |  16 +++
 python/pylibcudf/pylibcudf/transpose.pyi      |   4 +
 python/pylibcudf/pylibcudf/types.pyi          |  85 ++++++++++++++
 python/pylibcudf/pylibcudf/unary.pyi          |  38 ++++++
 98 files changed, 2224 insertions(+)
 create mode 100644 python/pylibcudf/pylibcudf/__init__.pyi
 create mode 100644 python/pylibcudf/pylibcudf/aggregation.pyi
 create mode 100644 python/pylibcudf/pylibcudf/binaryop.pyi
 create mode 100644 python/pylibcudf/pylibcudf/column.pyi
 create mode 100644 python/pylibcudf/pylibcudf/column_factories.pyi
 create mode 100644 python/pylibcudf/pylibcudf/concatenate.pyi
 create mode 100644 python/pylibcudf/pylibcudf/contiguous_split.pyi
 create mode 100644 python/pylibcudf/pylibcudf/copying.pyi
 create mode 100644 python/pylibcudf/pylibcudf/datetime.pyi
 create mode 100644 python/pylibcudf/pylibcudf/experimental.pyi
 create mode 100644 python/pylibcudf/pylibcudf/expressions.pyi
 create mode 100644 python/pylibcudf/pylibcudf/filling.pyi
 create mode 100644 python/pylibcudf/pylibcudf/gpumemoryview.pyi
 create mode 100644 python/pylibcudf/pylibcudf/groupby.pyi
 create mode 100644 python/pylibcudf/pylibcudf/hashing.pyi
 create mode 100644 python/pylibcudf/pylibcudf/interop.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/__init__.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/avro.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/csv.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/datasource.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/json.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/orc.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/parquet.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/timezone.pyi
 create mode 100644 python/pylibcudf/pylibcudf/io/types.pyi
 create mode 100644 python/pylibcudf/pylibcudf/join.pyi
 create mode 100644 python/pylibcudf/pylibcudf/json.pyi
 create mode 100644 python/pylibcudf/pylibcudf/labeling.pyi
 create mode 100644 python/pylibcudf/pylibcudf/lists.pyi
 create mode 100644 python/pylibcudf/pylibcudf/merge.pyi
 create mode 100644 python/pylibcudf/pylibcudf/null_mask.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/minhash.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/normalize.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/replace.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
 create mode 100644 python/pylibcudf/pylibcudf/partitioning.pyi
 create mode 100644 python/pylibcudf/pylibcudf/py.typed
 create mode 100644 python/pylibcudf/pylibcudf/quantiles.pyi
 create mode 100644 python/pylibcudf/pylibcudf/reduce.pyi
 create mode 100644 python/pylibcudf/pylibcudf/replace.pyi
 create mode 100644 python/pylibcudf/pylibcudf/reshape.pyi
 create mode 100644 python/pylibcudf/pylibcudf/rolling.pyi
 create mode 100644 python/pylibcudf/pylibcudf/round.pyi
 create mode 100644 python/pylibcudf/pylibcudf/scalar.pyi
 create mode 100644 python/pylibcudf/pylibcudf/search.pyi
 create mode 100644 python/pylibcudf/pylibcudf/sorting.pyi
 create mode 100644 python/pylibcudf/pylibcudf/stream_compaction.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/__init__.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/attributes.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/capitalize.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/case.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/char_types.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/combine.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/contains.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/__init__.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/extract.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/find.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/find_multiple.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/findall.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/padding.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/regex_flags.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/regex_program.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/repeat.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/replace.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/replace_re.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/side_type.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/slice.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/split/__init__.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/split/partition.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/split/split.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/strip.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/translate.pyi
 create mode 100644 python/pylibcudf/pylibcudf/strings/wrap.pyi
 create mode 100644 python/pylibcudf/pylibcudf/table.pyi
 create mode 100644 python/pylibcudf/pylibcudf/traits.pyi
 create mode 100644 python/pylibcudf/pylibcudf/transform.pyi
 create mode 100644 python/pylibcudf/pylibcudf/transpose.pyi
 create mode 100644 python/pylibcudf/pylibcudf/types.pyi
 create mode 100644 python/pylibcudf/pylibcudf/unary.pyi

diff --git a/python/pylibcudf/pylibcudf/__init__.pyi b/python/pylibcudf/pylibcudf/__init__.pyi
new file mode 100644
index 00000000000..a728647f82f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/__init__.pyi
@@ -0,0 +1,95 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# If libcudf was installed as a wheel, we must request it to load the library symbols.
+# Otherwise, we assume that the library was installed in a system path that ld can find.
+from pylibcudf import (
+    aggregation,
+    binaryop,
+    column_factories,
+    concatenate,
+    contiguous_split,
+    copying,
+    datetime,
+    experimental,
+    expressions,
+    filling,
+    groupby,
+    hashing,
+    interop,
+    io,
+    join,
+    json,
+    labeling,
+    lists,
+    merge,
+    null_mask,
+    nvtext,
+    partitioning,
+    quantiles,
+    reduce,
+    replace,
+    reshape,
+    rolling,
+    round,
+    search,
+    sorting,
+    stream_compaction,
+    strings,
+    traits,
+    transform,
+    transpose,
+    types,
+    unary,
+)
+from pylibcudf.column import Column
+from pylibcudf.gpumemoryview import gpumemoryview
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+from pylibcudf.types import DataType, MaskState, TypeId
+
+__all__ = [
+    "Column",
+    "DataType",
+    "MaskState",
+    "Scalar",
+    "Table",
+    "TypeId",
+    "aggregation",
+    "binaryop",
+    "column_factories",
+    "contiguous_split",
+    "concatenate",
+    "copying",
+    "datetime",
+    "experimental",
+    "expressions",
+    "filling",
+    "gpumemoryview",
+    "groupby",
+    "hashing",
+    "interop",
+    "io",
+    "join",
+    "json",
+    "labeling",
+    "lists",
+    "merge",
+    "null_mask",
+    "partitioning",
+    "quantiles",
+    "reduce",
+    "replace",
+    "reshape",
+    "rolling",
+    "round",
+    "search",
+    "stream_compaction",
+    "strings",
+    "sorting",
+    "traits",
+    "transform",
+    "transpose",
+    "types",
+    "unary",
+    "nvtext",
+]
diff --git a/python/pylibcudf/pylibcudf/aggregation.pyi b/python/pylibcudf/pylibcudf/aggregation.pyi
new file mode 100644
index 00000000000..f6fdf0273ae
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/aggregation.pyi
@@ -0,0 +1,109 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.types import (
+    DataType,
+    Interpolation,
+    NanEquality,
+    NullEquality,
+    NullOrder,
+    NullPolicy,
+    Order,
+)
+
+class Kind(IntEnum):
+    SUM = auto()
+    PRODUCT = auto()
+    MIN = auto()
+    MAX = auto()
+    COUNT_VALID = auto()
+    COUNT_ALL = auto()
+    ANY = auto()
+    ALL = auto()
+    SUM_OF_SQUARES = auto()
+    MEAN = auto()
+    VARIANCE = auto()
+    STD = auto()
+    MEDIAN = auto()
+    QUANTILE = auto()
+    ARGMAX = auto()
+    ARGMIN = auto()
+    NUNIQUE = auto()
+    NTH_ELEMENT = auto()
+    RANK = auto()
+    COLLECT_LIST = auto()
+    COLLECT_SET = auto()
+    PTX = auto()
+    CUDA = auto()
+    CORRELATION = auto()
+    COVARIANCE = auto()
+
+class CorrelationType(IntEnum):
+    PEARSON = auto()
+    KENDALL = auto()
+    SPEARMAN = auto()
+
+class EWMHistory(IntEnum):
+    INFINITE = auto()
+    FINITE = auto()
+
+class RankMethod(IntEnum):
+    FIRST = auto()
+    AVERAGE = auto()
+    MIN = auto()
+    MAX = auto()
+    DENSE = auto()
+
+class RankPercentage(IntEnum):
+    NONE = auto()
+    ZERO_NORMALIZED = auto()
+    ONE_NORMALIZED = auto()
+
+class UdfType(IntEnum):
+    CUDA = auto()
+    PTX = auto()
+
+class Aggregation:
+    def kind(self) -> Kind: ...
+
+def sum() -> Aggregation: ...
+def product() -> Aggregation: ...
+def min() -> Aggregation: ...
+def max() -> Aggregation: ...
+def count(null_handling: NullPolicy = NullPolicy.INCLUDE) -> Aggregation: ...
+def any() -> Aggregation: ...
+def all() -> Aggregation: ...
+def sum_of_squares() -> Aggregation: ...
+def mean() -> Aggregation: ...
+def variance(ddof: int = 1) -> Aggregation: ...
+def std(ddof: int = 1) -> Aggregation: ...
+def median() -> Aggregation: ...
+def quantile(
+    quantiles: list[float], interp: Interpolation = Interpolation.LINEAR
+) -> Aggregation: ...
+def argmax() -> Aggregation: ...
+def argmin() -> Aggregation: ...
+def ewma(center_of_mass: float, history: EWMHistory) -> Aggregation: ...
+def nunique(null_handling: NullPolicy = NullPolicy.EXCLUDE) -> Aggregation: ...
+def nth_element(
+    n: int, null_handling: NullPolicy = NullPolicy.INCLUDE
+) -> Aggregation: ...
+def collect_list(
+    null_handling: NullPolicy = NullPolicy.INCLUDE,
+) -> Aggregation: ...
+def collect_set(
+    null_handling: NullPolicy = NullPolicy.INCLUDE,
+    nulls_equal: NullEquality = NullEquality.EQUAL,
+    nans_equal: NanEquality = NanEquality.ALL_EQUAL,
+) -> Aggregation: ...
+def udf(operation: str, output_type: DataType) -> Aggregation: ...
+def correlation(type: CorrelationType, min_periods: int) -> Aggregation: ...
+def covariance(min_periods: int, ddof: int) -> Aggregation: ...
+def rank(
+    method: RankMethod,
+    column_order: Order = Order.ASCENDING,
+    null_handling: NullPolicy = NullPolicy.EXCLUDE,
+    null_precedence: NullOrder = NullOrder.AFTER,
+    percentage: RankPercentage = RankPercentage.NONE,
+) -> Aggregation: ...
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi
new file mode 100644
index 00000000000..9cbaeb4549e
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/binaryop.pyi
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.types import DataType
+
+class BinaryOperator(IntEnum):
+    ADD = auto()
+    SUB = auto()
+    MUL = auto()
+    DIV = auto()
+    TRUE_DIV = auto()
+    FLOOR_DIV = auto()
+    MOD = auto()
+    PMOD = auto()
+    PYMOD = auto()
+    POW = auto()
+    INT_POW = auto()
+    LOG_BASE = auto()
+    ATAN2 = auto()
+    SHIFT_LEFT = auto()
+    SHIFT_RIGHT = auto()
+    SHIFT_RIGHT_UNSIGNED = auto()
+    BITWISE_AND = auto()
+    BITWISE_OR = auto()
+    BITWISE_XOR = auto()
+    LOGICAL_AND = auto()
+    LOGICAL_OR = auto()
+    EQUAL = auto()
+    NOT_EQUAL = auto()
+    LESS = auto()
+    GREATER = auto()
+    LESS_EQUAL = auto()
+    GREATER_EQUAL = auto()
+    NULL_EQUALS = auto()
+    NULL_MAX = auto()
+    NULL_MIN = auto()
+    NULL_NOT_EQUALS = auto()
+    GENERIC_BINARY = auto()
+    NULL_LOGICAL_AND = auto()
+    NULL_LOGICAL_OR = auto()
+    INVALID_BINARY = auto()
+
+def binary_operation(
+    lhs: Column | Scalar,
+    rhs: Column | Scalar,
+    op: BinaryOperator,
+    output_type: DataType,
+) -> Column: ...
+def is_supported_operation(
+    out: DataType, lhs: DataType, rhs: DataType, op: BinaryOperator
+) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi
new file mode 100644
index 00000000000..72b41a9be5e
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/column.pyi
@@ -0,0 +1,48 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from collections.abc import Sequence
+from typing import Any
+
+from pylibcudf.gpumemoryview import gpumemoryview
+from pylibcudf.scalar import Scalar
+from pylibcudf.types import DataType
+
+class Column:
+    def __init__(
+        self,
+        data_type: DataType,
+        size: int,
+        data: gpumemoryview | None,
+        mask: gpumemoryview | None,
+        null_count: int,
+        offset: int,
+        children: list[Column],
+    ) -> None: ...
+    def type(self) -> DataType: ...
+    def child(self, index: int) -> Column: ...
+    def size(self) -> int: ...
+    def null_count(self) -> int: ...
+    def offset(self) -> int: ...
+    def data(self) -> gpumemoryview | None: ...
+    def null_mask(self) -> gpumemoryview | None: ...
+    def children(self) -> list[Column]: ...
+    def copy(self) -> Column: ...
+    def with_mask(
+        self, mask: gpumemoryview | None, null_count: int
+    ) -> Column: ...
+    def list_view(self) -> ListColumnView: ...
+    @staticmethod
+    def from_scalar(scalar: Scalar, size: int) -> Column: ...
+    @staticmethod
+    def all_null_like(like: Column, size: int) -> Column: ...
+    @staticmethod
+    def from_cuda_array_interface_obj(obj: Any) -> Column: ...
+
+class ListColumnView:
+    def __init__(self, column: Column) -> None: ...
+    def child(self) -> Column: ...
+    def offsets(self) -> Column: ...
+
+def is_c_contiguous(
+    shape: Sequence[int], strides: Sequence[int], itemsize: int
+) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi
new file mode 100644
index 00000000000..c87fe423acb
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/column_factories.pyi
@@ -0,0 +1,20 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.column import Column
+from pylibcudf.types import DataType, MaskState, TypeId
+
+def make_empty_column(type_or_id: DataType | TypeId) -> Column: ...
+def make_numeric_column(
+    type_: DataType, size: int, mstate: MaskState
+) -> Column: ...
+def make_fixed_point_column(
+    type_: DataType, size: int, mstate: MaskState
+) -> Column: ...
+def make_timestamp_column(
+    type_: DataType, size: int, mstate: MaskState
+) -> Column: ...
+def make_duration_column(
+    type_: DataType, size: int, mstate: MaskState
+) -> Column: ...
+def make_fixed_width_column(
+    type_: DataType, size: int, mstate: MaskState
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi
new file mode 100644
index 00000000000..79076f509e0
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/concatenate.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+
+def concatenate[ColumnOrTable: (Column, Table)](
+    objects: list[ColumnOrTable],
+) -> ColumnOrTable: ...
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi
new file mode 100644
index 00000000000..66e6c5e50c5
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi
@@ -0,0 +1,13 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.gpumemoryview import gpumemoryview
+from pylibcudf.table import Table
+
+class PackedColumns:
+    def release(self) -> tuple[memoryview, gpumemoryview]: ...
+
+def pack(input: Table) -> PackedColumns: ...
+def unpack(input: PackedColumns) -> Table: ...
+def unpack_from_memoryviews(
+    metadata: memoryview, gpu_data: gpumemoryview
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi
new file mode 100644
index 00000000000..07bfced4a55
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/copying.pyi
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+from typing import TypeVar
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+
+class MaskAllocationPolicy(IntEnum):
+    NEVER = auto()
+    RETAIN = auto()
+    ALWAYS = auto()
+
+class OutOfBoundsPolicy(IntEnum):
+    NULLIFY = auto()
+    DONT_CHECK = auto()
+
+ColumnOrTable = TypeVar("ColumnOrTable", Column, Table)
+
+def gather(
+    source_table: Table, gather_map: Column, bounds_policy: OutOfBoundsPolicy
+) -> Table: ...
+def scatter(
+    source: Table | list[Scalar], scatter_map: Column, target_table: Table
+) -> Table: ...
+def empty_like(input: ColumnOrTable) -> ColumnOrTable: ...
+def allocate_like(
+    input_column: Column, policy: MaskAllocationPolicy, size: int | None = None
+) -> Column: ...
+def copy_range_in_place(
+    input_column: Column,
+    target_column: Column,
+    input_begin: int,
+    input_end: int,
+    target_begin: int,
+) -> Column: ...
+def copy_range(
+    input_column: Column,
+    target_column: Column,
+    input_begin: int,
+    input_end: int,
+    target_begin: int,
+) -> Column: ...
+def shift(input: Column, offset: int, fill_value: Scalar) -> Column: ...
+def slice(input: ColumnOrTable, indices: list[int]) -> list[ColumnOrTable]: ...
+def split(input: ColumnOrTable, splits: list[int]) -> list[ColumnOrTable]: ...
+def copy_if_else(
+    lhs: Column | Scalar, rhs: Column | Scalar, boolean_mask: Column
+) -> Column: ...
+def boolean_mask_scatter(
+    input: Table | list[Scalar], target: Table, boolean_mask: Column
+) -> Table: ...
+def get_element(input_column: Column, index: int) -> Scalar: ...
diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi
new file mode 100644
index 00000000000..30ff3edf4fb
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/datetime.pyi
@@ -0,0 +1,45 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class DatetimeComponent(IntEnum):
+    YEAR = auto()
+    MONTH = auto()
+    DAY = auto()
+    WEEKDAY = auto()
+    HOUR = auto()
+    MINUTE = auto()
+    SECOND = auto()
+    MILLISECOND = auto()
+    MICROSECOND = auto()
+    NANOSECOND = auto()
+
+class RoundingFrequency(IntEnum):
+    DAY = auto()
+    HOUR = auto()
+    MINUTE = auto()
+    SECOND = auto()
+    MILLISECOND = auto()
+    MICROSECOND = auto()
+    NANOSECOND = auto()
+
+def extract_millisecond_fraction(input: Column) -> Column: ...
+def extract_microsecond_fraction(input: Column) -> Column: ...
+def extract_nanosecond_fraction(input: Column) -> Column: ...
+def extract_datetime_component(
+    input: Column, component: DatetimeComponent
+) -> Column: ...
+def ceil_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
+def floor_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
+def round_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
+def add_calendrical_months(
+    timestamps: Column, months: Column | Scalar
+) -> Column: ...
+def day_of_year(input: Column) -> Column: ...
+def is_leap_year(input: Column) -> Column: ...
+def last_day_of_month(input: Column) -> Column: ...
+def extract_quarter(input: Column) -> Column: ...
+def days_in_month(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/experimental.pyi b/python/pylibcudf/pylibcudf/experimental.pyi
new file mode 100644
index 00000000000..bbfb86b0ff6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/experimental.pyi
@@ -0,0 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+def enable_prefetching(key: str) -> None: ...
+def disable_prefetching(key: str) -> None: ...
+def prefetch_debugging(enable: bool) -> None: ...
diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi
new file mode 100644
index 00000000000..c3769bbfb85
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/expressions.pyi
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from enum import IntEnum, auto
+
+from pylibcudf.scalar import Scalar
+
+class TableReference(IntEnum):
+    LEFT = auto()
+    RIGHT = auto()
+
+class ASTOperator(IntEnum):
+    ADD = auto()
+    SUB = auto()
+    MUL = auto()
+    DIV = auto()
+    TRUE_DIV = auto()
+    FLOOR_DIV = auto()
+    MOD = auto()
+    PYMOD = auto()
+    POW = auto()
+    EQUAL = auto()
+    NULL_EQUAL = auto()
+    NOT_EQUAL = auto()
+    LESS = auto()
+    GREATER = auto()
+    LESS_EQUAL = auto()
+    GREATER_EQUAL = auto()
+    BITWISE_AND = auto()
+    BITWISE_OR = auto()
+    BITWISE_XOR = auto()
+    NULL_LOGICAL_AND = auto()
+    LOGICAL_AND = auto()
+    NULL_LOGICAL_OR = auto()
+    LOGICAL_OR = auto()
+    IDENTITY = auto()
+    IS_NULL = auto()
+    SIN = auto()
+    COS = auto()
+    TAN = auto()
+    ARCSIN = auto()
+    ARCCOS = auto()
+    ARCTAN = auto()
+    SINH = auto()
+    COSH = auto()
+    TANH = auto()
+    ARCSINH = auto()
+    ARCCOSH = auto()
+    ARCTANH = auto()
+    EXP = auto()
+    LOG = auto()
+    SQRT = auto()
+    CBRT = auto()
+    CEIL = auto()
+    FLOOR = auto()
+    ABS = auto()
+    RINT = auto()
+    BIT_INVERT = auto()
+    NOT = auto()
+
+class Expression: ...
+
+class Literal(Expression):
+    def __init__(self, value: Scalar) -> None: ...
+
+class ColumnReference(Expression):
+    def __init__(
+        self, index: int, table_source: TableReference = TableReference.LEFT
+    ) -> None: ...
+
+class ColumnNameReference(Expression):
+    def __init__(self, name: str) -> None: ...
+
+class Operation(Expression):
+    def __init__(
+        self,
+        op: ASTOperator,
+        left: Expression,
+        right: Expression | None = None,
+    ) -> None: ...
diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi
new file mode 100644
index 00000000000..c0534f1344b
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/filling.pyi
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+
+def fill(
+    destination: Column, begin: int, end: int, value: Scalar
+) -> Column: ...
+def fill_in_place(
+    destination: Column, c_begin: int, c_end: int, value: Scalar
+) -> None: ...
+def sequence(size: int, init: Scalar, step: Scalar) -> Column: ...
+def repeat(input_table: Table, count: Column | int) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyi b/python/pylibcudf/pylibcudf/gpumemoryview.pyi
new file mode 100644
index 00000000000..0491ba896e5
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyi
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from collections.abc import Mapping
+from typing import Any
+
+class gpumemoryview:
+    def __init__(self, data: Any) -> None: ...
+    @property
+    def __cuda_array_interface__(self) -> Mapping[str, Any]: ...
diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi
new file mode 100644
index 00000000000..e933cafdeef
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/groupby.pyi
@@ -0,0 +1,38 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.aggregation import Aggregation
+from pylibcudf.column import Column
+from pylibcudf.replace import ReplacePolicy
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+from pylibcudf.types import NullOrder, NullPolicy, Order, Sorted
+
+class GroupByRequest:
+    def __init__(
+        self, values: Column, aggregations: list[Aggregation]
+    ) -> None: ...
+
+class GroupBy:
+    def __init__(
+        self,
+        keys: Table,
+        null_handling: NullPolicy = NullPolicy.EXCLUDE,
+        keys_are_sorted: Sorted = Sorted.NO,
+        column_order: list[Order] | None = None,
+        null_precedence: list[NullOrder] | None = None,
+    ) -> None: ...
+    def aggregate(
+        self, requests: list[GroupByRequest]
+    ) -> tuple[Table, list[Table]]: ...
+    def scan(
+        self, requests: list[GroupByRequest]
+    ) -> tuple[Table, list[Table]]: ...
+    def shift(
+        self, values: Table, offset: list[int], fill_values: list[Scalar]
+    ) -> tuple[Table, Table]: ...
+    def replace_nulls(
+        self, values: Table, replace_policies: list[ReplacePolicy]
+    ) -> tuple[Table, Table]: ...
+    def get_groups(
+        self, values: Table | None = None
+    ) -> tuple[list[int], Table, Table]: ...
diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi
new file mode 100644
index 00000000000..69a72aa4783
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/hashing.pyi
@@ -0,0 +1,22 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+
+LIBCUDF_DEFAULT_HASH_SEED: int
+
+def murmurhash3_x86_32(
+    input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED
+) -> Column: ...
+def murmurhash3_x64_128(
+    input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED
+) -> Table: ...
+def xxhash_64(
+    input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED
+) -> Column: ...
+def md5(input: Table) -> Column: ...
+def sha1(input: Table) -> Column: ...
+def sha224(input: Table) -> Column: ...
+def sha256(input: Table) -> Column: ...
+def sha384(input: Table) -> Column: ...
+def sha512(input: Table) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi
new file mode 100644
index 00000000000..cd8cb0c4a2c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/interop.pyi
@@ -0,0 +1,50 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from collections.abc import Iterable, Mapping
+from typing import Any, overload
+
+import pyarrow as pa
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+from pylibcudf.types import DataType
+
+class ColumnMetadata:
+    name: str
+    children_meta: list[ColumnMetadata]
+
+@overload
+def from_arrow(obj: pa.DataType) -> DataType: ...
+@overload
+def from_arrow(
+    obj: pa.Scalar[Any], *, data_type: DataType | None = None
+) -> Scalar: ...
+@overload
+def from_arrow(obj: pa.Array[Any]) -> Column: ...
+@overload
+def from_arrow(obj: pa.Table) -> Table: ...
+@overload
+def to_arrow(
+    obj: DataType,
+    *,
+    precision: int | None = None,
+    fields: Iterable[pa.Field | tuple[str, pa.DataType]]
+    | Mapping[str, pa.DataType]
+    | None = None,
+    value_type: pa.DataType | None = None,
+) -> pa.DataType: ...
+@overload
+def to_arrow(
+    obj: Table, metadata: list[ColumnMetadata | str] | None = None
+) -> pa.Table: ...
+@overload
+def to_arrow(
+    obj: Column, metadata: ColumnMetadata | str | None = None
+) -> pa.Array[Any]: ...
+@overload
+def to_arrow(
+    obj: Scalar, metadata: ColumnMetadata | str | None = None
+) -> pa.Scalar[Any]: ...
+def from_dlpack(managed_tensor: Any) -> Table: ...
+def to_dlpack(input: Table) -> Any: ...
diff --git a/python/pylibcudf/pylibcudf/io/__init__.pyi b/python/pylibcudf/pylibcudf/io/__init__.pyi
new file mode 100644
index 00000000000..e1a93ce08e3
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/__init__.pyi
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.io import (
+    avro,
+    csv,
+    datasource,
+    json,
+    orc,
+    parquet,
+    timezone,
+    types,
+)
+from pylibcudf.io.types import SinkInfo, SourceInfo, TableWithMetadata
+
+__all__ = [
+    "avro",
+    "csv",
+    "datasource",
+    "json",
+    "orc",
+    "parquet",
+    "timezone",
+    "types",
+    "SinkInfo",
+    "SourceInfo",
+    "TableWithMetadata",
+]
diff --git a/python/pylibcudf/pylibcudf/io/avro.pyi b/python/pylibcudf/pylibcudf/io/avro.pyi
new file mode 100644
index 00000000000..49c2f083702
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/avro.pyi
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.io.types import SourceInfo, TableWithMetadata
+
+__all__ = ["read_avro"]
+
+def read_avro(
+    source_info: SourceInfo,
+    columns: list[str] | None = None,
+    skip_rows: int = 0,
+    num_rows: int = -1,
+) -> TableWithMetadata: ...
diff --git a/python/pylibcudf/pylibcudf/io/csv.pyi b/python/pylibcudf/pylibcudf/io/csv.pyi
new file mode 100644
index 00000000000..356825a927d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/csv.pyi
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from collections.abc import Mapping
+
+from pylibcudf.io.types import (
+    CompressionType,
+    QuoteStyle,
+    SourceInfo,
+    TableWithMetadata,
+)
+from pylibcudf.types import DataType
+
+def read_csv(
+    source_info: SourceInfo,
+    *,
+    compression: CompressionType = CompressionType.AUTO,
+    byte_range_offset: int = 0,
+    byte_range_size: int = 0,
+    col_names: list[str] | None = None,
+    prefix: str = "",
+    mangle_dupe_cols: bool = True,
+    usecols: list[int] | list[str] | None = None,
+    nrows: int = -1,
+    skiprows: int = 0,
+    skipfooter: int = 0,
+    header: int = 0,
+    lineterminator: str = "\n",
+    delimiter: str | None = None,
+    thousands: str | None = None,
+    decimal: str = ".",
+    comment: str | None = None,
+    delim_whitespace: bool = False,
+    skipinitialspace: bool = False,
+    skip_blank_lines: bool = True,
+    quoting: QuoteStyle = QuoteStyle.MINIMAL,
+    quotechar: str = '"',
+    doublequote: bool = True,
+    parse_dates: list[str] | list[int] | None = None,
+    parse_hex: list[str] | list[int] | None = None,
+    # Technically this should be dict/list
+    # but using a fused type prevents using None as default
+    dtypes: Mapping[str, DataType] | list[DataType] | None = None,
+    true_values: list[str] | None = None,
+    false_values: list[str] | None = None,
+    na_values: list[str] | None = None,
+    keep_default_na: bool = True,
+    na_filter: bool = True,
+    dayfirst: bool = False,
+    # Note: These options are supported by the libcudf reader
+    # but are not exposed here since there is no demand for them
+    # on the Python side yet.
+    # detect_whitespace_around_quotes: bool = False,
+    # timestamp_type: DataType = DataType(type_id.EMPTY),
+) -> TableWithMetadata: ...
diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyi b/python/pylibcudf/pylibcudf/io/datasource.pyi
new file mode 100644
index 00000000000..c4184208b0c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/datasource.pyi
@@ -0,0 +1,4 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+class Datasource:
+    pass
diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi
new file mode 100644
index 00000000000..33794afb208
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/json.pyi
@@ -0,0 +1,50 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from collections.abc import Mapping
+from typing import TypeAlias
+
+from pylibcudf.column import Column
+from pylibcudf.io.types import (
+    CompressionType,
+    JSONRecoveryMode,
+    SinkInfo,
+    SourceInfo,
+    TableWithMetadata,
+)
+from pylibcudf.types import DataType
+
+ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap]
+
+NameAndType: TypeAlias = tuple[str, DataType, list[NameAndType]]
+
+def read_json(
+    source_info: SourceInfo,
+    dtypes: list[NameAndType] | None = None,
+    compression: CompressionType = CompressionType.AUTO,
+    lines: bool = False,
+    byte_range_offset: int = 0,
+    byte_range_size: int = 0,
+    keep_quotes: bool = False,
+    mixed_types_as_string: bool = False,
+    prune_columns: bool = False,
+    recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL,
+) -> TableWithMetadata: ...
+def write_json(
+    sink_info: SinkInfo,
+    tbl: TableWithMetadata,
+    na_rep: str = "",
+    include_nulls: bool = False,
+    lines: bool = False,
+    rows_per_chunk: int = 2**32 - 1,
+    true_value: str = "true",
+    false_value: str = "false",
+) -> None: ...
+def chunked_read_json(
+    source_info: SourceInfo,
+    dtypes: list[NameAndType] | None = None,
+    compression: CompressionType = CompressionType.AUTO,
+    keep_quotes: bool = False,
+    mixed_types_as_string: bool = False,
+    prune_columns: bool = False,
+    recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL,
+    chunk_size: int = 100_000_000,
+) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ...
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi
new file mode 100644
index 00000000000..87ea9088b44
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/orc.pyi
@@ -0,0 +1,39 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from typing import Any
+
+from pylibcudf.io.types import SourceInfo, TableWithMetadata
+from pylibcudf.types import DataType
+
+def read_orc(
+    source_info: SourceInfo,
+    columns: list[str] | None = None,
+    stripes: list[list[int]] | None = None,
+    skip_rows: int = 0,
+    nrows: int = -1,
+    use_index: bool = True,
+    use_np_dtypes: bool = True,
+    timestamp_type: DataType | None = None,
+    decimal128_columns: list[str] | None = None,
+) -> TableWithMetadata: ...
+
+class OrcColumnStatistics:
+    @property
+    def number_of_values(self) -> int | None: ...
+    @property
+    def has_null(self) -> bool | None: ...
+    def __getitem__(self, item: str) -> Any: ...
+    def __contains__(self, item: str) -> bool: ...
+    def get[T](self, item: str, default: None | T = None) -> T | None: ...
+
+class ParsedOrcStatistics:
+    @property
+    def column_names(self) -> list[str]: ...
+    @property
+    def file_stats(self) -> list[OrcColumnStatistics]: ...
+    @property
+    def stripes_stats(self) -> list[OrcColumnStatistics]: ...
+
+def read_parsed_orc_statistics(
+    source_info: SourceInfo,
+) -> ParsedOrcStatistics: ...
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi
new file mode 100644
index 00000000000..bcf1d1cce09
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyi
@@ -0,0 +1,36 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.expressions import Expression
+from pylibcudf.io.types import SourceInfo, TableWithMetadata
+
+class ChunkedParquetReader:
+    def __init__(
+        self,
+        source_info: SourceInfo,
+        columns: list[str] | None = None,
+        row_groups: list[list[int]] | None = None,
+        use_pandas_metadata: bool = True,
+        convert_strings_to_categories: bool = False,
+        skip_rows: int = 0,
+        nrows: int = 0,
+        chunk_read_limit: int = 0,
+        pass_read_limit: int = 1024000000,
+        allow_mismatched_pq_schemas: bool = False,
+    ) -> None: ...
+    def has_next(self) -> bool: ...
+    def read_chunk(self) -> TableWithMetadata: ...
+
+def read_parquet(
+    source_info: SourceInfo,
+    columns: list[str] | None = None,
+    row_groups: list[list[int]] | None = None,
+    filters: Expression | None = None,
+    convert_strings_to_categories: bool = False,
+    use_pandas_metadata: bool = True,
+    skip_rows: int = 0,
+    nrows: int = -1,
+    allow_mismatched_pq_schemas: bool = False,
+    # disabled see comment in parquet.pyx for more
+    # reader_column_schema: ReaderColumnSchema = *,
+    # timestamp_type: DataType = *
+) -> TableWithMetadata: ...
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyi b/python/pylibcudf/pylibcudf/io/timezone.pyi
new file mode 100644
index 00000000000..0582800c4af
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.table import Table
+
+def make_timezone_transition_table(
+    tzif_dir: str, timezone_name: str
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi
new file mode 100644
index 00000000000..f668c07f940
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/types.pyi
@@ -0,0 +1,97 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import io
+import os
+from collections.abc import Mapping
+from enum import IntEnum, auto
+from typing import Literal, TypeAlias, overload
+
+from pylibcudf.column import Column
+from pylibcudf.io.datasource import Datasource
+from pylibcudf.table import Table
+
+class JSONRecoveryMode(IntEnum):
+    FAIL = auto()
+    RECOVER_WITH_NULL = auto()
+
+class CompressionType(IntEnum):
+    NONE = auto()
+    AUTO = auto()
+    SNAPPY = auto()
+    GZIP = auto()
+    BZIP2 = auto()
+    BROTLI = auto()
+    ZIP = auto()
+    XZ = auto()
+    ZLIB = auto()
+    LZ4 = auto()
+    LZO = auto()
+    ZSTD = auto()
+
+class ColumnEncoding(IntEnum):
+    USE_DEFAULT = auto()
+    DICTIONARY = auto()
+    PLAIN = auto()
+    DELTA_BINARY_PACKED = auto()
+    DELTA_LENGTH_BYTE_ARRAY = auto()
+    DELTA_BYTE_ARRAY = auto()
+    BYTE_STREAM_SPLIT = auto()
+    DIRECT = auto()
+    DIRECT_V2 = auto()
+    DICTIONARY_V2 = auto()
+
+class DictionaryPolicy(IntEnum):
+    NEVER = auto()
+    ADAPTIVE = auto()
+    ALWAYS = auto()
+
+class StatisticsFreq(IntEnum):
+    STATISTICS_NONE = auto()
+    STATISTICS_ROWGROUP = auto()
+    STATISTICS_PAGE = auto()
+    STATISTICS_COLUMN = auto()
+
+class QuoteStyle(IntEnum):
+    MINIMAL = auto()
+    ALL = auto()
+    NONNUMERIC = auto()
+    NONE = auto()
+
+ColumnNameSpec: TypeAlias = tuple[str, list[ColumnNameSpec]]
+ChildNameSpec: TypeAlias = Mapping[str, ChildNameSpec]
+
+class TableWithMetadata:
+    tbl: Table
+    def __init__(
+        self, tbl: Table, column_names: list[ColumnNameSpec]
+    ) -> None: ...
+    @property
+    def columns(self) -> list[Column]: ...
+    @overload
+    def column_names(self, include_children: Literal[False]) -> list[str]: ...
+    @overload
+    def column_names(
+        self, include_children: Literal[True]
+    ) -> list[ColumnNameSpec]: ...
+    @overload
+    def column_names(
+        self, include_children: bool = False
+    ) -> list[str] | list[ColumnNameSpec]: ...
+    @property
+    def child_names(self) -> ChildNameSpec: ...
+    @property
+    def per_file_user_data(self) -> list[Mapping[str, str]]: ...
+
+class SourceInfo:
+    def __init__(
+        self, sources: list[str] | list[os.PathLike] | list[Datasource]
+    ) -> None: ...
+
+class SinkInfo:
+    def __init__(
+        self,
+        sinks: list[os.PathLike]
+        | list[io.StringIO]
+        | list[io.BytesIO]
+        | list[io.TextIOBase]
+        | list[str],
+    ) -> None: ...
diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi
new file mode 100644
index 00000000000..f34357baa67
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/join.pyi
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.expressions import Expression
+from pylibcudf.table import Table
+from pylibcudf.types import NullEquality
+
+def inner_join(
+    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+) -> tuple[Column, Column]: ...
+def left_join(
+    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+) -> tuple[Column, Column]: ...
+def full_join(
+    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+) -> tuple[Column, Column]: ...
+def left_semi_join(
+    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+) -> Column: ...
+def left_anti_join(
+    left_keys: Table, right_keys: Table, nulls_equal: NullEquality
+) -> Column: ...
+def cross_join(left: Table, right: Table) -> Table: ...
+def conditional_inner_join(
+    left: Table, right: Table, binary_predicate: Expression
+) -> tuple[Column, Column]: ...
+def conditional_left_join(
+    left: Table, right: Table, binary_predicate: Expression
+) -> tuple[Column, Column]: ...
+def conditional_full_join(
+    left: Table, right: Table, binary_predicate: Expression
+) -> tuple[Column, Column]: ...
+def conditional_left_semi_join(
+    left: Table, right: Table, binary_predicate: Expression
+) -> Column: ...
+def conditional_left_anti_join(
+    left: Table, right: Table, binary_predicate: Expression
+) -> Column: ...
+def mixed_inner_join(
+    left_keys: Table,
+    right_keys: Table,
+    left_conditional: Table,
+    right_conditional: Table,
+    binary_predicate: Expression,
+    nulls_equal: NullEquality,
+) -> tuple[Column, Column]: ...
+def mixed_left_join(
+    left_keys: Table,
+    right_keys: Table,
+    left_conditional: Table,
+    right_conditional: Table,
+    binary_predicate: Expression,
+    nulls_equal: NullEquality,
+) -> tuple[Column, Column]: ...
+def mixed_full_join(
+    left_keys: Table,
+    right_keys: Table,
+    left_conditional: Table,
+    right_conditional: Table,
+    binary_predicate: Expression,
+    nulls_equal: NullEquality,
+) -> tuple[Column, Column]: ...
+def mixed_left_semi_join(
+    left_keys: Table,
+    right_keys: Table,
+    left_conditional: Table,
+    right_conditional: Table,
+    binary_predicate: Expression,
+    nulls_equal: NullEquality,
+) -> Column: ...
+def mixed_left_anti_join(
+    left_keys: Table,
+    right_keys: Table,
+    left_conditional: Table,
+    right_conditional: Table,
+    binary_predicate: Expression,
+    nulls_equal: NullEquality,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi
new file mode 100644
index 00000000000..41872c037de
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/json.pyi
@@ -0,0 +1,23 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class GetJsonObjectOptions:
+    def __init__(
+        self,
+        *,
+        allow_single_quotes: bool = False,
+        strip_quotes_from_single_strings: bool = True,
+        missing_fields_as_nulls: bool = False,
+    ) -> None: ...
+    def get_allow_single_quotes(self) -> bool: ...
+    def get_strip_quotes_from_single_strings(self) -> bool: ...
+    def get_missing_fields_as_nulls(self) -> bool: ...
+    def set_allow_single_quotes(self, value: bool) -> None: ...
+    def set_strip_quotes_from_single_strings(self, value: bool) -> None: ...
+    def set_missing_fields_as_nulls(self, value: bool) -> None: ...
+
+def get_json_object(
+    col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi
new file mode 100644
index 00000000000..362a92ca7c1
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/labeling.pyi
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.column import Column
+
+def label_bins(
+    input: Column,
+    left_edges: Column,
+    left_inclusive: bool,
+    right_edges: Column,
+    right_inclusive: bool,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
new file mode 100644
index 00000000000..e5d186572cd
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -0,0 +1,42 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+from pylibcudf.types import NullOrder
+
+def explode_outer(input: Table, explode_column_idx: int) -> Table: ...
+def concatenate_rows(input: Table) -> Column: ...
+def concatenate_list_elements(input: Column, dropna: bool) -> Column: ...
+def contains(input: Column, search_key: Column | Scalar) -> Column: ...
+def contains_nulls(input: Column) -> Column: ...
+def index_of(
+    input: Column, search_key: Column | Scalar, find_first_option: bool
+) -> Column: ...
+def reverse(input: Column) -> Column: ...
+def segmented_gather(input: Column, gather_map_list: Column) -> Column: ...
+def extract_list_element(input: Column, index: Column | int) -> Column: ...
+def count_elements(input: Column) -> Column: ...
+def sequences(
+    starts: Column, sizes: Column, steps: Column | None = None
+) -> Column: ...
+def sort_lists(
+    input: Column,
+    ascending: bool,
+    na_position: NullOrder,
+    stable: bool = False,
+) -> Column: ...
+def difference_distinct(
+    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+) -> Column: ...
+def have_overlap(
+    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+) -> Column: ...
+def intersect_distinct(
+    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+) -> Column: ...
+def union_distinct(
+    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+) -> Column: ...
+def apply_boolean_mask(input: Column, mask: Column) -> Column: ...
+def distinct(input: Column, nulls_equal: bool, nans_equal: bool) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi
new file mode 100644
index 00000000000..b18eb01f8a2
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/merge.pyi
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.table import Table
+from pylibcudf.types import NullOrder, Order
+
+def merge(
+    tables_to_merge: list[Table],
+    key_cols: list[int],
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi
new file mode 100644
index 00000000000..1a6d96a0822
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/null_mask.pyi
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from rmm.pylibrmm.device_buffer import DeviceBuffer
+
+from pylibcudf.column import Column
+from pylibcudf.types import MaskState
+
+def copy_bitmask(col: Column) -> DeviceBuffer: ...
+def bitmask_allocation_size_bytes(number_of_bits: int) -> int: ...
+def create_null_mask(
+    size: int, state: MaskState = MaskState.UNINITIALIZED
+) -> DeviceBuffer: ...
+def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
+def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pyi b/python/pylibcudf/pylibcudf/nvtext/__init__.pyi
new file mode 100644
index 00000000000..aa51eff6bf5
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pyi
@@ -0,0 +1,29 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.nvtext import (
+    byte_pair_encode,
+    edit_distance,
+    generate_ngrams,
+    jaccard,
+    minhash,
+    ngrams_tokenize,
+    normalize,
+    replace,
+    stemmer,
+    subword_tokenize,
+    tokenize,
+)
+
+__all__ = [
+    "byte_pair_encode",
+    "edit_distance",
+    "generate_ngrams",
+    "jaccard",
+    "minhash",
+    "ngrams_tokenize",
+    "normalize",
+    "replace",
+    "stemmer",
+    "subword_tokenize",
+    "tokenize",
+]
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
new file mode 100644
index 00000000000..eb85acd56e5
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class BPEMergePairs:
+    def __init__(self, merge_pairs: Column) -> None: ...
+
+def byte_pair_encoding(
+    input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
new file mode 100644
index 00000000000..85bbbb880ee
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi
@@ -0,0 +1,6 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def edit_distance(input: Column, targets: Column) -> Column: ...
+def edit_distance_matrix(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
new file mode 100644
index 00000000000..2757518379d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def generate_ngrams(
+    input: Column, ngrams: int, separator: Scalar
+) -> Column: ...
+def generate_character_ngrams(input: Column, ngrams: int = 2) -> Column: ...
+def hash_character_ngrams(input: Column, ngrams: int = 2) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
new file mode 100644
index 00000000000..18263c5c8fd
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi
@@ -0,0 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def jaccard_index(input1: Column, input2: Column, width: int) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
new file mode 100644
index 00000000000..a2d9b6364f7
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
@@ -0,0 +1,13 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def minhash(
+    input: Column, seeds: Column | Scalar, width: int = 4
+) -> Column: ...
+def minhash64(
+    input: Column, seeds: Column | Scalar, width: int = 4
+) -> Column: ...
+def word_minhash(input: Column, seeds: Column) -> Column: ...
+def word_minhash64(input: Column, seeds: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
new file mode 100644
index 00000000000..224640ed44d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def ngrams_tokenize(
+    input: Column, ngrams: int, delimiter: Scalar, separator: Scalar
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
new file mode 100644
index 00000000000..1d90a5a8960
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi
@@ -0,0 +1,6 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def normalize_spaces(input: Column) -> Column: ...
+def normalize_characters(input: Column, do_lower_case: bool) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyi b/python/pylibcudf/pylibcudf/nvtext/replace.pyi
new file mode 100644
index 00000000000..1f1ac72ce7c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyi
@@ -0,0 +1,17 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def replace_tokens(
+    input: Column,
+    targets: Column,
+    replacements: Column,
+    delimiter: Scalar | None = None,
+) -> Column: ...
+def filter_tokens(
+    input: Column,
+    min_token_length: int,
+    replacement: Scalar | None = None,
+    delimiter: Scalar | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
new file mode 100644
index 00000000000..d6ba1d189bd
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def is_letter(
+    input: Column, check_vowels: bool, indices: Column | int
+) -> Column: ...
+def porter_stemmer_measure(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
new file mode 100644
index 00000000000..996bd093eb4
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
@@ -0,0 +1,15 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+class HashedVocabulary:
+    def __init__(self, hash_file: str) -> None: ...
+
+def subword_tokenize(
+    input: Column,
+    vocabulary_table: HashedVocabulary,
+    max_sequence_length: int,
+    stride: int,
+    do_lower_case: bool,
+    do_truncate: bool,
+) -> tuple[Column, Column, Column]: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
new file mode 100644
index 00000000000..516011eff61
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class TokenizeVocabulary:
+    def __init__(self, vocab: Column) -> None: ...
+
+def tokenize_scalar(
+    input: Column, delimiter: Scalar | None = None
+) -> Column: ...
+def tokenize_column(input: Column, delimiters: Column) -> Column: ...
+def count_tokens_scalar(
+    input: Column, delimiter: Scalar | None = None
+) -> Column: ...
+def count_tokens_column(input: Column, delimiters: Column) -> Column: ...
+def character_tokenize(input: Column) -> Column: ...
+def detokenize(
+    input: Column, row_indices: Column, separator: Scalar | None = None
+) -> Column: ...
+def tokenize_with_vocabulary(
+    input: Column,
+    vocabulary: TokenizeVocabulary,
+    delimiter: Scalar,
+    default_id: int = -1,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyi b/python/pylibcudf/pylibcudf/partitioning.pyi
new file mode 100644
index 00000000000..48a2ade23f1
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/partitioning.pyi
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+
+def hash_partition(
+    input: Table, columns_to_hash: list[int], num_partitions: int
+) -> tuple[Table, list[int]]: ...
+def partition(
+    t: Table, partition_map: Column, num_partitions: int
+) -> tuple[Table, list[int]]: ...
+def round_robin_partition(
+    input: Table, num_partitions: int, start_partition: int = 0
+) -> tuple[Table, list[int]]: ...
diff --git a/python/pylibcudf/pylibcudf/py.typed b/python/pylibcudf/pylibcudf/py.typed
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/quantiles.pyi b/python/pylibcudf/pylibcudf/quantiles.pyi
new file mode 100644
index 00000000000..dca6eed013a
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/quantiles.pyi
@@ -0,0 +1,23 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from collections.abc import Sequence
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+from pylibcudf.types import Interpolation, NullOrder, Order, Sorted
+
+def quantile(
+    input: Column,
+    q: Sequence[float],
+    interp: Interpolation = Interpolation.LINEAR,
+    ordered_indices: Column | None = None,
+    exact: bool = True,
+) -> Column: ...
+def quantiles(
+    input: Table,
+    q: Sequence[float],
+    interp: Interpolation = Interpolation.NEAREST,
+    is_input_sorted: Sorted = Sorted.NO,
+    column_order: list[Order] | None = None,
+    null_precedence: list[NullOrder] | None = None,
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi
new file mode 100644
index 00000000000..03193d3d0d9
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/reduce.pyi
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.aggregation import Aggregation
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.types import DataType
+
+class ScanType(IntEnum):
+    INCLUSIVE = auto()
+    EXCLUSIVE = auto()
+
+def reduce(col: Column, agg: Aggregation, data_type: DataType) -> Scalar: ...
+def scan(col: Column, agg: Aggregation, inclusive: ScanType) -> Column: ...
+def minmax(col: Column) -> tuple[Scalar, Scalar]: ...
diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi
new file mode 100644
index 00000000000..b4d65e76f76
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/replace.pyi
@@ -0,0 +1,29 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class ReplacePolicy(IntEnum):
+    PRECEDING = auto()
+    FOLLOWING = auto()
+
+def replace_nulls(
+    source_column: Column, replacement: Column | Scalar | ReplacePolicy
+) -> Column: ...
+def find_and_replace_all(
+    source_column: Column,
+    values_to_replace: Column,
+    replacement_values: Column,
+) -> Column: ...
+def clamp(
+    source_column: Column,
+    lo: Scalar,
+    hi: Scalar,
+    lo_replace: Scalar | None = None,
+    hi_replace: Scalar | None = None,
+) -> Column: ...
+def normalize_nans_and_zeros(
+    source_column: Column, inplace: bool = False
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/reshape.pyi b/python/pylibcudf/pylibcudf/reshape.pyi
new file mode 100644
index 00000000000..d8d0ffcc3e0
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/reshape.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+
+def interleave_columns(source_table: Table) -> Column: ...
+def tile(source_table: Table, count: int) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/rolling.pyi b/python/pylibcudf/pylibcudf/rolling.pyi
new file mode 100644
index 00000000000..ca0111e01ec
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/rolling.pyi
@@ -0,0 +1,12 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.aggregation import Aggregation
+from pylibcudf.column import Column
+
+def rolling_window[WindowType: (Column, int)](
+    source: Column,
+    preceding_window: WindowType,
+    following_window: WindowType,
+    min_periods: int,
+    agg: Aggregation,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi
new file mode 100644
index 00000000000..0099ad3c510
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/round.pyi
@@ -0,0 +1,15 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+
+class RoundingMethod(IntEnum):
+    HALF_UP = auto()
+    HALF_EVEN = auto()
+
+def round(
+    source: Column,
+    decimal_places: int = 0,
+    round_method: RoundingMethod = RoundingMethod.HALF_UP,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi
new file mode 100644
index 00000000000..0b72b10ef86
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/scalar.pyi
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+class Scalar:
+    def type(self) -> DataType: ...
+    def is_valid(self) -> bool: ...
+    @staticmethod
+    def empty_like(column: Column) -> Scalar: ...
diff --git a/python/pylibcudf/pylibcudf/search.pyi b/python/pylibcudf/pylibcudf/search.pyi
new file mode 100644
index 00000000000..7f292b129b2
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/search.pyi
@@ -0,0 +1,19 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+from pylibcudf.types import NullOrder, Order
+
+def lower_bound(
+    haystack: Table,
+    needles: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Column: ...
+def upper_bound(
+    haystack: Table,
+    needles: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Column: ...
+def contains(haystack: Column, needles: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi
new file mode 100644
index 00000000000..60a42023a6a
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/sorting.pyi
@@ -0,0 +1,62 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.aggregation import RankMethod
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+from pylibcudf.types import NullOrder, NullPolicy, Order
+
+def sorted_order(
+    source_table: Table, column_order: list, null_precedence: list
+) -> Column: ...
+def stable_sorted_order(
+    source_table: Table,
+    column_order: list,
+    null_precedence: list,
+) -> Column: ...
+def rank(
+    input_view: Column,
+    method: RankMethod,
+    column_order: Order,
+    null_handling: NullPolicy,
+    null_precedence: NullOrder,
+    percentage: bool,
+) -> Column: ...
+def is_sorted(
+    table: Table, column_order: list[Order], null_precedence: list[NullOrder]
+) -> bool: ...
+def segmented_sort_by_key(
+    values: Table,
+    keys: Table,
+    segment_offsets: Column,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
+def stable_segmented_sort_by_key(
+    values: Table,
+    keys: Table,
+    segment_offsets: Column,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
+def sort_by_key(
+    values: Table,
+    keys: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
+def stable_sort_by_key(
+    values: Table,
+    keys: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
+def sort(
+    source_table: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
+def stable_sort(
+    source_table: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
+) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi
new file mode 100644
index 00000000000..fe1cf6ee4fc
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi
@@ -0,0 +1,53 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.table import Table
+from pylibcudf.types import NanEquality, NanPolicy, NullEquality, NullPolicy
+
+class DuplicateKeepOption(IntEnum):
+    KEEP_ANY = auto()
+    KEEP_FIRST = auto()
+    KEEP_LAST = auto()
+    KEEP_NONE = auto()
+
+def drop_nulls(
+    source_table: Table, keys: list[int], keep_threshold: int
+) -> Table: ...
+def drop_nans(
+    source_table: Table, keys: list[int], keep_threshold: int
+) -> Table: ...
+def apply_boolean_mask(source_table: Table, boolean_mask: Column) -> Table: ...
+def unique(
+    input: Table,
+    keys: list[int],
+    keep: DuplicateKeepOption,
+    nulls_equal: NullEquality,
+) -> Table: ...
+def distinct(
+    input: Table,
+    keys: list[int],
+    keep: DuplicateKeepOption,
+    nulls_equal: NullEquality,
+    nans_equal: NanEquality,
+) -> Table: ...
+def distinct_indices(
+    input: Table,
+    keep: DuplicateKeepOption,
+    nulls_equal: NullEquality,
+    nans_equal: NanEquality,
+) -> Column: ...
+def stable_distinct(
+    input: Table,
+    keys: list[int],
+    keep: DuplicateKeepOption,
+    nulls_equal: NullEquality,
+    nans_equal: NanEquality,
+) -> Table: ...
+def unique_count(
+    column: Column, null_handling: NullPolicy, nan_handling: NanPolicy
+) -> int: ...
+def distinct_count(
+    column: Column, null_handling: NullPolicy, nan_handling: NanPolicy
+) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pyi b/python/pylibcudf/pylibcudf/strings/__init__.pyi
new file mode 100644
index 00000000000..492ed311c28
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/__init__.pyi
@@ -0,0 +1,55 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.strings import (
+    attributes,
+    capitalize,
+    case,
+    char_types,
+    combine,
+    contains,
+    convert,
+    extract,
+    find,
+    find_multiple,
+    findall,
+    padding,
+    regex_flags,
+    regex_program,
+    repeat,
+    replace,
+    replace_re,
+    side_type,
+    slice,
+    split,
+    strip,
+    translate,
+    wrap,
+)
+from pylibcudf.strings.side_type import SideType
+
+__all__ = [
+    "SideType",
+    "attributes",
+    "capitalize",
+    "case",
+    "char_types",
+    "combine",
+    "contains",
+    "convert",
+    "extract",
+    "find",
+    "find_multiple",
+    "findall",
+    "padding",
+    "regex_flags",
+    "regex_program",
+    "repeat",
+    "replace",
+    "replace_re",
+    "side_type",
+    "slice",
+    "split",
+    "strip",
+    "translate",
+    "wrap",
+]
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyi b/python/pylibcudf/pylibcudf/strings/attributes.pyi
new file mode 100644
index 00000000000..7fd5c9773d4
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def count_characters(source_strings: Column) -> Column: ...
+def count_bytes(source_strings: Column) -> Column: ...
+def code_points(source_strings: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
new file mode 100644
index 00000000000..05bf8043727
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def capitalize(input: Column, delimiters: Scalar | None = None) -> Column: ...
+def title(input: Column) -> Column: ...
+def is_title(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/case.pyi b/python/pylibcudf/pylibcudf/strings/case.pyi
new file mode 100644
index 00000000000..4e50db4d1da
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/case.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def to_lower(input: Column) -> Column: ...
+def to_upper(input: Column) -> Column: ...
+def swapcase(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi
new file mode 100644
index 00000000000..1e3f57082ef
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi
@@ -0,0 +1,30 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class StringCharacterTypes(IntEnum):
+    DECIMAL = auto()
+    NUMERIC = auto()
+    DIGIT = auto()
+    ALPHA = auto()
+    SPACE = auto()
+    UPPER = auto()
+    LOWER = auto()
+    ALPHANUM = auto()
+    CASE_TYPES = auto()
+    ALL_TYPES = auto()
+
+def all_characters_of_type(
+    source_strings: Column,
+    types: StringCharacterTypes,
+    verify_types: StringCharacterTypes,
+) -> Column: ...
+def filter_characters_of_type(
+    source_strings: Column,
+    types_to_remove: StringCharacterTypes,
+    replacement: Scalar,
+    types_to_keep: StringCharacterTypes,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi
new file mode 100644
index 00000000000..0833ac006c0
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyi
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+
+class SeparatorOnNulls(IntEnum):
+    YES = auto()
+    NO = auto()
+
+class OutputIfEmptyList(IntEnum):
+    EMPTY_STRING = auto()
+    NULL_ELEMENT = auto()
+
+def concatenate(
+    strings_columns: Table,
+    separator: Column | Scalar,
+    narep: Scalar | None = None,
+    col_narep: Scalar | None = None,
+    separate_nulls: SeparatorOnNulls = SeparatorOnNulls.YES,
+) -> Column: ...
+def join_strings(
+    input: Column, separator: Scalar, narep: Scalar
+) -> Column: ...
+def join_list_elements(
+    source_strings: Column,
+    separator: Column | Scalar,
+    separator_narep: Scalar,
+    string_narep: Scalar,
+    separate_nulls: SeparatorOnNulls,
+    empty_list_policy: OutputIfEmptyList,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyi b/python/pylibcudf/pylibcudf/strings/contains.pyi
new file mode 100644
index 00000000000..1f0620383b3
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyi
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.strings.regex_program import RegexProgram
+
+def contains_re(input: Column, prog: RegexProgram) -> Column: ...
+def count_re(input: Column, prog: RegexProgram) -> Column: ...
+def matches_re(input: Column, prog: RegexProgram) -> Column: ...
+def like(
+    input: Column,
+    pattern: Column | Scalar,
+    escape_character: Scalar | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index aa27a7c8929..08b5034456e 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -10,3 +10,15 @@
     convert_lists,
     convert_urls,
 )
+
+__all__ = [
+    "convert_booleans",
+    "convert_datetime",
+    "convert_durations",
+    "convert_fixed_point",
+    "convert_floats",
+    "convert_integers",
+    "convert_ipv4",
+    "convert_lists",
+    "convert_urls",
+]
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi b/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi
new file mode 100644
index 00000000000..edf615376c2
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi
@@ -0,0 +1,24 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.strings.convert import (
+    convert_booleans,
+    convert_datetime,
+    convert_durations,
+    convert_fixed_point,
+    convert_floats,
+    convert_integers,
+    convert_ipv4,
+    convert_lists,
+    convert_urls,
+)
+
+__all__ = [
+    "convert_booleans",
+    "convert_datetime",
+    "convert_durations",
+    "convert_fixed_point",
+    "convert_floats",
+    "convert_integers",
+    "convert_ipv4",
+    "convert_lists",
+    "convert_urls",
+]
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
new file mode 100644
index 00000000000..77c09242e9a
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def to_booleans(input: Column, true_string: Scalar) -> Column: ...
+def from_booleans(
+    booleans: Column, true_string: Scalar, false_string: Scalar
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
new file mode 100644
index 00000000000..c6857169765
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi
@@ -0,0 +1,12 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+def to_timestamps(
+    input: Column, timestamp_type: DataType, format: str
+) -> Column: ...
+def from_timestamps(
+    timestamps: Column, format: str, input_strings_names: Column
+) -> Column: ...
+def is_timestamp(input: Column, format: str) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
new file mode 100644
index 00000000000..a5787a5fe49
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+def to_durations(
+    input: Column, duration_type: DataType, format: str
+) -> Column: ...
+def from_durations(durations: Column, format: str | None = None) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
new file mode 100644
index 00000000000..1192d3dfcd6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+def to_fixed_point(input: Column, output_type: DataType) -> Column: ...
+def from_fixed_point(input: Column) -> Column: ...
+def is_fixed_point(
+    input: Column, decimal_type: DataType | None = None
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
new file mode 100644
index 00000000000..ddf4042e10d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+def to_floats(strings: Column, output_type: DataType) -> Column: ...
+def from_floats(floats: Column) -> Column: ...
+def is_float(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
new file mode 100644
index 00000000000..b96226fba90
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+def to_integers(input: Column, output_type: DataType) -> Column: ...
+def from_integers(integers: Column) -> Column: ...
+def is_integer(input: Column, int_type: DataType | None = None) -> Column: ...
+def hex_to_integers(input: Column, output_type: DataType) -> Column: ...
+def is_hex(input: Column) -> Column: ...
+def integers_to_hex(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
new file mode 100644
index 00000000000..b017b32598c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def ipv4_to_integers(input: Column) -> Column: ...
+def integers_to_ipv4(integers: Column) -> Column: ...
+def is_ipv4(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
new file mode 100644
index 00000000000..6ab3a4183e9
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def format_list_column(
+    input: Column,
+    na_rep: Scalar | None = None,
+    separators: Column | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
new file mode 100644
index 00000000000..40321c3ae66
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
@@ -0,0 +1,6 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def url_encode(Input: Column) -> Column: ...
+def url_decode(Input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyi b/python/pylibcudf/pylibcudf/strings/extract.pyi
new file mode 100644
index 00000000000..4354bd3072d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/extract.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.strings.regex_program import RegexProgram
+from pylibcudf.table import Table
+
+def extract(input: Column, prog: RegexProgram) -> Table: ...
+def extract_all_record(input: Column, prog: RegexProgram) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/find.pyi b/python/pylibcudf/pylibcudf/strings/find.pyi
new file mode 100644
index 00000000000..3d04a9c3161
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/find.pyi
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def find(
+    input: Column, target: Column | Scalar, start: int = 0, stop: int = -1
+) -> Column: ...
+def rfind(
+    input: Column, target: Scalar, start: int = 0, stop: int = -1
+) -> Column: ...
+def contains(input: Column, target: Column | Scalar) -> Column: ...
+def starts_with(input: Column, target: Column | Scalar) -> Column: ...
+def ends_with(input: Column, target: Column | Scalar) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
new file mode 100644
index 00000000000..3d46fd2fa6d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi
@@ -0,0 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def find_multiple(input: Column, targets: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyi b/python/pylibcudf/pylibcudf/strings/findall.pyi
new file mode 100644
index 00000000000..77e38581d22
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.strings.regex_program import RegexProgram
+
+def find_re(input: Column, pattern: RegexProgram) -> Column: ...
+def findall(input: Column, pattern: RegexProgram) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyi b/python/pylibcudf/pylibcudf/strings/padding.pyi
new file mode 100644
index 00000000000..a991935e6e5
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyi
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.strings.side_type import SideType
+
+def pad(
+    input: Column, width: int, side: SideType, fill_char: str
+) -> Column: ...
+def zfill(input: Column, width: int) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyi b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi
new file mode 100644
index 00000000000..2576b5575de
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from enum import IntEnum, auto
+
+class RegexFlags(IntEnum):
+    DEFAULT = auto()
+    MULTILINE = auto()
+    DOTALL = auto()
diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyi b/python/pylibcudf/pylibcudf/strings/regex_program.pyi
new file mode 100644
index 00000000000..6c853bcfc44
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.strings.regex_flags import RegexFlags
+
+class RegexProgram:
+    @staticmethod
+    def create(pattern: str, flags: RegexFlags) -> RegexProgram: ...
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyi b/python/pylibcudf/pylibcudf/strings/repeat.pyi
new file mode 100644
index 00000000000..93a46b71caa
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pyi
@@ -0,0 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def repeat_strings(input: Column, repeat_times: Column | int) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyi b/python/pylibcudf/pylibcudf/strings/replace.pyi
new file mode 100644
index 00000000000..64df09ef7e8
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyi
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def replace(
+    input: Column, target: Scalar, repl: Scalar, maxrepl: int = -1
+) -> Column: ...
+def replace_multiple(
+    input: Column, target: Column, repl: Column, maxrepl: int = -1
+) -> Column: ...
+def replace_slice(
+    input: Column, repl: Scalar | None = None, start: int = 0, stop: int = -1
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyi b/python/pylibcudf/pylibcudf/strings/replace_re.pyi
new file mode 100644
index 00000000000..056bafbf7ef
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyi
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from typing import overload
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.strings.regex_flags import RegexFlags
+from pylibcudf.strings.regex_program import RegexProgram
+
+@overload
+def replace_re(
+    input: Column,
+    pattern: RegexProgram,
+    replacement: Scalar,
+    max_replace_count: int = -1,
+) -> Column: ...
+@overload
+def replace_re(
+    input: Column,
+    patterns: list[str],
+    replacement: Column,
+    max_replace_count: int = -1,
+    flags: RegexFlags = RegexFlags.DEFAULT,
+) -> Column: ...
+def replace_with_backrefs(
+    input: Column, prog: RegexProgram, replacement: str
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyi b/python/pylibcudf/pylibcudf/strings/side_type.pyi
new file mode 100644
index 00000000000..15083120be0
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/side_type.pyi
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from enum import IntEnum, auto
+
+class SideType(IntEnum):
+    LEFT = auto()
+    RIGHT = auto()
+    BOTH = auto()
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyi b/python/pylibcudf/pylibcudf/strings/slice.pyi
new file mode 100644
index 00000000000..7bf9a7cb8c6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyi
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+def slice_strings(
+    input: Column,
+    start: Column | Scalar | None = None,
+    stop: Column | Scalar | None = None,
+    step: Scalar | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/split/__init__.py b/python/pylibcudf/pylibcudf/strings/split/__init__.py
index 2033e5e275b..db2a597882e 100644
--- a/python/pylibcudf/pylibcudf/strings/split/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/split/__init__.py
@@ -1,2 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from . import partition, split
+
+__all__ = ["partition", "split"]
diff --git a/python/pylibcudf/pylibcudf/strings/split/__init__.pyi b/python/pylibcudf/pylibcudf/strings/split/__init__.pyi
new file mode 100644
index 00000000000..c44bce048b6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/split/__init__.pyi
@@ -0,0 +1,4 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.strings.split import partition, split
+
+__all__ = ["partition", "split"]
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyi b/python/pylibcudf/pylibcudf/strings/split/partition.pyi
new file mode 100644
index 00000000000..f19a463bd7e
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyi
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.table import Table
+
+def partition(input: Column, delimiter: Scalar | None = None) -> Table: ...
+def rpartition(input: Column, delimiter: Scalar | None = None) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyi b/python/pylibcudf/pylibcudf/strings/split/split.pyi
new file mode 100644
index 00000000000..3ccf0bc2a01
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pyi
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.strings.regex_program import RegexProgram
+from pylibcudf.table import Table
+
+def split(
+    strings_column: Column, delimiter: Scalar, maxsplit: int
+) -> Table: ...
+def rsplit(
+    strings_column: Column, delimiter: Scalar, maxsplit: int
+) -> Table: ...
+def split_record(
+    strings: Column, delimiter: Scalar, maxsplit: int
+) -> Column: ...
+def rsplit_record(
+    strings: Column, delimiter: Scalar, maxsplit: int
+) -> Column: ...
+def split_re(input: Column, prog: RegexProgram, maxsplit: int) -> Table: ...
+def rsplit_re(input: Column, prog: RegexProgram, maxsplit: int) -> Table: ...
+def split_record_re(
+    input: Column, prog: RegexProgram, maxsplit: int
+) -> Column: ...
+def rsplit_record_re(
+    input: Column, prog: RegexProgram, maxsplit: int
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyi b/python/pylibcudf/pylibcudf/strings/strip.pyi
new file mode 100644
index 00000000000..680355fc88f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyi
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+from pylibcudf.strings.side_type import SideType
+
+def strip(
+    input: Column,
+    side: SideType = SideType.BOTH,
+    to_strip: Scalar | None = None,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi
new file mode 100644
index 00000000000..adeafcc2641
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyi
@@ -0,0 +1,20 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from collections.abc import Mapping
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.scalar import Scalar
+
+class FilterType(IntEnum):
+    KEEP = auto()
+    REMOVE = auto()
+
+def translate(
+    input: Column, chars_table: Mapping[int | str, int | str]
+) -> Column: ...
+def filter_characters(
+    input: Column,
+    characters_to_filter: Mapping[int | str, int | str],
+    keep_characters: FilterType,
+    replacement: Scalar,
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyi b/python/pylibcudf/pylibcudf/strings/wrap.pyi
new file mode 100644
index 00000000000..5658f279197
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyi
@@ -0,0 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def wrap(input: Column, width: int) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi
new file mode 100644
index 00000000000..ad4c9146feb
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/table.pyi
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+class Table:
+    def __init__(self, columns: list[Column]) -> None: ...
+    def num_columns(self) -> int: ...
+    def num_rows(self) -> int: ...
+    def columns(self) -> list[Column]: ...
diff --git a/python/pylibcudf/pylibcudf/traits.pyi b/python/pylibcudf/pylibcudf/traits.pyi
new file mode 100644
index 00000000000..4c76a07a95c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/traits.pyi
@@ -0,0 +1,22 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.types import DataType
+
+def is_relationally_comparable(typ: DataType) -> bool: ...
+def is_equality_comparable(typ: DataType) -> bool: ...
+def is_numeric(typ: DataType) -> bool: ...
+def is_index_type(typ: DataType) -> bool: ...
+def is_unsigned(typ: DataType) -> bool: ...
+def is_integral(typ: DataType) -> bool: ...
+def is_integral_not_bool(typ: DataType) -> bool: ...
+def is_floating_point(typ: DataType) -> bool: ...
+def is_boolean(typ: DataType) -> bool: ...
+def is_timestamp(typ: DataType) -> bool: ...
+def is_fixed_point(typ: DataType) -> bool: ...
+def is_duration(typ: DataType) -> bool: ...
+def is_chrono(typ: DataType) -> bool: ...
+def is_dictionary(typ: DataType) -> bool: ...
+def is_fixed_width(typ: DataType) -> bool: ...
+def is_compound(typ: DataType) -> bool: ...
+def is_nested(typ: DataType) -> bool: ...
+def is_bit_castable(source: DataType, target: DataType) -> bool: ...
diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi
new file mode 100644
index 00000000000..103b9ec36ab
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/transform.pyi
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.column import Column
+from pylibcudf.expressions import Expression
+from pylibcudf.gpumemoryview import gpumemoryview
+from pylibcudf.table import Table
+from pylibcudf.types import DataType
+
+def nans_to_nulls(input: Column) -> tuple[gpumemoryview, int]: ...
+def compute_column(input: Table, expr: Expression) -> Column: ...
+def bools_to_mask(input: Column) -> tuple[gpumemoryview, int]: ...
+def mask_to_bools(bitmask: int, begin_bit: int, end_bit: int) -> Column: ...
+def transform(
+    input: Column, unary_udf: str, output_type: DataType, is_ptx: bool
+) -> Column: ...
+def encode(input: Table) -> tuple[Table, Column]: ...
+def one_hot_encode(input_column: Column, categories: Column) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi
new file mode 100644
index 00000000000..a84ab8a60ea
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/transpose.pyi
@@ -0,0 +1,4 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from pylibcudf.table import Table
+
+def transpose(input_table: Table) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi
new file mode 100644
index 00000000000..ce000cafe9d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/types.pyi
@@ -0,0 +1,85 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from enum import IntEnum, auto
+
+class Interpolation(IntEnum):
+    LINEAR = auto()
+    LOWER = auto()
+    HIGHER = auto()
+    MIDPOINT = auto()
+    NEAREST = auto()
+
+class MaskState(IntEnum):
+    UNALLOCATED = auto()
+    UNINITIALIZED = auto()
+    ALL_VALID = auto()
+    ALL_NULL = auto()
+
+class NanEquality(IntEnum):
+    ALL_EQUAL = auto()
+    UNEQUAL = auto()
+
+class NanPolicy(IntEnum):
+    NAN_IS_NULL = auto()
+    NAN_IS_VALID = auto()
+
+class NullEquality(IntEnum):
+    EQUAL = auto()
+    UNEQUAL = auto()
+
+class NullOrder(IntEnum):
+    AFTER = auto()
+    BEFORE = auto()
+
+class NullPolicy(IntEnum):
+    EXCLUDE = auto()
+    INCLUDE = auto()
+
+class Order(IntEnum):
+    ASCENDING = auto()
+    DESCENDING = auto()
+
+class Sorted(IntEnum):
+    NO = auto()
+    YES = auto()
+
+class TypeId(IntEnum):
+    EMPTY = auto()
+    INT8 = auto()
+    INT16 = auto()
+    INT32 = auto()
+    INT64 = auto()
+    UINT8 = auto()
+    UINT16 = auto()
+    UINT32 = auto()
+    UINT64 = auto()
+    FLOAT32 = auto()
+    FLOAT64 = auto()
+    BOOL8 = auto()
+    TIMESTAMP_DAYS = auto()
+    TIMESTAMP_SECONDS = auto()
+    TIMESTAMP_MILLISECONDS = auto()
+    TIMESTAMP_MICROSECONDS = auto()
+    TIMESTAMP_NANOSECONDS = auto()
+    DURATION_DAYS = auto()
+    DURATION_SECONDS = auto()
+    DURATION_MILLISECONDS = auto()
+    DURATION_MICROSECONDS = auto()
+    DURATION_NANOSECONDS = auto()
+    DICTIONARY32 = auto()
+    STRING = auto()
+    LIST = auto()
+    DECIMAL32 = auto()
+    DECIMAL64 = auto()
+    DECIMAL128 = auto()
+    STRUCT = auto()
+    NUM_TYPE_IDS = auto()
+
+class DataType:
+    def __init__(self, type_id: TypeId, scale: int = 0) -> None: ...
+    def id(self) -> TypeId: ...
+    def scale(self) -> int: ...
+
+def size_of(dtype: DataType) -> int: ...
+
+SIZE_TYPE: DataType
+SIZE_TYPE_ID: TypeId
diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi
new file mode 100644
index 00000000000..d3095e56528
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/unary.pyi
@@ -0,0 +1,38 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
+from pylibcudf.column import Column
+from pylibcudf.types import DataType
+
+class UnaryOperator(IntEnum):
+    SIN = auto()
+    COS = auto()
+    TAN = auto()
+    ARCSIN = auto()
+    ARCCOS = auto()
+    ARCTAN = auto()
+    SINH = auto()
+    COSH = auto()
+    TANH = auto()
+    ARCSINH = auto()
+    ARCCOSH = auto()
+    ARCTANH = auto()
+    EXP = auto()
+    LOG = auto()
+    SQRT = auto()
+    CBRT = auto()
+    CEIL = auto()
+    FLOOR = auto()
+    ABS = auto()
+    RINT = auto()
+    BIT_INVERT = auto()
+    NOT = auto()
+
+def unary_operation(input: Column, op: UnaryOperator) -> Column: ...
+def is_null(input: Column) -> Column: ...
+def is_valid(input: Column) -> Column: ...
+def cast(input: Column, data_type: DataType) -> Column: ...
+def is_nan(input: Column) -> Column: ...
+def is_not_nan(input: Column) -> Column: ...
+def is_supported_cast(from_: DataType, to: DataType) -> bool: ...

From af3dc64f863cd561248de02a241a538af534dc50 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 17:26:43 +0000
Subject: [PATCH 05/16] Minor adaptations in response to type annotations in
 pylibcudf

---
 python/cudf_polars/cudf_polars/containers/dataframe.py     | 2 +-
 python/cudf_polars/cudf_polars/dsl/expressions/datetime.py | 4 +++-
 python/cudf_polars/cudf_polars/dsl/expressions/literal.py  | 2 +-
 python/cudf_polars/cudf_polars/dsl/ir.py                   | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 08bc9d0ea3f..7560a0f5a64 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -60,7 +60,7 @@ def to_polars(self) -> pl.DataFrame:
         # To guarantee we produce correct names, we therefore
         # serialise with names we control and rename with that map.
         name_map = {f"column_{i}": name for i, name in enumerate(self.column_map)}
-        table: pa.Table = plc.interop.to_arrow(
+        table = plc.interop.to_arrow(
             self.table,
             [plc.interop.ColumnMetadata(name=name) for name in name_map],
         )
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
index 65fa4bfa62f..cd8e5c6a4eb 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
@@ -27,7 +27,9 @@
 
 class TemporalFunction(Expr):
     __slots__ = ("name", "options")
-    _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = {
+    _COMPONENT_MAP: ClassVar[
+        dict[pl_expr.TemporalFunction, plc.datetime.DatetimeComponent]
+    ] = {
         pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR,
         pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH,
         pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY,
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py
index c16313bf83c..7eba0c110ab 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py
@@ -58,7 +58,7 @@ def collect_agg(self, *, depth: int) -> AggInfo:
 class LiteralColumn(Expr):
     __slots__ = ("value",)
     _non_child = ("dtype", "value")
-    value: pa.Array[Any, Any]
+    value: pa.Array[Any]
 
     def __init__(self, dtype: plc.DataType, value: pl.Series) -> None:
         self.dtype = dtype
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index a242ff9300f..727e8ce7666 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -498,7 +498,7 @@ def do_evaluate(
                 # Mask must have been applied.
                 return df
         elif typ == "ndjson":
-            json_schema: list[tuple[str, str, list]] = [
+            json_schema: list[plc.io.json.NameAndType] = [
                 (name, typ, []) for name, typ in schema.items()
             ]
             plc_tbl_w_meta = plc.io.json.read_json(

From 5eb87fb8771993c827117d6433da513d3b3b3798 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 18:14:49 +0000
Subject: [PATCH 06/16] Use typed enum for inclusive parameter in label_bins

---
 python/cudf/cudf/_lib/labeling.pyx      |  4 ++--
 python/pylibcudf/pylibcudf/labeling.pxd |  4 ++--
 python/pylibcudf/pylibcudf/labeling.pyi | 11 +++++++++--
 python/pylibcudf/pylibcudf/labeling.pyx | 23 ++++++-----------------
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx
index 3966cce8981..524bfd3b2e8 100644
--- a/python/cudf/cudf/_lib/labeling.pyx
+++ b/python/cudf/cudf/_lib/labeling.pyx
@@ -17,8 +17,8 @@ def label_bins(Column input, Column left_edges, cbool left_inclusive,
     plc_column = plc.labeling.label_bins(
         input.to_pylibcudf(mode="read"),
         left_edges.to_pylibcudf(mode="read"),
-        left_inclusive,
+        plc.labeling.Inclusive.YES if left_inclusive else plc.labeling.Inclusive.NO,
         right_edges.to_pylibcudf(mode="read"),
-        right_inclusive
+        plc.labeling.Inclusive.YES if right_inclusive else plc.labeling.Inclusive.NO,
     )
     return Column.from_pylibcudf(plc_column)
diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd
index 6f8797ae7d3..b1f9f2e806d 100644
--- a/python/pylibcudf/pylibcudf/labeling.pxd
+++ b/python/pylibcudf/pylibcudf/labeling.pxd
@@ -8,7 +8,7 @@ from .column cimport Column
 cpdef Column label_bins(
     Column input,
     Column left_edges,
-    bool left_inclusive,
+    inclusive left_inclusive,
     Column right_edges,
-    bool right_inclusive
+    inclusive right_inclusive
 )
diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi
index 362a92ca7c1..0a6f2c13719 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyi
+++ b/python/pylibcudf/pylibcudf/labeling.pyi
@@ -1,10 +1,17 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
+
+from enum import IntEnum, auto
+
 from pylibcudf.column import Column
 
+class Inclusive(IntEnum):
+    YES = auto()
+    NO = auto()
+
 def label_bins(
     input: Column,
     left_edges: Column,
-    left_inclusive: bool,
+    left_inclusive: Inclusive,
     right_edges: Column,
-    right_inclusive: bool,
+    right_inclusive: Inclusive,
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index 226a9e14172..84a7d42283b 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -14,9 +14,9 @@ from .column cimport Column
 cpdef Column label_bins(
     Column input,
     Column left_edges,
-    bool left_inclusive,
+    inclusive left_inclusive,
     Column right_edges,
-    bool right_inclusive
+    inclusive right_inclusive
 ):
     """Labels elements based on membership in the specified bins.
 
@@ -28,11 +28,11 @@ cpdef Column label_bins(
         Column of input elements to label according to the specified bins.
     left_edges : Column
         Column of the left edge of each bin.
-    left_inclusive : bool
+    left_inclusive : Inclusive
         Whether or not the left edge is inclusive.
     right_edges : Column
         Column of the right edge of each bin.
-    right_inclusive : bool
+    right_inclusive : Inclusive
         Whether or not the right edge is inclusive.
 
     Returns
@@ -42,24 +42,13 @@ cpdef Column label_bins(
         according to the specified bins.
     """
     cdef unique_ptr[column] c_result
-    cdef inclusive c_left_inclusive = (
-        inclusive.YES
-        if left_inclusive
-        else inclusive.NO
-    )
-    cdef inclusive c_right_inclusive = (
-        inclusive.YES
-        if right_inclusive
-        else inclusive.NO
-    )
-
     with nogil:
         c_result = cpp_labeling.label_bins(
             input.view(),
             left_edges.view(),
-            c_left_inclusive,
+            left_inclusive,
             right_edges.view(),
-            c_right_inclusive,
+            right_inclusive,
         )
 
     return Column.from_libcudf(move(c_result))

From 4ef56b8839d4357d6ea6e10f76f622a66da57322 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 18:17:18 +0000
Subject: [PATCH 07/16] Used typed enums for null/nan equality in list methods

---
 python/cudf/cudf/_lib/lists.pyx               |  18 +-
 .../pylibcudf/libcudf/CMakeLists.txt          |   1 +
 .../pylibcudf/libcudf/lists/CMakeLists.txt    |  23 +++
 .../pylibcudf/libcudf/lists/combine.pxd       |   8 +-
 .../pylibcudf/libcudf/lists/combine.pyx       |   0
 .../pylibcudf/libcudf/lists/contains.pyx      |   0
 python/pylibcudf/pylibcudf/lists.pxd          |  30 +++-
 python/pylibcudf/pylibcudf/lists.pyi          |  46 ++++-
 python/pylibcudf/pylibcudf/lists.pyx          | 163 ++++++------------
 9 files changed, 154 insertions(+), 135 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/combine.pyx
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/contains.pyx

diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 12432ac6d5d..f28afd10f86 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -4,7 +4,9 @@ from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 
-from pylibcudf.libcudf.types cimport null_order, size_type
+from pylibcudf.libcudf.types cimport (
+    nan_equality, null_equality, null_order, order, size_type
+)
 
 from cudf._lib.column cimport Column
 from cudf._lib.utils cimport columns_from_pylibcudf_table
@@ -37,8 +39,8 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal):
     return Column.from_pylibcudf(
         plc.lists.distinct(
             col.to_pylibcudf(mode="read"),
-            nulls_equal,
-            nans_all_equal,
+            null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL,
+            nan_equality.ALL_EQUAL if nans_all_equal else nan_equality.UNEQUAL,
         )
     )
 
@@ -48,7 +50,7 @@ def sort_lists(Column col, bool ascending, str na_position):
     return Column.from_pylibcudf(
         plc.lists.sort_lists(
             col.to_pylibcudf(mode="read"),
-            ascending,
+            order.ASCENDING if ascending else order.DESCENDING,
             null_order.BEFORE if na_position == "first" else null_order.AFTER,
             False,
         )
@@ -91,7 +93,7 @@ def index_of_scalar(Column col, object py_search_key):
         plc.lists.index_of(
             col.to_pylibcudf(mode="read"),
             <Scalar> py_search_key.device_value.c_value,
-            True,
+            plc.lists.DuplicateFindOption.FIND_FIRST,
         )
     )
 
@@ -102,7 +104,7 @@ def index_of_column(Column col, Column search_keys):
         plc.lists.index_of(
             col.to_pylibcudf(mode="read"),
             search_keys.to_pylibcudf(mode="read"),
-            True,
+            plc.lists.DuplicateFindOption.FIND_FIRST,
         )
     )
 
@@ -123,7 +125,9 @@ def concatenate_list_elements(Column input_column, dropna=False):
     return Column.from_pylibcudf(
         plc.lists.concatenate_list_elements(
             input_column.to_pylibcudf(mode="read"),
-            dropna,
+            plc.lists.ConcatenateNullPolicy.IGNORE
+            if dropna
+            else plc.lists.ConcatenateNullPolicy.NULLIFTY_OUTPUT_ROW,
         )
     )
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
index 15beaee47d4..00669ff579a 100644
--- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
@@ -24,4 +24,5 @@ rapids_cython_create_modules(
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp
 )
 add_subdirectory(io)
+add_subdirectory(lists)
 add_subdirectory(strings)
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt
new file mode 100644
index 00000000000..c896db2c85a
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt
@@ -0,0 +1,23 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources combine.pyx contains.pyx)
+
+set(linked_libraries cudf::cudf)
+
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp_lists
+)
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
index d077958ce03..09a5d84c64f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd
@@ -1,5 +1,6 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
+from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
@@ -9,10 +10,9 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 cdef extern from "cudf/lists/combine.hpp" namespace \
         "cudf::lists" nogil:
 
-    ctypedef enum concatenate_null_policy:
-        IGNORE "cudf::lists::concatenate_null_policy::IGNORE"
-        NULLIFY_OUTPUT_ROW \
-            "cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW"
+    cpdef enum class concatenate_null_policy(int32_t):
+        IGNORE
+        NULLIFY_OUTPUT_ROW
 
     cdef unique_ptr[column] concatenate_rows(
         const table_view input_table
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pyx b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pyx b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd
index e7d006e6e2e..10c1c26e24e 100644
--- a/python/pylibcudf/pylibcudf/lists.pxd
+++ b/python/pylibcudf/pylibcudf/lists.pxd
@@ -1,7 +1,11 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool
-from pylibcudf.libcudf.types cimport null_order, size_type
+from pylibcudf.libcudf.types cimport (
+    nan_equality, null_equality, null_order, order, size_type
+)
+from pylibcudf.libcudf.lists.combine cimport concatenate_null_policy
+from pylibcudf.libcudf.lists.contains cimport duplicate_find_option
 
 from .column cimport Column
 from .scalar cimport Scalar
@@ -19,13 +23,13 @@ cpdef Table explode_outer(Table, size_type explode_column_idx)
 
 cpdef Column concatenate_rows(Table)
 
-cpdef Column concatenate_list_elements(Column, bool dropna)
+cpdef Column concatenate_list_elements(Column, concatenate_null_policy null_policy)
 
 cpdef Column contains(Column, ColumnOrScalar)
 
 cpdef Column contains_nulls(Column)
 
-cpdef Column index_of(Column, ColumnOrScalar, bool)
+cpdef Column index_of(Column, ColumnOrScalar, duplicate_find_option)
 
 cpdef Column reverse(Column)
 
@@ -37,16 +41,24 @@ cpdef Column count_elements(Column)
 
 cpdef Column sequences(Column, Column, Column steps = *)
 
-cpdef Column sort_lists(Column, bool, null_order, bool stable = *)
+cpdef Column sort_lists(Column, order, null_order, bool stable = *)
 
-cpdef Column difference_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*)
+cpdef Column difference_distinct(
+    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+)
 
-cpdef Column have_overlap(Column, Column, bool nulls_equal=*, bool nans_equal=*)
+cpdef Column have_overlap(
+    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+)
 
-cpdef Column intersect_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*)
+cpdef Column intersect_distinct(
+    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+)
 
-cpdef Column union_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*)
+cpdef Column union_distinct(
+    Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=*
+)
 
 cpdef Column apply_boolean_mask(Column, Column)
 
-cpdef Column distinct(Column, bool, bool)
+cpdef Column distinct(Column, null_equality, nan_equality)
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
index e5d186572cd..4e8966ce98a 100644
--- a/python/pylibcudf/pylibcudf/lists.pyi
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -1,17 +1,31 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from enum import IntEnum, auto
+
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
-from pylibcudf.types import NullOrder
+from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order
+
+class ConcatenateNullPolicy(IntEnum):
+    IGNORE = auto()
+    NULLIFY_OUTPUT_ROW = auto()
+
+class DuplicateFindOption(IntEnum):
+    FIND_FIRST = auto()
+    FIND_LAST = auto()
 
 def explode_outer(input: Table, explode_column_idx: int) -> Table: ...
 def concatenate_rows(input: Table) -> Column: ...
-def concatenate_list_elements(input: Column, dropna: bool) -> Column: ...
+def concatenate_list_elements(
+    input: Column, null_policy: ConcatenateNullPolicy
+) -> Column: ...
 def contains(input: Column, search_key: Column | Scalar) -> Column: ...
 def contains_nulls(input: Column) -> Column: ...
 def index_of(
-    input: Column, search_key: Column | Scalar, find_first_option: bool
+    input: Column,
+    search_key: Column | Scalar,
+    find_option: DuplicateFindOption,
 ) -> Column: ...
 def reverse(input: Column) -> Column: ...
 def segmented_gather(input: Column, gather_map_list: Column) -> Column: ...
@@ -22,21 +36,35 @@ def sequences(
 ) -> Column: ...
 def sort_lists(
     input: Column,
-    ascending: bool,
+    sort_order: Order,
     na_position: NullOrder,
     stable: bool = False,
 ) -> Column: ...
 def difference_distinct(
-    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+    lhs: Column,
+    rhs: Column,
+    nulls_equal: NullEquality = NullEquality.EQUAL,
+    nans_equal: NanEquality = NanEquality.ALL_EQUAL,
 ) -> Column: ...
 def have_overlap(
-    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+    lhs: Column,
+    rhs: Column,
+    nulls_equal: NullEquality = NullEquality.EQUAL,
+    nans_equal: NanEquality = NanEquality.ALL_EQUAL,
 ) -> Column: ...
 def intersect_distinct(
-    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+    lhs: Column,
+    rhs: Column,
+    nulls_equal: NullEquality = NullEquality.EQUAL,
+    nans_equal: NanEquality = NanEquality.ALL_EQUAL,
 ) -> Column: ...
 def union_distinct(
-    lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True
+    lhs: Column,
+    rhs: Column,
+    nulls_equal: NullEquality = NullEquality.EQUAL,
+    nans_equal: NanEquality = NanEquality.ALL_EQUAL,
 ) -> Column: ...
 def apply_boolean_mask(input: Column, mask: Column) -> Column: ...
-def distinct(input: Column, nulls_equal: bool, nans_equal: bool) -> Column: ...
+def distinct(
+    input: Column, nulls_equal: NullEquality, nans_equal: NanEquality
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index ecaf62d6895..f6ffb1874b9 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -42,6 +42,9 @@ from pylibcudf.libcudf.types cimport (
 )
 from pylibcudf.lists cimport ColumnOrScalar, ColumnOrSizeType
 
+from pylibcudf.libcudf.lists.combine import concatenate_null_policy as ConcatenateNullPolicy # no-cython-lint
+from pylibcudf.libcudf.lists.contains import duplicate_find_option as DuplicateFindOption # no-cython-lint
+
 from .column cimport Column, ListColumnView
 from .scalar cimport Scalar
 from .table cimport Table
@@ -97,7 +100,9 @@ cpdef Column concatenate_rows(Table input):
     return Column.from_libcudf(move(c_result))
 
 
-cpdef Column concatenate_list_elements(Column input, bool dropna):
+cpdef Column concatenate_list_elements(
+    Column input, concatenate_null_policy null_policy
+):
     """Concatenate multiple lists on the same row into a single list.
 
     For details, see :cpp:func:`concatenate_list_elements`.
@@ -106,20 +111,14 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
     ----------
     input : Column
         The input column
-    dropna : bool
-        If true, null list elements will be ignored
-        from concatenation. Otherwise any input null values will result in
-        the corresponding output row being set to null.
+    null_policy : ConcatenateNullPolicy
+        How to treat null list elements.
 
     Returns
     -------
     Column
         A new Column of concatenated list elements
     """
-    cdef concatenate_null_policy null_policy = (
-        concatenate_null_policy.IGNORE if dropna
-        else concatenate_null_policy.NULLIFY_OUTPUT_ROW
-    )
     cdef unique_ptr[column] c_result
 
     with nogil:
@@ -191,7 +190,9 @@ cpdef Column contains_nulls(Column input):
     return Column.from_libcudf(move(c_result))
 
 
-cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_option):
+cpdef Column index_of(
+    Column input, ColumnOrScalar search_key, duplicate_find_option find_option
+):
     """Create a column of index values indicating the position of a search
     key row within the corresponding list row in the lists column.
 
@@ -207,9 +208,8 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o
         The input column.
     search_key : Union[Column, Scalar]
         The search key.
-    find_first_option : bool
-        If true, index_of returns the first match.
-        Otherwise the last match is returned.
+    find_option : DuplicateFindOption
+        Which match to return if there are duplicates.
 
     Returns
     -------
@@ -220,11 +220,6 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o
     """
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
-    cdef cpp_contains.duplicate_find_option find_option = (
-        cpp_contains.duplicate_find_option.FIND_FIRST if find_first_option
-        else cpp_contains.duplicate_find_option.FIND_LAST
-    )
-
     with nogil:
         c_result = cpp_contains.index_of(
             list_view.view(),
@@ -380,7 +375,7 @@ cpdef Column sequences(Column starts, Column sizes, Column steps = None):
 
 cpdef Column sort_lists(
     Column input,
-    bool ascending,
+    order sort_order,
     null_order na_position,
     bool stable = False
 ):
@@ -392,8 +387,8 @@ cpdef Column sort_lists(
     ----------
     input : Column
         The input column.
-    ascending : bool
-        If true, the sort order is ascending. Otherwise, the sort order is descending.
+    ascending : Order
+        Sort order in the list.
     na_position : NullOrder
         If na_position equals NullOrder.FIRST, then the null values in the output
         column are placed first. Otherwise, they are be placed after.
@@ -409,21 +404,17 @@ cpdef Column sort_lists(
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
-    cdef order c_sort_order = (
-        order.ASCENDING if ascending else order.DESCENDING
-    )
-
     with nogil:
         if stable:
             c_result = cpp_stable_sort_lists(
                     list_view.view(),
-                    c_sort_order,
+                    sort_order,
                     na_position,
             )
         else:
             c_result = cpp_sort_lists(
                     list_view.view(),
-                    c_sort_order,
+                    sort_order,
                     na_position,
             )
     return Column.from_libcudf(move(c_result))
@@ -432,8 +423,8 @@ cpdef Column sort_lists(
 cpdef Column difference_distinct(
     Column lhs,
     Column rhs,
-    bool nulls_equal=True,
-    bool nans_equal=True
+    null_equality nulls_equal=null_equality.EQUAL,
+    nan_equality nans_equal=nan_equality.ALL_EQUAL,
 ):
     """Create a column of index values indicating the position of a search
     key row within the corresponding list row in the lists column.
@@ -446,11 +437,10 @@ cpdef Column difference_distinct(
         The input lists column of elements that may be included.
     rhs : Column
         The input lists column of elements to exclude.
-    nulls_equal : bool, default True
-        If true, null elements are considered equal. Otherwise, unequal.
-    nans_equal : bool, default True
-        If true, libcudf will treat nan elements from {-nan, +nan}
-        as equal. Otherwise, unequal. Otherwise, unequal.
+    nulls_equal : NullEquality, default EQUAL
+        Are nulls considered equal.
+    nans_equal : NanEquality, default ALL_EQUAL
+        Are nans considered equal.
 
     Returns
     -------
@@ -461,19 +451,12 @@ cpdef Column difference_distinct(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
-    cdef null_equality c_nulls_equal = (
-        null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL
-    )
-    cdef nan_equality c_nans_equal = (
-        nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL
-    )
-
     with nogil:
         c_result = cpp_set_operations.difference_distinct(
             lhs_view.view(),
             rhs_view.view(),
-            c_nulls_equal,
-            c_nans_equal,
+            nulls_equal,
+            nans_equal,
         )
     return Column.from_libcudf(move(c_result))
 
@@ -481,8 +464,8 @@ cpdef Column difference_distinct(
 cpdef Column have_overlap(
     Column lhs,
     Column rhs,
-    bool nulls_equal=True,
-    bool nans_equal=True
+    null_equality nulls_equal=null_equality.EQUAL,
+    nan_equality nans_equal=nan_equality.ALL_EQUAL,
 ):
     """Check if lists at each row of the given lists columns overlap.
 
@@ -494,11 +477,10 @@ cpdef Column have_overlap(
         The input lists column for one side.
     rhs : Column
         The input lists column for the other side.
-    nulls_equal : bool, default True
-        If true, null elements are considered equal. Otherwise, unequal.
-    nans_equal : bool, default True
-        If true, libcudf will treat nan elements from {-nan, +nan}
-        as equal. Otherwise, unequal. Otherwise, unequal.
+    nulls_equal : NullEquality, default EQUAL
+        Are nulls considered equal.
+    nans_equal : NanEquality, default ALL_EQUAL
+        Are nans considered equal.
 
     Returns
     -------
@@ -509,19 +491,12 @@ cpdef Column have_overlap(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
-    cdef null_equality c_nulls_equal = (
-        null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL
-    )
-    cdef nan_equality c_nans_equal = (
-        nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL
-    )
-
     with nogil:
         c_result = cpp_set_operations.have_overlap(
             lhs_view.view(),
             rhs_view.view(),
-            c_nulls_equal,
-            c_nans_equal,
+            nulls_equal,
+            nans_equal,
         )
     return Column.from_libcudf(move(c_result))
 
@@ -529,8 +504,8 @@ cpdef Column have_overlap(
 cpdef Column intersect_distinct(
     Column lhs,
     Column rhs,
-    bool nulls_equal=True,
-    bool nans_equal=True
+    null_equality nulls_equal=null_equality.EQUAL,
+    nan_equality nans_equal=nan_equality.ALL_EQUAL,
 ):
     """Create a lists column of distinct elements common to two input lists columns.
 
@@ -542,11 +517,10 @@ cpdef Column intersect_distinct(
         The input lists column of elements that may be included.
     rhs : Column
         The input lists column of elements to exclude.
-    nulls_equal : bool, default True
-        If true, null elements are considered equal. Otherwise, unequal.
-    nans_equal : bool, default True
-        If true, libcudf will treat nan elements from {-nan, +nan}
-        as equal. Otherwise, unequal. Otherwise, unequal.
+    nulls_equal : NullEquality, default EQUAL
+        Are nulls considered equal.
+    nans_equal : NanEquality, default ALL_EQUAL
+        Are nans considered equal.
 
     Returns
     -------
@@ -557,19 +531,12 @@ cpdef Column intersect_distinct(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
-    cdef null_equality c_nulls_equal = (
-        null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL
-    )
-    cdef nan_equality c_nans_equal = (
-        nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL
-    )
-
     with nogil:
         c_result = cpp_set_operations.intersect_distinct(
             lhs_view.view(),
             rhs_view.view(),
-            c_nulls_equal,
-            c_nans_equal,
+            nulls_equal,
+            nans_equal,
         )
     return Column.from_libcudf(move(c_result))
 
@@ -577,8 +544,8 @@ cpdef Column intersect_distinct(
 cpdef Column union_distinct(
     Column lhs,
     Column rhs,
-    bool nulls_equal=True,
-    bool nans_equal=True
+    null_equality nulls_equal=null_equality.EQUAL,
+    nan_equality nans_equal=nan_equality.ALL_EQUAL,
 ):
     """Create a lists column of distinct elements found in
     either of two input lists columns.
@@ -591,11 +558,10 @@ cpdef Column union_distinct(
         The input lists column of elements that may be included.
     rhs : Column
         The input lists column of elements to exclude.
-    nulls_equal : bool, default True
-        If true, null elements are considered equal. Otherwise, unequal.
-    nans_equal : bool, default True
-        If true, libcudf will treat nan elements from {-nan, +nan}
-        as equal. Otherwise, unequal. Otherwise, unequal.
+    nulls_equal : NullEquality, default EQUAL
+        Are nulls considered equal.
+    nans_equal : NanEquality, default ALL_EQUAL
+        Are nans considered equal.
 
     Returns
     -------
@@ -606,19 +572,12 @@ cpdef Column union_distinct(
     cdef ListColumnView lhs_view = lhs.list_view()
     cdef ListColumnView rhs_view = rhs.list_view()
 
-    cdef null_equality c_nulls_equal = (
-        null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL
-    )
-    cdef nan_equality c_nans_equal = (
-        nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL
-    )
-
     with nogil:
         c_result = cpp_set_operations.union_distinct(
             lhs_view.view(),
             rhs_view.view(),
-            c_nulls_equal,
-            c_nans_equal,
+            nulls_equal,
+            nans_equal,
         )
     return Column.from_libcudf(move(c_result))
 
@@ -651,7 +610,7 @@ cpdef Column apply_boolean_mask(Column input, Column boolean_mask):
     return Column.from_libcudf(move(c_result))
 
 
-cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal):
+cpdef Column distinct(Column input, null_equality nulls_equal, nan_equality nans_equal):
     """Create a new list column without duplicate elements in each list.
 
     For details, see :cpp:func:`distinct`.
@@ -660,11 +619,10 @@ cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal):
     ----------
     input : Column
         The input column.
-    nulls_equal : bool
-        If true, null elements are considered equal. Otherwise, unequal.
-    nans_equal : bool
-        If true, libcudf will treat nan elements from {-nan, +nan}
-        as equal. Otherwise, unequal. Otherwise, unequal.
+    nulls_equal : NullEquality
+        Are nulls considered equal.
+    nans_equal : NanEquality
+        Are nans considered equal.
 
     Returns
     -------
@@ -674,17 +632,10 @@ cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal):
     cdef unique_ptr[column] c_result
     cdef ListColumnView list_view = input.list_view()
 
-    cdef null_equality c_nulls_equal = (
-        null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL
-    )
-    cdef nan_equality c_nans_equal = (
-        nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL
-    )
-
     with nogil:
         c_result = cpp_distinct(
             list_view.view(),
-            c_nulls_equal,
-            c_nans_equal,
+            nulls_equal,
+            nans_equal,
         )
     return Column.from_libcudf(move(c_result))

From bd9f6f8f9727f930b48d1294ddfcdde08e8646c4 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 6 Nov 2024 18:40:24 +0000
Subject: [PATCH 08/16] Add some guidance about type stubs

---
 docs/cudf/source/developer_guide/pylibcudf.md | 73 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md
index 39840e72e21..1ee828e7c4e 100644
--- a/docs/cudf/source/developer_guide/pylibcudf.md
+++ b/docs/cudf/source/developer_guide/pylibcudf.md
@@ -15,7 +15,8 @@ To satisfy the goals of pylibcudf, we impose the following set of design princip
 - All typing in code should be written using Cython syntax, not PEP 484 Python typing syntax. Not only does this ensure compatibility with Cython < 3, but even with Cython 3 PEP 484 support remains incomplete as of this writing.
 - All cudf code should interact only with pylibcudf, never with libcudf directly. This is not currently the case, but is the direction that the library is moving towards.
 - Ideally, pylibcudf should depend on no RAPIDS component other than rmm, and should in general have minimal runtime dependencies.
-
+- Type stubs are provided and generated manually. When adding new
+  functionality, ensure that the matching type stub is appropriately updated.
 
 ## Relationship to libcudf
 
@@ -249,3 +250,73 @@ In the event that libcudf provides multiple overloads for the same function with
 and set arguments not shared between overloads to `None`. If a user tries to pass in an unsupported argument for a specific overload type, you should raise `ValueError`.
 
 Finally, consider making an libcudf issue if you think this inconsistency can be addressed on the libcudf side.
+
+### Type stubs
+
+Since static type checkers like `mypy` and `pyright` cannot parse
+Cython code, we provide type stubs for the pylibcudf package. These
+are currently maintained manually, alongside the matching pylibcudf
+files.
+
+Every `pyx` file should have a matching `pyi` file that provides the
+type stubs. Most functions can be exposed straightforwardly. Some
+guiding principles:
+
+- For typed integer arguments in libcudf, use `int` as a type
+  annotation.
+- For functions which are annotated as a `list` in Cython, but the
+  function body does more detailed checking, try and encode the
+  detailed information in the type.
+- For Cython fused types there are two options:
+    1. If the fused type appears only once in the function signature,
+       use a `Union` type;
+    2. If the fused type appears more than once (or as both an input
+       and output type), use a `TypeVar` with
+       the variants in the fused type provided as constraints.
+
+
+As an example, `pylibcudf.copying.split` is typed in Cython as:
+
+```cython
+ctypedef fused ColumnOrTable:
+    Table
+    Column
+
+cpdef list split(ColumnOrTable input, list splits): ...
+```
+
+Here we only have a single use of the fused type, and the `list`
+arguments do not specify their values. Here, if we provide a `Column`
+as input, we receive a `list[Column]` as output, and if we provide a
+`Table` we receive `list[Table]` as output.
+
+In the type stub, we can encode this with a `TypeVar`, we can also
+provide typing for the `splits` argument that indicates that the split
+values must be integers:
+
+```python
+ColumnOrTable = TypeVar("ColumnOrTable", Column, Table)
+
+def split(input: ColumnOrTable, splits: list[int]) -> list[ColumnOrTable]: ...
+```
+
+Conversely, `pylibcudf.copying.scatter` uses a fused type only once in
+its input:
+
+```cython
+ctypedef fused TableOrListOfScalars:
+    Table
+    list
+
+cpdef Table scatter(
+    TableOrListOfScalars source, Column scatter_map, Table target
+)
+```
+
+In the type stub, we can use a normal union in this case
+
+```python
+def scatter(
+    source: Table | list[Scalar], scatter_map: Column, target: Table
+) -> Table: ...
+```

From 1801379712beb8f59c44276b3cfb7ada644b0a28 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 7 Nov 2024 14:38:46 +0000
Subject: [PATCH 09/16] Add __all__ to all pylibcudf modules

---
 python/pylibcudf/pylibcudf/aggregation.pyx    | 34 +++++++++++++++++++
 python/pylibcudf/pylibcudf/binaryop.pyx       |  1 +
 python/pylibcudf/pylibcudf/column.pyx         |  1 +
 .../pylibcudf/pylibcudf/column_factories.pyx  |  9 +++++
 python/pylibcudf/pylibcudf/concatenate.pyx    |  1 +
 .../pylibcudf/pylibcudf/contiguous_split.pyx  |  7 ++++
 python/pylibcudf/pylibcudf/copying.pyx        | 17 ++++++++++
 python/pylibcudf/pylibcudf/datetime.pyx       | 18 ++++++++++
 python/pylibcudf/pylibcudf/experimental.pyx   |  2 ++
 python/pylibcudf/pylibcudf/expressions.pyx    | 10 ++++++
 python/pylibcudf/pylibcudf/filling.pyx        |  2 ++
 python/pylibcudf/pylibcudf/gpumemoryview.pyx  |  1 +
 python/pylibcudf/pylibcudf/groupby.pyx        |  2 ++
 python/pylibcudf/pylibcudf/hashing.pyx        | 13 +++++++
 python/pylibcudf/pylibcudf/interop.pyx        |  8 +++++
 python/pylibcudf/pylibcudf/io/__init__.py     | 14 ++++++++
 python/pylibcudf/pylibcudf/io/avro.pyx        |  2 ++
 python/pylibcudf/pylibcudf/io/csv.pyx         |  2 ++
 python/pylibcudf/pylibcudf/io/datasource.pyx  |  1 +
 python/pylibcudf/pylibcudf/io/json.pyx        |  1 +
 python/pylibcudf/pylibcudf/io/orc.pyx         |  6 ++++
 python/pylibcudf/pylibcudf/io/parquet.pyx     |  2 ++
 python/pylibcudf/pylibcudf/io/timezone.pyx    |  1 +
 python/pylibcudf/pylibcudf/io/types.pyx       | 11 ++++++
 python/pylibcudf/pylibcudf/join.pyx           | 18 ++++++++++
 python/pylibcudf/pylibcudf/json.pyx           |  1 +
 python/pylibcudf/pylibcudf/labeling.pyx       |  1 +
 python/pylibcudf/pylibcudf/lists.pyx          | 22 ++++++++++++
 python/pylibcudf/pylibcudf/merge.pyx          |  1 +
 python/pylibcudf/pylibcudf/null_mask.pyx      |  7 ++++
 .../pylibcudf/nvtext/byte_pair_encode.pyx     |  1 +
 .../pylibcudf/nvtext/edit_distance.pyx        |  1 +
 .../pylibcudf/nvtext/generate_ngrams.pyx      |  5 +++
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyx |  1 +
 python/pylibcudf/pylibcudf/nvtext/minhash.pyx |  6 ++++
 .../pylibcudf/nvtext/ngrams_tokenize.pyx      |  1 +
 .../pylibcudf/pylibcudf/nvtext/normalize.pyx  |  1 +
 python/pylibcudf/pylibcudf/nvtext/replace.pyx |  1 +
 python/pylibcudf/pylibcudf/nvtext/stemmer.pyx |  1 +
 .../pylibcudf/nvtext/subword_tokenize.pyx     |  1 +
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyx   | 10 ++++++
 python/pylibcudf/pylibcudf/partitioning.pyx   |  5 +++
 python/pylibcudf/pylibcudf/quantiles.pyx      |  1 +
 python/pylibcudf/pylibcudf/reduce.pyx         |  1 +
 python/pylibcudf/pylibcudf/replace.pyx        |  8 +++++
 python/pylibcudf/pylibcudf/reshape.pyx        |  1 +
 python/pylibcudf/pylibcudf/rolling.pyx        |  1 +
 python/pylibcudf/pylibcudf/round.pyx          |  1 +
 python/pylibcudf/pylibcudf/scalar.pyx         |  2 ++
 python/pylibcudf/pylibcudf/search.pyx         |  1 +
 python/pylibcudf/pylibcudf/sorting.pyx        | 12 +++++++
 .../pylibcudf/pylibcudf/stream_compaction.pyx | 12 +++++++
 .../pylibcudf/pylibcudf/strings/__init__.py   |  4 +--
 .../pylibcudf/strings/attributes.pyx          |  1 +
 .../pylibcudf/strings/capitalize.pyx          |  1 +
 python/pylibcudf/pylibcudf/strings/case.pyx   |  1 +
 .../pylibcudf/strings/char_types.pyx          |  5 +++
 .../pylibcudf/pylibcudf/strings/combine.pyx   |  7 ++++
 .../pylibcudf/pylibcudf/strings/contains.pyx  |  1 +
 .../strings/convert/convert_booleans.pyx      |  1 +
 .../strings/convert/convert_datetime.pyx      |  1 +
 .../strings/convert/convert_durations.pyx     |  1 +
 .../strings/convert/convert_fixed_point.pyx   |  2 ++
 .../strings/convert/convert_floats.pyx        |  1 +
 .../strings/convert/convert_integers.pyx      |  8 +++++
 .../strings/convert/convert_ipv4.pyx          |  1 +
 .../strings/convert/convert_lists.pyx         |  1 +
 .../strings/convert/convert_urls.pyx          |  1 +
 .../pylibcudf/pylibcudf/strings/extract.pyx   |  1 +
 python/pylibcudf/pylibcudf/strings/find.pyx   |  1 +
 .../pylibcudf/strings/find_multiple.pyx       |  1 +
 .../pylibcudf/pylibcudf/strings/findall.pyx   |  1 +
 .../pylibcudf/pylibcudf/strings/padding.pyx   |  1 +
 .../pylibcudf/strings/regex_flags.pyx         |  2 ++
 .../pylibcudf/strings/regex_program.pyx       |  1 +
 python/pylibcudf/pylibcudf/strings/repeat.pyx |  1 +
 .../pylibcudf/pylibcudf/strings/replace.pyx   |  1 +
 .../pylibcudf/strings/replace_re.pyx          |  1 +
 .../pylibcudf/pylibcudf/strings/side_type.pyx |  2 ++
 python/pylibcudf/pylibcudf/strings/slice.pyx  |  1 +
 .../pylibcudf/strings/split/partition.pyx     |  1 +
 .../pylibcudf/strings/split/split.pyx         | 10 ++++++
 python/pylibcudf/pylibcudf/strings/strip.pyx  |  1 +
 .../pylibcudf/pylibcudf/strings/translate.pyx |  1 +
 python/pylibcudf/pylibcudf/strings/wrap.pyx   |  1 +
 python/pylibcudf/pylibcudf/table.pyx          |  1 +
 python/pylibcudf/pylibcudf/traits.pyx         | 20 +++++++++++
 python/pylibcudf/pylibcudf/transform.pyx      |  9 +++++
 python/pylibcudf/pylibcudf/transpose.pyx      |  1 +
 python/pylibcudf/pylibcudf/types.pyx          | 16 +++++++++
 python/pylibcudf/pylibcudf/unary.pyx          | 10 ++++++
 91 files changed, 410 insertions(+), 2 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/aggregation.pyx
index e510b738f70..662f76d5c8e 100644
--- a/python/pylibcudf/pylibcudf/aggregation.pyx
+++ b/python/pylibcudf/pylibcudf/aggregation.pyx
@@ -64,6 +64,40 @@ from pylibcudf.libcudf.aggregation import udf_type as UdfType  # no-cython-lint
 from .types cimport DataType
 
 
+__all__ = [
+    "Aggregation",
+    "CorrelationType",
+    "EWMHistory",
+    "Kind",
+    "RankMethod",
+    "RankPercentage",
+    "UdfType",
+    "all",
+    "any",
+    "argmax",
+    "argmin",
+    "collect_list",
+    "collect_set",
+    "correlation",
+    "count",
+    "covariance",
+    "ewma",
+    "max",
+    "mean",
+    "median",
+    "min",
+    "nth_element",
+    "nunique",
+    "product",
+    "quantile",
+    "rank",
+    "std",
+    "sum",
+    "sum_of_squares",
+    "udf",
+    "variance",
+]
+
 cdef class Aggregation:
     """A type of aggregation to perform.
 
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx
index eef73bf4e9d..b7b4ecc6e83 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyx
+++ b/python/pylibcudf/pylibcudf/binaryop.pyx
@@ -16,6 +16,7 @@ from .column cimport Column
 from .scalar cimport Scalar
 from .types cimport DataType
 
+__all__ = ["BinaryOperator", "binary_operation", "is_supported_operation"]
 
 cpdef Column binary_operation(
     LeftBinaryOperand lhs,
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 4e5698566d0..794c76438f3 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -17,6 +17,7 @@ from .utils cimport int_to_bitmask_ptr, int_to_void_ptr
 
 import functools
 
+__all__ = ["Column", "ListColumnView", "is_c_contiguous"]
 
 cdef class Column:
     """A container of nullable device data as a column of elements.
diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx
index ac942a620b5..c4969a7f502 100644
--- a/python/pylibcudf/pylibcudf/column_factories.pyx
+++ b/python/pylibcudf/pylibcudf/column_factories.pyx
@@ -17,6 +17,15 @@ from .types cimport DataType, type_id
 from .types import MaskState, TypeId
 
 
+__all__ = [
+    "make_duration_column",
+    "make_empty_column",
+    "make_fixed_point_column",
+    "make_fixed_width_column",
+    "make_numeric_column",
+    "make_timestamp_column",
+]
+
 cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id):
     """Creates an empty column of the specified type.
 
diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx
index 10c860d97bb..42c5f34cf3e 100644
--- a/python/pylibcudf/pylibcudf/concatenate.pyx
+++ b/python/pylibcudf/pylibcudf/concatenate.pyx
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from .column cimport Column
 from .table cimport Table
 
+__all__ = ["concatenate"]
 
 cpdef concatenate(list objects):
     """Concatenate columns or tables.
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx
index ed926a3fcc0..451757eea10 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyx
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx
@@ -20,6 +20,13 @@ from .table cimport Table
 from .utils cimport int_to_void_ptr
 
 
+__all__ = [
+    "PackedColumns",
+    "pack",
+    "unpack",
+    "unpack_from_memoryviews",
+]
+
 cdef class HostBuffer:
     """Owning host buffer that implements the buffer protocol"""
     cdef unique_ptr[vector[uint8_t]] c_obj
diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx
index 4938f1a3dda..fb8b6f9890e 100644
--- a/python/pylibcudf/pylibcudf/copying.pyx
+++ b/python/pylibcudf/pylibcudf/copying.pyx
@@ -36,6 +36,23 @@ from .table cimport Table
 from .utils cimport _as_vector
 
 
+__all__ = [
+    "MaskAllocationPolicy",
+    "OutOfBoundsPolicy",
+    "allocate_like",
+    "boolean_mask_scatter",
+    "copy_if_else",
+    "copy_range",
+    "copy_range_in_place",
+    "empty_like",
+    "gather",
+    "get_element",
+    "scatter",
+    "shift",
+    "slice",
+    "split",
+]
+
 cpdef Table gather(
     Table source_table,
     Column gather_map,
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index 9e5e709d81d..b100e3e22d0 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -29,6 +29,24 @@ from cython.operator cimport dereference
 
 from .column cimport Column
 
+__all__ = [
+    "DatetimeComponent",
+    "RoundingFrequency",
+    "add_calendrical_months",
+    "ceil_datetimes",
+    "day_of_year",
+    "days_in_month",
+    "extract_datetime_component",
+    "extract_microsecond_fraction",
+    "extract_millisecond_fraction",
+    "extract_nanosecond_fraction",
+    "extract_quarter",
+    "floor_datetimes",
+    "is_leap_year",
+    "last_day_of_month",
+    "round_datetimes",
+]
+
 cpdef Column extract_millisecond_fraction(
     Column input
 ):
diff --git a/python/pylibcudf/pylibcudf/experimental.pyx b/python/pylibcudf/pylibcudf/experimental.pyx
index b25a53e13b2..d94d6d087ac 100644
--- a/python/pylibcudf/pylibcudf/experimental.pyx
+++ b/python/pylibcudf/pylibcudf/experimental.pyx
@@ -5,6 +5,8 @@ from libcpp.string cimport string
 from pylibcudf.libcudf cimport experimental as cpp_experimental
 
 
+__all__ = ["disable_prefetching", "enable_prefetching", "prefetch_debugging"]
+
 cpdef enable_prefetching(str key):
     """Turn on prefetch instructions for the given key.
 
diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx
index 1535f68366b..b0db533dba9 100644
--- a/python/pylibcudf/pylibcudf/expressions.pyx
+++ b/python/pylibcudf/pylibcudf/expressions.pyx
@@ -49,6 +49,16 @@ from .types cimport DataType
 # Aliases for simplicity
 ctypedef unique_ptr[libcudf_exp.expression] expression_ptr
 
+__all__ = [
+    "ASTOperator",
+    "ColumnNameReference",
+    "ColumnReference",
+    "Expression",
+    "Literal",
+    "Operation",
+    "TableReference",
+]
+
 # Define this class just to have a docstring for it
 cdef class Expression:
     """
diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx
index a47004a1e42..883dd49280b 100644
--- a/python/pylibcudf/pylibcudf/filling.pyx
+++ b/python/pylibcudf/pylibcudf/filling.pyx
@@ -18,6 +18,8 @@ from .scalar cimport Scalar
 from .table cimport Table
 
 
+__all__ = ["fill", "fill_in_place", "repeat", "sequence"]
+
 cpdef Column fill(
     Column destination,
     size_type begin,
diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyx b/python/pylibcudf/pylibcudf/gpumemoryview.pyx
index 0904022a944..605a19ea0de 100644
--- a/python/pylibcudf/pylibcudf/gpumemoryview.pyx
+++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyx
@@ -1,5 +1,6 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
+__all__ = ["gpumemoryview"]
 
 cdef class gpumemoryview:
     """Minimal representation of a memory buffer.
diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx
index 71f9ecb0453..2760516b316 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyx
+++ b/python/pylibcudf/pylibcudf/groupby.pyx
@@ -25,6 +25,8 @@ from .types cimport null_order, null_policy, order, sorted
 from .utils cimport _as_vector
 
 
+__all__ = ["GroupBy", "GroupByRequest"]
+
 cdef class GroupByRequest:
     """A request for a groupby aggregation or scan.
 
diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx
index 9ea3d4d1bda..548cffc0ce8 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyx
+++ b/python/pylibcudf/pylibcudf/hashing.pyx
@@ -20,6 +20,19 @@ from pylibcudf.libcudf.table.table cimport table
 from .column cimport Column
 from .table cimport Table
 
+__all__ = [
+    "LIBCUDF_DEFAULT_HASH_SEED",
+    "md5",
+    "murmurhash3_x64_128",
+    "murmurhash3_x86_32",
+    "sha1",
+    "sha224",
+    "sha256",
+    "sha384",
+    "sha512",
+    "xxhash_64",
+]
+
 LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED
 
 cpdef Column murmurhash3_x86_32(
diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx
index 61e812353b7..bd5397ac328 100644
--- a/python/pylibcudf/pylibcudf/interop.pyx
+++ b/python/pylibcudf/pylibcudf/interop.pyx
@@ -38,6 +38,14 @@ from .scalar cimport Scalar
 from .table cimport Table
 from .types cimport DataType, type_id
 
+__all__ = [
+    "ColumnMetadata",
+    "from_arrow",
+    "from_dlpack",
+    "to_arrow",
+    "to_dlpack",
+]
+
 ARROW_TO_PYLIBCUDF_TYPES = {
     pa.int8(): type_id.INT8,
     pa.int16(): type_id.INT16,
diff --git a/python/pylibcudf/pylibcudf/io/__init__.py b/python/pylibcudf/pylibcudf/io/__init__.py
index 2e4f215b12c..08891e454cd 100644
--- a/python/pylibcudf/pylibcudf/io/__init__.py
+++ b/python/pylibcudf/pylibcudf/io/__init__.py
@@ -2,3 +2,17 @@
 
 from . import avro, csv, datasource, json, orc, parquet, timezone, types
 from .types import SinkInfo, SourceInfo, TableWithMetadata
+
+__all__ = [
+    "SinkInfo",
+    "SourceInfo",
+    "TableWithMetadata",
+    "avro",
+    "csv",
+    "datasource",
+    "json",
+    "orc",
+    "parquet",
+    "timezone",
+    "types",
+]
diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx
index fe765b34f82..4271333511a 100644
--- a/python/pylibcudf/pylibcudf/io/avro.pyx
+++ b/python/pylibcudf/pylibcudf/io/avro.pyx
@@ -10,6 +10,8 @@ from pylibcudf.libcudf.io.avro cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = ["read_avro"]
+
 
 cpdef TableWithMetadata read_avro(
     SourceInfo source_info,
diff --git a/python/pylibcudf/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx
index 2c61cc42d82..858e580ab34 100644
--- a/python/pylibcudf/pylibcudf/io/csv.pyx
+++ b/python/pylibcudf/pylibcudf/io/csv.pyx
@@ -19,6 +19,8 @@ from pylibcudf.libcudf.types cimport data_type, size_type
 from pylibcudf.types cimport DataType
 
 
+__all__ = ["read_csv"]
+
 cdef tuple _process_parse_dates_hex(list cols):
     cdef vector[string] str_cols
     cdef vector[int] int_cols
diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyx b/python/pylibcudf/pylibcudf/io/datasource.pyx
index 02418444caa..4e7c9c8e385 100644
--- a/python/pylibcudf/pylibcudf/io/datasource.pyx
+++ b/python/pylibcudf/pylibcudf/io/datasource.pyx
@@ -2,6 +2,7 @@
 
 from pylibcudf.libcudf.io.datasource cimport datasource
 
+__all__ = ["Datasource"]
 
 cdef class Datasource:
     cdef datasource* get_datasource(self) except * nogil:
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index 65f78f830f1..ad2989925c9 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -23,6 +23,7 @@ from pylibcudf.libcudf.io.types cimport (
 from pylibcudf.libcudf.types cimport data_type, size_type
 from pylibcudf.types cimport DataType
 
+__all__ = ["chunked_read_json", "read_json", "write_json"]
 
 cdef map[string, schema_element] _generate_schema_map(list dtypes):
     cdef map[string, schema_element] schema_map
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx
index 70e0a7995a2..f2f644a32f9 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pyx
+++ b/python/pylibcudf/pylibcudf/io/orc.pyx
@@ -30,6 +30,12 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.types cimport DataType
 from pylibcudf.variant cimport get_if, holds_alternative
 
+__all__ = [
+    "OrcColumnStatistics",
+    "ParsedOrcStatistics",
+    "read_orc",
+    "read_parsed_orc_statistics",
+]
 
 cdef class OrcColumnStatistics:
     def __init__(self):
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
index 981ca7b8159..7f93c763298 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -16,6 +16,8 @@ from pylibcudf.libcudf.io.parquet cimport (
 from pylibcudf.libcudf.io.types cimport table_with_metadata
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = ["ChunkedParquetReader", "read_parquet"]
+
 
 cdef parquet_reader_options _setup_parquet_reader_options(
     SourceInfo source_info,
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx
index f120b65fb2c..af7cf8a4ee5 100644
--- a/python/pylibcudf/pylibcudf/io/timezone.pyx
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyx
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.table.table cimport table
 
 from ..table cimport Table
 
+__all__ = ["make_timezone_transition_table"]
 
 cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
     """
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index e2439fbad88..8bc226d83e5 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -31,6 +31,17 @@ from pylibcudf.libcudf.io.types import (
     statistics_freq as StatisticsFreq, # no-cython-lint
 )
 
+__all__ = [
+    "ColumnEncoding",
+    "CompressionType",
+    "DictionaryPolicy",
+    "JSONRecoveryMode",
+    "QuoteStyle",
+    "SinkInfo",
+    "SourceInfo",
+    "StatisticsFreq",
+    "TableWithMetadata",
+]
 
 cdef class TableWithMetadata:
     """A container holding a table and its associated metadata
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index 0d841eee194..c2efe05ffc4 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -15,6 +15,24 @@ from .column cimport Column
 from .expressions cimport Expression
 from .table cimport Table
 
+__all__ = [
+    "conditional_full_join",
+    "conditional_inner_join",
+    "conditional_left_anti_join",
+    "conditional_left_join",
+    "conditional_left_semi_join",
+    "cross_join",
+    "full_join",
+    "inner_join",
+    "left_anti_join",
+    "left_join",
+    "left_semi_join",
+    "mixed_full_join",
+    "mixed_inner_join",
+    "mixed_left_anti_join",
+    "mixed_left_join",
+    "mixed_left_semi_join",
+]
 
 cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map):
     # helper to convert a gather map to a Column
diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx
index ebb82f80408..9c5dd023509 100644
--- a/python/pylibcudf/pylibcudf/json.pyx
+++ b/python/pylibcudf/pylibcudf/json.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.scalar cimport Scalar
 
+__all__ = ["GetJsonObjectOptions", "get_json_object"]
 
 cdef class GetJsonObjectOptions:
     """Settings for ``get_json_object()``"""
diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx
index 84a7d42283b..cae1830f6b9 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyx
+++ b/python/pylibcudf/pylibcudf/labeling.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.labeling import inclusive as Inclusive  # no-cython-lint
 
 from .column cimport Column
 
+__all__ = ["Inclusive", "label_bins"]
 
 cpdef Column label_bins(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx
index f6ffb1874b9..ccc56eaa520 100644
--- a/python/pylibcudf/pylibcudf/lists.pyx
+++ b/python/pylibcudf/pylibcudf/lists.pyx
@@ -49,6 +49,28 @@ from .column cimport Column, ListColumnView
 from .scalar cimport Scalar
 from .table cimport Table
 
+__all__ = [
+    "ConcatenateNullPolicy",
+    "DuplicateFindOption",
+    "apply_boolean_mask",
+    "concatenate_list_elements",
+    "concatenate_rows",
+    "contains",
+    "contains_nulls",
+    "count_elements",
+    "difference_distinct",
+    "distinct",
+    "explode_outer",
+    "extract_list_element",
+    "have_overlap",
+    "index_of",
+    "intersect_distinct",
+    "reverse",
+    "segmented_gather",
+    "sequences",
+    "sort_lists",
+    "union_distinct",
+]
 
 cpdef Table explode_outer(Table input, size_type explode_column_idx):
     """Explode a column of lists into rows.
diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx
index 61a21aafdb2..c051cdc0c66 100644
--- a/python/pylibcudf/pylibcudf/merge.pyx
+++ b/python/pylibcudf/pylibcudf/merge.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport null_order, order, size_type
 
 from .table cimport Table
 
+__all__ = ["merge"]
 
 cpdef Table merge (
     list tables_to_merge,
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx
index 74180951562..adc264e9af6 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pyx
+++ b/python/pylibcudf/pylibcudf/null_mask.pyx
@@ -14,6 +14,13 @@ from pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint
 from .column cimport Column
 from .table cimport Table
 
+__all__ = [
+    "bitmask_allocation_size_bytes",
+    "bitmask_and",
+    "bitmask_or",
+    "copy_bitmask",
+    "create_null_mask",
+]
 
 cdef DeviceBuffer buffer_to_python(device_buffer buf):
     return DeviceBuffer.c_from_unique_ptr(make_unique[device_buffer](move(buf)))
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
index 76caad276d4..c63b92328f3 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 )
 from pylibcudf.scalar cimport Scalar
 
+__all__ = ["BPEMergePairs", "byte_pair_encoding"]
 
 cdef class BPEMergePairs:
     """The table of merge pairs for the BPE encoder.
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
index dcacb2e1267..eceeaff24e3 100644
--- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
@@ -9,6 +9,7 @@ from pylibcudf.libcudf.nvtext.edit_distance cimport (
     edit_distance_matrix as cpp_edit_distance_matrix,
 )
 
+__all__ = ["edit_distance", "edit_distance_matrix"]
 
 cpdef Column edit_distance(Column input, Column targets):
     """
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
index 09859d09e9e..521bc0ef4a4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
@@ -14,6 +14,11 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 
+__all__ = [
+    "generate_ngrams",
+    "generate_character_ngrams",
+    "hash_character_ngrams",
+]
 
 cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator):
     """
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
index 3d8669865d9..90cace088f7 100644
--- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.nvtext.jaccard cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = ["jaccard_index"]
 
 cpdef Column jaccard_index(Column input1, Column input2, size_type width):
     """
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
index f1e012e60e5..96b849b6b48 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
@@ -17,6 +17,12 @@ from pylibcudf.scalar cimport Scalar
 
 from cython.operator import dereference
 
+__all__ = [
+    "minhash",
+    "minhash64",
+    "word_minhash",
+    "word_minhash64",
+]
 
 cpdef Column minhash(Column input, ColumnOrScalar seeds, size_type width=4):
     """
diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
index 8a1854c5f0d..771c7c019fc 100644
--- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 
+__all__ = ["ngrams_tokenize"]
 
 cpdef Column ngrams_tokenize(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
index 637d900b659..b259ccaefa6 100644
--- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.nvtext.normalize cimport (
     normalize_spaces as cpp_normalize_spaces,
 )
 
+__all__ = ["normalize_characters", "normalize_spaces"]
 
 cpdef Column normalize_spaces(Column input):
     """
diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
index b65348ce14d..a27592fb434 100644
--- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 
+__all__ = ["filter_tokens", "replace_tokens"]
 
 cpdef Column replace_tokens(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
index 854d1053624..c9e4f1274e4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.nvtext.stemmer cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = ["is_letter", "porter_stemmer_measure"]
 
 cpdef Column is_letter(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
index 04643d3bd84..a346eef4619 100644
--- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
@@ -13,6 +13,7 @@ from pylibcudf.libcudf.nvtext.subword_tokenize cimport (
     tokenizer_result as cpp_tokenizer_result,
 )
 
+__all__ = ["HashedVocabulary", "subword_tokenize"]
 
 cdef class HashedVocabulary:
     """The vocabulary data for use with the subword_tokenize function.
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
index ec02e8ebf4e..26b055fd5ae 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
@@ -20,6 +20,16 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 )
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = [
+    "TokenizeVocabulary",
+    "character_tokenize",
+    "count_tokens_column",
+    "count_tokens_scalar",
+    "detokenize",
+    "tokenize_column",
+    "tokenize_scalar",
+    "tokenize_with_vocabulary",
+]
 
 cdef class TokenizeVocabulary:
     """The Vocabulary object to be used with ``tokenize_with_vocabulary``.
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
index 3cff4843735..1dacabceb06 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyx
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -11,6 +11,11 @@ from pylibcudf.libcudf.table.table cimport table
 from .column cimport Column
 from .table cimport Table
 
+__all__ = [
+    "hash_partition",
+    "partition",
+    "round_robin_partition",
+]
 
 cpdef tuple[Table, list] hash_partition(
     Table input,
diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx
index 7d92b598bd0..634218586ac 100644
--- a/python/pylibcudf/pylibcudf/quantiles.pyx
+++ b/python/pylibcudf/pylibcudf/quantiles.pyx
@@ -17,6 +17,7 @@ from .column cimport Column
 from .table cimport Table
 from .types cimport interpolation
 
+__all__ = ["quantile", "quantiles"]
 
 cpdef Column quantile(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx
index d9ec3a9bdc4..1d6ffd9de10 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyx
+++ b/python/pylibcudf/pylibcudf/reduce.pyx
@@ -16,6 +16,7 @@ from .types cimport DataType
 
 from pylibcudf.libcudf.reduce import scan_type as ScanType  # no-cython-lint
 
+__all__ = ["ScanType", "minmax", "reduce", "scan"]
 
 cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type):
     """Perform a reduction on a column
diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx
index f77eba7ace5..51be2b29277 100644
--- a/python/pylibcudf/pylibcudf/replace.pyx
+++ b/python/pylibcudf/pylibcudf/replace.pyx
@@ -15,6 +15,14 @@ from pylibcudf.libcudf.replace import \
 from .column cimport Column
 from .scalar cimport Scalar
 
+__all__ = [
+    "ReplacePolicy",
+    "clamp",
+    "find_and_replace_all",
+    "normalize_nans_and_zeros",
+    "replace_nulls",
+]
+
 
 cpdef Column replace_nulls(Column source_column, ReplacementType replacement):
     """Replace nulls in source_column.
diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx
index 6540b5198ab..bdc212a1985 100644
--- a/python/pylibcudf/pylibcudf/reshape.pyx
+++ b/python/pylibcudf/pylibcudf/reshape.pyx
@@ -13,6 +13,7 @@ from pylibcudf.libcudf.types cimport size_type
 from .column cimport Column
 from .table cimport Table
 
+__all__ = ["interleave_columns", "tile"]
 
 cpdef Column interleave_columns(Table source_table):
     """Interleave columns of a table into a single column.
diff --git a/python/pylibcudf/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx
index 4fd0b005431..11acf57ccf4 100644
--- a/python/pylibcudf/pylibcudf/rolling.pyx
+++ b/python/pylibcudf/pylibcudf/rolling.pyx
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.types cimport size_type
 from .aggregation cimport Aggregation
 from .column cimport Column
 
+__all__ = ["rolling_window"]
 
 cpdef Column rolling_window(
     Column source,
diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx
index 689363e652d..09e5a9cc3bc 100644
--- a/python/pylibcudf/pylibcudf/round.pyx
+++ b/python/pylibcudf/pylibcudf/round.pyx
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.column.column cimport column
 
 from .column cimport Column
 
+__all__ = ["RoundingMethod", "round"]
 
 cpdef Column round(
     Column source,
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index d4888a62ad1..e522a2a4670 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -11,6 +11,8 @@ from rmm.pylibrmm.memory_resource cimport get_current_device_resource
 from .column cimport Column
 from .types cimport DataType
 
+__all__ = ["Scalar"]
+
 
 # The DeviceMemoryResource attribute could be released prematurely
 # by the gc if the Scalar is in a reference cycle. Removing the tp_clear
diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx
index 1a870248046..50353fcd0cc 100644
--- a/python/pylibcudf/pylibcudf/search.pyx
+++ b/python/pylibcudf/pylibcudf/search.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport null_order, order
 from .column cimport Column
 from .table cimport Table
 
+__all__ = ["contains", "lower_bound", "upper_bound"]
 
 cpdef Column lower_bound(
     Table haystack,
diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx
index fc40f03e1fd..fb29ef8c571 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyx
+++ b/python/pylibcudf/pylibcudf/sorting.pyx
@@ -12,6 +12,18 @@ from pylibcudf.libcudf.types cimport null_order, null_policy, order
 from .column cimport Column
 from .table cimport Table
 
+__all__ = [
+    "is_sorted",
+    "rank",
+    "segmented_sort_by_key",
+    "sort",
+    "sort_by_key",
+    "sorted_order",
+    "stable_segmented_sort_by_key",
+    "stable_sort",
+    "stable_sort_by_key",
+    "stable_sorted_order",
+]
 
 cpdef Column sorted_order(Table source_table, list column_order, list null_precedence):
     """Computes the row indices required to sort the table.
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx
index 2145398a191..6e403ca1b07 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyx
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx
@@ -21,6 +21,18 @@ from pylibcudf.libcudf.stream_compaction import \
 from .column cimport Column
 from .table cimport Table
 
+__all__ = [
+    "DuplicateKeepOption",
+    "apply_boolean_mask",
+    "distinct",
+    "distinct_count",
+    "distinct_indices",
+    "drop_nans",
+    "drop_nulls",
+    "stable_distinct",
+    "unique",
+    "unique_count",
+]
 
 cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold):
     """Filters out rows from the input table based on the presence of nulls.
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py
index fa7294c7dbd..67054f0b447 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/__init__.py
@@ -28,6 +28,7 @@
 from .side_type import SideType
 
 __all__ = [
+    "SideType",
     "attributes",
     "capitalize",
     "case",
@@ -46,9 +47,8 @@
     "replace",
     "replace_re",
     "slice",
-    "strip",
     "split",
-    "SideType",
+    "strip",
     "translate",
     "wrap",
 ]
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx
index 8e46a32835d..f1eb09b4965 100644
--- a/python/pylibcudf/pylibcudf/strings/attributes.pyx
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx
@@ -6,6 +6,7 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport attributes as cpp_attributes
 
+__all__ = ["code_points", "count_bytes", "count_characters"]
 
 cpdef Column count_characters(Column source_strings):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
index 06b991c3cf1..a54480b8e4a 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx
@@ -14,6 +14,7 @@ from pylibcudf.strings.char_types cimport string_character_types
 
 from cython.operator import dereference
 
+__all__ = ["capitalize", "is_title", "title"]
 
 cpdef Column capitalize(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx
index 9e6cd7717d3..d0e054bef72 100644
--- a/python/pylibcudf/pylibcudf/strings/case.pyx
+++ b/python/pylibcudf/pylibcudf/strings/case.pyx
@@ -6,6 +6,7 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport case as cpp_case
 
+__all__ = ["swapcase", "to_lower", "to_upper"]
 
 cpdef Column to_lower(Column input):
     cdef unique_ptr[column] c_result
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx
index cb04efe5e8f..0af4a1f9c37 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyx
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx
@@ -12,6 +12,11 @@ from cython.operator import dereference
 from pylibcudf.libcudf.strings.char_types import \
     string_character_types as StringCharacterTypes  # no-cython-lint
 
+__all__ = [
+    "StringCharacterTypes",
+    "all_characters_of_type",
+    "filter_characters_of_type",
+]
 
 cpdef Column all_characters_of_type(
     Column source_strings,
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx
index f17d5265ab4..dc1e72c799b 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyx
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyx
@@ -17,6 +17,13 @@ from pylibcudf.libcudf.strings.combine import \
 from pylibcudf.libcudf.strings.combine import \
     separator_on_nulls as SeparatorOnNulls  # no-cython-lint
 
+__all__ = [
+    "OutputIfEmptyList",
+    "SeparatorOnNulls",
+    "concatenate",
+    "join_list_elements",
+    "join_strings",
+]
 
 cpdef Column concatenate(
     Table strings_columns,
diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx
index d4b1130241d..7b4c53ed853 100644
--- a/python/pylibcudf/pylibcudf/strings/contains.pyx
+++ b/python/pylibcudf/pylibcudf/strings/contains.pyx
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport (
 from pylibcudf.libcudf.strings cimport contains as cpp_contains
 from pylibcudf.strings.regex_program cimport RegexProgram
 
+__all__ = ["contains_re", "count_re", "like", "matches_re"]
 
 cpdef Column contains_re(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
index dc12b291b11..1899a3b27cc 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
@@ -12,6 +12,7 @@ from pylibcudf.scalar cimport Scalar
 
 from cython.operator import dereference
 
+__all__ = ["from_booleans", "to_booleans"]
 
 cpdef Column to_booleans(Column input, Scalar true_string):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
index 0ee60812e00..f1cd684166c 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.strings.convert cimport (
 
 from pylibcudf.types import DataType
 
+__all__ = ["from_timestamps", "is_timestamp", "to_timestamps"]
 
 cpdef Column to_timestamps(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
index 31980ace418..a9654afd00a 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
@@ -11,6 +11,7 @@ from pylibcudf.libcudf.strings.convert cimport (
 
 from pylibcudf.types import DataType
 
+__all__ = ["from_durations", "to_durations"]
 
 cpdef Column to_durations(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
index 962a47dfadf..00cbc822f36 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
@@ -9,6 +9,8 @@ from pylibcudf.libcudf.strings.convert cimport (
 )
 from pylibcudf.types cimport DataType, type_id
 
+__all__ = ["from_fixed_point", "is_fixed_point", "to_fixed_point"]
+
 
 cpdef Column to_fixed_point(Column input, DataType output_type):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
index 1296f4f9db5..b5199aac577 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
@@ -9,6 +9,7 @@ from pylibcudf.libcudf.strings.convert cimport (
 )
 from pylibcudf.types cimport DataType
 
+__all__ = ["from_floats", "is_float", "to_floats"]
 
 cpdef Column to_floats(Column strings, DataType output_type):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
index 5558683a502..12984e15ce9 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx
@@ -9,6 +9,14 @@ from pylibcudf.libcudf.strings.convert cimport (
 )
 from pylibcudf.types cimport DataType
 
+__all__ = [
+    "from_integers",
+    "hex_to_integers",
+    "integers_to_hex",
+    "is_hex",
+    "is_integer",
+    "to_integers"
+]
 
 cpdef Column to_integers(Column input, DataType output_type):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
index 834781f95f3..e7c6aae4fa8 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
@@ -6,6 +6,7 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4
 
+__all__ = ["integers_to_ipv4", "ipv4_to_integers", "is_ipv4"]
 
 cpdef Column ipv4_to_integers(Column input):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
index cbfe5f5aa8b..518f72f6644 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
@@ -17,6 +17,7 @@ from pylibcudf.types cimport type_id
 
 from cython.operator import dereference
 
+__all__ = ["format_list_column"]
 
 cpdef Column format_list_column(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
index 82f8a75f1d9..bd5e23bca43 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
@@ -6,6 +6,7 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings.convert cimport convert_urls as cpp_convert_urls
 
+__all__ = ["url_decode", "url_encode"]
 
 cpdef Column url_encode(Column input):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyx b/python/pylibcudf/pylibcudf/strings/extract.pyx
index b56eccc8287..0ce70666e92 100644
--- a/python/pylibcudf/pylibcudf/strings/extract.pyx
+++ b/python/pylibcudf/pylibcudf/strings/extract.pyx
@@ -9,6 +9,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.strings.regex_program cimport RegexProgram
 from pylibcudf.table cimport Table
 
+__all__ = ["extract", "extract_all_record"]
 
 cpdef Table extract(Column input, RegexProgram prog):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx
index 6fc6dca24fd..f0af339ff08 100644
--- a/python/pylibcudf/pylibcudf/strings/find.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find.pyx
@@ -10,6 +10,7 @@ from cython.operator import dereference
 
 from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 
+__all__ = ["contains", "ends_with", "find", "rfind", "starts_with"]
 
 cpdef Column find(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
index 672aa606bd0..c9ce734b4be 100644
--- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
+++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx
@@ -6,6 +6,7 @@ from pylibcudf.column cimport Column
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport find_multiple as cpp_find_multiple
 
+__all__ = ["find_multiple"]
 
 cpdef Column find_multiple(Column input, Column targets):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx
index 89fa4302824..23c84675a16 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pyx
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyx
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport findall as cpp_findall
 from pylibcudf.strings.regex_program cimport RegexProgram
 
+__all__ = ["findall", "find_re"]
 
 cpdef Column findall(Column input, RegexProgram pattern):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx
index f6950eecf60..0e349a7be47 100644
--- a/python/pylibcudf/pylibcudf/strings/padding.pyx
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyx
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport padding as cpp_padding
 from pylibcudf.libcudf.strings.side_type cimport side_type
 
+__all__ = ["pad", "zfill"]
 
 cpdef Column pad(Column input, size_type width, side_type side, str fill_char):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyx b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
index ce3b6b10a42..65b504e0dc7 100644
--- a/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
+++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx
@@ -2,3 +2,5 @@
 
 from pylibcudf.libcudf.strings.regex_flags import \
     regex_flags as RegexFlags  # no-cython-lint
+
+__all__ = ["RegexFlags"]
diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx
index 91f585cd637..cf278d7039d 100644
--- a/python/pylibcudf/pylibcudf/strings/regex_program.pyx
+++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx
@@ -11,6 +11,7 @@ from pylibcudf.strings.regex_flags import RegexFlags
 
 from pylibcudf.strings.regex_flags cimport regex_flags
 
+__all__ = ["RegexProgram"]
 
 cdef class RegexProgram:
     """Regex program class.
diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyx b/python/pylibcudf/pylibcudf/strings/repeat.pyx
index fb2bb13c666..a497b1f438e 100644
--- a/python/pylibcudf/pylibcudf/strings/repeat.pyx
+++ b/python/pylibcudf/pylibcudf/strings/repeat.pyx
@@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport repeat as cpp_repeat
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = ["repeat_strings"]
 
 cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx
index 2b94f5e3fee..3ba6c1b5530 100644
--- a/python/pylibcudf/pylibcudf/strings/replace.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace.pyx
@@ -16,6 +16,7 @@ from pylibcudf.libcudf.strings.replace cimport (
 from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.scalar cimport Scalar
 
+__all__ = ["replace", "replace_multiple", "replace_slice"]
 
 cpdef Column replace(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
index ccc33fd4425..bdabc779ddf 100644
--- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx
+++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx
@@ -16,6 +16,7 @@ from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.regex_flags cimport regex_flags
 from pylibcudf.strings.regex_program cimport RegexProgram
 
+__all__ = ["replace_re", "replace_with_backrefs"]
 
 cpdef Column replace_re(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyx b/python/pylibcudf/pylibcudf/strings/side_type.pyx
index cf0c770cc11..87db4206a9c 100644
--- a/python/pylibcudf/pylibcudf/strings/side_type.pyx
+++ b/python/pylibcudf/pylibcudf/strings/side_type.pyx
@@ -1,3 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from pylibcudf.libcudf.strings.side_type import \
     side_type as SideType  # no-cython-lint
+
+__all__ = ["SideType"]
diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx
index 70d10cab36c..d32de7c50e0 100644
--- a/python/pylibcudf/pylibcudf/strings/slice.pyx
+++ b/python/pylibcudf/pylibcudf/strings/slice.pyx
@@ -14,6 +14,7 @@ from pylibcudf.scalar cimport Scalar
 
 from cython.operator import dereference
 
+__all__ = ["slice_strings"]
 
 cpdef Column slice_strings(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
index 0fb4f186c41..75537ea46d3 100644
--- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx
@@ -13,6 +13,7 @@ from pylibcudf.table cimport Table
 
 from cython.operator import dereference
 
+__all__ = ["partition", "rpartition"]
 
 cpdef Table partition(Column input, Scalar delimiter=None):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyx b/python/pylibcudf/pylibcudf/strings/split/split.pyx
index e3827f6645e..90087f996f0 100644
--- a/python/pylibcudf/pylibcudf/strings/split/split.pyx
+++ b/python/pylibcudf/pylibcudf/strings/split/split.pyx
@@ -13,6 +13,16 @@ from pylibcudf.table cimport Table
 
 from cython.operator import dereference
 
+__all__ = [
+    "rsplit",
+    "rsplit_re",
+    "rsplit_record",
+    "rsplit_record_re",
+    "split",
+    "split_re",
+    "split_record",
+    "split_record_re",
+]
 
 cpdef Table split(Column strings_column, Scalar delimiter, size_type maxsplit):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx
index 429a23c3cdf..805d959891b 100644
--- a/python/pylibcudf/pylibcudf/strings/strip.pyx
+++ b/python/pylibcudf/pylibcudf/strings/strip.pyx
@@ -13,6 +13,7 @@ from pylibcudf.libcudf.strings cimport strip as cpp_strip
 from pylibcudf.scalar cimport Scalar
 from pylibcudf.strings.side_type cimport side_type
 
+__all__ = ["strip"]
 
 cpdef Column strip(
     Column input,
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx
index d85da8e6cdd..ba1e8dc5d27 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyx
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyx
@@ -14,6 +14,7 @@ from cython.operator import dereference
 from pylibcudf.libcudf.strings.translate import \
     filter_type as FilterType  # no-cython-lint
 
+__all__ = ["FilterType", "filter_characters", "translate"]
 
 cdef vector[pair[char_utf8, char_utf8]] _table_to_c_table(dict table):
     """
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx
index 2ced250f837..b696eb48e47 100644
--- a/python/pylibcudf/pylibcudf/strings/wrap.pyx
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx
@@ -7,6 +7,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.strings cimport wrap as cpp_wrap
 from pylibcudf.libcudf.types cimport size_type
 
+__all__ = ["wrap"]
 
 cpdef Column wrap(Column input, size_type width):
     """
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index d0d6f2343d0..97955aa0ae6 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -10,6 +10,7 @@ from pylibcudf.libcudf.table.table cimport table
 
 from .column cimport Column
 
+__all__ = ["Table"]
 
 cdef class Table:
     """A list of columns of the same size.
diff --git a/python/pylibcudf/pylibcudf/traits.pyx b/python/pylibcudf/pylibcudf/traits.pyx
index 5a1c67e1f6c..c191a1fa88d 100644
--- a/python/pylibcudf/pylibcudf/traits.pyx
+++ b/python/pylibcudf/pylibcudf/traits.pyx
@@ -5,6 +5,26 @@ from pylibcudf.libcudf.utilities cimport traits
 
 from .types cimport DataType
 
+__all__ = [
+    "is_bit_castable",
+    "is_boolean",
+    "is_chrono",
+    "is_compound",
+    "is_dictionary",
+    "is_duration",
+    "is_equality_comparable",
+    "is_fixed_point",
+    "is_fixed_width",
+    "is_floating_point",
+    "is_index_type",
+    "is_integral",
+    "is_integral_not_bool",
+    "is_nested",
+    "is_numeric",
+    "is_relationally_comparable",
+    "is_timestamp",
+    "is_unsigned",
+]
 
 cpdef bool is_relationally_comparable(DataType typ):
     """Checks if the given data type supports relational comparisons.
diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
index e8d95cadb0c..9700bcff221 100644
--- a/python/pylibcudf/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -18,6 +18,15 @@ from .gpumemoryview cimport gpumemoryview
 from .types cimport DataType
 from .utils cimport int_to_bitmask_ptr
 
+__all__ = [
+    "bools_to_mask",
+    "compute_column",
+    "encode",
+    "mask_to_bools",
+    "nans_to_nulls",
+    "one_hot_encode",
+    "transform",
+]
 
 cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input):
     """Create a null mask preserving existing nulls and converting nans to null.
diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx
index a24f937ced3..5eb3e58cebc 100644
--- a/python/pylibcudf/pylibcudf/transpose.pyx
+++ b/python/pylibcudf/pylibcudf/transpose.pyx
@@ -9,6 +9,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view
 from .column cimport Column
 from .table cimport Table
 
+__all__ = ["transpose"]
 
 cpdef Table transpose(Table input_table):
     """Transpose a Table.
diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx
index a0c31f994a3..afa1b56f38a 100644
--- a/python/pylibcudf/pylibcudf/types.pyx
+++ b/python/pylibcudf/pylibcudf/types.pyx
@@ -20,6 +20,22 @@ from pylibcudf.libcudf.types import null_order as NullOrder  # no-cython-lint, i
 from pylibcudf.libcudf.types import order as Order  # no-cython-lint, isort:skip
 from pylibcudf.libcudf.types import sorted as Sorted  # no-cython-lint, isort:skip
 
+__all__ = [
+    "DataType",
+    "Interpolation",
+    "MaskState",
+    "NanEquality",
+    "NanPolicy",
+    "NullEquality",
+    "NullOrder",
+    "NullPolicy",
+    "Order",
+    "SIZE_TYPE",
+    "SIZE_TYPE_ID",
+    "Sorted",
+    "TypeId",
+    "size_of"
+]
 
 cdef class DataType:
     """Indicator for the logical data type of an element in a column.
diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx
index 53e8c382b5e..b738ab53d1b 100644
--- a/python/pylibcudf/pylibcudf/unary.pyx
+++ b/python/pylibcudf/pylibcudf/unary.pyx
@@ -13,6 +13,16 @@ from pylibcudf.libcudf.unary import \
 from .column cimport Column
 from .types cimport DataType
 
+__all__ = [
+    "UnaryOperator",
+    "cast",
+    "is_nan",
+    "is_not_nan",
+    "is_null",
+    "is_supported_cast",
+    "is_valid",
+    "unary_operation",
+]
 
 cpdef Column unary_operation(Column input, unary_operator op):
     """Perform a unary operation on a column.

From 7493605016d75bba0374aeb049e040a73a0f14b6 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 10:38:25 +0000
Subject: [PATCH 10/16] Pylibcudf classes are typically not hashable

---
 python/pylibcudf/pylibcudf/column.pyx                  | 4 ++++
 python/pylibcudf/pylibcudf/contiguous_split.pyx        | 4 ++++
 python/pylibcudf/pylibcudf/expressions.pyx             | 2 +-
 python/pylibcudf/pylibcudf/gpumemoryview.pyx           | 2 ++
 python/pylibcudf/pylibcudf/groupby.pyx                 | 4 ++++
 python/pylibcudf/pylibcudf/io/datasource.pyx           | 1 +
 python/pylibcudf/pylibcudf/io/orc.pyx                  | 4 ++++
 python/pylibcudf/pylibcudf/io/parquet.pyx              | 2 ++
 python/pylibcudf/pylibcudf/io/types.pyx                | 6 ++++++
 python/pylibcudf/pylibcudf/json.pyx                    | 2 ++
 python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx | 2 ++
 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx | 2 ++
 python/pylibcudf/pylibcudf/nvtext/tokenize.pyx         | 2 ++
 python/pylibcudf/pylibcudf/scalar.pyx                  | 2 ++
 python/pylibcudf/pylibcudf/strings/regex_program.pyx   | 2 ++
 python/pylibcudf/pylibcudf/table.pyx                   | 2 ++
 16 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index 794c76438f3..9bb5574608e 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -62,6 +62,8 @@ cdef class Column:
         self._children = children
         self._num_children = len(children)
 
+    __hash__ = None
+
     cdef column_view view(self) nogil:
         """Generate a libcudf column_view to pass to libcudf algorithms.
 
@@ -385,6 +387,8 @@ cdef class ListColumnView:
             raise TypeError("Column is not a list type")
         self._column = col
 
+    __hash__ = None
+
     cpdef child(self):
         """The data column of the underlying list column."""
         return self._column.child(1)
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx
index 451757eea10..94873e079c9 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyx
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx
@@ -45,6 +45,8 @@ cdef class HostBuffer:
         out.strides[0] = 1
         return out
 
+    __hash__ = None
+
     def __getbuffer__(self, Py_buffer *buffer, int flags):
         buffer.buf = dereference(self.c_obj).data()
         buffer.format = NULL  # byte
@@ -76,6 +78,8 @@ cdef class PackedColumns:
             "Use one of the factories."
         )
 
+    __hash__ = None
+
     @staticmethod
     cdef PackedColumns from_libcudf(unique_ptr[packed_columns] data):
         """Create a Python PackedColumns from a libcudf packed_columns."""
diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx
index b0db533dba9..0f12cfe313c 100644
--- a/python/pylibcudf/pylibcudf/expressions.pyx
+++ b/python/pylibcudf/pylibcudf/expressions.pyx
@@ -68,7 +68,7 @@ cdef class Expression:
 
     For details, see :cpp:class:`cudf::ast::expression`.
     """
-    pass
+    __hash__ = None
 
 cdef class Literal(Expression):
     """
diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyx b/python/pylibcudf/pylibcudf/gpumemoryview.pyx
index 605a19ea0de..41316eddb60 100644
--- a/python/pylibcudf/pylibcudf/gpumemoryview.pyx
+++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyx
@@ -26,3 +26,5 @@ cdef class gpumemoryview:
     @property
     def __cuda_array_interface__(self):
         return self.obj.__cuda_array_interface__
+
+    __hash__ = None
diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx
index 2760516b316..e6cb3ac81a7 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyx
+++ b/python/pylibcudf/pylibcudf/groupby.pyx
@@ -47,6 +47,8 @@ cdef class GroupByRequest:
         self._values = values
         self._aggregations = aggregations
 
+    __hash__ = None
+
     cdef aggregation_request _to_libcudf_agg_request(self) except *:
         """Convert to a libcudf aggregation_request object.
 
@@ -129,6 +131,8 @@ cdef class GroupBy:
         # deallocated from under us:
         self._keys = keys
 
+    __hash__ = None
+
     @staticmethod
     cdef tuple _parse_outputs(
         pair[unique_ptr[table], vector[aggregation_result]] c_res
diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyx b/python/pylibcudf/pylibcudf/io/datasource.pyx
index 4e7c9c8e385..aac1c0d1014 100644
--- a/python/pylibcudf/pylibcudf/io/datasource.pyx
+++ b/python/pylibcudf/pylibcudf/io/datasource.pyx
@@ -5,6 +5,7 @@ from pylibcudf.libcudf.io.datasource cimport datasource
 __all__ = ["Datasource"]
 
 cdef class Datasource:
+    __hash__ = None
     cdef datasource* get_datasource(self) except * nogil:
         with gil:
             raise NotImplementedError("get_datasource() should not "
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx
index f2f644a32f9..4270f5b4f95 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pyx
+++ b/python/pylibcudf/pylibcudf/io/orc.pyx
@@ -45,6 +45,8 @@ cdef class OrcColumnStatistics:
             "use `OrcColumnStatistics.from_libcudf` instead."
         )
 
+    __hash__ = None
+
     @property
     def number_of_values(self):
         if self.number_of_values_c.has_value():
@@ -189,6 +191,8 @@ cdef class OrcColumnStatistics:
 
 cdef class ParsedOrcStatistics:
 
+    __hash__ = None
+
     @property
     def column_names(self):
         return [name.decode() for name in self.c_obj.column_names]
diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx
index 7f93c763298..b76a352d633 100644
--- a/python/pylibcudf/pylibcudf/io/parquet.pyx
+++ b/python/pylibcudf/pylibcudf/io/parquet.pyx
@@ -125,6 +125,8 @@ cdef class ChunkedParquetReader:
                 )
             )
 
+    __hash__ = None
+
     cpdef bool has_next(self):
         """
         Returns True if there is another chunk in the Parquet file
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 8bc226d83e5..85eb03fe3b9 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -65,6 +65,8 @@ cdef class TableWithMetadata:
 
         self.metadata.schema_info = self._make_column_info(column_names)
 
+    __hash__ = None
+
     cdef vector[column_name_info] _make_column_info(self, list column_names):
         cdef vector[column_name_info] col_name_infos
         cdef column_name_info info
@@ -229,6 +231,8 @@ cdef class SourceInfo:
 
         self.c_obj = source_info(c_host_buffers)
 
+    __hash__ = None
+
 
 # Adapts a python io.IOBase object as a libcudf IO data_sink. This lets you
 # write from cudf to any python file-like object (File/BytesIO/SocketIO etc)
@@ -311,3 +315,5 @@ cdef class SinkInfo:
         else:
             # we don't have sinks so we must have paths to sinks
             self.c_obj = sink_info(paths)
+
+    __hash__ = None
diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx
index 9c5dd023509..5ec1e1be971 100644
--- a/python/pylibcudf/pylibcudf/json.pyx
+++ b/python/pylibcudf/pylibcudf/json.pyx
@@ -27,6 +27,8 @@ cdef class GetJsonObjectOptions:
         )
         self.set_missing_fields_as_nulls(missing_fields_as_nulls)
 
+    __hash__ = None
+
     def get_allow_single_quotes(self):
         """
         Returns true/false depending on whether single-quotes for representing strings
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
index c63b92328f3..7565b21084f 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx
@@ -28,6 +28,8 @@ cdef class BPEMergePairs:
         with nogil:
             self.c_obj = move(cpp_load_merge_pairs(c_pairs))
 
+    __hash__ = None
+
 cpdef Column byte_pair_encoding(
     Column input,
     BPEMergePairs merge_pairs,
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
index a346eef4619..14fb6f5fe1e 100644
--- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx
@@ -25,6 +25,8 @@ cdef class HashedVocabulary:
         with nogil:
             self.c_obj = move(cpp_load_vocabulary_file(c_hash_file))
 
+    __hash__ = None
+
 cpdef tuple[Column, Column, Column] subword_tokenize(
     Column input,
     HashedVocabulary vocabulary_table,
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
index 26b055fd5ae..43d426489b4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx
@@ -41,6 +41,8 @@ cdef class TokenizeVocabulary:
         with nogil:
             self.c_obj = move(cpp_load_vocabulary(c_vocab))
 
+    __hash__ = None
+
 cpdef Column tokenize_scalar(Column input, Scalar delimiter=None):
     """
     Returns a single column of strings by tokenizing the input
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index e522a2a4670..1ac014e891e 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -39,6 +39,8 @@ cdef class Scalar:
         # DeviceScalar.
         raise ValueError("Scalar should be constructed with a factory")
 
+    __hash__ = None
+
     cdef const scalar* get(self) noexcept nogil:
         return self.c_obj.get()
 
diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx
index cf278d7039d..46bfde074d2 100644
--- a/python/pylibcudf/pylibcudf/strings/regex_program.pyx
+++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx
@@ -25,6 +25,8 @@ cdef class RegexProgram:
     def __init__(self, *args, **kwargs):
         raise ValueError("Do not instantiate RegexProgram directly, use create")
 
+    __hash__ = None
+
     @staticmethod
     def create(str pattern, int flags):
         """Create a program from a pattern.
diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx
index 97955aa0ae6..0c1e88a927c 100644
--- a/python/pylibcudf/pylibcudf/table.pyx
+++ b/python/pylibcudf/pylibcudf/table.pyx
@@ -25,6 +25,8 @@ cdef class Table:
             raise ValueError("All columns must be pylibcudf Column objects")
         self._columns = columns
 
+    __hash__ = None
+
     cdef table_view view(self) nogil:
         """Generate a libcudf table_view to pass to libcudf algorithms.
 

From a4e8617b807a066b3d54cc90127594453d47f5c1 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 10:38:54 +0000
Subject: [PATCH 11/16] Minor fixes in response to review

---
 python/pylibcudf/pylibcudf/aggregation.pyi    |  82 ++++++-------
 python/pylibcudf/pylibcudf/binaryop.pyi       |  72 ++++++------
 python/pylibcudf/pylibcudf/column.pyi         |   2 +-
 python/pylibcudf/pylibcudf/copying.pyi        |  12 +-
 python/pylibcudf/pylibcudf/datetime.pyi       |  36 +++---
 python/pylibcudf/pylibcudf/expressions.pyi    | 108 ++++++++---------
 python/pylibcudf/pylibcudf/gpumemoryview.pyi  |   2 +-
 python/pylibcudf/pylibcudf/io/types.pyi       |  72 ++++++------
 python/pylibcudf/pylibcudf/labeling.pyi       |   6 +-
 python/pylibcudf/pylibcudf/lists.pyi          |  10 +-
 .../pylibcudf/nvtext/byte_pair_encode.pyi     |   4 +-
 .../pylibcudf/nvtext/subword_tokenize.pyi     |   2 +-
 .../pylibcudf/pylibcudf/nvtext/tokenize.pyi   |   2 +-
 python/pylibcudf/pylibcudf/reduce.pyi         |   6 +-
 python/pylibcudf/pylibcudf/replace.pyi        |   6 +-
 python/pylibcudf/pylibcudf/round.pyi          |   6 +-
 .../pylibcudf/pylibcudf/stream_compaction.pyi |  10 +-
 .../pylibcudf/strings/char_types.pyi          |  22 ++--
 .../pylibcudf/pylibcudf/strings/combine.pyi   |  10 +-
 .../pylibcudf/strings/regex_flags.pyi         |   8 +-
 .../pylibcudf/pylibcudf/strings/side_type.pyi |   8 +-
 .../pylibcudf/pylibcudf/strings/translate.pyi |   6 +-
 python/pylibcudf/pylibcudf/table.pyi          |   2 +-
 python/pylibcudf/pylibcudf/types.pyi          | 110 +++++++++---------
 python/pylibcudf/pylibcudf/unary.pyi          |  46 ++++----
 25 files changed, 325 insertions(+), 325 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/aggregation.pyi b/python/pylibcudf/pylibcudf/aggregation.pyi
index f6fdf0273ae..230249995a5 100644
--- a/python/pylibcudf/pylibcudf/aggregation.pyi
+++ b/python/pylibcudf/pylibcudf/aggregation.pyi
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.types import (
     DataType,
@@ -13,56 +13,56 @@ from pylibcudf.types import (
 )
 
 class Kind(IntEnum):
-    SUM = auto()
-    PRODUCT = auto()
-    MIN = auto()
-    MAX = auto()
-    COUNT_VALID = auto()
-    COUNT_ALL = auto()
-    ANY = auto()
-    ALL = auto()
-    SUM_OF_SQUARES = auto()
-    MEAN = auto()
-    VARIANCE = auto()
-    STD = auto()
-    MEDIAN = auto()
-    QUANTILE = auto()
-    ARGMAX = auto()
-    ARGMIN = auto()
-    NUNIQUE = auto()
-    NTH_ELEMENT = auto()
-    RANK = auto()
-    COLLECT_LIST = auto()
-    COLLECT_SET = auto()
-    PTX = auto()
-    CUDA = auto()
-    CORRELATION = auto()
-    COVARIANCE = auto()
+    SUM = ...
+    PRODUCT = ...
+    MIN = ...
+    MAX = ...
+    COUNT_VALID = ...
+    COUNT_ALL = ...
+    ANY = ...
+    ALL = ...
+    SUM_OF_SQUARES = ...
+    MEAN = ...
+    VARIANCE = ...
+    STD = ...
+    MEDIAN = ...
+    QUANTILE = ...
+    ARGMAX = ...
+    ARGMIN = ...
+    NUNIQUE = ...
+    NTH_ELEMENT = ...
+    RANK = ...
+    COLLECT_LIST = ...
+    COLLECT_SET = ...
+    PTX = ...
+    CUDA = ...
+    CORRELATION = ...
+    COVARIANCE = ...
 
 class CorrelationType(IntEnum):
-    PEARSON = auto()
-    KENDALL = auto()
-    SPEARMAN = auto()
+    PEARSON = ...
+    KENDALL = ...
+    SPEARMAN = ...
 
 class EWMHistory(IntEnum):
-    INFINITE = auto()
-    FINITE = auto()
+    INFINITE = ...
+    FINITE = ...
 
 class RankMethod(IntEnum):
-    FIRST = auto()
-    AVERAGE = auto()
-    MIN = auto()
-    MAX = auto()
-    DENSE = auto()
+    FIRST = ...
+    AVERAGE = ...
+    MIN = ...
+    MAX = ...
+    DENSE = ...
 
 class RankPercentage(IntEnum):
-    NONE = auto()
-    ZERO_NORMALIZED = auto()
-    ONE_NORMALIZED = auto()
+    NONE = ...
+    ZERO_NORMALIZED = ...
+    ONE_NORMALIZED = ...
 
 class UdfType(IntEnum):
-    CUDA = auto()
-    PTX = auto()
+    CUDA = ...
+    PTX = ...
 
 class Aggregation:
     def kind(self) -> Kind: ...
diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi
index 9cbaeb4549e..f745e6c6854 100644
--- a/python/pylibcudf/pylibcudf/binaryop.pyi
+++ b/python/pylibcudf/pylibcudf/binaryop.pyi
@@ -1,47 +1,47 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.types import DataType
 
 class BinaryOperator(IntEnum):
-    ADD = auto()
-    SUB = auto()
-    MUL = auto()
-    DIV = auto()
-    TRUE_DIV = auto()
-    FLOOR_DIV = auto()
-    MOD = auto()
-    PMOD = auto()
-    PYMOD = auto()
-    POW = auto()
-    INT_POW = auto()
-    LOG_BASE = auto()
-    ATAN2 = auto()
-    SHIFT_LEFT = auto()
-    SHIFT_RIGHT = auto()
-    SHIFT_RIGHT_UNSIGNED = auto()
-    BITWISE_AND = auto()
-    BITWISE_OR = auto()
-    BITWISE_XOR = auto()
-    LOGICAL_AND = auto()
-    LOGICAL_OR = auto()
-    EQUAL = auto()
-    NOT_EQUAL = auto()
-    LESS = auto()
-    GREATER = auto()
-    LESS_EQUAL = auto()
-    GREATER_EQUAL = auto()
-    NULL_EQUALS = auto()
-    NULL_MAX = auto()
-    NULL_MIN = auto()
-    NULL_NOT_EQUALS = auto()
-    GENERIC_BINARY = auto()
-    NULL_LOGICAL_AND = auto()
-    NULL_LOGICAL_OR = auto()
-    INVALID_BINARY = auto()
+    ADD = ...
+    SUB = ...
+    MUL = ...
+    DIV = ...
+    TRUE_DIV = ...
+    FLOOR_DIV = ...
+    MOD = ...
+    PMOD = ...
+    PYMOD = ...
+    POW = ...
+    INT_POW = ...
+    LOG_BASE = ...
+    ATAN2 = ...
+    SHIFT_LEFT = ...
+    SHIFT_RIGHT = ...
+    SHIFT_RIGHT_UNSIGNED = ...
+    BITWISE_AND = ...
+    BITWISE_OR = ...
+    BITWISE_XOR = ...
+    LOGICAL_AND = ...
+    LOGICAL_OR = ...
+    EQUAL = ...
+    NOT_EQUAL = ...
+    LESS = ...
+    GREATER = ...
+    LESS_EQUAL = ...
+    GREATER_EQUAL = ...
+    NULL_EQUALS = ...
+    NULL_MAX = ...
+    NULL_MIN = ...
+    NULL_NOT_EQUALS = ...
+    GENERIC_BINARY = ...
+    NULL_LOGICAL_AND = ...
+    NULL_LOGICAL_OR = ...
+    INVALID_BINARY = ...
 
 def binary_operation(
     lhs: Column | Scalar,
diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi
index 72b41a9be5e..c9f70de3dbf 100644
--- a/python/pylibcudf/pylibcudf/column.pyi
+++ b/python/pylibcudf/pylibcudf/column.pyi
@@ -39,7 +39,7 @@ class Column:
     def from_cuda_array_interface_obj(obj: Any) -> Column: ...
 
 class ListColumnView:
-    def __init__(self, column: Column) -> None: ...
+    def __init__(self, column: Column): ...
     def child(self) -> Column: ...
     def offsets(self) -> Column: ...
 
diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi
index 07bfced4a55..6cf4ed48724 100644
--- a/python/pylibcudf/pylibcudf/copying.pyi
+++ b/python/pylibcudf/pylibcudf/copying.pyi
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 from typing import TypeVar
 
 from pylibcudf.column import Column
@@ -8,13 +8,13 @@ from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 
 class MaskAllocationPolicy(IntEnum):
-    NEVER = auto()
-    RETAIN = auto()
-    ALWAYS = auto()
+    NEVER = ...
+    RETAIN = ...
+    ALWAYS = ...
 
 class OutOfBoundsPolicy(IntEnum):
-    NULLIFY = auto()
-    DONT_CHECK = auto()
+    NULLIFY = ...
+    DONT_CHECK = ...
 
 ColumnOrTable = TypeVar("ColumnOrTable", Column, Table)
 
diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi
index 30ff3edf4fb..91df1bfb92b 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyi
+++ b/python/pylibcudf/pylibcudf/datetime.pyi
@@ -1,30 +1,30 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class DatetimeComponent(IntEnum):
-    YEAR = auto()
-    MONTH = auto()
-    DAY = auto()
-    WEEKDAY = auto()
-    HOUR = auto()
-    MINUTE = auto()
-    SECOND = auto()
-    MILLISECOND = auto()
-    MICROSECOND = auto()
-    NANOSECOND = auto()
+    YEAR = ...
+    MONTH = ...
+    DAY = ...
+    WEEKDAY = ...
+    HOUR = ...
+    MINUTE = ...
+    SECOND = ...
+    MILLISECOND = ...
+    MICROSECOND = ...
+    NANOSECOND = ...
 
 class RoundingFrequency(IntEnum):
-    DAY = auto()
-    HOUR = auto()
-    MINUTE = auto()
-    SECOND = auto()
-    MILLISECOND = auto()
-    MICROSECOND = auto()
-    NANOSECOND = auto()
+    DAY = ...
+    HOUR = ...
+    MINUTE = ...
+    SECOND = ...
+    MILLISECOND = ...
+    MICROSECOND = ...
+    NANOSECOND = ...
 
 def extract_millisecond_fraction(input: Column) -> Column: ...
 def extract_microsecond_fraction(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi
index c3769bbfb85..5b5c6755392 100644
--- a/python/pylibcudf/pylibcudf/expressions.pyi
+++ b/python/pylibcudf/pylibcudf/expressions.pyi
@@ -1,73 +1,73 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.scalar import Scalar
 
 class TableReference(IntEnum):
-    LEFT = auto()
-    RIGHT = auto()
+    LEFT = ...
+    RIGHT = ...
 
 class ASTOperator(IntEnum):
-    ADD = auto()
-    SUB = auto()
-    MUL = auto()
-    DIV = auto()
-    TRUE_DIV = auto()
-    FLOOR_DIV = auto()
-    MOD = auto()
-    PYMOD = auto()
-    POW = auto()
-    EQUAL = auto()
-    NULL_EQUAL = auto()
-    NOT_EQUAL = auto()
-    LESS = auto()
-    GREATER = auto()
-    LESS_EQUAL = auto()
-    GREATER_EQUAL = auto()
-    BITWISE_AND = auto()
-    BITWISE_OR = auto()
-    BITWISE_XOR = auto()
-    NULL_LOGICAL_AND = auto()
-    LOGICAL_AND = auto()
-    NULL_LOGICAL_OR = auto()
-    LOGICAL_OR = auto()
-    IDENTITY = auto()
-    IS_NULL = auto()
-    SIN = auto()
-    COS = auto()
-    TAN = auto()
-    ARCSIN = auto()
-    ARCCOS = auto()
-    ARCTAN = auto()
-    SINH = auto()
-    COSH = auto()
-    TANH = auto()
-    ARCSINH = auto()
-    ARCCOSH = auto()
-    ARCTANH = auto()
-    EXP = auto()
-    LOG = auto()
-    SQRT = auto()
-    CBRT = auto()
-    CEIL = auto()
-    FLOOR = auto()
-    ABS = auto()
-    RINT = auto()
-    BIT_INVERT = auto()
-    NOT = auto()
+    ADD = ...
+    SUB = ...
+    MUL = ...
+    DIV = ...
+    TRUE_DIV = ...
+    FLOOR_DIV = ...
+    MOD = ...
+    PYMOD = ...
+    POW = ...
+    EQUAL = ...
+    NULL_EQUAL = ...
+    NOT_EQUAL = ...
+    LESS = ...
+    GREATER = ...
+    LESS_EQUAL = ...
+    GREATER_EQUAL = ...
+    BITWISE_AND = ...
+    BITWISE_OR = ...
+    BITWISE_XOR = ...
+    NULL_LOGICAL_AND = ...
+    LOGICAL_AND = ...
+    NULL_LOGICAL_OR = ...
+    LOGICAL_OR = ...
+    IDENTITY = ...
+    IS_NULL = ...
+    SIN = ...
+    COS = ...
+    TAN = ...
+    ARCSIN = ...
+    ARCCOS = ...
+    ARCTAN = ...
+    SINH = ...
+    COSH = ...
+    TANH = ...
+    ARCSINH = ...
+    ARCCOSH = ...
+    ARCTANH = ...
+    EXP = ...
+    LOG = ...
+    SQRT = ...
+    CBRT = ...
+    CEIL = ...
+    FLOOR = ...
+    ABS = ...
+    RINT = ...
+    BIT_INVERT = ...
+    NOT = ...
 
 class Expression: ...
 
 class Literal(Expression):
-    def __init__(self, value: Scalar) -> None: ...
+    def __init__(self, value: Scalar): ...
 
 class ColumnReference(Expression):
     def __init__(
         self, index: int, table_source: TableReference = TableReference.LEFT
-    ) -> None: ...
+    ): ...
 
 class ColumnNameReference(Expression):
-    def __init__(self, name: str) -> None: ...
+    def __init__(self, name: str): ...
 
 class Operation(Expression):
     def __init__(
@@ -75,4 +75,4 @@ class Operation(Expression):
         op: ASTOperator,
         left: Expression,
         right: Expression | None = None,
-    ) -> None: ...
+    ): ...
diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyi b/python/pylibcudf/pylibcudf/gpumemoryview.pyi
index 0491ba896e5..50f1f39a515 100644
--- a/python/pylibcudf/pylibcudf/gpumemoryview.pyi
+++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyi
@@ -4,6 +4,6 @@ from collections.abc import Mapping
 from typing import Any
 
 class gpumemoryview:
-    def __init__(self, data: Any) -> None: ...
+    def __init__(self, data: Any): ...
     @property
     def __cuda_array_interface__(self) -> Mapping[str, Any]: ...
diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi
index f668c07f940..3ddf21f84dd 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyi
+++ b/python/pylibcudf/pylibcudf/io/types.pyi
@@ -2,7 +2,7 @@
 import io
 import os
 from collections.abc import Mapping
-from enum import IntEnum, auto
+from enum import IntEnum
 from typing import Literal, TypeAlias, overload
 
 from pylibcudf.column import Column
@@ -10,51 +10,51 @@ from pylibcudf.io.datasource import Datasource
 from pylibcudf.table import Table
 
 class JSONRecoveryMode(IntEnum):
-    FAIL = auto()
-    RECOVER_WITH_NULL = auto()
+    FAIL = ...
+    RECOVER_WITH_NULL = ...
 
 class CompressionType(IntEnum):
-    NONE = auto()
-    AUTO = auto()
-    SNAPPY = auto()
-    GZIP = auto()
-    BZIP2 = auto()
-    BROTLI = auto()
-    ZIP = auto()
-    XZ = auto()
-    ZLIB = auto()
-    LZ4 = auto()
-    LZO = auto()
-    ZSTD = auto()
+    NONE = ...
+    AUTO = ...
+    SNAPPY = ...
+    GZIP = ...
+    BZIP2 = ...
+    BROTLI = ...
+    ZIP = ...
+    XZ = ...
+    ZLIB = ...
+    LZ4 = ...
+    LZO = ...
+    ZSTD = ...
 
 class ColumnEncoding(IntEnum):
-    USE_DEFAULT = auto()
-    DICTIONARY = auto()
-    PLAIN = auto()
-    DELTA_BINARY_PACKED = auto()
-    DELTA_LENGTH_BYTE_ARRAY = auto()
-    DELTA_BYTE_ARRAY = auto()
-    BYTE_STREAM_SPLIT = auto()
-    DIRECT = auto()
-    DIRECT_V2 = auto()
-    DICTIONARY_V2 = auto()
+    USE_DEFAULT = ...
+    DICTIONARY = ...
+    PLAIN = ...
+    DELTA_BINARY_PACKED = ...
+    DELTA_LENGTH_BYTE_ARRAY = ...
+    DELTA_BYTE_ARRAY = ...
+    BYTE_STREAM_SPLIT = ...
+    DIRECT = ...
+    DIRECT_V2 = ...
+    DICTIONARY_V2 = ...
 
 class DictionaryPolicy(IntEnum):
-    NEVER = auto()
-    ADAPTIVE = auto()
-    ALWAYS = auto()
+    NEVER = ...
+    ADAPTIVE = ...
+    ALWAYS = ...
 
 class StatisticsFreq(IntEnum):
-    STATISTICS_NONE = auto()
-    STATISTICS_ROWGROUP = auto()
-    STATISTICS_PAGE = auto()
-    STATISTICS_COLUMN = auto()
+    STATISTICS_NONE = ...
+    STATISTICS_ROWGROUP = ...
+    STATISTICS_PAGE = ...
+    STATISTICS_COLUMN = ...
 
 class QuoteStyle(IntEnum):
-    MINIMAL = auto()
-    ALL = auto()
-    NONNUMERIC = auto()
-    NONE = auto()
+    MINIMAL = ...
+    ALL = ...
+    NONNUMERIC = ...
+    NONE = ...
 
 ColumnNameSpec: TypeAlias = tuple[str, list[ColumnNameSpec]]
 ChildNameSpec: TypeAlias = Mapping[str, ChildNameSpec]
diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi
index 0a6f2c13719..c3a75d10baf 100644
--- a/python/pylibcudf/pylibcudf/labeling.pyi
+++ b/python/pylibcudf/pylibcudf/labeling.pyi
@@ -1,12 +1,12 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 
 class Inclusive(IntEnum):
-    YES = auto()
-    NO = auto()
+    YES = ...
+    NO = ...
 
 def label_bins(
     input: Column,
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
index 4e8966ce98a..6e86aca40aa 100644
--- a/python/pylibcudf/pylibcudf/lists.pyi
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
@@ -8,12 +8,12 @@ from pylibcudf.table import Table
 from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order
 
 class ConcatenateNullPolicy(IntEnum):
-    IGNORE = auto()
-    NULLIFY_OUTPUT_ROW = auto()
+    IGNORE = ...
+    NULLIFY_OUTPUT_ROW = ...
 
 class DuplicateFindOption(IntEnum):
-    FIND_FIRST = auto()
-    FIND_LAST = auto()
+    FIND_FIRST = ...
+    FIND_LAST = ...
 
 def explode_outer(input: Table, explode_column_idx: int) -> Table: ...
 def concatenate_rows(input: Table) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
index eb85acd56e5..a75714d9648 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
@@ -4,8 +4,8 @@ from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class BPEMergePairs:
-    def __init__(self, merge_pairs: Column) -> None: ...
+    def __init__(self, merge_pairs: Column): ...
 
 def byte_pair_encoding(
     input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None
-) -> Column: ...
+): ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
index 996bd093eb4..f6618e296b1 100644
--- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi
@@ -3,7 +3,7 @@
 from pylibcudf.column import Column
 
 class HashedVocabulary:
-    def __init__(self, hash_file: str) -> None: ...
+    def __init__(self, hash_file: str): ...
 
 def subword_tokenize(
     input: Column,
diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
index 516011eff61..b9aa2393514 100644
--- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi
@@ -4,7 +4,7 @@ from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class TokenizeVocabulary:
-    def __init__(self, vocab: Column) -> None: ...
+    def __init__(self, vocab: Column): ...
 
 def tokenize_scalar(
     input: Column, delimiter: Scalar | None = None
diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi
index 03193d3d0d9..a09949b7b30 100644
--- a/python/pylibcudf/pylibcudf/reduce.pyi
+++ b/python/pylibcudf/pylibcudf/reduce.pyi
@@ -1,6 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.aggregation import Aggregation
 from pylibcudf.column import Column
@@ -8,8 +8,8 @@ from pylibcudf.scalar import Scalar
 from pylibcudf.types import DataType
 
 class ScanType(IntEnum):
-    INCLUSIVE = auto()
-    EXCLUSIVE = auto()
+    INCLUSIVE = ...
+    EXCLUSIVE = ...
 
 def reduce(col: Column, agg: Aggregation, data_type: DataType) -> Scalar: ...
 def scan(col: Column, agg: Aggregation, inclusive: ScanType) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi
index b4d65e76f76..eed7a2a6c52 100644
--- a/python/pylibcudf/pylibcudf/replace.pyi
+++ b/python/pylibcudf/pylibcudf/replace.pyi
@@ -1,13 +1,13 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class ReplacePolicy(IntEnum):
-    PRECEDING = auto()
-    FOLLOWING = auto()
+    PRECEDING = ...
+    FOLLOWING = ...
 
 def replace_nulls(
     source_column: Column, replacement: Column | Scalar | ReplacePolicy
diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi
index 0099ad3c510..410cf5de586 100644
--- a/python/pylibcudf/pylibcudf/round.pyi
+++ b/python/pylibcudf/pylibcudf/round.pyi
@@ -1,12 +1,12 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 
 class RoundingMethod(IntEnum):
-    HALF_UP = auto()
-    HALF_EVEN = auto()
+    HALF_UP = ...
+    HALF_EVEN = ...
 
 def round(
     source: Column,
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi
index fe1cf6ee4fc..5db6875b7c0 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyi
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi
@@ -1,16 +1,16 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 from pylibcudf.types import NanEquality, NanPolicy, NullEquality, NullPolicy
 
 class DuplicateKeepOption(IntEnum):
-    KEEP_ANY = auto()
-    KEEP_FIRST = auto()
-    KEEP_LAST = auto()
-    KEEP_NONE = auto()
+    KEEP_ANY = ...
+    KEEP_FIRST = ...
+    KEEP_LAST = ...
+    KEEP_NONE = ...
 
 def drop_nulls(
     source_table: Table, keys: list[int], keep_threshold: int
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi
index 1e3f57082ef..daa36cbb68d 100644
--- a/python/pylibcudf/pylibcudf/strings/char_types.pyi
+++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi
@@ -1,21 +1,21 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class StringCharacterTypes(IntEnum):
-    DECIMAL = auto()
-    NUMERIC = auto()
-    DIGIT = auto()
-    ALPHA = auto()
-    SPACE = auto()
-    UPPER = auto()
-    LOWER = auto()
-    ALPHANUM = auto()
-    CASE_TYPES = auto()
-    ALL_TYPES = auto()
+    DECIMAL = ...
+    NUMERIC = ...
+    DIGIT = ...
+    ALPHA = ...
+    SPACE = ...
+    UPPER = ...
+    LOWER = ...
+    ALPHANUM = ...
+    CASE_TYPES = ...
+    ALL_TYPES = ...
 
 def all_characters_of_type(
     source_strings: Column,
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi
index 0833ac006c0..d5780c0a923 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyi
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyi
@@ -1,18 +1,18 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 
 class SeparatorOnNulls(IntEnum):
-    YES = auto()
-    NO = auto()
+    YES = ...
+    NO = ...
 
 class OutputIfEmptyList(IntEnum):
-    EMPTY_STRING = auto()
-    NULL_ELEMENT = auto()
+    EMPTY_STRING = ...
+    NULL_ELEMENT = ...
 
 def concatenate(
     strings_columns: Table,
diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyi b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi
index 2576b5575de..c551cebf181 100644
--- a/python/pylibcudf/pylibcudf/strings/regex_flags.pyi
+++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from enum import IntEnum, auto
+from enum import IntEnum
 
 class RegexFlags(IntEnum):
-    DEFAULT = auto()
-    MULTILINE = auto()
-    DOTALL = auto()
+    DEFAULT = ...
+    MULTILINE = ...
+    DOTALL = ...
diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyi b/python/pylibcudf/pylibcudf/strings/side_type.pyi
index 15083120be0..532edd60077 100644
--- a/python/pylibcudf/pylibcudf/strings/side_type.pyi
+++ b/python/pylibcudf/pylibcudf/strings/side_type.pyi
@@ -1,7 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from enum import IntEnum, auto
+from enum import IntEnum
 
 class SideType(IntEnum):
-    LEFT = auto()
-    RIGHT = auto()
-    BOTH = auto()
+    LEFT = ...
+    RIGHT = ...
+    BOTH = ...
diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi
index adeafcc2641..7158b6eb05c 100644
--- a/python/pylibcudf/pylibcudf/strings/translate.pyi
+++ b/python/pylibcudf/pylibcudf/strings/translate.pyi
@@ -1,13 +1,13 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from collections.abc import Mapping
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
 
 class FilterType(IntEnum):
-    KEEP = auto()
-    REMOVE = auto()
+    KEEP = ...
+    REMOVE = ...
 
 def translate(
     input: Column, chars_table: Mapping[int | str, int | str]
diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi
index ad4c9146feb..e84075215c3 100644
--- a/python/pylibcudf/pylibcudf/table.pyi
+++ b/python/pylibcudf/pylibcudf/table.pyi
@@ -3,7 +3,7 @@
 from pylibcudf.column import Column
 
 class Table:
-    def __init__(self, columns: list[Column]) -> None: ...
+    def __init__(self, columns: list[Column]): ...
     def num_columns(self) -> int: ...
     def num_rows(self) -> int: ...
     def columns(self) -> list[Column]: ...
diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi
index ce000cafe9d..5ff5f9ac273 100644
--- a/python/pylibcudf/pylibcudf/types.pyi
+++ b/python/pylibcudf/pylibcudf/types.pyi
@@ -1,81 +1,81 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from enum import IntEnum, auto
+from enum import IntEnum
 
 class Interpolation(IntEnum):
-    LINEAR = auto()
-    LOWER = auto()
-    HIGHER = auto()
-    MIDPOINT = auto()
-    NEAREST = auto()
+    LINEAR = ...
+    LOWER = ...
+    HIGHER = ...
+    MIDPOINT = ...
+    NEAREST = ...
 
 class MaskState(IntEnum):
-    UNALLOCATED = auto()
-    UNINITIALIZED = auto()
-    ALL_VALID = auto()
-    ALL_NULL = auto()
+    UNALLOCATED = ...
+    UNINITIALIZED = ...
+    ALL_VALID = ...
+    ALL_NULL = ...
 
 class NanEquality(IntEnum):
-    ALL_EQUAL = auto()
-    UNEQUAL = auto()
+    ALL_EQUAL = ...
+    UNEQUAL = ...
 
 class NanPolicy(IntEnum):
-    NAN_IS_NULL = auto()
-    NAN_IS_VALID = auto()
+    NAN_IS_NULL = ...
+    NAN_IS_VALID = ...
 
 class NullEquality(IntEnum):
-    EQUAL = auto()
-    UNEQUAL = auto()
+    EQUAL = ...
+    UNEQUAL = ...
 
 class NullOrder(IntEnum):
-    AFTER = auto()
-    BEFORE = auto()
+    AFTER = ...
+    BEFORE = ...
 
 class NullPolicy(IntEnum):
-    EXCLUDE = auto()
-    INCLUDE = auto()
+    EXCLUDE = ...
+    INCLUDE = ...
 
 class Order(IntEnum):
-    ASCENDING = auto()
-    DESCENDING = auto()
+    ASCENDING = ...
+    DESCENDING = ...
 
 class Sorted(IntEnum):
-    NO = auto()
-    YES = auto()
+    NO = ...
+    YES = ...
 
 class TypeId(IntEnum):
-    EMPTY = auto()
-    INT8 = auto()
-    INT16 = auto()
-    INT32 = auto()
-    INT64 = auto()
-    UINT8 = auto()
-    UINT16 = auto()
-    UINT32 = auto()
-    UINT64 = auto()
-    FLOAT32 = auto()
-    FLOAT64 = auto()
-    BOOL8 = auto()
-    TIMESTAMP_DAYS = auto()
-    TIMESTAMP_SECONDS = auto()
-    TIMESTAMP_MILLISECONDS = auto()
-    TIMESTAMP_MICROSECONDS = auto()
-    TIMESTAMP_NANOSECONDS = auto()
-    DURATION_DAYS = auto()
-    DURATION_SECONDS = auto()
-    DURATION_MILLISECONDS = auto()
-    DURATION_MICROSECONDS = auto()
-    DURATION_NANOSECONDS = auto()
-    DICTIONARY32 = auto()
-    STRING = auto()
-    LIST = auto()
-    DECIMAL32 = auto()
-    DECIMAL64 = auto()
-    DECIMAL128 = auto()
-    STRUCT = auto()
-    NUM_TYPE_IDS = auto()
+    EMPTY = ...
+    INT8 = ...
+    INT16 = ...
+    INT32 = ...
+    INT64 = ...
+    UINT8 = ...
+    UINT16 = ...
+    UINT32 = ...
+    UINT64 = ...
+    FLOAT32 = ...
+    FLOAT64 = ...
+    BOOL8 = ...
+    TIMESTAMP_DAYS = ...
+    TIMESTAMP_SECONDS = ...
+    TIMESTAMP_MILLISECONDS = ...
+    TIMESTAMP_MICROSECONDS = ...
+    TIMESTAMP_NANOSECONDS = ...
+    DURATION_DAYS = ...
+    DURATION_SECONDS = ...
+    DURATION_MILLISECONDS = ...
+    DURATION_MICROSECONDS = ...
+    DURATION_NANOSECONDS = ...
+    DICTIONARY32 = ...
+    STRING = ...
+    LIST = ...
+    DECIMAL32 = ...
+    DECIMAL64 = ...
+    DECIMAL128 = ...
+    STRUCT = ...
+    NUM_TYPE_IDS = ...
 
 class DataType:
-    def __init__(self, type_id: TypeId, scale: int = 0) -> None: ...
+    def __init__(self, type_id: TypeId, scale: int = 0): ...
     def id(self) -> TypeId: ...
     def scale(self) -> int: ...
 
diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi
index d3095e56528..7aa23b618f4 100644
--- a/python/pylibcudf/pylibcudf/unary.pyi
+++ b/python/pylibcudf/pylibcudf/unary.pyi
@@ -1,33 +1,33 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from enum import IntEnum, auto
+from enum import IntEnum
 
 from pylibcudf.column import Column
 from pylibcudf.types import DataType
 
 class UnaryOperator(IntEnum):
-    SIN = auto()
-    COS = auto()
-    TAN = auto()
-    ARCSIN = auto()
-    ARCCOS = auto()
-    ARCTAN = auto()
-    SINH = auto()
-    COSH = auto()
-    TANH = auto()
-    ARCSINH = auto()
-    ARCCOSH = auto()
-    ARCTANH = auto()
-    EXP = auto()
-    LOG = auto()
-    SQRT = auto()
-    CBRT = auto()
-    CEIL = auto()
-    FLOOR = auto()
-    ABS = auto()
-    RINT = auto()
-    BIT_INVERT = auto()
-    NOT = auto()
+    SIN = ...
+    COS = ...
+    TAN = ...
+    ARCSIN = ...
+    ARCCOS = ...
+    ARCTAN = ...
+    SINH = ...
+    COSH = ...
+    TANH = ...
+    ARCSINH = ...
+    ARCCOSH = ...
+    ARCTANH = ...
+    EXP = ...
+    LOG = ...
+    SQRT = ...
+    CBRT = ...
+    CEIL = ...
+    FLOOR = ...
+    ABS = ...
+    RINT = ...
+    BIT_INVERT = ...
+    NOT = ...
 
 def unary_operation(input: Column, op: UnaryOperator) -> Column: ...
 def is_null(input: Column) -> Column: ...

From 635da9ee9e349f1980bb6fba24a30811e642bfa4 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 10:47:06 +0000
Subject: [PATCH 12/16] pylibcudf: enable flake8-tidy/type-checking rules

---
 python/pylibcudf/pyproject.toml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 7ddb70b180e..d4b7eff1149 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -56,6 +56,12 @@ Documentation = "https://docs.rapids.ai/api/cudf/stable/"
 [tool.ruff]
 extend = "../../pyproject.toml"
 
+[tool.ruff.lint]
+extend-select = [
+  "TCH", # flake8-type-checking
+  "TID", # flake8-tidy-imports
+]
+
 [tool.ruff.lint.isort]
 combine-as-imports = true
 known-first-party = ["pylibcudf"]

From 1b68bfebf67acfa42d9aaa54d9197c429422bd3c Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 11:22:29 +0000
Subject: [PATCH 13/16] Catch some missing bits

---
 python/pylibcudf/pylibcudf/hashing.pyi           | 16 ++++++----------
 python/pylibcudf/pylibcudf/interop.pyi           |  4 +++-
 python/pylibcudf/pylibcudf/io/types.pyi          |  6 +++---
 .../pylibcudf/nvtext/byte_pair_encode.pyi        |  2 +-
 python/pylibcudf/pylibcudf/sorting.pyi           |  8 +++++---
 python/pylibcudf/pylibcudf/types.pyi             |  5 +++--
 6 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi
index 69a72aa4783..a849f5d0729 100644
--- a/python/pylibcudf/pylibcudf/hashing.pyi
+++ b/python/pylibcudf/pylibcudf/hashing.pyi
@@ -1,19 +1,15 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from typing import Final
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
-LIBCUDF_DEFAULT_HASH_SEED: int
+LIBCUDF_DEFAULT_HASH_SEED: Final[int]
 
-def murmurhash3_x86_32(
-    input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED
-) -> Column: ...
-def murmurhash3_x64_128(
-    input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED
-) -> Table: ...
-def xxhash_64(
-    input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED
-) -> Column: ...
+def murmurhash3_x86_32(input: Table, seed: int = ...) -> Column: ...
+def murmurhash3_x64_128(input: Table, seed: int = ...) -> Table: ...
+def xxhash_64(input: Table, seed: int = ...) -> Column: ...
 def md5(input: Table) -> Column: ...
 def sha1(input: Table) -> Column: ...
 def sha224(input: Table) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi
index cd8cb0c4a2c..5d1696c4f4f 100644
--- a/python/pylibcudf/pylibcudf/interop.pyi
+++ b/python/pylibcudf/pylibcudf/interop.pyi
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
 from typing import Any, overload
 
 import pyarrow as pa
@@ -10,6 +11,7 @@ from pylibcudf.scalar import Scalar
 from pylibcudf.table import Table
 from pylibcudf.types import DataType
 
+@dataclass
 class ColumnMetadata:
     name: str
     children_meta: list[ColumnMetadata]
@@ -29,7 +31,7 @@ def to_arrow(
     obj: DataType,
     *,
     precision: int | None = None,
-    fields: Iterable[pa.Field | tuple[str, pa.DataType]]
+    fields: Iterable[pa.Field[pa.DataType] | tuple[str, pa.DataType]]
     | Mapping[str, pa.DataType]
     | None = None,
     value_type: pa.DataType | None = None,
diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi
index 3ddf21f84dd..a4f4fc13bdc 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyi
+++ b/python/pylibcudf/pylibcudf/io/types.pyi
@@ -3,7 +3,7 @@ import io
 import os
 from collections.abc import Mapping
 from enum import IntEnum
-from typing import Literal, TypeAlias, overload
+from typing import Any, Literal, TypeAlias, overload
 
 from pylibcudf.column import Column
 from pylibcudf.io.datasource import Datasource
@@ -83,13 +83,13 @@ class TableWithMetadata:
 
 class SourceInfo:
     def __init__(
-        self, sources: list[str] | list[os.PathLike] | list[Datasource]
+        self, sources: list[str] | list[os.PathLike[Any]] | list[Datasource]
     ) -> None: ...
 
 class SinkInfo:
     def __init__(
         self,
-        sinks: list[os.PathLike]
+        sinks: list[os.PathLike[Any]]
         | list[io.StringIO]
         | list[io.BytesIO]
         | list[io.TextIOBase]
diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
index a75714d9648..ca39aa16d7e 100644
--- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi
@@ -8,4 +8,4 @@ class BPEMergePairs:
 
 def byte_pair_encoding(
     input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None
-): ...
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi
index 60a42023a6a..4ba0f07e986 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyi
+++ b/python/pylibcudf/pylibcudf/sorting.pyi
@@ -6,12 +6,14 @@ from pylibcudf.table import Table
 from pylibcudf.types import NullOrder, NullPolicy, Order
 
 def sorted_order(
-    source_table: Table, column_order: list, null_precedence: list
+    source_table: Table,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
 ) -> Column: ...
 def stable_sorted_order(
     source_table: Table,
-    column_order: list,
-    null_precedence: list,
+    column_order: list[Order],
+    null_precedence: list[NullOrder],
 ) -> Column: ...
 def rank(
     input_view: Column,
diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi
index 5ff5f9ac273..c67555dfeb4 100644
--- a/python/pylibcudf/pylibcudf/types.pyi
+++ b/python/pylibcudf/pylibcudf/types.pyi
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 from enum import IntEnum
+from typing import Final
 
 class Interpolation(IntEnum):
     LINEAR = ...
@@ -81,5 +82,5 @@ class DataType:
 
 def size_of(dtype: DataType) -> int: ...
 
-SIZE_TYPE: DataType
-SIZE_TYPE_ID: TypeId
+SIZE_TYPE: Final[DataType]
+SIZE_TYPE_ID: Final[TypeId]

From 42be70bae2ccb59d79e1c4c69fa6c1574f2c574d Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 11:30:41 +0000
Subject: [PATCH 14/16] Oops

---
 python/cudf/cudf/_lib/lists.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index f28afd10f86..9a2aa4a6130 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -127,7 +127,7 @@ def concatenate_list_elements(Column input_column, dropna=False):
             input_column.to_pylibcudf(mode="read"),
             plc.lists.ConcatenateNullPolicy.IGNORE
             if dropna
-            else plc.lists.ConcatenateNullPolicy.NULLIFTY_OUTPUT_ROW,
+            else plc.lists.ConcatenateNullPolicy.NULLIFY_OUTPUT_ROW,
         )
     )
 

From 9f3f5fbaa0dc46c85f94a960b59d593a9630d51a Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 11:45:04 +0000
Subject: [PATCH 15/16] No need for __init__.pyi

---
 python/pylibcudf/pylibcudf/__init__.pyi       | 95 -------------------
 python/pylibcudf/pylibcudf/io/__init__.pyi    | 27 ------
 .../pylibcudf/pylibcudf/nvtext/__init__.pyi   | 29 ------
 .../pylibcudf/pylibcudf/strings/__init__.pyi  | 55 -----------
 .../pylibcudf/strings/convert/__init__.pyi    | 24 -----
 .../pylibcudf/strings/split/__init__.pyi      |  4 -
 6 files changed, 234 deletions(-)
 delete mode 100644 python/pylibcudf/pylibcudf/__init__.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/io/__init__.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/strings/__init__.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/strings/convert/__init__.pyi
 delete mode 100644 python/pylibcudf/pylibcudf/strings/split/__init__.pyi

diff --git a/python/pylibcudf/pylibcudf/__init__.pyi b/python/pylibcudf/pylibcudf/__init__.pyi
deleted file mode 100644
index a728647f82f..00000000000
--- a/python/pylibcudf/pylibcudf/__init__.pyi
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-# If libcudf was installed as a wheel, we must request it to load the library symbols.
-# Otherwise, we assume that the library was installed in a system path that ld can find.
-from pylibcudf import (
-    aggregation,
-    binaryop,
-    column_factories,
-    concatenate,
-    contiguous_split,
-    copying,
-    datetime,
-    experimental,
-    expressions,
-    filling,
-    groupby,
-    hashing,
-    interop,
-    io,
-    join,
-    json,
-    labeling,
-    lists,
-    merge,
-    null_mask,
-    nvtext,
-    partitioning,
-    quantiles,
-    reduce,
-    replace,
-    reshape,
-    rolling,
-    round,
-    search,
-    sorting,
-    stream_compaction,
-    strings,
-    traits,
-    transform,
-    transpose,
-    types,
-    unary,
-)
-from pylibcudf.column import Column
-from pylibcudf.gpumemoryview import gpumemoryview
-from pylibcudf.scalar import Scalar
-from pylibcudf.table import Table
-from pylibcudf.types import DataType, MaskState, TypeId
-
-__all__ = [
-    "Column",
-    "DataType",
-    "MaskState",
-    "Scalar",
-    "Table",
-    "TypeId",
-    "aggregation",
-    "binaryop",
-    "column_factories",
-    "contiguous_split",
-    "concatenate",
-    "copying",
-    "datetime",
-    "experimental",
-    "expressions",
-    "filling",
-    "gpumemoryview",
-    "groupby",
-    "hashing",
-    "interop",
-    "io",
-    "join",
-    "json",
-    "labeling",
-    "lists",
-    "merge",
-    "null_mask",
-    "partitioning",
-    "quantiles",
-    "reduce",
-    "replace",
-    "reshape",
-    "rolling",
-    "round",
-    "search",
-    "stream_compaction",
-    "strings",
-    "sorting",
-    "traits",
-    "transform",
-    "transpose",
-    "types",
-    "unary",
-    "nvtext",
-]
diff --git a/python/pylibcudf/pylibcudf/io/__init__.pyi b/python/pylibcudf/pylibcudf/io/__init__.pyi
deleted file mode 100644
index e1a93ce08e3..00000000000
--- a/python/pylibcudf/pylibcudf/io/__init__.pyi
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from pylibcudf.io import (
-    avro,
-    csv,
-    datasource,
-    json,
-    orc,
-    parquet,
-    timezone,
-    types,
-)
-from pylibcudf.io.types import SinkInfo, SourceInfo, TableWithMetadata
-
-__all__ = [
-    "avro",
-    "csv",
-    "datasource",
-    "json",
-    "orc",
-    "parquet",
-    "timezone",
-    "types",
-    "SinkInfo",
-    "SourceInfo",
-    "TableWithMetadata",
-]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pyi b/python/pylibcudf/pylibcudf/nvtext/__init__.pyi
deleted file mode 100644
index aa51eff6bf5..00000000000
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.pyi
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from pylibcudf.nvtext import (
-    byte_pair_encode,
-    edit_distance,
-    generate_ngrams,
-    jaccard,
-    minhash,
-    ngrams_tokenize,
-    normalize,
-    replace,
-    stemmer,
-    subword_tokenize,
-    tokenize,
-)
-
-__all__ = [
-    "byte_pair_encode",
-    "edit_distance",
-    "generate_ngrams",
-    "jaccard",
-    "minhash",
-    "ngrams_tokenize",
-    "normalize",
-    "replace",
-    "stemmer",
-    "subword_tokenize",
-    "tokenize",
-]
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pyi b/python/pylibcudf/pylibcudf/strings/__init__.pyi
deleted file mode 100644
index 492ed311c28..00000000000
--- a/python/pylibcudf/pylibcudf/strings/__init__.pyi
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-from pylibcudf.strings import (
-    attributes,
-    capitalize,
-    case,
-    char_types,
-    combine,
-    contains,
-    convert,
-    extract,
-    find,
-    find_multiple,
-    findall,
-    padding,
-    regex_flags,
-    regex_program,
-    repeat,
-    replace,
-    replace_re,
-    side_type,
-    slice,
-    split,
-    strip,
-    translate,
-    wrap,
-)
-from pylibcudf.strings.side_type import SideType
-
-__all__ = [
-    "SideType",
-    "attributes",
-    "capitalize",
-    "case",
-    "char_types",
-    "combine",
-    "contains",
-    "convert",
-    "extract",
-    "find",
-    "find_multiple",
-    "findall",
-    "padding",
-    "regex_flags",
-    "regex_program",
-    "repeat",
-    "replace",
-    "replace_re",
-    "side_type",
-    "slice",
-    "split",
-    "strip",
-    "translate",
-    "wrap",
-]
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi b/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi
deleted file mode 100644
index edf615376c2..00000000000
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-from pylibcudf.strings.convert import (
-    convert_booleans,
-    convert_datetime,
-    convert_durations,
-    convert_fixed_point,
-    convert_floats,
-    convert_integers,
-    convert_ipv4,
-    convert_lists,
-    convert_urls,
-)
-
-__all__ = [
-    "convert_booleans",
-    "convert_datetime",
-    "convert_durations",
-    "convert_fixed_point",
-    "convert_floats",
-    "convert_integers",
-    "convert_ipv4",
-    "convert_lists",
-    "convert_urls",
-]
diff --git a/python/pylibcudf/pylibcudf/strings/split/__init__.pyi b/python/pylibcudf/pylibcudf/strings/split/__init__.pyi
deleted file mode 100644
index c44bce048b6..00000000000
--- a/python/pylibcudf/pylibcudf/strings/split/__init__.pyi
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-from pylibcudf.strings.split import partition, split
-
-__all__ = ["partition", "split"]

From 1dcf8ec54bae8b017436fc7e43116b3917698f35 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 8 Nov 2024 18:39:42 +0000
Subject: [PATCH 16/16] Minor fixes from work in progress checker

---
 python/pylibcudf/pylibcudf/aggregation.pyi                  | 1 +
 python/pylibcudf/pylibcudf/contiguous_split.pyi             | 1 +
 python/pylibcudf/pylibcudf/datetime.pyi                     | 2 +-
 python/pylibcudf/pylibcudf/expressions.pyi                  | 3 ++-
 python/pylibcudf/pylibcudf/filling.pyi                      | 2 +-
 python/pylibcudf/pylibcudf/groupby.pyi                      | 2 +-
 python/pylibcudf/pylibcudf/io/datasource.pyi                | 2 +-
 python/pylibcudf/pylibcudf/io/json.pyi                      | 2 +-
 python/pylibcudf/pylibcudf/io/orc.pyi                       | 2 ++
 python/pylibcudf/pylibcudf/json.pyi                         | 6 +++---
 python/pylibcudf/pylibcudf/lists.pyi                        | 2 +-
 python/pylibcudf/pylibcudf/sorting.pyi                      | 2 +-
 python/pylibcudf/pylibcudf/stream_compaction.pyi            | 4 ++--
 python/pylibcudf/pylibcudf/strings/capitalize.pyi           | 6 +++++-
 python/pylibcudf/pylibcudf/strings/combine.pyi              | 2 +-
 python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi | 4 ++--
 python/pylibcudf/pylibcudf/strings/regex_program.pyi        | 1 +
 python/pylibcudf/pylibcudf/table.pyi                        | 2 +-
 python/pylibcudf/pylibcudf/transform.pyi                    | 2 +-
 python/pylibcudf/pylibcudf/types.pyi                        | 2 +-
 20 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/aggregation.pyi b/python/pylibcudf/pylibcudf/aggregation.pyi
index 230249995a5..a59e2a9dc93 100644
--- a/python/pylibcudf/pylibcudf/aggregation.pyi
+++ b/python/pylibcudf/pylibcudf/aggregation.pyi
@@ -65,6 +65,7 @@ class UdfType(IntEnum):
     PTX = ...
 
 class Aggregation:
+    def __init__(self): ...
     def kind(self) -> Kind: ...
 
 def sum() -> Aggregation: ...
diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi
index 66e6c5e50c5..dd6328fbf23 100644
--- a/python/pylibcudf/pylibcudf/contiguous_split.pyi
+++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi
@@ -4,6 +4,7 @@ from pylibcudf.gpumemoryview import gpumemoryview
 from pylibcudf.table import Table
 
 class PackedColumns:
+    def __init__(self): ...
     def release(self) -> tuple[memoryview, gpumemoryview]: ...
 
 def pack(input: Table) -> PackedColumns: ...
diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi
index 91df1bfb92b..6a3ae7953d9 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyi
+++ b/python/pylibcudf/pylibcudf/datetime.pyi
@@ -36,7 +36,7 @@ def ceil_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
 def floor_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
 def round_datetimes(input: Column, freq: RoundingFrequency) -> Column: ...
 def add_calendrical_months(
-    timestamps: Column, months: Column | Scalar
+    input: Column, months: Column | Scalar
 ) -> Column: ...
 def day_of_year(input: Column) -> Column: ...
 def is_leap_year(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi
index 5b5c6755392..12b473d8605 100644
--- a/python/pylibcudf/pylibcudf/expressions.pyi
+++ b/python/pylibcudf/pylibcudf/expressions.pyi
@@ -56,7 +56,8 @@ class ASTOperator(IntEnum):
     BIT_INVERT = ...
     NOT = ...
 
-class Expression: ...
+class Expression:
+    def __init__(self): ...
 
 class Literal(Expression):
     def __init__(self, value: Scalar): ...
diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi
index c0534f1344b..c88cdd8b16f 100644
--- a/python/pylibcudf/pylibcudf/filling.pyi
+++ b/python/pylibcudf/pylibcudf/filling.pyi
@@ -8,7 +8,7 @@ def fill(
     destination: Column, begin: int, end: int, value: Scalar
 ) -> Column: ...
 def fill_in_place(
-    destination: Column, c_begin: int, c_end: int, value: Scalar
+    destination: Column, begin: int, end: int, value: Scalar
 ) -> None: ...
 def sequence(size: int, init: Scalar, step: Scalar) -> Column: ...
 def repeat(input_table: Table, count: Column | int) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi
index e933cafdeef..883ad6e34cf 100644
--- a/python/pylibcudf/pylibcudf/groupby.pyi
+++ b/python/pylibcudf/pylibcudf/groupby.pyi
@@ -31,7 +31,7 @@ class GroupBy:
         self, values: Table, offset: list[int], fill_values: list[Scalar]
     ) -> tuple[Table, Table]: ...
     def replace_nulls(
-        self, values: Table, replace_policies: list[ReplacePolicy]
+        self, value: Table, replace_policies: list[ReplacePolicy]
     ) -> tuple[Table, Table]: ...
     def get_groups(
         self, values: Table | None = None
diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyi b/python/pylibcudf/pylibcudf/io/datasource.pyi
index c4184208b0c..e52197f793b 100644
--- a/python/pylibcudf/pylibcudf/io/datasource.pyi
+++ b/python/pylibcudf/pylibcudf/io/datasource.pyi
@@ -1,4 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 class Datasource:
-    pass
+    def __init__(self): ...
diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi
index 33794afb208..b2bc6a43700 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyi
+++ b/python/pylibcudf/pylibcudf/io/json.pyi
@@ -30,7 +30,7 @@ def read_json(
 ) -> TableWithMetadata: ...
 def write_json(
     sink_info: SinkInfo,
-    tbl: TableWithMetadata,
+    table_w_meta: TableWithMetadata,
     na_rep: str = "",
     include_nulls: bool = False,
     lines: bool = False,
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi
index 87ea9088b44..4cf87f1a832 100644
--- a/python/pylibcudf/pylibcudf/io/orc.pyi
+++ b/python/pylibcudf/pylibcudf/io/orc.pyi
@@ -18,6 +18,7 @@ def read_orc(
 ) -> TableWithMetadata: ...
 
 class OrcColumnStatistics:
+    def __init__(self): ...
     @property
     def number_of_values(self) -> int | None: ...
     @property
@@ -27,6 +28,7 @@ class OrcColumnStatistics:
     def get[T](self, item: str, default: None | T = None) -> T | None: ...
 
 class ParsedOrcStatistics:
+    def __init__(self): ...
     @property
     def column_names(self) -> list[str]: ...
     @property
diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi
index 41872c037de..b93d4876dab 100644
--- a/python/pylibcudf/pylibcudf/json.pyi
+++ b/python/pylibcudf/pylibcudf/json.pyi
@@ -14,9 +14,9 @@ class GetJsonObjectOptions:
     def get_allow_single_quotes(self) -> bool: ...
     def get_strip_quotes_from_single_strings(self) -> bool: ...
     def get_missing_fields_as_nulls(self) -> bool: ...
-    def set_allow_single_quotes(self, value: bool) -> None: ...
-    def set_strip_quotes_from_single_strings(self, value: bool) -> None: ...
-    def set_missing_fields_as_nulls(self, value: bool) -> None: ...
+    def set_allow_single_quotes(self, val: bool) -> None: ...
+    def set_strip_quotes_from_single_strings(self, val: bool) -> None: ...
+    def set_missing_fields_as_nulls(self, val: bool) -> None: ...
 
 def get_json_object(
     col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None
diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi
index 6e86aca40aa..dff6c400638 100644
--- a/python/pylibcudf/pylibcudf/lists.pyi
+++ b/python/pylibcudf/pylibcudf/lists.pyi
@@ -64,7 +64,7 @@ def union_distinct(
     nulls_equal: NullEquality = NullEquality.EQUAL,
     nans_equal: NanEquality = NanEquality.ALL_EQUAL,
 ) -> Column: ...
-def apply_boolean_mask(input: Column, mask: Column) -> Column: ...
+def apply_boolean_mask(input: Column, boolean_mask: Column) -> Column: ...
 def distinct(
     input: Column, nulls_equal: NullEquality, nans_equal: NanEquality
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi
index 4ba0f07e986..5255d869a4d 100644
--- a/python/pylibcudf/pylibcudf/sorting.pyi
+++ b/python/pylibcudf/pylibcudf/sorting.pyi
@@ -24,7 +24,7 @@ def rank(
     percentage: bool,
 ) -> Column: ...
 def is_sorted(
-    table: Table, column_order: list[Order], null_precedence: list[NullOrder]
+    tbl: Table, column_order: list[Order], null_precedence: list[NullOrder]
 ) -> bool: ...
 def segmented_sort_by_key(
     values: Table,
diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi
index 5db6875b7c0..99cade48309 100644
--- a/python/pylibcudf/pylibcudf/stream_compaction.pyi
+++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi
@@ -46,8 +46,8 @@ def stable_distinct(
     nans_equal: NanEquality,
 ) -> Table: ...
 def unique_count(
-    column: Column, null_handling: NullPolicy, nan_handling: NanPolicy
+    source: Column, null_handling: NullPolicy, nan_handling: NanPolicy
 ) -> int: ...
 def distinct_count(
-    column: Column, null_handling: NullPolicy, nan_handling: NanPolicy
+    source: Column, null_handling: NullPolicy, nan_handling: NanPolicy
 ) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
index 05bf8043727..5c6689418e2 100644
--- a/python/pylibcudf/pylibcudf/strings/capitalize.pyi
+++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi
@@ -2,7 +2,11 @@
 
 from pylibcudf.column import Column
 from pylibcudf.scalar import Scalar
+from pylibcudf.strings.char_types import StringCharacterTypes
 
 def capitalize(input: Column, delimiters: Scalar | None = None) -> Column: ...
-def title(input: Column) -> Column: ...
+def title(
+    input: Column,
+    sequence_type: StringCharacterTypes = StringCharacterTypes.ALPHA,
+) -> Column: ...
 def is_title(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi
index d5780c0a923..3094b20f141 100644
--- a/python/pylibcudf/pylibcudf/strings/combine.pyi
+++ b/python/pylibcudf/pylibcudf/strings/combine.pyi
@@ -25,7 +25,7 @@ def join_strings(
     input: Column, separator: Scalar, narep: Scalar
 ) -> Column: ...
 def join_list_elements(
-    source_strings: Column,
+    lists_strings_column: Column,
     separator: Column | Scalar,
     separator_narep: Scalar,
     string_narep: Scalar,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
index 40321c3ae66..49b8468957c 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi
@@ -2,5 +2,5 @@
 
 from pylibcudf.column import Column
 
-def url_encode(Input: Column) -> Column: ...
-def url_decode(Input: Column) -> Column: ...
+def url_encode(input: Column) -> Column: ...
+def url_decode(input: Column) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyi b/python/pylibcudf/pylibcudf/strings/regex_program.pyi
index 6c853bcfc44..9abd6fa7802 100644
--- a/python/pylibcudf/pylibcudf/strings/regex_program.pyi
+++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyi
@@ -3,5 +3,6 @@
 from pylibcudf.strings.regex_flags import RegexFlags
 
 class RegexProgram:
+    def __init__(self): ...
     @staticmethod
     def create(pattern: str, flags: RegexFlags) -> RegexProgram: ...
diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi
index e84075215c3..5aef7e009c8 100644
--- a/python/pylibcudf/pylibcudf/table.pyi
+++ b/python/pylibcudf/pylibcudf/table.pyi
@@ -3,7 +3,7 @@
 from pylibcudf.column import Column
 
 class Table:
-    def __init__(self, columns: list[Column]): ...
+    def __init__(self, column: list[Column]): ...
     def num_columns(self) -> int: ...
     def num_rows(self) -> int: ...
     def columns(self) -> list[Column]: ...
diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi
index 103b9ec36ab..5cbd2e635f0 100644
--- a/python/pylibcudf/pylibcudf/transform.pyi
+++ b/python/pylibcudf/pylibcudf/transform.pyi
@@ -13,4 +13,4 @@ def transform(
     input: Column, unary_udf: str, output_type: DataType, is_ptx: bool
 ) -> Column: ...
 def encode(input: Table) -> tuple[Table, Column]: ...
-def one_hot_encode(input_column: Column, categories: Column) -> Table: ...
+def one_hot_encode(input: Column, categories: Column) -> Table: ...
diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi
index c67555dfeb4..c91a95414bd 100644
--- a/python/pylibcudf/pylibcudf/types.pyi
+++ b/python/pylibcudf/pylibcudf/types.pyi
@@ -80,7 +80,7 @@ class DataType:
     def id(self) -> TypeId: ...
     def scale(self) -> int: ...
 
-def size_of(dtype: DataType) -> int: ...
+def size_of(t: DataType) -> int: ...
 
 SIZE_TYPE: Final[DataType]
 SIZE_TYPE_ID: Final[TypeId]