From 41c74c05539978d20f1d5ba129242b7af85ed37d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 18:23:29 +0000 Subject: [PATCH 01/16] Fix pylibcudf isort sections --- python/pylibcudf/pyproject.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index a80c85a1fa8..7ddb70b180e 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -58,11 +58,10 @@ extend = "../../pyproject.toml" [tool.ruff.lint.isort] combine-as-imports = true -known-first-party = ["cudf"] -section-order = ["future", "standard-library", "third-party", "dask", "rapids", "first-party", "local-folder"] +known-first-party = ["pylibcudf"] +section-order = ["future", "standard-library", "third-party", "rapids", "first-party", "local-folder"] [tool.ruff.lint.isort.sections] -dask = ["dask", "distributed", "dask_cuda"] rapids = ["rmm"] [tool.ruff.lint.per-file-ignores] From 2bea16a9c6fdac000449aeb774e7f86223f485cf Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 13:44:26 +0000 Subject: [PATCH 02/16] Missing pxd signature for apply_boolean_mask --- python/pylibcudf/pylibcudf/stream_compaction.pxd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd index a4f39792f0c..a20a23e2e58 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd @@ -17,6 +17,8 @@ cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold) cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold) +cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask) + cpdef Table unique( Table input, list keys, From 2522e8418edc1de3f036baaeb1e8e094af678230 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 15:41:54 +0000 Subject: [PATCH 03/16] Import quote style as QuoteStyle --- python/pylibcudf/pylibcudf/io/types.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 967d05e7057..e2439fbad88 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -27,6 +27,7 @@ from pylibcudf.libcudf.io.types import ( compression_type as CompressionType, # no-cython-lint column_encoding as ColumnEncoding, # no-cython-lint dictionary_policy as DictionaryPolicy, # no-cython-lint + quote_style as QuoteStyle, # no-cython-lint statistics_freq as StatisticsFreq, # no-cython-lint ) From 6232dbf510d5d8043e93420a7d101c1a838ea650 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 13:46:06 +0000 Subject: [PATCH 04/16] Type stubs for pylibcudf --- python/pylibcudf/pylibcudf/__init__.pyi | 95 +++++++++++++++ python/pylibcudf/pylibcudf/aggregation.pyi | 109 ++++++++++++++++++ python/pylibcudf/pylibcudf/binaryop.pyi | 54 +++++++++ python/pylibcudf/pylibcudf/column.pyi | 48 ++++++++ .../pylibcudf/pylibcudf/column_factories.pyi | 20 ++++ python/pylibcudf/pylibcudf/concatenate.pyi | 8 ++ .../pylibcudf/pylibcudf/contiguous_split.pyi | 13 +++ python/pylibcudf/pylibcudf/copying.pyi | 54 +++++++++ python/pylibcudf/pylibcudf/datetime.pyi | 45 ++++++++ python/pylibcudf/pylibcudf/experimental.pyi | 5 + python/pylibcudf/pylibcudf/expressions.pyi | 78 +++++++++++++ python/pylibcudf/pylibcudf/filling.pyi | 14 +++ python/pylibcudf/pylibcudf/gpumemoryview.pyi | 9 ++ python/pylibcudf/pylibcudf/groupby.pyi | 38 ++++++ python/pylibcudf/pylibcudf/hashing.pyi | 22 ++++ python/pylibcudf/pylibcudf/interop.pyi | 50 ++++++++ python/pylibcudf/pylibcudf/io/__init__.pyi | 27 +++++ python/pylibcudf/pylibcudf/io/avro.pyi | 11 ++ python/pylibcudf/pylibcudf/io/csv.pyi | 54 +++++++++ python/pylibcudf/pylibcudf/io/datasource.pyi | 4 + python/pylibcudf/pylibcudf/io/json.pyi | 50 ++++++++ python/pylibcudf/pylibcudf/io/orc.pyi | 39 +++++++ python/pylibcudf/pylibcudf/io/parquet.pyi | 36 ++++++ python/pylibcudf/pylibcudf/io/timezone.pyi | 7 ++ python/pylibcudf/pylibcudf/io/types.pyi | 97 ++++++++++++++++ python/pylibcudf/pylibcudf/join.pyi | 78 +++++++++++++ python/pylibcudf/pylibcudf/json.pyi | 23 ++++ python/pylibcudf/pylibcudf/labeling.pyi | 10 ++ python/pylibcudf/pylibcudf/lists.pyi | 42 +++++++ python/pylibcudf/pylibcudf/merge.pyi | 11 ++ python/pylibcudf/pylibcudf/null_mask.pyi | 14 +++ .../pylibcudf/pylibcudf/nvtext/__init__.pyi | 29 +++++ .../pylibcudf/nvtext/byte_pair_encode.pyi | 11 ++ .../pylibcudf/nvtext/edit_distance.pyi | 6 + .../pylibcudf/nvtext/generate_ngrams.pyi | 10 ++ python/pylibcudf/pylibcudf/nvtext/jaccard.pyi | 5 + python/pylibcudf/pylibcudf/nvtext/minhash.pyi | 13 +++ .../pylibcudf/nvtext/ngrams_tokenize.pyi | 8 ++ .../pylibcudf/pylibcudf/nvtext/normalize.pyi | 6 + python/pylibcudf/pylibcudf/nvtext/replace.pyi | 17 +++ python/pylibcudf/pylibcudf/nvtext/stemmer.pyi | 8 ++ .../pylibcudf/nvtext/subword_tokenize.pyi | 15 +++ .../pylibcudf/pylibcudf/nvtext/tokenize.pyi | 26 +++++ python/pylibcudf/pylibcudf/partitioning.pyi | 14 +++ python/pylibcudf/pylibcudf/py.typed | 0 python/pylibcudf/pylibcudf/quantiles.pyi | 23 ++++ python/pylibcudf/pylibcudf/reduce.pyi | 16 +++ python/pylibcudf/pylibcudf/replace.pyi | 29 +++++ python/pylibcudf/pylibcudf/reshape.pyi | 7 ++ python/pylibcudf/pylibcudf/rolling.pyi | 12 ++ python/pylibcudf/pylibcudf/round.pyi | 15 +++ python/pylibcudf/pylibcudf/scalar.pyi | 10 ++ python/pylibcudf/pylibcudf/search.pyi | 19 +++ python/pylibcudf/pylibcudf/sorting.pyi | 62 ++++++++++ .../pylibcudf/pylibcudf/stream_compaction.pyi | 53 +++++++++ .../pylibcudf/pylibcudf/strings/__init__.pyi | 55 +++++++++ .../pylibcudf/strings/attributes.pyi | 7 ++ .../pylibcudf/strings/capitalize.pyi | 8 ++ python/pylibcudf/pylibcudf/strings/case.pyi | 7 ++ .../pylibcudf/strings/char_types.pyi | 30 +++++ .../pylibcudf/pylibcudf/strings/combine.pyi | 34 ++++++ .../pylibcudf/pylibcudf/strings/contains.pyi | 14 +++ .../pylibcudf/strings/convert/__init__.py | 12 ++ .../pylibcudf/strings/convert/__init__.pyi | 24 ++++ .../strings/convert/convert_booleans.pyi | 9 ++ .../strings/convert/convert_datetime.pyi | 12 ++ .../strings/convert/convert_durations.pyi | 9 ++ .../strings/convert/convert_fixed_point.pyi | 10 ++ .../strings/convert/convert_floats.pyi | 8 ++ .../strings/convert/convert_integers.pyi | 11 ++ .../strings/convert/convert_ipv4.pyi | 7 ++ .../strings/convert/convert_lists.pyi | 10 ++ .../strings/convert/convert_urls.pyi | 6 + .../pylibcudf/pylibcudf/strings/extract.pyi | 8 ++ python/pylibcudf/pylibcudf/strings/find.pyi | 14 +++ .../pylibcudf/strings/find_multiple.pyi | 5 + .../pylibcudf/pylibcudf/strings/findall.pyi | 7 ++ .../pylibcudf/pylibcudf/strings/padding.pyi | 9 ++ .../pylibcudf/strings/regex_flags.pyi | 7 ++ .../pylibcudf/strings/regex_program.pyi | 7 ++ python/pylibcudf/pylibcudf/strings/repeat.pyi | 5 + .../pylibcudf/pylibcudf/strings/replace.pyi | 14 +++ .../pylibcudf/strings/replace_re.pyi | 27 +++++ .../pylibcudf/pylibcudf/strings/side_type.pyi | 7 ++ python/pylibcudf/pylibcudf/strings/slice.pyi | 11 ++ .../pylibcudf/strings/split/__init__.py | 2 + .../pylibcudf/strings/split/__init__.pyi | 4 + .../pylibcudf/strings/split/partition.pyi | 8 ++ .../pylibcudf/strings/split/split.pyi | 27 +++++ python/pylibcudf/pylibcudf/strings/strip.pyi | 11 ++ .../pylibcudf/pylibcudf/strings/translate.pyi | 20 ++++ python/pylibcudf/pylibcudf/strings/wrap.pyi | 5 + python/pylibcudf/pylibcudf/table.pyi | 9 ++ python/pylibcudf/pylibcudf/traits.pyi | 22 ++++ python/pylibcudf/pylibcudf/transform.pyi | 16 +++ python/pylibcudf/pylibcudf/transpose.pyi | 4 + python/pylibcudf/pylibcudf/types.pyi | 85 ++++++++++++++ python/pylibcudf/pylibcudf/unary.pyi | 38 ++++++ 98 files changed, 2224 insertions(+) create mode 100644 python/pylibcudf/pylibcudf/__init__.pyi create mode 100644 python/pylibcudf/pylibcudf/aggregation.pyi create mode 100644 python/pylibcudf/pylibcudf/binaryop.pyi create mode 100644 python/pylibcudf/pylibcudf/column.pyi create mode 100644 python/pylibcudf/pylibcudf/column_factories.pyi create mode 100644 python/pylibcudf/pylibcudf/concatenate.pyi create mode 100644 python/pylibcudf/pylibcudf/contiguous_split.pyi create mode 100644 python/pylibcudf/pylibcudf/copying.pyi create mode 100644 python/pylibcudf/pylibcudf/datetime.pyi create mode 100644 python/pylibcudf/pylibcudf/experimental.pyi create mode 100644 python/pylibcudf/pylibcudf/expressions.pyi create mode 100644 python/pylibcudf/pylibcudf/filling.pyi create mode 100644 python/pylibcudf/pylibcudf/gpumemoryview.pyi create mode 100644 python/pylibcudf/pylibcudf/groupby.pyi create mode 100644 python/pylibcudf/pylibcudf/hashing.pyi create mode 100644 python/pylibcudf/pylibcudf/interop.pyi create mode 100644 python/pylibcudf/pylibcudf/io/__init__.pyi create mode 100644 python/pylibcudf/pylibcudf/io/avro.pyi create mode 100644 python/pylibcudf/pylibcudf/io/csv.pyi create mode 100644 python/pylibcudf/pylibcudf/io/datasource.pyi create mode 100644 python/pylibcudf/pylibcudf/io/json.pyi create mode 100644 python/pylibcudf/pylibcudf/io/orc.pyi create mode 100644 python/pylibcudf/pylibcudf/io/parquet.pyi create mode 100644 python/pylibcudf/pylibcudf/io/timezone.pyi create mode 100644 python/pylibcudf/pylibcudf/io/types.pyi create mode 100644 python/pylibcudf/pylibcudf/join.pyi create mode 100644 python/pylibcudf/pylibcudf/json.pyi create mode 100644 python/pylibcudf/pylibcudf/labeling.pyi create mode 100644 python/pylibcudf/pylibcudf/lists.pyi create mode 100644 python/pylibcudf/pylibcudf/merge.pyi create mode 100644 python/pylibcudf/pylibcudf/null_mask.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/jaccard.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/minhash.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/normalize.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/replace.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/stemmer.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi create mode 100644 python/pylibcudf/pylibcudf/nvtext/tokenize.pyi create mode 100644 python/pylibcudf/pylibcudf/partitioning.pyi create mode 100644 python/pylibcudf/pylibcudf/py.typed create mode 100644 python/pylibcudf/pylibcudf/quantiles.pyi create mode 100644 python/pylibcudf/pylibcudf/reduce.pyi create mode 100644 python/pylibcudf/pylibcudf/replace.pyi create mode 100644 python/pylibcudf/pylibcudf/reshape.pyi create mode 100644 python/pylibcudf/pylibcudf/rolling.pyi create mode 100644 python/pylibcudf/pylibcudf/round.pyi create mode 100644 python/pylibcudf/pylibcudf/scalar.pyi create mode 100644 python/pylibcudf/pylibcudf/search.pyi create mode 100644 python/pylibcudf/pylibcudf/sorting.pyi create mode 100644 python/pylibcudf/pylibcudf/stream_compaction.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/__init__.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/attributes.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/capitalize.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/case.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/char_types.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/combine.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/contains.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/__init__.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/extract.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/find.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/find_multiple.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/findall.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/padding.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/regex_flags.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/regex_program.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/repeat.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/replace.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/replace_re.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/side_type.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/slice.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/split/__init__.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/split/partition.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/split/split.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/strip.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/translate.pyi create mode 100644 python/pylibcudf/pylibcudf/strings/wrap.pyi create mode 100644 python/pylibcudf/pylibcudf/table.pyi create mode 100644 python/pylibcudf/pylibcudf/traits.pyi create mode 100644 python/pylibcudf/pylibcudf/transform.pyi create mode 100644 python/pylibcudf/pylibcudf/transpose.pyi create mode 100644 python/pylibcudf/pylibcudf/types.pyi create mode 100644 python/pylibcudf/pylibcudf/unary.pyi diff --git a/python/pylibcudf/pylibcudf/__init__.pyi b/python/pylibcudf/pylibcudf/__init__.pyi new file mode 100644 index 00000000000..a728647f82f --- /dev/null +++ b/python/pylibcudf/pylibcudf/__init__.pyi @@ -0,0 +1,95 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# If libcudf was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +from pylibcudf import ( + aggregation, + binaryop, + column_factories, + concatenate, + contiguous_split, + copying, + datetime, + experimental, + expressions, + filling, + groupby, + hashing, + interop, + io, + join, + json, + labeling, + lists, + merge, + null_mask, + nvtext, + partitioning, + quantiles, + reduce, + replace, + reshape, + rolling, + round, + search, + sorting, + stream_compaction, + strings, + traits, + transform, + transpose, + types, + unary, +) +from pylibcudf.column import Column +from pylibcudf.gpumemoryview import gpumemoryview +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table +from pylibcudf.types import DataType, MaskState, TypeId + +__all__ = [ + "Column", + "DataType", + "MaskState", + "Scalar", + "Table", + "TypeId", + "aggregation", + "binaryop", + "column_factories", + "contiguous_split", + "concatenate", + "copying", + "datetime", + "experimental", + "expressions", + "filling", + "gpumemoryview", + "groupby", + "hashing", + "interop", + "io", + "join", + "json", + "labeling", + "lists", + "merge", + "null_mask", + "partitioning", + "quantiles", + "reduce", + "replace", + "reshape", + "rolling", + "round", + "search", + "stream_compaction", + "strings", + "sorting", + "traits", + "transform", + "transpose", + "types", + "unary", + "nvtext", +] diff --git a/python/pylibcudf/pylibcudf/aggregation.pyi b/python/pylibcudf/pylibcudf/aggregation.pyi new file mode 100644 index 00000000000..f6fdf0273ae --- /dev/null +++ b/python/pylibcudf/pylibcudf/aggregation.pyi @@ -0,0 +1,109 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.types import ( + DataType, + Interpolation, + NanEquality, + NullEquality, + NullOrder, + NullPolicy, + Order, +) + +class Kind(IntEnum): + SUM = auto() + PRODUCT = auto() + MIN = auto() + MAX = auto() + COUNT_VALID = auto() + COUNT_ALL = auto() + ANY = auto() + ALL = auto() + SUM_OF_SQUARES = auto() + MEAN = auto() + VARIANCE = auto() + STD = auto() + MEDIAN = auto() + QUANTILE = auto() + ARGMAX = auto() + ARGMIN = auto() + NUNIQUE = auto() + NTH_ELEMENT = auto() + RANK = auto() + COLLECT_LIST = auto() + COLLECT_SET = auto() + PTX = auto() + CUDA = auto() + CORRELATION = auto() + COVARIANCE = auto() + +class CorrelationType(IntEnum): + PEARSON = auto() + KENDALL = auto() + SPEARMAN = auto() + +class EWMHistory(IntEnum): + INFINITE = auto() + FINITE = auto() + +class RankMethod(IntEnum): + FIRST = auto() + AVERAGE = auto() + MIN = auto() + MAX = auto() + DENSE = auto() + +class RankPercentage(IntEnum): + NONE = auto() + ZERO_NORMALIZED = auto() + ONE_NORMALIZED = auto() + +class UdfType(IntEnum): + CUDA = auto() + PTX = auto() + +class Aggregation: + def kind(self) -> Kind: ... + +def sum() -> Aggregation: ... +def product() -> Aggregation: ... +def min() -> Aggregation: ... +def max() -> Aggregation: ... +def count(null_handling: NullPolicy = NullPolicy.INCLUDE) -> Aggregation: ... +def any() -> Aggregation: ... +def all() -> Aggregation: ... +def sum_of_squares() -> Aggregation: ... +def mean() -> Aggregation: ... +def variance(ddof: int = 1) -> Aggregation: ... +def std(ddof: int = 1) -> Aggregation: ... +def median() -> Aggregation: ... +def quantile( + quantiles: list[float], interp: Interpolation = Interpolation.LINEAR +) -> Aggregation: ... +def argmax() -> Aggregation: ... +def argmin() -> Aggregation: ... +def ewma(center_of_mass: float, history: EWMHistory) -> Aggregation: ... +def nunique(null_handling: NullPolicy = NullPolicy.EXCLUDE) -> Aggregation: ... +def nth_element( + n: int, null_handling: NullPolicy = NullPolicy.INCLUDE +) -> Aggregation: ... +def collect_list( + null_handling: NullPolicy = NullPolicy.INCLUDE, +) -> Aggregation: ... +def collect_set( + null_handling: NullPolicy = NullPolicy.INCLUDE, + nulls_equal: NullEquality = NullEquality.EQUAL, + nans_equal: NanEquality = NanEquality.ALL_EQUAL, +) -> Aggregation: ... +def udf(operation: str, output_type: DataType) -> Aggregation: ... +def correlation(type: CorrelationType, min_periods: int) -> Aggregation: ... +def covariance(min_periods: int, ddof: int) -> Aggregation: ... +def rank( + method: RankMethod, + column_order: Order = Order.ASCENDING, + null_handling: NullPolicy = NullPolicy.EXCLUDE, + null_precedence: NullOrder = NullOrder.AFTER, + percentage: RankPercentage = RankPercentage.NONE, +) -> Aggregation: ... diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi new file mode 100644 index 00000000000..9cbaeb4549e --- /dev/null +++ b/python/pylibcudf/pylibcudf/binaryop.pyi @@ -0,0 +1,54 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.types import DataType + +class BinaryOperator(IntEnum): + ADD = auto() + SUB = auto() + MUL = auto() + DIV = auto() + TRUE_DIV = auto() + FLOOR_DIV = auto() + MOD = auto() + PMOD = auto() + PYMOD = auto() + POW = auto() + INT_POW = auto() + LOG_BASE = auto() + ATAN2 = auto() + SHIFT_LEFT = auto() + SHIFT_RIGHT = auto() + SHIFT_RIGHT_UNSIGNED = auto() + BITWISE_AND = auto() + BITWISE_OR = auto() + BITWISE_XOR = auto() + LOGICAL_AND = auto() + LOGICAL_OR = auto() + EQUAL = auto() + NOT_EQUAL = auto() + LESS = auto() + GREATER = auto() + LESS_EQUAL = auto() + GREATER_EQUAL = auto() + NULL_EQUALS = auto() + NULL_MAX = auto() + NULL_MIN = auto() + NULL_NOT_EQUALS = auto() + GENERIC_BINARY = auto() + NULL_LOGICAL_AND = auto() + NULL_LOGICAL_OR = auto() + INVALID_BINARY = auto() + +def binary_operation( + lhs: Column | Scalar, + rhs: Column | Scalar, + op: BinaryOperator, + output_type: DataType, +) -> Column: ... +def is_supported_operation( + out: DataType, lhs: DataType, rhs: DataType, op: BinaryOperator +) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi new file mode 100644 index 00000000000..72b41a9be5e --- /dev/null +++ b/python/pylibcudf/pylibcudf/column.pyi @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from collections.abc import Sequence +from typing import Any + +from pylibcudf.gpumemoryview import gpumemoryview +from pylibcudf.scalar import Scalar +from pylibcudf.types import DataType + +class Column: + def __init__( + self, + data_type: DataType, + size: int, + data: gpumemoryview | None, + mask: gpumemoryview | None, + null_count: int, + offset: int, + children: list[Column], + ) -> None: ... + def type(self) -> DataType: ... + def child(self, index: int) -> Column: ... + def size(self) -> int: ... + def null_count(self) -> int: ... + def offset(self) -> int: ... + def data(self) -> gpumemoryview | None: ... + def null_mask(self) -> gpumemoryview | None: ... + def children(self) -> list[Column]: ... + def copy(self) -> Column: ... + def with_mask( + self, mask: gpumemoryview | None, null_count: int + ) -> Column: ... + def list_view(self) -> ListColumnView: ... + @staticmethod + def from_scalar(scalar: Scalar, size: int) -> Column: ... + @staticmethod + def all_null_like(like: Column, size: int) -> Column: ... + @staticmethod + def from_cuda_array_interface_obj(obj: Any) -> Column: ... + +class ListColumnView: + def __init__(self, column: Column) -> None: ... + def child(self) -> Column: ... + def offsets(self) -> Column: ... + +def is_c_contiguous( + shape: Sequence[int], strides: Sequence[int], itemsize: int +) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi new file mode 100644 index 00000000000..c87fe423acb --- /dev/null +++ b/python/pylibcudf/pylibcudf/column_factories.pyi @@ -0,0 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.column import Column +from pylibcudf.types import DataType, MaskState, TypeId + +def make_empty_column(type_or_id: DataType | TypeId) -> Column: ... +def make_numeric_column( + type_: DataType, size: int, mstate: MaskState +) -> Column: ... +def make_fixed_point_column( + type_: DataType, size: int, mstate: MaskState +) -> Column: ... +def make_timestamp_column( + type_: DataType, size: int, mstate: MaskState +) -> Column: ... +def make_duration_column( + type_: DataType, size: int, mstate: MaskState +) -> Column: ... +def make_fixed_width_column( + type_: DataType, size: int, mstate: MaskState +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi new file mode 100644 index 00000000000..79076f509e0 --- /dev/null +++ b/python/pylibcudf/pylibcudf/concatenate.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.table import Table + +def concatenate[ColumnOrTable: (Column, Table)]( + objects: list[ColumnOrTable], +) -> ColumnOrTable: ... diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi new file mode 100644 index 00000000000..66e6c5e50c5 --- /dev/null +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.gpumemoryview import gpumemoryview +from pylibcudf.table import Table + +class PackedColumns: + def release(self) -> tuple[memoryview, gpumemoryview]: ... + +def pack(input: Table) -> PackedColumns: ... +def unpack(input: PackedColumns) -> Table: ... +def unpack_from_memoryviews( + metadata: memoryview, gpu_data: gpumemoryview +) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi new file mode 100644 index 00000000000..07bfced4a55 --- /dev/null +++ b/python/pylibcudf/pylibcudf/copying.pyi @@ -0,0 +1,54 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto +from typing import TypeVar + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table + +class MaskAllocationPolicy(IntEnum): + NEVER = auto() + RETAIN = auto() + ALWAYS = auto() + +class OutOfBoundsPolicy(IntEnum): + NULLIFY = auto() + DONT_CHECK = auto() + +ColumnOrTable = TypeVar("ColumnOrTable", Column, Table) + +def gather( + source_table: Table, gather_map: Column, bounds_policy: OutOfBoundsPolicy +) -> Table: ... +def scatter( + source: Table | list[Scalar], scatter_map: Column, target_table: Table +) -> Table: ... +def empty_like(input: ColumnOrTable) -> ColumnOrTable: ... +def allocate_like( + input_column: Column, policy: MaskAllocationPolicy, size: int | None = None +) -> Column: ... +def copy_range_in_place( + input_column: Column, + target_column: Column, + input_begin: int, + input_end: int, + target_begin: int, +) -> Column: ... +def copy_range( + input_column: Column, + target_column: Column, + input_begin: int, + input_end: int, + target_begin: int, +) -> Column: ... +def shift(input: Column, offset: int, fill_value: Scalar) -> Column: ... +def slice(input: ColumnOrTable, indices: list[int]) -> list[ColumnOrTable]: ... +def split(input: ColumnOrTable, splits: list[int]) -> list[ColumnOrTable]: ... +def copy_if_else( + lhs: Column | Scalar, rhs: Column | Scalar, boolean_mask: Column +) -> Column: ... +def boolean_mask_scatter( + input: Table | list[Scalar], target: Table, boolean_mask: Column +) -> Table: ... +def get_element(input_column: Column, index: int) -> Scalar: ... diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi new file mode 100644 index 00000000000..30ff3edf4fb --- /dev/null +++ b/python/pylibcudf/pylibcudf/datetime.pyi @@ -0,0 +1,45 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class DatetimeComponent(IntEnum): + YEAR = auto() + MONTH = auto() + DAY = auto() + WEEKDAY = auto() + HOUR = auto() + MINUTE = auto() + SECOND = auto() + MILLISECOND = auto() + MICROSECOND = auto() + NANOSECOND = auto() + +class RoundingFrequency(IntEnum): + DAY = auto() + HOUR = auto() + MINUTE = auto() + SECOND = auto() + MILLISECOND = auto() + MICROSECOND = auto() + NANOSECOND = auto() + +def extract_millisecond_fraction(input: Column) -> Column: ... +def extract_microsecond_fraction(input: Column) -> Column: ... +def extract_nanosecond_fraction(input: Column) -> Column: ... +def extract_datetime_component( + input: Column, component: DatetimeComponent +) -> Column: ... +def ceil_datetimes(input: Column, freq: RoundingFrequency) -> Column: ... +def floor_datetimes(input: Column, freq: RoundingFrequency) -> Column: ... +def round_datetimes(input: Column, freq: RoundingFrequency) -> Column: ... +def add_calendrical_months( + timestamps: Column, months: Column | Scalar +) -> Column: ... +def day_of_year(input: Column) -> Column: ... +def is_leap_year(input: Column) -> Column: ... +def last_day_of_month(input: Column) -> Column: ... +def extract_quarter(input: Column) -> Column: ... +def days_in_month(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/experimental.pyi b/python/pylibcudf/pylibcudf/experimental.pyi new file mode 100644 index 00000000000..bbfb86b0ff6 --- /dev/null +++ b/python/pylibcudf/pylibcudf/experimental.pyi @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +def enable_prefetching(key: str) -> None: ... +def disable_prefetching(key: str) -> None: ... +def prefetch_debugging(enable: bool) -> None: ... diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi new file mode 100644 index 00000000000..c3769bbfb85 --- /dev/null +++ b/python/pylibcudf/pylibcudf/expressions.pyi @@ -0,0 +1,78 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from enum import IntEnum, auto + +from pylibcudf.scalar import Scalar + +class TableReference(IntEnum): + LEFT = auto() + RIGHT = auto() + +class ASTOperator(IntEnum): + ADD = auto() + SUB = auto() + MUL = auto() + DIV = auto() + TRUE_DIV = auto() + FLOOR_DIV = auto() + MOD = auto() + PYMOD = auto() + POW = auto() + EQUAL = auto() + NULL_EQUAL = auto() + NOT_EQUAL = auto() + LESS = auto() + GREATER = auto() + LESS_EQUAL = auto() + GREATER_EQUAL = auto() + BITWISE_AND = auto() + BITWISE_OR = auto() + BITWISE_XOR = auto() + NULL_LOGICAL_AND = auto() + LOGICAL_AND = auto() + NULL_LOGICAL_OR = auto() + LOGICAL_OR = auto() + IDENTITY = auto() + IS_NULL = auto() + SIN = auto() + COS = auto() + TAN = auto() + ARCSIN = auto() + ARCCOS = auto() + ARCTAN = auto() + SINH = auto() + COSH = auto() + TANH = auto() + ARCSINH = auto() + ARCCOSH = auto() + ARCTANH = auto() + EXP = auto() + LOG = auto() + SQRT = auto() + CBRT = auto() + CEIL = auto() + FLOOR = auto() + ABS = auto() + RINT = auto() + BIT_INVERT = auto() + NOT = auto() + +class Expression: ... + +class Literal(Expression): + def __init__(self, value: Scalar) -> None: ... + +class ColumnReference(Expression): + def __init__( + self, index: int, table_source: TableReference = TableReference.LEFT + ) -> None: ... + +class ColumnNameReference(Expression): + def __init__(self, name: str) -> None: ... + +class Operation(Expression): + def __init__( + self, + op: ASTOperator, + left: Expression, + right: Expression | None = None, + ) -> None: ... diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi new file mode 100644 index 00000000000..c0534f1344b --- /dev/null +++ b/python/pylibcudf/pylibcudf/filling.pyi @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table + +def fill( + destination: Column, begin: int, end: int, value: Scalar +) -> Column: ... +def fill_in_place( + destination: Column, c_begin: int, c_end: int, value: Scalar +) -> None: ... +def sequence(size: int, init: Scalar, step: Scalar) -> Column: ... +def repeat(input_table: Table, count: Column | int) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyi b/python/pylibcudf/pylibcudf/gpumemoryview.pyi new file mode 100644 index 00000000000..0491ba896e5 --- /dev/null +++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyi @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from collections.abc import Mapping +from typing import Any + +class gpumemoryview: + def __init__(self, data: Any) -> None: ... + @property + def __cuda_array_interface__(self) -> Mapping[str, Any]: ... diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi new file mode 100644 index 00000000000..e933cafdeef --- /dev/null +++ b/python/pylibcudf/pylibcudf/groupby.pyi @@ -0,0 +1,38 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.aggregation import Aggregation +from pylibcudf.column import Column +from pylibcudf.replace import ReplacePolicy +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table +from pylibcudf.types import NullOrder, NullPolicy, Order, Sorted + +class GroupByRequest: + def __init__( + self, values: Column, aggregations: list[Aggregation] + ) -> None: ... + +class GroupBy: + def __init__( + self, + keys: Table, + null_handling: NullPolicy = NullPolicy.EXCLUDE, + keys_are_sorted: Sorted = Sorted.NO, + column_order: list[Order] | None = None, + null_precedence: list[NullOrder] | None = None, + ) -> None: ... + def aggregate( + self, requests: list[GroupByRequest] + ) -> tuple[Table, list[Table]]: ... + def scan( + self, requests: list[GroupByRequest] + ) -> tuple[Table, list[Table]]: ... + def shift( + self, values: Table, offset: list[int], fill_values: list[Scalar] + ) -> tuple[Table, Table]: ... + def replace_nulls( + self, values: Table, replace_policies: list[ReplacePolicy] + ) -> tuple[Table, Table]: ... + def get_groups( + self, values: Table | None = None + ) -> tuple[list[int], Table, Table]: ... diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi new file mode 100644 index 00000000000..69a72aa4783 --- /dev/null +++ b/python/pylibcudf/pylibcudf/hashing.pyi @@ -0,0 +1,22 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.table import Table + +LIBCUDF_DEFAULT_HASH_SEED: int + +def murmurhash3_x86_32( + input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED +) -> Column: ... +def murmurhash3_x64_128( + input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED +) -> Table: ... +def xxhash_64( + input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED +) -> Column: ... +def md5(input: Table) -> Column: ... +def sha1(input: Table) -> Column: ... +def sha224(input: Table) -> Column: ... +def sha256(input: Table) -> Column: ... +def sha384(input: Table) -> Column: ... +def sha512(input: Table) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi new file mode 100644 index 00000000000..cd8cb0c4a2c --- /dev/null +++ b/python/pylibcudf/pylibcudf/interop.pyi @@ -0,0 +1,50 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from collections.abc import Iterable, Mapping +from typing import Any, overload + +import pyarrow as pa + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table +from pylibcudf.types import DataType + +class ColumnMetadata: + name: str + children_meta: list[ColumnMetadata] + +@overload +def from_arrow(obj: pa.DataType) -> DataType: ... +@overload +def from_arrow( + obj: pa.Scalar[Any], *, data_type: DataType | None = None +) -> Scalar: ... +@overload +def from_arrow(obj: pa.Array[Any]) -> Column: ... +@overload +def from_arrow(obj: pa.Table) -> Table: ... +@overload +def to_arrow( + obj: DataType, + *, + precision: int | None = None, + fields: Iterable[pa.Field | tuple[str, pa.DataType]] + | Mapping[str, pa.DataType] + | None = None, + value_type: pa.DataType | None = None, +) -> pa.DataType: ... +@overload +def to_arrow( + obj: Table, metadata: list[ColumnMetadata | str] | None = None +) -> pa.Table: ... +@overload +def to_arrow( + obj: Column, metadata: ColumnMetadata | str | None = None +) -> pa.Array[Any]: ... +@overload +def to_arrow( + obj: Scalar, metadata: ColumnMetadata | str | None = None +) -> pa.Scalar[Any]: ... +def from_dlpack(managed_tensor: Any) -> Table: ... +def to_dlpack(input: Table) -> Any: ... diff --git a/python/pylibcudf/pylibcudf/io/__init__.pyi b/python/pylibcudf/pylibcudf/io/__init__.pyi new file mode 100644 index 00000000000..e1a93ce08e3 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/__init__.pyi @@ -0,0 +1,27 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.io import ( + avro, + csv, + datasource, + json, + orc, + parquet, + timezone, + types, +) +from pylibcudf.io.types import SinkInfo, SourceInfo, TableWithMetadata + +__all__ = [ + "avro", + "csv", + "datasource", + "json", + "orc", + "parquet", + "timezone", + "types", + "SinkInfo", + "SourceInfo", + "TableWithMetadata", +] diff --git a/python/pylibcudf/pylibcudf/io/avro.pyi b/python/pylibcudf/pylibcudf/io/avro.pyi new file mode 100644 index 00000000000..49c2f083702 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/avro.pyi @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.io.types import SourceInfo, TableWithMetadata + +__all__ = ["read_avro"] + +def read_avro( + source_info: SourceInfo, + columns: list[str] | None = None, + skip_rows: int = 0, + num_rows: int = -1, +) -> TableWithMetadata: ... diff --git a/python/pylibcudf/pylibcudf/io/csv.pyi b/python/pylibcudf/pylibcudf/io/csv.pyi new file mode 100644 index 00000000000..356825a927d --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/csv.pyi @@ -0,0 +1,54 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from collections.abc import Mapping + +from pylibcudf.io.types import ( + CompressionType, + QuoteStyle, + SourceInfo, + TableWithMetadata, +) +from pylibcudf.types import DataType + +def read_csv( + source_info: SourceInfo, + *, + compression: CompressionType = CompressionType.AUTO, + byte_range_offset: int = 0, + byte_range_size: int = 0, + col_names: list[str] | None = None, + prefix: str = "", + mangle_dupe_cols: bool = True, + usecols: list[int] | list[str] | None = None, + nrows: int = -1, + skiprows: int = 0, + skipfooter: int = 0, + header: int = 0, + lineterminator: str = "\n", + delimiter: str | None = None, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + delim_whitespace: bool = False, + skipinitialspace: bool = False, + skip_blank_lines: bool = True, + quoting: QuoteStyle = QuoteStyle.MINIMAL, + quotechar: str = '"', + doublequote: bool = True, + parse_dates: list[str] | list[int] | None = None, + parse_hex: list[str] | list[int] | None = None, + # Technically this should be dict/list + # but using a fused type prevents using None as default + dtypes: Mapping[str, DataType] | list[DataType] | None = None, + true_values: list[str] | None = None, + false_values: list[str] | None = None, + na_values: list[str] | None = None, + keep_default_na: bool = True, + na_filter: bool = True, + dayfirst: bool = False, + # Note: These options are supported by the libcudf reader + # but are not exposed here since there is no demand for them + # on the Python side yet. + # detect_whitespace_around_quotes: bool = False, + # timestamp_type: DataType = DataType(type_id.EMPTY), +) -> TableWithMetadata: ... diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyi b/python/pylibcudf/pylibcudf/io/datasource.pyi new file mode 100644 index 00000000000..c4184208b0c --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/datasource.pyi @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +class Datasource: + pass diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi new file mode 100644 index 00000000000..33794afb208 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/json.pyi @@ -0,0 +1,50 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from collections.abc import Mapping +from typing import TypeAlias + +from pylibcudf.column import Column +from pylibcudf.io.types import ( + CompressionType, + JSONRecoveryMode, + SinkInfo, + SourceInfo, + TableWithMetadata, +) +from pylibcudf.types import DataType + +ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap] + +NameAndType: TypeAlias = tuple[str, DataType, list[NameAndType]] + +def read_json( + source_info: SourceInfo, + dtypes: list[NameAndType] | None = None, + compression: CompressionType = CompressionType.AUTO, + lines: bool = False, + byte_range_offset: int = 0, + byte_range_size: int = 0, + keep_quotes: bool = False, + mixed_types_as_string: bool = False, + prune_columns: bool = False, + recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL, +) -> TableWithMetadata: ... +def write_json( + sink_info: SinkInfo, + tbl: TableWithMetadata, + na_rep: str = "", + include_nulls: bool = False, + lines: bool = False, + rows_per_chunk: int = 2**32 - 1, + true_value: str = "true", + false_value: str = "false", +) -> None: ... +def chunked_read_json( + source_info: SourceInfo, + dtypes: list[NameAndType] | None = None, + compression: CompressionType = CompressionType.AUTO, + keep_quotes: bool = False, + mixed_types_as_string: bool = False, + prune_columns: bool = False, + recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL, + chunk_size: int = 100_000_000, +) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ... diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi new file mode 100644 index 00000000000..87ea9088b44 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/orc.pyi @@ -0,0 +1,39 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from typing import Any + +from pylibcudf.io.types import SourceInfo, TableWithMetadata +from pylibcudf.types import DataType + +def read_orc( + source_info: SourceInfo, + columns: list[str] | None = None, + stripes: list[list[int]] | None = None, + skip_rows: int = 0, + nrows: int = -1, + use_index: bool = True, + use_np_dtypes: bool = True, + timestamp_type: DataType | None = None, + decimal128_columns: list[str] | None = None, +) -> TableWithMetadata: ... + +class OrcColumnStatistics: + @property + def number_of_values(self) -> int | None: ... + @property + def has_null(self) -> bool | None: ... + def __getitem__(self, item: str) -> Any: ... + def __contains__(self, item: str) -> bool: ... + def get[T](self, item: str, default: None | T = None) -> T | None: ... + +class ParsedOrcStatistics: + @property + def column_names(self) -> list[str]: ... + @property + def file_stats(self) -> list[OrcColumnStatistics]: ... + @property + def stripes_stats(self) -> list[OrcColumnStatistics]: ... + +def read_parsed_orc_statistics( + source_info: SourceInfo, +) -> ParsedOrcStatistics: ... diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi new file mode 100644 index 00000000000..bcf1d1cce09 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/parquet.pyi @@ -0,0 +1,36 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.expressions import Expression +from pylibcudf.io.types import SourceInfo, TableWithMetadata + +class ChunkedParquetReader: + def __init__( + self, + source_info: SourceInfo, + columns: list[str] | None = None, + row_groups: list[list[int]] | None = None, + use_pandas_metadata: bool = True, + convert_strings_to_categories: bool = False, + skip_rows: int = 0, + nrows: int = 0, + chunk_read_limit: int = 0, + pass_read_limit: int = 1024000000, + allow_mismatched_pq_schemas: bool = False, + ) -> None: ... + def has_next(self) -> bool: ... + def read_chunk(self) -> TableWithMetadata: ... + +def read_parquet( + source_info: SourceInfo, + columns: list[str] | None = None, + row_groups: list[list[int]] | None = None, + filters: Expression | None = None, + convert_strings_to_categories: bool = False, + use_pandas_metadata: bool = True, + skip_rows: int = 0, + nrows: int = -1, + allow_mismatched_pq_schemas: bool = False, + # disabled see comment in parquet.pyx for more + # reader_column_schema: ReaderColumnSchema = *, + # timestamp_type: DataType = * +) -> TableWithMetadata: ... diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyi b/python/pylibcudf/pylibcudf/io/timezone.pyi new file mode 100644 index 00000000000..0582800c4af --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/timezone.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.table import Table + +def make_timezone_transition_table( + tzif_dir: str, timezone_name: str +) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi new file mode 100644 index 00000000000..f668c07f940 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/types.pyi @@ -0,0 +1,97 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import io +import os +from collections.abc import Mapping +from enum import IntEnum, auto +from typing import Literal, TypeAlias, overload + +from pylibcudf.column import Column +from pylibcudf.io.datasource import Datasource +from pylibcudf.table import Table + +class JSONRecoveryMode(IntEnum): + FAIL = auto() + RECOVER_WITH_NULL = auto() + +class CompressionType(IntEnum): + NONE = auto() + AUTO = auto() + SNAPPY = auto() + GZIP = auto() + BZIP2 = auto() + BROTLI = auto() + ZIP = auto() + XZ = auto() + ZLIB = auto() + LZ4 = auto() + LZO = auto() + ZSTD = auto() + +class ColumnEncoding(IntEnum): + USE_DEFAULT = auto() + DICTIONARY = auto() + PLAIN = auto() + DELTA_BINARY_PACKED = auto() + DELTA_LENGTH_BYTE_ARRAY = auto() + DELTA_BYTE_ARRAY = auto() + BYTE_STREAM_SPLIT = auto() + DIRECT = auto() + DIRECT_V2 = auto() + DICTIONARY_V2 = auto() + +class DictionaryPolicy(IntEnum): + NEVER = auto() + ADAPTIVE = auto() + ALWAYS = auto() + +class StatisticsFreq(IntEnum): + STATISTICS_NONE = auto() + STATISTICS_ROWGROUP = auto() + STATISTICS_PAGE = auto() + STATISTICS_COLUMN = auto() + +class QuoteStyle(IntEnum): + MINIMAL = auto() + ALL = auto() + NONNUMERIC = auto() + NONE = auto() + +ColumnNameSpec: TypeAlias = tuple[str, list[ColumnNameSpec]] +ChildNameSpec: TypeAlias = Mapping[str, ChildNameSpec] + +class TableWithMetadata: + tbl: Table + def __init__( + self, tbl: Table, column_names: list[ColumnNameSpec] + ) -> None: ... + @property + def columns(self) -> list[Column]: ... + @overload + def column_names(self, include_children: Literal[False]) -> list[str]: ... + @overload + def column_names( + self, include_children: Literal[True] + ) -> list[ColumnNameSpec]: ... + @overload + def column_names( + self, include_children: bool = False + ) -> list[str] | list[ColumnNameSpec]: ... + @property + def child_names(self) -> ChildNameSpec: ... + @property + def per_file_user_data(self) -> list[Mapping[str, str]]: ... + +class SourceInfo: + def __init__( + self, sources: list[str] | list[os.PathLike] | list[Datasource] + ) -> None: ... + +class SinkInfo: + def __init__( + self, + sinks: list[os.PathLike] + | list[io.StringIO] + | list[io.BytesIO] + | list[io.TextIOBase] + | list[str], + ) -> None: ... diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi new file mode 100644 index 00000000000..f34357baa67 --- /dev/null +++ b/python/pylibcudf/pylibcudf/join.pyi @@ -0,0 +1,78 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.expressions import Expression +from pylibcudf.table import Table +from pylibcudf.types import NullEquality + +def inner_join( + left_keys: Table, right_keys: Table, nulls_equal: NullEquality +) -> tuple[Column, Column]: ... +def left_join( + left_keys: Table, right_keys: Table, nulls_equal: NullEquality +) -> tuple[Column, Column]: ... +def full_join( + left_keys: Table, right_keys: Table, nulls_equal: NullEquality +) -> tuple[Column, Column]: ... +def left_semi_join( + left_keys: Table, right_keys: Table, nulls_equal: NullEquality +) -> Column: ... +def left_anti_join( + left_keys: Table, right_keys: Table, nulls_equal: NullEquality +) -> Column: ... +def cross_join(left: Table, right: Table) -> Table: ... +def conditional_inner_join( + left: Table, right: Table, binary_predicate: Expression +) -> tuple[Column, Column]: ... +def conditional_left_join( + left: Table, right: Table, binary_predicate: Expression +) -> tuple[Column, Column]: ... +def conditional_full_join( + left: Table, right: Table, binary_predicate: Expression +) -> tuple[Column, Column]: ... +def conditional_left_semi_join( + left: Table, right: Table, binary_predicate: Expression +) -> Column: ... +def conditional_left_anti_join( + left: Table, right: Table, binary_predicate: Expression +) -> Column: ... +def mixed_inner_join( + left_keys: Table, + right_keys: Table, + left_conditional: Table, + right_conditional: Table, + binary_predicate: Expression, + nulls_equal: NullEquality, +) -> tuple[Column, Column]: ... +def mixed_left_join( + left_keys: Table, + right_keys: Table, + left_conditional: Table, + right_conditional: Table, + binary_predicate: Expression, + nulls_equal: NullEquality, +) -> tuple[Column, Column]: ... +def mixed_full_join( + left_keys: Table, + right_keys: Table, + left_conditional: Table, + right_conditional: Table, + binary_predicate: Expression, + nulls_equal: NullEquality, +) -> tuple[Column, Column]: ... +def mixed_left_semi_join( + left_keys: Table, + right_keys: Table, + left_conditional: Table, + right_conditional: Table, + binary_predicate: Expression, + nulls_equal: NullEquality, +) -> Column: ... +def mixed_left_anti_join( + left_keys: Table, + right_keys: Table, + left_conditional: Table, + right_conditional: Table, + binary_predicate: Expression, + nulls_equal: NullEquality, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi new file mode 100644 index 00000000000..41872c037de --- /dev/null +++ b/python/pylibcudf/pylibcudf/json.pyi @@ -0,0 +1,23 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class GetJsonObjectOptions: + def __init__( + self, + *, + allow_single_quotes: bool = False, + strip_quotes_from_single_strings: bool = True, + missing_fields_as_nulls: bool = False, + ) -> None: ... + def get_allow_single_quotes(self) -> bool: ... + def get_strip_quotes_from_single_strings(self) -> bool: ... + def get_missing_fields_as_nulls(self) -> bool: ... + def set_allow_single_quotes(self, value: bool) -> None: ... + def set_strip_quotes_from_single_strings(self, value: bool) -> None: ... + def set_missing_fields_as_nulls(self, value: bool) -> None: ... + +def get_json_object( + col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi new file mode 100644 index 00000000000..362a92ca7c1 --- /dev/null +++ b/python/pylibcudf/pylibcudf/labeling.pyi @@ -0,0 +1,10 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.column import Column + +def label_bins( + input: Column, + left_edges: Column, + left_inclusive: bool, + right_edges: Column, + right_inclusive: bool, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi new file mode 100644 index 00000000000..e5d186572cd --- /dev/null +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -0,0 +1,42 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table +from pylibcudf.types import NullOrder + +def explode_outer(input: Table, explode_column_idx: int) -> Table: ... +def concatenate_rows(input: Table) -> Column: ... +def concatenate_list_elements(input: Column, dropna: bool) -> Column: ... +def contains(input: Column, search_key: Column | Scalar) -> Column: ... +def contains_nulls(input: Column) -> Column: ... +def index_of( + input: Column, search_key: Column | Scalar, find_first_option: bool +) -> Column: ... +def reverse(input: Column) -> Column: ... +def segmented_gather(input: Column, gather_map_list: Column) -> Column: ... +def extract_list_element(input: Column, index: Column | int) -> Column: ... +def count_elements(input: Column) -> Column: ... +def sequences( + starts: Column, sizes: Column, steps: Column | None = None +) -> Column: ... +def sort_lists( + input: Column, + ascending: bool, + na_position: NullOrder, + stable: bool = False, +) -> Column: ... +def difference_distinct( + lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True +) -> Column: ... +def have_overlap( + lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True +) -> Column: ... +def intersect_distinct( + lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True +) -> Column: ... +def union_distinct( + lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True +) -> Column: ... +def apply_boolean_mask(input: Column, mask: Column) -> Column: ... +def distinct(input: Column, nulls_equal: bool, nans_equal: bool) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi new file mode 100644 index 00000000000..b18eb01f8a2 --- /dev/null +++ b/python/pylibcudf/pylibcudf/merge.pyi @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.table import Table +from pylibcudf.types import NullOrder, Order + +def merge( + tables_to_merge: list[Table], + key_cols: list[int], + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi new file mode 100644 index 00000000000..1a6d96a0822 --- /dev/null +++ b/python/pylibcudf/pylibcudf/null_mask.pyi @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from rmm.pylibrmm.device_buffer import DeviceBuffer + +from pylibcudf.column import Column +from pylibcudf.types import MaskState + +def copy_bitmask(col: Column) -> DeviceBuffer: ... +def bitmask_allocation_size_bytes(number_of_bits: int) -> int: ... +def create_null_mask( + size: int, state: MaskState = MaskState.UNINITIALIZED +) -> DeviceBuffer: ... +def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ... +def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pyi b/python/pylibcudf/pylibcudf/nvtext/__init__.pyi new file mode 100644 index 00000000000..aa51eff6bf5 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pyi @@ -0,0 +1,29 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.nvtext import ( + byte_pair_encode, + edit_distance, + generate_ngrams, + jaccard, + minhash, + ngrams_tokenize, + normalize, + replace, + stemmer, + subword_tokenize, + tokenize, +) + +__all__ = [ + "byte_pair_encode", + "edit_distance", + "generate_ngrams", + "jaccard", + "minhash", + "ngrams_tokenize", + "normalize", + "replace", + "stemmer", + "subword_tokenize", + "tokenize", +] diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi new file mode 100644 index 00000000000..eb85acd56e5 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class BPEMergePairs: + def __init__(self, merge_pairs: Column) -> None: ... + +def byte_pair_encoding( + input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi new file mode 100644 index 00000000000..85bbbb880ee --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi @@ -0,0 +1,6 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def edit_distance(input: Column, targets: Column) -> Column: ... +def edit_distance_matrix(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi new file mode 100644 index 00000000000..2757518379d --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi @@ -0,0 +1,10 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def generate_ngrams( + input: Column, ngrams: int, separator: Scalar +) -> Column: ... +def generate_character_ngrams(input: Column, ngrams: int = 2) -> Column: ... +def hash_character_ngrams(input: Column, ngrams: int = 2) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi new file mode 100644 index 00000000000..18263c5c8fd --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def jaccard_index(input1: Column, input2: Column, width: int) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi new file mode 100644 index 00000000000..a2d9b6364f7 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi @@ -0,0 +1,13 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def minhash( + input: Column, seeds: Column | Scalar, width: int = 4 +) -> Column: ... +def minhash64( + input: Column, seeds: Column | Scalar, width: int = 4 +) -> Column: ... +def word_minhash(input: Column, seeds: Column) -> Column: ... +def word_minhash64(input: Column, seeds: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi new file mode 100644 index 00000000000..224640ed44d --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def ngrams_tokenize( + input: Column, ngrams: int, delimiter: Scalar, separator: Scalar +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi new file mode 100644 index 00000000000..1d90a5a8960 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi @@ -0,0 +1,6 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def normalize_spaces(input: Column) -> Column: ... +def normalize_characters(input: Column, do_lower_case: bool) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyi b/python/pylibcudf/pylibcudf/nvtext/replace.pyi new file mode 100644 index 00000000000..1f1ac72ce7c --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyi @@ -0,0 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def replace_tokens( + input: Column, + targets: Column, + replacements: Column, + delimiter: Scalar | None = None, +) -> Column: ... +def filter_tokens( + input: Column, + min_token_length: int, + replacement: Scalar | None = None, + delimiter: Scalar | None = None, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi new file mode 100644 index 00000000000..d6ba1d189bd --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def is_letter( + input: Column, check_vowels: bool, indices: Column | int +) -> Column: ... +def porter_stemmer_measure(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi new file mode 100644 index 00000000000..996bd093eb4 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +class HashedVocabulary: + def __init__(self, hash_file: str) -> None: ... + +def subword_tokenize( + input: Column, + vocabulary_table: HashedVocabulary, + max_sequence_length: int, + stride: int, + do_lower_case: bool, + do_truncate: bool, +) -> tuple[Column, Column, Column]: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi new file mode 100644 index 00000000000..516011eff61 --- /dev/null +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi @@ -0,0 +1,26 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class TokenizeVocabulary: + def __init__(self, vocab: Column) -> None: ... + +def tokenize_scalar( + input: Column, delimiter: Scalar | None = None +) -> Column: ... +def tokenize_column(input: Column, delimiters: Column) -> Column: ... +def count_tokens_scalar( + input: Column, delimiter: Scalar | None = None +) -> Column: ... +def count_tokens_column(input: Column, delimiters: Column) -> Column: ... +def character_tokenize(input: Column) -> Column: ... +def detokenize( + input: Column, row_indices: Column, separator: Scalar | None = None +) -> Column: ... +def tokenize_with_vocabulary( + input: Column, + vocabulary: TokenizeVocabulary, + delimiter: Scalar, + default_id: int = -1, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/partitioning.pyi b/python/pylibcudf/pylibcudf/partitioning.pyi new file mode 100644 index 00000000000..48a2ade23f1 --- /dev/null +++ b/python/pylibcudf/pylibcudf/partitioning.pyi @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.table import Table + +def hash_partition( + input: Table, columns_to_hash: list[int], num_partitions: int +) -> tuple[Table, list[int]]: ... +def partition( + t: Table, partition_map: Column, num_partitions: int +) -> tuple[Table, list[int]]: ... +def round_robin_partition( + input: Table, num_partitions: int, start_partition: int = 0 +) -> tuple[Table, list[int]]: ... diff --git a/python/pylibcudf/pylibcudf/py.typed b/python/pylibcudf/pylibcudf/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/quantiles.pyi b/python/pylibcudf/pylibcudf/quantiles.pyi new file mode 100644 index 00000000000..dca6eed013a --- /dev/null +++ b/python/pylibcudf/pylibcudf/quantiles.pyi @@ -0,0 +1,23 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from collections.abc import Sequence + +from pylibcudf.column import Column +from pylibcudf.table import Table +from pylibcudf.types import Interpolation, NullOrder, Order, Sorted + +def quantile( + input: Column, + q: Sequence[float], + interp: Interpolation = Interpolation.LINEAR, + ordered_indices: Column | None = None, + exact: bool = True, +) -> Column: ... +def quantiles( + input: Table, + q: Sequence[float], + interp: Interpolation = Interpolation.NEAREST, + is_input_sorted: Sorted = Sorted.NO, + column_order: list[Order] | None = None, + null_precedence: list[NullOrder] | None = None, +) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi new file mode 100644 index 00000000000..03193d3d0d9 --- /dev/null +++ b/python/pylibcudf/pylibcudf/reduce.pyi @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.aggregation import Aggregation +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.types import DataType + +class ScanType(IntEnum): + INCLUSIVE = auto() + EXCLUSIVE = auto() + +def reduce(col: Column, agg: Aggregation, data_type: DataType) -> Scalar: ... +def scan(col: Column, agg: Aggregation, inclusive: ScanType) -> Column: ... +def minmax(col: Column) -> tuple[Scalar, Scalar]: ... diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi new file mode 100644 index 00000000000..b4d65e76f76 --- /dev/null +++ b/python/pylibcudf/pylibcudf/replace.pyi @@ -0,0 +1,29 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class ReplacePolicy(IntEnum): + PRECEDING = auto() + FOLLOWING = auto() + +def replace_nulls( + source_column: Column, replacement: Column | Scalar | ReplacePolicy +) -> Column: ... +def find_and_replace_all( + source_column: Column, + values_to_replace: Column, + replacement_values: Column, +) -> Column: ... +def clamp( + source_column: Column, + lo: Scalar, + hi: Scalar, + lo_replace: Scalar | None = None, + hi_replace: Scalar | None = None, +) -> Column: ... +def normalize_nans_and_zeros( + source_column: Column, inplace: bool = False +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/reshape.pyi b/python/pylibcudf/pylibcudf/reshape.pyi new file mode 100644 index 00000000000..d8d0ffcc3e0 --- /dev/null +++ b/python/pylibcudf/pylibcudf/reshape.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.table import Table + +def interleave_columns(source_table: Table) -> Column: ... +def tile(source_table: Table, count: int) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/rolling.pyi b/python/pylibcudf/pylibcudf/rolling.pyi new file mode 100644 index 00000000000..ca0111e01ec --- /dev/null +++ b/python/pylibcudf/pylibcudf/rolling.pyi @@ -0,0 +1,12 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.aggregation import Aggregation +from pylibcudf.column import Column + +def rolling_window[WindowType: (Column, int)]( + source: Column, + preceding_window: WindowType, + following_window: WindowType, + min_periods: int, + agg: Aggregation, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi new file mode 100644 index 00000000000..0099ad3c510 --- /dev/null +++ b/python/pylibcudf/pylibcudf/round.pyi @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column + +class RoundingMethod(IntEnum): + HALF_UP = auto() + HALF_EVEN = auto() + +def round( + source: Column, + decimal_places: int = 0, + round_method: RoundingMethod = RoundingMethod.HALF_UP, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi new file mode 100644 index 00000000000..0b72b10ef86 --- /dev/null +++ b/python/pylibcudf/pylibcudf/scalar.pyi @@ -0,0 +1,10 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +class Scalar: + def type(self) -> DataType: ... + def is_valid(self) -> bool: ... + @staticmethod + def empty_like(column: Column) -> Scalar: ... diff --git a/python/pylibcudf/pylibcudf/search.pyi b/python/pylibcudf/pylibcudf/search.pyi new file mode 100644 index 00000000000..7f292b129b2 --- /dev/null +++ b/python/pylibcudf/pylibcudf/search.pyi @@ -0,0 +1,19 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.table import Table +from pylibcudf.types import NullOrder, Order + +def lower_bound( + haystack: Table, + needles: Table, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Column: ... +def upper_bound( + haystack: Table, + needles: Table, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Column: ... +def contains(haystack: Column, needles: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi new file mode 100644 index 00000000000..60a42023a6a --- /dev/null +++ b/python/pylibcudf/pylibcudf/sorting.pyi @@ -0,0 +1,62 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.aggregation import RankMethod +from pylibcudf.column import Column +from pylibcudf.table import Table +from pylibcudf.types import NullOrder, NullPolicy, Order + +def sorted_order( + source_table: Table, column_order: list, null_precedence: list +) -> Column: ... +def stable_sorted_order( + source_table: Table, + column_order: list, + null_precedence: list, +) -> Column: ... +def rank( + input_view: Column, + method: RankMethod, + column_order: Order, + null_handling: NullPolicy, + null_precedence: NullOrder, + percentage: bool, +) -> Column: ... +def is_sorted( + table: Table, column_order: list[Order], null_precedence: list[NullOrder] +) -> bool: ... +def segmented_sort_by_key( + values: Table, + keys: Table, + segment_offsets: Column, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... +def stable_segmented_sort_by_key( + values: Table, + keys: Table, + segment_offsets: Column, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... +def sort_by_key( + values: Table, + keys: Table, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... +def stable_sort_by_key( + values: Table, + keys: Table, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... +def sort( + source_table: Table, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... +def stable_sort( + source_table: Table, + column_order: list[Order], + null_precedence: list[NullOrder], +) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi new file mode 100644 index 00000000000..fe1cf6ee4fc --- /dev/null +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -0,0 +1,53 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.table import Table +from pylibcudf.types import NanEquality, NanPolicy, NullEquality, NullPolicy + +class DuplicateKeepOption(IntEnum): + KEEP_ANY = auto() + KEEP_FIRST = auto() + KEEP_LAST = auto() + KEEP_NONE = auto() + +def drop_nulls( + source_table: Table, keys: list[int], keep_threshold: int +) -> Table: ... +def drop_nans( + source_table: Table, keys: list[int], keep_threshold: int +) -> Table: ... +def apply_boolean_mask(source_table: Table, boolean_mask: Column) -> Table: ... +def unique( + input: Table, + keys: list[int], + keep: DuplicateKeepOption, + nulls_equal: NullEquality, +) -> Table: ... +def distinct( + input: Table, + keys: list[int], + keep: DuplicateKeepOption, + nulls_equal: NullEquality, + nans_equal: NanEquality, +) -> Table: ... +def distinct_indices( + input: Table, + keep: DuplicateKeepOption, + nulls_equal: NullEquality, + nans_equal: NanEquality, +) -> Column: ... +def stable_distinct( + input: Table, + keys: list[int], + keep: DuplicateKeepOption, + nulls_equal: NullEquality, + nans_equal: NanEquality, +) -> Table: ... +def unique_count( + column: Column, null_handling: NullPolicy, nan_handling: NanPolicy +) -> int: ... +def distinct_count( + column: Column, null_handling: NullPolicy, nan_handling: NanPolicy +) -> int: ... diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pyi b/python/pylibcudf/pylibcudf/strings/__init__.pyi new file mode 100644 index 00000000000..492ed311c28 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/__init__.pyi @@ -0,0 +1,55 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.strings import ( + attributes, + capitalize, + case, + char_types, + combine, + contains, + convert, + extract, + find, + find_multiple, + findall, + padding, + regex_flags, + regex_program, + repeat, + replace, + replace_re, + side_type, + slice, + split, + strip, + translate, + wrap, +) +from pylibcudf.strings.side_type import SideType + +__all__ = [ + "SideType", + "attributes", + "capitalize", + "case", + "char_types", + "combine", + "contains", + "convert", + "extract", + "find", + "find_multiple", + "findall", + "padding", + "regex_flags", + "regex_program", + "repeat", + "replace", + "replace_re", + "side_type", + "slice", + "split", + "strip", + "translate", + "wrap", +] diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyi b/python/pylibcudf/pylibcudf/strings/attributes.pyi new file mode 100644 index 00000000000..7fd5c9773d4 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/attributes.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def count_characters(source_strings: Column) -> Column: ... +def count_bytes(source_strings: Column) -> Column: ... +def code_points(source_strings: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi new file mode 100644 index 00000000000..05bf8043727 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def capitalize(input: Column, delimiters: Scalar | None = None) -> Column: ... +def title(input: Column) -> Column: ... +def is_title(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/case.pyi b/python/pylibcudf/pylibcudf/strings/case.pyi new file mode 100644 index 00000000000..4e50db4d1da --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/case.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def to_lower(input: Column) -> Column: ... +def to_upper(input: Column) -> Column: ... +def swapcase(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi new file mode 100644 index 00000000000..1e3f57082ef --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class StringCharacterTypes(IntEnum): + DECIMAL = auto() + NUMERIC = auto() + DIGIT = auto() + ALPHA = auto() + SPACE = auto() + UPPER = auto() + LOWER = auto() + ALPHANUM = auto() + CASE_TYPES = auto() + ALL_TYPES = auto() + +def all_characters_of_type( + source_strings: Column, + types: StringCharacterTypes, + verify_types: StringCharacterTypes, +) -> Column: ... +def filter_characters_of_type( + source_strings: Column, + types_to_remove: StringCharacterTypes, + replacement: Scalar, + types_to_keep: StringCharacterTypes, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi new file mode 100644 index 00000000000..0833ac006c0 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/combine.pyi @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table + +class SeparatorOnNulls(IntEnum): + YES = auto() + NO = auto() + +class OutputIfEmptyList(IntEnum): + EMPTY_STRING = auto() + NULL_ELEMENT = auto() + +def concatenate( + strings_columns: Table, + separator: Column | Scalar, + narep: Scalar | None = None, + col_narep: Scalar | None = None, + separate_nulls: SeparatorOnNulls = SeparatorOnNulls.YES, +) -> Column: ... +def join_strings( + input: Column, separator: Scalar, narep: Scalar +) -> Column: ... +def join_list_elements( + source_strings: Column, + separator: Column | Scalar, + separator_narep: Scalar, + string_narep: Scalar, + separate_nulls: SeparatorOnNulls, + empty_list_policy: OutputIfEmptyList, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyi b/python/pylibcudf/pylibcudf/strings/contains.pyi new file mode 100644 index 00000000000..1f0620383b3 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/contains.pyi @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.strings.regex_program import RegexProgram + +def contains_re(input: Column, prog: RegexProgram) -> Column: ... +def count_re(input: Column, prog: RegexProgram) -> Column: ... +def matches_re(input: Column, prog: RegexProgram) -> Column: ... +def like( + input: Column, + pattern: Column | Scalar, + escape_character: Scalar | None = None, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py index aa27a7c8929..08b5034456e 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py @@ -10,3 +10,15 @@ convert_lists, convert_urls, ) + +__all__ = [ + "convert_booleans", + "convert_datetime", + "convert_durations", + "convert_fixed_point", + "convert_floats", + "convert_integers", + "convert_ipv4", + "convert_lists", + "convert_urls", +] diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi b/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi new file mode 100644 index 00000000000..edf615376c2 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi @@ -0,0 +1,24 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.strings.convert import ( + convert_booleans, + convert_datetime, + convert_durations, + convert_fixed_point, + convert_floats, + convert_integers, + convert_ipv4, + convert_lists, + convert_urls, +) + +__all__ = [ + "convert_booleans", + "convert_datetime", + "convert_durations", + "convert_fixed_point", + "convert_floats", + "convert_integers", + "convert_ipv4", + "convert_lists", + "convert_urls", +] diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi new file mode 100644 index 00000000000..77c09242e9a --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def to_booleans(input: Column, true_string: Scalar) -> Column: ... +def from_booleans( + booleans: Column, true_string: Scalar, false_string: Scalar +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi new file mode 100644 index 00000000000..c6857169765 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi @@ -0,0 +1,12 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +def to_timestamps( + input: Column, timestamp_type: DataType, format: str +) -> Column: ... +def from_timestamps( + timestamps: Column, format: str, input_strings_names: Column +) -> Column: ... +def is_timestamp(input: Column, format: str) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi new file mode 100644 index 00000000000..a5787a5fe49 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +def to_durations( + input: Column, duration_type: DataType, format: str +) -> Column: ... +def from_durations(durations: Column, format: str | None = None) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi new file mode 100644 index 00000000000..1192d3dfcd6 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi @@ -0,0 +1,10 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +def to_fixed_point(input: Column, output_type: DataType) -> Column: ... +def from_fixed_point(input: Column) -> Column: ... +def is_fixed_point( + input: Column, decimal_type: DataType | None = None +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi new file mode 100644 index 00000000000..ddf4042e10d --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +def to_floats(strings: Column, output_type: DataType) -> Column: ... +def from_floats(floats: Column) -> Column: ... +def is_float(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi new file mode 100644 index 00000000000..b96226fba90 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +def to_integers(input: Column, output_type: DataType) -> Column: ... +def from_integers(integers: Column) -> Column: ... +def is_integer(input: Column, int_type: DataType | None = None) -> Column: ... +def hex_to_integers(input: Column, output_type: DataType) -> Column: ... +def is_hex(input: Column) -> Column: ... +def integers_to_hex(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi new file mode 100644 index 00000000000..b017b32598c --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def ipv4_to_integers(input: Column) -> Column: ... +def integers_to_ipv4(integers: Column) -> Column: ... +def is_ipv4(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi new file mode 100644 index 00000000000..6ab3a4183e9 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi @@ -0,0 +1,10 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def format_list_column( + input: Column, + na_rep: Scalar | None = None, + separators: Column | None = None, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi new file mode 100644 index 00000000000..40321c3ae66 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi @@ -0,0 +1,6 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def url_encode(Input: Column) -> Column: ... +def url_decode(Input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyi b/python/pylibcudf/pylibcudf/strings/extract.pyi new file mode 100644 index 00000000000..4354bd3072d --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/extract.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.table import Table + +def extract(input: Column, prog: RegexProgram) -> Table: ... +def extract_all_record(input: Column, prog: RegexProgram) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/find.pyi b/python/pylibcudf/pylibcudf/strings/find.pyi new file mode 100644 index 00000000000..3d04a9c3161 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/find.pyi @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def find( + input: Column, target: Column | Scalar, start: int = 0, stop: int = -1 +) -> Column: ... +def rfind( + input: Column, target: Scalar, start: int = 0, stop: int = -1 +) -> Column: ... +def contains(input: Column, target: Column | Scalar) -> Column: ... +def starts_with(input: Column, target: Column | Scalar) -> Column: ... +def ends_with(input: Column, target: Column | Scalar) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi new file mode 100644 index 00000000000..3d46fd2fa6d --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def find_multiple(input: Column, targets: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyi b/python/pylibcudf/pylibcudf/strings/findall.pyi new file mode 100644 index 00000000000..77e38581d22 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/findall.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.strings.regex_program import RegexProgram + +def find_re(input: Column, pattern: RegexProgram) -> Column: ... +def findall(input: Column, pattern: RegexProgram) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyi b/python/pylibcudf/pylibcudf/strings/padding.pyi new file mode 100644 index 00000000000..a991935e6e5 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/padding.pyi @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.strings.side_type import SideType + +def pad( + input: Column, width: int, side: SideType, fill_char: str +) -> Column: ... +def zfill(input: Column, width: int) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyi b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi new file mode 100644 index 00000000000..2576b5575de --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from enum import IntEnum, auto + +class RegexFlags(IntEnum): + DEFAULT = auto() + MULTILINE = auto() + DOTALL = auto() diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyi b/python/pylibcudf/pylibcudf/strings/regex_program.pyi new file mode 100644 index 00000000000..6c853bcfc44 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.strings.regex_flags import RegexFlags + +class RegexProgram: + @staticmethod + def create(pattern: str, flags: RegexFlags) -> RegexProgram: ... diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyi b/python/pylibcudf/pylibcudf/strings/repeat.pyi new file mode 100644 index 00000000000..93a46b71caa --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/repeat.pyi @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def repeat_strings(input: Column, repeat_times: Column | int) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyi b/python/pylibcudf/pylibcudf/strings/replace.pyi new file mode 100644 index 00000000000..64df09ef7e8 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/replace.pyi @@ -0,0 +1,14 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def replace( + input: Column, target: Scalar, repl: Scalar, maxrepl: int = -1 +) -> Column: ... +def replace_multiple( + input: Column, target: Column, repl: Column, maxrepl: int = -1 +) -> Column: ... +def replace_slice( + input: Column, repl: Scalar | None = None, start: int = 0, stop: int = -1 +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyi b/python/pylibcudf/pylibcudf/strings/replace_re.pyi new file mode 100644 index 00000000000..056bafbf7ef --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyi @@ -0,0 +1,27 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from typing import overload + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.strings.regex_flags import RegexFlags +from pylibcudf.strings.regex_program import RegexProgram + +@overload +def replace_re( + input: Column, + pattern: RegexProgram, + replacement: Scalar, + max_replace_count: int = -1, +) -> Column: ... +@overload +def replace_re( + input: Column, + patterns: list[str], + replacement: Column, + max_replace_count: int = -1, + flags: RegexFlags = RegexFlags.DEFAULT, +) -> Column: ... +def replace_with_backrefs( + input: Column, prog: RegexProgram, replacement: str +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyi b/python/pylibcudf/pylibcudf/strings/side_type.pyi new file mode 100644 index 00000000000..15083120be0 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/side_type.pyi @@ -0,0 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from enum import IntEnum, auto + +class SideType(IntEnum): + LEFT = auto() + RIGHT = auto() + BOTH = auto() diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyi b/python/pylibcudf/pylibcudf/strings/slice.pyi new file mode 100644 index 00000000000..7bf9a7cb8c6 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/slice.pyi @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +def slice_strings( + input: Column, + start: Column | Scalar | None = None, + stop: Column | Scalar | None = None, + step: Scalar | None = None, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/split/__init__.py b/python/pylibcudf/pylibcudf/strings/split/__init__.py index 2033e5e275b..db2a597882e 100644 --- a/python/pylibcudf/pylibcudf/strings/split/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/split/__init__.py @@ -1,2 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from . import partition, split + +__all__ = ["partition", "split"] diff --git a/python/pylibcudf/pylibcudf/strings/split/__init__.pyi b/python/pylibcudf/pylibcudf/strings/split/__init__.pyi new file mode 100644 index 00000000000..c44bce048b6 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/split/__init__.pyi @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.strings.split import partition, split + +__all__ = ["partition", "split"] diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyi b/python/pylibcudf/pylibcudf/strings/split/partition.pyi new file mode 100644 index 00000000000..f19a463bd7e --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyi @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.table import Table + +def partition(input: Column, delimiter: Scalar | None = None) -> Table: ... +def rpartition(input: Column, delimiter: Scalar | None = None) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyi b/python/pylibcudf/pylibcudf/strings/split/split.pyi new file mode 100644 index 00000000000..3ccf0bc2a01 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/split/split.pyi @@ -0,0 +1,27 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.table import Table + +def split( + strings_column: Column, delimiter: Scalar, maxsplit: int +) -> Table: ... +def rsplit( + strings_column: Column, delimiter: Scalar, maxsplit: int +) -> Table: ... +def split_record( + strings: Column, delimiter: Scalar, maxsplit: int +) -> Column: ... +def rsplit_record( + strings: Column, delimiter: Scalar, maxsplit: int +) -> Column: ... +def split_re(input: Column, prog: RegexProgram, maxsplit: int) -> Table: ... +def rsplit_re(input: Column, prog: RegexProgram, maxsplit: int) -> Table: ... +def split_record_re( + input: Column, prog: RegexProgram, maxsplit: int +) -> Column: ... +def rsplit_record_re( + input: Column, prog: RegexProgram, maxsplit: int +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyi b/python/pylibcudf/pylibcudf/strings/strip.pyi new file mode 100644 index 00000000000..680355fc88f --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/strip.pyi @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar +from pylibcudf.strings.side_type import SideType + +def strip( + input: Column, + side: SideType = SideType.BOTH, + to_strip: Scalar | None = None, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi new file mode 100644 index 00000000000..adeafcc2641 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/translate.pyi @@ -0,0 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from collections.abc import Mapping +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.scalar import Scalar + +class FilterType(IntEnum): + KEEP = auto() + REMOVE = auto() + +def translate( + input: Column, chars_table: Mapping[int | str, int | str] +) -> Column: ... +def filter_characters( + input: Column, + characters_to_filter: Mapping[int | str, int | str], + keep_characters: FilterType, + replacement: Scalar, +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyi b/python/pylibcudf/pylibcudf/strings/wrap.pyi new file mode 100644 index 00000000000..5658f279197 --- /dev/null +++ b/python/pylibcudf/pylibcudf/strings/wrap.pyi @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +def wrap(input: Column, width: int) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi new file mode 100644 index 00000000000..ad4c9146feb --- /dev/null +++ b/python/pylibcudf/pylibcudf/table.pyi @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.column import Column + +class Table: + def __init__(self, columns: list[Column]) -> None: ... + def num_columns(self) -> int: ... + def num_rows(self) -> int: ... + def columns(self) -> list[Column]: ... diff --git a/python/pylibcudf/pylibcudf/traits.pyi b/python/pylibcudf/pylibcudf/traits.pyi new file mode 100644 index 00000000000..4c76a07a95c --- /dev/null +++ b/python/pylibcudf/pylibcudf/traits.pyi @@ -0,0 +1,22 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from pylibcudf.types import DataType + +def is_relationally_comparable(typ: DataType) -> bool: ... +def is_equality_comparable(typ: DataType) -> bool: ... +def is_numeric(typ: DataType) -> bool: ... +def is_index_type(typ: DataType) -> bool: ... +def is_unsigned(typ: DataType) -> bool: ... +def is_integral(typ: DataType) -> bool: ... +def is_integral_not_bool(typ: DataType) -> bool: ... +def is_floating_point(typ: DataType) -> bool: ... +def is_boolean(typ: DataType) -> bool: ... +def is_timestamp(typ: DataType) -> bool: ... +def is_fixed_point(typ: DataType) -> bool: ... +def is_duration(typ: DataType) -> bool: ... +def is_chrono(typ: DataType) -> bool: ... +def is_dictionary(typ: DataType) -> bool: ... +def is_fixed_width(typ: DataType) -> bool: ... +def is_compound(typ: DataType) -> bool: ... +def is_nested(typ: DataType) -> bool: ... +def is_bit_castable(source: DataType, target: DataType) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi new file mode 100644 index 00000000000..103b9ec36ab --- /dev/null +++ b/python/pylibcudf/pylibcudf/transform.pyi @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.column import Column +from pylibcudf.expressions import Expression +from pylibcudf.gpumemoryview import gpumemoryview +from pylibcudf.table import Table +from pylibcudf.types import DataType + +def nans_to_nulls(input: Column) -> tuple[gpumemoryview, int]: ... +def compute_column(input: Table, expr: Expression) -> Column: ... +def bools_to_mask(input: Column) -> tuple[gpumemoryview, int]: ... +def mask_to_bools(bitmask: int, begin_bit: int, end_bit: int) -> Column: ... +def transform( + input: Column, unary_udf: str, output_type: DataType, is_ptx: bool +) -> Column: ... +def encode(input: Table) -> tuple[Table, Column]: ... +def one_hot_encode(input_column: Column, categories: Column) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi new file mode 100644 index 00000000000..a84ab8a60ea --- /dev/null +++ b/python/pylibcudf/pylibcudf/transpose.pyi @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.table import Table + +def transpose(input_table: Table) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi new file mode 100644 index 00000000000..ce000cafe9d --- /dev/null +++ b/python/pylibcudf/pylibcudf/types.pyi @@ -0,0 +1,85 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from enum import IntEnum, auto + +class Interpolation(IntEnum): + LINEAR = auto() + LOWER = auto() + HIGHER = auto() + MIDPOINT = auto() + NEAREST = auto() + +class MaskState(IntEnum): + UNALLOCATED = auto() + UNINITIALIZED = auto() + ALL_VALID = auto() + ALL_NULL = auto() + +class NanEquality(IntEnum): + ALL_EQUAL = auto() + UNEQUAL = auto() + +class NanPolicy(IntEnum): + NAN_IS_NULL = auto() + NAN_IS_VALID = auto() + +class NullEquality(IntEnum): + EQUAL = auto() + UNEQUAL = auto() + +class NullOrder(IntEnum): + AFTER = auto() + BEFORE = auto() + +class NullPolicy(IntEnum): + EXCLUDE = auto() + INCLUDE = auto() + +class Order(IntEnum): + ASCENDING = auto() + DESCENDING = auto() + +class Sorted(IntEnum): + NO = auto() + YES = auto() + +class TypeId(IntEnum): + EMPTY = auto() + INT8 = auto() + INT16 = auto() + INT32 = auto() + INT64 = auto() + UINT8 = auto() + UINT16 = auto() + UINT32 = auto() + UINT64 = auto() + FLOAT32 = auto() + FLOAT64 = auto() + BOOL8 = auto() + TIMESTAMP_DAYS = auto() + TIMESTAMP_SECONDS = auto() + TIMESTAMP_MILLISECONDS = auto() + TIMESTAMP_MICROSECONDS = auto() + TIMESTAMP_NANOSECONDS = auto() + DURATION_DAYS = auto() + DURATION_SECONDS = auto() + DURATION_MILLISECONDS = auto() + DURATION_MICROSECONDS = auto() + DURATION_NANOSECONDS = auto() + DICTIONARY32 = auto() + STRING = auto() + LIST = auto() + DECIMAL32 = auto() + DECIMAL64 = auto() + DECIMAL128 = auto() + STRUCT = auto() + NUM_TYPE_IDS = auto() + +class DataType: + def __init__(self, type_id: TypeId, scale: int = 0) -> None: ... + def id(self) -> TypeId: ... + def scale(self) -> int: ... + +def size_of(dtype: DataType) -> int: ... + +SIZE_TYPE: DataType +SIZE_TYPE_ID: TypeId diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi new file mode 100644 index 00000000000..d3095e56528 --- /dev/null +++ b/python/pylibcudf/pylibcudf/unary.pyi @@ -0,0 +1,38 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + +from pylibcudf.column import Column +from pylibcudf.types import DataType + +class UnaryOperator(IntEnum): + SIN = auto() + COS = auto() + TAN = auto() + ARCSIN = auto() + ARCCOS = auto() + ARCTAN = auto() + SINH = auto() + COSH = auto() + TANH = auto() + ARCSINH = auto() + ARCCOSH = auto() + ARCTANH = auto() + EXP = auto() + LOG = auto() + SQRT = auto() + CBRT = auto() + CEIL = auto() + FLOOR = auto() + ABS = auto() + RINT = auto() + BIT_INVERT = auto() + NOT = auto() + +def unary_operation(input: Column, op: UnaryOperator) -> Column: ... +def is_null(input: Column) -> Column: ... +def is_valid(input: Column) -> Column: ... +def cast(input: Column, data_type: DataType) -> Column: ... +def is_nan(input: Column) -> Column: ... +def is_not_nan(input: Column) -> Column: ... +def is_supported_cast(from_: DataType, to: DataType) -> bool: ... From af3dc64f863cd561248de02a241a538af534dc50 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 17:26:43 +0000 Subject: [PATCH 05/16] Minor adaptations in response to type annotations in pylibcudf --- python/cudf_polars/cudf_polars/containers/dataframe.py | 2 +- python/cudf_polars/cudf_polars/dsl/expressions/datetime.py | 4 +++- python/cudf_polars/cudf_polars/dsl/expressions/literal.py | 2 +- python/cudf_polars/cudf_polars/dsl/ir.py | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index 08bc9d0ea3f..7560a0f5a64 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -60,7 +60,7 @@ def to_polars(self) -> pl.DataFrame: # To guarantee we produce correct names, we therefore # serialise with names we control and rename with that map. name_map = {f"column_{i}": name for i, name in enumerate(self.column_map)} - table: pa.Table = plc.interop.to_arrow( + table = plc.interop.to_arrow( self.table, [plc.interop.ColumnMetadata(name=name) for name in name_map], ) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py index 65fa4bfa62f..cd8e5c6a4eb 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/datetime.py @@ -27,7 +27,9 @@ class TemporalFunction(Expr): __slots__ = ("name", "options") - _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = { + _COMPONENT_MAP: ClassVar[ + dict[pl_expr.TemporalFunction, plc.datetime.DatetimeComponent] + ] = { pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR, pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH, pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY, diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py index c16313bf83c..7eba0c110ab 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/literal.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/literal.py @@ -58,7 +58,7 @@ def collect_agg(self, *, depth: int) -> AggInfo: class LiteralColumn(Expr): __slots__ = ("value",) _non_child = ("dtype", "value") - value: pa.Array[Any, Any] + value: pa.Array[Any] def __init__(self, dtype: plc.DataType, value: pl.Series) -> None: self.dtype = dtype diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index a242ff9300f..727e8ce7666 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -498,7 +498,7 @@ def do_evaluate( # Mask must have been applied. return df elif typ == "ndjson": - json_schema: list[tuple[str, str, list]] = [ + json_schema: list[plc.io.json.NameAndType] = [ (name, typ, []) for name, typ in schema.items() ] plc_tbl_w_meta = plc.io.json.read_json( From 5eb87fb8771993c827117d6433da513d3b3b3798 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 18:14:49 +0000 Subject: [PATCH 06/16] Use typed enum for inclusive parameter in label_bins --- python/cudf/cudf/_lib/labeling.pyx | 4 ++-- python/pylibcudf/pylibcudf/labeling.pxd | 4 ++-- python/pylibcudf/pylibcudf/labeling.pyi | 11 +++++++++-- python/pylibcudf/pylibcudf/labeling.pyx | 23 ++++++----------------- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx index 3966cce8981..524bfd3b2e8 100644 --- a/python/cudf/cudf/_lib/labeling.pyx +++ b/python/cudf/cudf/_lib/labeling.pyx @@ -17,8 +17,8 @@ def label_bins(Column input, Column left_edges, cbool left_inclusive, plc_column = plc.labeling.label_bins( input.to_pylibcudf(mode="read"), left_edges.to_pylibcudf(mode="read"), - left_inclusive, + plc.labeling.Inclusive.YES if left_inclusive else plc.labeling.Inclusive.NO, right_edges.to_pylibcudf(mode="read"), - right_inclusive + plc.labeling.Inclusive.YES if right_inclusive else plc.labeling.Inclusive.NO, ) return Column.from_pylibcudf(plc_column) diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd index 6f8797ae7d3..b1f9f2e806d 100644 --- a/python/pylibcudf/pylibcudf/labeling.pxd +++ b/python/pylibcudf/pylibcudf/labeling.pxd @@ -8,7 +8,7 @@ from .column cimport Column cpdef Column label_bins( Column input, Column left_edges, - bool left_inclusive, + inclusive left_inclusive, Column right_edges, - bool right_inclusive + inclusive right_inclusive ) diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi index 362a92ca7c1..0a6f2c13719 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyi +++ b/python/pylibcudf/pylibcudf/labeling.pyi @@ -1,10 +1,17 @@ # Copyright (c) 2024, NVIDIA CORPORATION. + +from enum import IntEnum, auto + from pylibcudf.column import Column +class Inclusive(IntEnum): + YES = auto() + NO = auto() + def label_bins( input: Column, left_edges: Column, - left_inclusive: bool, + left_inclusive: Inclusive, right_edges: Column, - right_inclusive: bool, + right_inclusive: Inclusive, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx index 226a9e14172..84a7d42283b 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyx +++ b/python/pylibcudf/pylibcudf/labeling.pyx @@ -14,9 +14,9 @@ from .column cimport Column cpdef Column label_bins( Column input, Column left_edges, - bool left_inclusive, + inclusive left_inclusive, Column right_edges, - bool right_inclusive + inclusive right_inclusive ): """Labels elements based on membership in the specified bins. @@ -28,11 +28,11 @@ cpdef Column label_bins( Column of input elements to label according to the specified bins. left_edges : Column Column of the left edge of each bin. - left_inclusive : bool + left_inclusive : Inclusive Whether or not the left edge is inclusive. right_edges : Column Column of the right edge of each bin. - right_inclusive : bool + right_inclusive : Inclusive Whether or not the right edge is inclusive. Returns @@ -42,24 +42,13 @@ cpdef Column label_bins( according to the specified bins. """ cdef unique_ptr[column] c_result - cdef inclusive c_left_inclusive = ( - inclusive.YES - if left_inclusive - else inclusive.NO - ) - cdef inclusive c_right_inclusive = ( - inclusive.YES - if right_inclusive - else inclusive.NO - ) - with nogil: c_result = cpp_labeling.label_bins( input.view(), left_edges.view(), - c_left_inclusive, + left_inclusive, right_edges.view(), - c_right_inclusive, + right_inclusive, ) return Column.from_libcudf(move(c_result)) From 4ef56b8839d4357d6ea6e10f76f622a66da57322 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 18:17:18 +0000 Subject: [PATCH 07/16] Used typed enums for null/nan equality in list methods --- python/cudf/cudf/_lib/lists.pyx | 18 +- .../pylibcudf/libcudf/CMakeLists.txt | 1 + .../pylibcudf/libcudf/lists/CMakeLists.txt | 23 +++ .../pylibcudf/libcudf/lists/combine.pxd | 8 +- .../pylibcudf/libcudf/lists/combine.pyx | 0 .../pylibcudf/libcudf/lists/contains.pyx | 0 python/pylibcudf/pylibcudf/lists.pxd | 30 +++- python/pylibcudf/pylibcudf/lists.pyi | 46 ++++- python/pylibcudf/pylibcudf/lists.pyx | 163 ++++++------------ 9 files changed, 154 insertions(+), 135 deletions(-) create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/combine.pyx create mode 100644 python/pylibcudf/pylibcudf/libcudf/lists/contains.pyx diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 12432ac6d5d..f28afd10f86 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -4,7 +4,9 @@ from cudf.core.buffer import acquire_spill_lock from libcpp cimport bool -from pylibcudf.libcudf.types cimport null_order, size_type +from pylibcudf.libcudf.types cimport ( + nan_equality, null_equality, null_order, order, size_type +) from cudf._lib.column cimport Column from cudf._lib.utils cimport columns_from_pylibcudf_table @@ -37,8 +39,8 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal): return Column.from_pylibcudf( plc.lists.distinct( col.to_pylibcudf(mode="read"), - nulls_equal, - nans_all_equal, + null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL, + nan_equality.ALL_EQUAL if nans_all_equal else nan_equality.UNEQUAL, ) ) @@ -48,7 +50,7 @@ def sort_lists(Column col, bool ascending, str na_position): return Column.from_pylibcudf( plc.lists.sort_lists( col.to_pylibcudf(mode="read"), - ascending, + order.ASCENDING if ascending else order.DESCENDING, null_order.BEFORE if na_position == "first" else null_order.AFTER, False, ) @@ -91,7 +93,7 @@ def index_of_scalar(Column col, object py_search_key): plc.lists.index_of( col.to_pylibcudf(mode="read"), py_search_key.device_value.c_value, - True, + plc.lists.DuplicateFindOption.FIND_FIRST, ) ) @@ -102,7 +104,7 @@ def index_of_column(Column col, Column search_keys): plc.lists.index_of( col.to_pylibcudf(mode="read"), search_keys.to_pylibcudf(mode="read"), - True, + plc.lists.DuplicateFindOption.FIND_FIRST, ) ) @@ -123,7 +125,9 @@ def concatenate_list_elements(Column input_column, dropna=False): return Column.from_pylibcudf( plc.lists.concatenate_list_elements( input_column.to_pylibcudf(mode="read"), - dropna, + plc.lists.ConcatenateNullPolicy.IGNORE + if dropna + else plc.lists.ConcatenateNullPolicy.NULLIFTY_OUTPUT_ROW, ) ) diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt index 15beaee47d4..00669ff579a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt @@ -24,4 +24,5 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp ) add_subdirectory(io) +add_subdirectory(lists) add_subdirectory(strings) diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt new file mode 100644 index 00000000000..c896db2c85a --- /dev/null +++ b/python/pylibcudf/pylibcudf/libcudf/lists/CMakeLists.txt @@ -0,0 +1,23 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources combine.pyx contains.pyx) + +set(linked_libraries cudf::cudf) + +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp_lists +) diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd index d077958ce03..09a5d84c64f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. +from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -9,10 +10,9 @@ from pylibcudf.libcudf.table.table_view cimport table_view cdef extern from "cudf/lists/combine.hpp" namespace \ "cudf::lists" nogil: - ctypedef enum concatenate_null_policy: - IGNORE "cudf::lists::concatenate_null_policy::IGNORE" - NULLIFY_OUTPUT_ROW \ - "cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW" + cpdef enum class concatenate_null_policy(int32_t): + IGNORE + NULLIFY_OUTPUT_ROW cdef unique_ptr[column] concatenate_rows( const table_view input_table diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pyx b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pyx b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd index e7d006e6e2e..10c1c26e24e 100644 --- a/python/pylibcudf/pylibcudf/lists.pxd +++ b/python/pylibcudf/pylibcudf/lists.pxd @@ -1,7 +1,11 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from libcpp cimport bool -from pylibcudf.libcudf.types cimport null_order, size_type +from pylibcudf.libcudf.types cimport ( + nan_equality, null_equality, null_order, order, size_type +) +from pylibcudf.libcudf.lists.combine cimport concatenate_null_policy +from pylibcudf.libcudf.lists.contains cimport duplicate_find_option from .column cimport Column from .scalar cimport Scalar @@ -19,13 +23,13 @@ cpdef Table explode_outer(Table, size_type explode_column_idx) cpdef Column concatenate_rows(Table) -cpdef Column concatenate_list_elements(Column, bool dropna) +cpdef Column concatenate_list_elements(Column, concatenate_null_policy null_policy) cpdef Column contains(Column, ColumnOrScalar) cpdef Column contains_nulls(Column) -cpdef Column index_of(Column, ColumnOrScalar, bool) +cpdef Column index_of(Column, ColumnOrScalar, duplicate_find_option) cpdef Column reverse(Column) @@ -37,16 +41,24 @@ cpdef Column count_elements(Column) cpdef Column sequences(Column, Column, Column steps = *) -cpdef Column sort_lists(Column, bool, null_order, bool stable = *) +cpdef Column sort_lists(Column, order, null_order, bool stable = *) -cpdef Column difference_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*) +cpdef Column difference_distinct( + Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=* +) -cpdef Column have_overlap(Column, Column, bool nulls_equal=*, bool nans_equal=*) +cpdef Column have_overlap( + Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=* +) -cpdef Column intersect_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*) +cpdef Column intersect_distinct( + Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=* +) -cpdef Column union_distinct(Column, Column, bool nulls_equal=*, bool nans_equal=*) +cpdef Column union_distinct( + Column, Column, null_equality nulls_equal=*, nan_equality nans_equal=* +) cpdef Column apply_boolean_mask(Column, Column) -cpdef Column distinct(Column, bool, bool) +cpdef Column distinct(Column, null_equality, nan_equality) diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index e5d186572cd..4e8966ce98a 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -1,17 +1,31 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from enum import IntEnum, auto + from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table -from pylibcudf.types import NullOrder +from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order + +class ConcatenateNullPolicy(IntEnum): + IGNORE = auto() + NULLIFY_OUTPUT_ROW = auto() + +class DuplicateFindOption(IntEnum): + FIND_FIRST = auto() + FIND_LAST = auto() def explode_outer(input: Table, explode_column_idx: int) -> Table: ... def concatenate_rows(input: Table) -> Column: ... -def concatenate_list_elements(input: Column, dropna: bool) -> Column: ... +def concatenate_list_elements( + input: Column, null_policy: ConcatenateNullPolicy +) -> Column: ... def contains(input: Column, search_key: Column | Scalar) -> Column: ... def contains_nulls(input: Column) -> Column: ... def index_of( - input: Column, search_key: Column | Scalar, find_first_option: bool + input: Column, + search_key: Column | Scalar, + find_option: DuplicateFindOption, ) -> Column: ... def reverse(input: Column) -> Column: ... def segmented_gather(input: Column, gather_map_list: Column) -> Column: ... @@ -22,21 +36,35 @@ def sequences( ) -> Column: ... def sort_lists( input: Column, - ascending: bool, + sort_order: Order, na_position: NullOrder, stable: bool = False, ) -> Column: ... def difference_distinct( - lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True + lhs: Column, + rhs: Column, + nulls_equal: NullEquality = NullEquality.EQUAL, + nans_equal: NanEquality = NanEquality.ALL_EQUAL, ) -> Column: ... def have_overlap( - lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True + lhs: Column, + rhs: Column, + nulls_equal: NullEquality = NullEquality.EQUAL, + nans_equal: NanEquality = NanEquality.ALL_EQUAL, ) -> Column: ... def intersect_distinct( - lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True + lhs: Column, + rhs: Column, + nulls_equal: NullEquality = NullEquality.EQUAL, + nans_equal: NanEquality = NanEquality.ALL_EQUAL, ) -> Column: ... def union_distinct( - lhs: Column, rhs: Column, nulls_equal: bool = True, nans_equal: bool = True + lhs: Column, + rhs: Column, + nulls_equal: NullEquality = NullEquality.EQUAL, + nans_equal: NanEquality = NanEquality.ALL_EQUAL, ) -> Column: ... def apply_boolean_mask(input: Column, mask: Column) -> Column: ... -def distinct(input: Column, nulls_equal: bool, nans_equal: bool) -> Column: ... +def distinct( + input: Column, nulls_equal: NullEquality, nans_equal: NanEquality +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index ecaf62d6895..f6ffb1874b9 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -42,6 +42,9 @@ from pylibcudf.libcudf.types cimport ( ) from pylibcudf.lists cimport ColumnOrScalar, ColumnOrSizeType +from pylibcudf.libcudf.lists.combine import concatenate_null_policy as ConcatenateNullPolicy # no-cython-lint +from pylibcudf.libcudf.lists.contains import duplicate_find_option as DuplicateFindOption # no-cython-lint + from .column cimport Column, ListColumnView from .scalar cimport Scalar from .table cimport Table @@ -97,7 +100,9 @@ cpdef Column concatenate_rows(Table input): return Column.from_libcudf(move(c_result)) -cpdef Column concatenate_list_elements(Column input, bool dropna): +cpdef Column concatenate_list_elements( + Column input, concatenate_null_policy null_policy +): """Concatenate multiple lists on the same row into a single list. For details, see :cpp:func:`concatenate_list_elements`. @@ -106,20 +111,14 @@ cpdef Column concatenate_list_elements(Column input, bool dropna): ---------- input : Column The input column - dropna : bool - If true, null list elements will be ignored - from concatenation. Otherwise any input null values will result in - the corresponding output row being set to null. + null_policy : ConcatenateNullPolicy + How to treat null list elements. Returns ------- Column A new Column of concatenated list elements """ - cdef concatenate_null_policy null_policy = ( - concatenate_null_policy.IGNORE if dropna - else concatenate_null_policy.NULLIFY_OUTPUT_ROW - ) cdef unique_ptr[column] c_result with nogil: @@ -191,7 +190,9 @@ cpdef Column contains_nulls(Column input): return Column.from_libcudf(move(c_result)) -cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_option): +cpdef Column index_of( + Column input, ColumnOrScalar search_key, duplicate_find_option find_option +): """Create a column of index values indicating the position of a search key row within the corresponding list row in the lists column. @@ -207,9 +208,8 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o The input column. search_key : Union[Column, Scalar] The search key. - find_first_option : bool - If true, index_of returns the first match. - Otherwise the last match is returned. + find_option : DuplicateFindOption + Which match to return if there are duplicates. Returns ------- @@ -220,11 +220,6 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o """ cdef unique_ptr[column] c_result cdef ListColumnView list_view = input.list_view() - cdef cpp_contains.duplicate_find_option find_option = ( - cpp_contains.duplicate_find_option.FIND_FIRST if find_first_option - else cpp_contains.duplicate_find_option.FIND_LAST - ) - with nogil: c_result = cpp_contains.index_of( list_view.view(), @@ -380,7 +375,7 @@ cpdef Column sequences(Column starts, Column sizes, Column steps = None): cpdef Column sort_lists( Column input, - bool ascending, + order sort_order, null_order na_position, bool stable = False ): @@ -392,8 +387,8 @@ cpdef Column sort_lists( ---------- input : Column The input column. - ascending : bool - If true, the sort order is ascending. Otherwise, the sort order is descending. + ascending : Order + Sort order in the list. na_position : NullOrder If na_position equals NullOrder.FIRST, then the null values in the output column are placed first. Otherwise, they are be placed after. @@ -409,21 +404,17 @@ cpdef Column sort_lists( cdef unique_ptr[column] c_result cdef ListColumnView list_view = input.list_view() - cdef order c_sort_order = ( - order.ASCENDING if ascending else order.DESCENDING - ) - with nogil: if stable: c_result = cpp_stable_sort_lists( list_view.view(), - c_sort_order, + sort_order, na_position, ) else: c_result = cpp_sort_lists( list_view.view(), - c_sort_order, + sort_order, na_position, ) return Column.from_libcudf(move(c_result)) @@ -432,8 +423,8 @@ cpdef Column sort_lists( cpdef Column difference_distinct( Column lhs, Column rhs, - bool nulls_equal=True, - bool nans_equal=True + null_equality nulls_equal=null_equality.EQUAL, + nan_equality nans_equal=nan_equality.ALL_EQUAL, ): """Create a column of index values indicating the position of a search key row within the corresponding list row in the lists column. @@ -446,11 +437,10 @@ cpdef Column difference_distinct( The input lists column of elements that may be included. rhs : Column The input lists column of elements to exclude. - nulls_equal : bool, default True - If true, null elements are considered equal. Otherwise, unequal. - nans_equal : bool, default True - If true, libcudf will treat nan elements from {-nan, +nan} - as equal. Otherwise, unequal. Otherwise, unequal. + nulls_equal : NullEquality, default EQUAL + Are nulls considered equal. + nans_equal : NanEquality, default ALL_EQUAL + Are nans considered equal. Returns ------- @@ -461,19 +451,12 @@ cpdef Column difference_distinct( cdef ListColumnView lhs_view = lhs.list_view() cdef ListColumnView rhs_view = rhs.list_view() - cdef null_equality c_nulls_equal = ( - null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL - ) - cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL - ) - with nogil: c_result = cpp_set_operations.difference_distinct( lhs_view.view(), rhs_view.view(), - c_nulls_equal, - c_nans_equal, + nulls_equal, + nans_equal, ) return Column.from_libcudf(move(c_result)) @@ -481,8 +464,8 @@ cpdef Column difference_distinct( cpdef Column have_overlap( Column lhs, Column rhs, - bool nulls_equal=True, - bool nans_equal=True + null_equality nulls_equal=null_equality.EQUAL, + nan_equality nans_equal=nan_equality.ALL_EQUAL, ): """Check if lists at each row of the given lists columns overlap. @@ -494,11 +477,10 @@ cpdef Column have_overlap( The input lists column for one side. rhs : Column The input lists column for the other side. - nulls_equal : bool, default True - If true, null elements are considered equal. Otherwise, unequal. - nans_equal : bool, default True - If true, libcudf will treat nan elements from {-nan, +nan} - as equal. Otherwise, unequal. Otherwise, unequal. + nulls_equal : NullEquality, default EQUAL + Are nulls considered equal. + nans_equal : NanEquality, default ALL_EQUAL + Are nans considered equal. Returns ------- @@ -509,19 +491,12 @@ cpdef Column have_overlap( cdef ListColumnView lhs_view = lhs.list_view() cdef ListColumnView rhs_view = rhs.list_view() - cdef null_equality c_nulls_equal = ( - null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL - ) - cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL - ) - with nogil: c_result = cpp_set_operations.have_overlap( lhs_view.view(), rhs_view.view(), - c_nulls_equal, - c_nans_equal, + nulls_equal, + nans_equal, ) return Column.from_libcudf(move(c_result)) @@ -529,8 +504,8 @@ cpdef Column have_overlap( cpdef Column intersect_distinct( Column lhs, Column rhs, - bool nulls_equal=True, - bool nans_equal=True + null_equality nulls_equal=null_equality.EQUAL, + nan_equality nans_equal=nan_equality.ALL_EQUAL, ): """Create a lists column of distinct elements common to two input lists columns. @@ -542,11 +517,10 @@ cpdef Column intersect_distinct( The input lists column of elements that may be included. rhs : Column The input lists column of elements to exclude. - nulls_equal : bool, default True - If true, null elements are considered equal. Otherwise, unequal. - nans_equal : bool, default True - If true, libcudf will treat nan elements from {-nan, +nan} - as equal. Otherwise, unequal. Otherwise, unequal. + nulls_equal : NullEquality, default EQUAL + Are nulls considered equal. + nans_equal : NanEquality, default ALL_EQUAL + Are nans considered equal. Returns ------- @@ -557,19 +531,12 @@ cpdef Column intersect_distinct( cdef ListColumnView lhs_view = lhs.list_view() cdef ListColumnView rhs_view = rhs.list_view() - cdef null_equality c_nulls_equal = ( - null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL - ) - cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL - ) - with nogil: c_result = cpp_set_operations.intersect_distinct( lhs_view.view(), rhs_view.view(), - c_nulls_equal, - c_nans_equal, + nulls_equal, + nans_equal, ) return Column.from_libcudf(move(c_result)) @@ -577,8 +544,8 @@ cpdef Column intersect_distinct( cpdef Column union_distinct( Column lhs, Column rhs, - bool nulls_equal=True, - bool nans_equal=True + null_equality nulls_equal=null_equality.EQUAL, + nan_equality nans_equal=nan_equality.ALL_EQUAL, ): """Create a lists column of distinct elements found in either of two input lists columns. @@ -591,11 +558,10 @@ cpdef Column union_distinct( The input lists column of elements that may be included. rhs : Column The input lists column of elements to exclude. - nulls_equal : bool, default True - If true, null elements are considered equal. Otherwise, unequal. - nans_equal : bool, default True - If true, libcudf will treat nan elements from {-nan, +nan} - as equal. Otherwise, unequal. Otherwise, unequal. + nulls_equal : NullEquality, default EQUAL + Are nulls considered equal. + nans_equal : NanEquality, default ALL_EQUAL + Are nans considered equal. Returns ------- @@ -606,19 +572,12 @@ cpdef Column union_distinct( cdef ListColumnView lhs_view = lhs.list_view() cdef ListColumnView rhs_view = rhs.list_view() - cdef null_equality c_nulls_equal = ( - null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL - ) - cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL - ) - with nogil: c_result = cpp_set_operations.union_distinct( lhs_view.view(), rhs_view.view(), - c_nulls_equal, - c_nans_equal, + nulls_equal, + nans_equal, ) return Column.from_libcudf(move(c_result)) @@ -651,7 +610,7 @@ cpdef Column apply_boolean_mask(Column input, Column boolean_mask): return Column.from_libcudf(move(c_result)) -cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal): +cpdef Column distinct(Column input, null_equality nulls_equal, nan_equality nans_equal): """Create a new list column without duplicate elements in each list. For details, see :cpp:func:`distinct`. @@ -660,11 +619,10 @@ cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal): ---------- input : Column The input column. - nulls_equal : bool - If true, null elements are considered equal. Otherwise, unequal. - nans_equal : bool - If true, libcudf will treat nan elements from {-nan, +nan} - as equal. Otherwise, unequal. Otherwise, unequal. + nulls_equal : NullEquality + Are nulls considered equal. + nans_equal : NanEquality + Are nans considered equal. Returns ------- @@ -674,17 +632,10 @@ cpdef Column distinct(Column input, bool nulls_equal, bool nans_equal): cdef unique_ptr[column] c_result cdef ListColumnView list_view = input.list_view() - cdef null_equality c_nulls_equal = ( - null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL - ) - cdef nan_equality c_nans_equal = ( - nan_equality.ALL_EQUAL if nans_equal else nan_equality.UNEQUAL - ) - with nogil: c_result = cpp_distinct( list_view.view(), - c_nulls_equal, - c_nans_equal, + nulls_equal, + nans_equal, ) return Column.from_libcudf(move(c_result)) From bd9f6f8f9727f930b48d1294ddfcdde08e8646c4 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 6 Nov 2024 18:40:24 +0000 Subject: [PATCH 08/16] Add some guidance about type stubs --- docs/cudf/source/developer_guide/pylibcudf.md | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md index 39840e72e21..1ee828e7c4e 100644 --- a/docs/cudf/source/developer_guide/pylibcudf.md +++ b/docs/cudf/source/developer_guide/pylibcudf.md @@ -15,7 +15,8 @@ To satisfy the goals of pylibcudf, we impose the following set of design princip - All typing in code should be written using Cython syntax, not PEP 484 Python typing syntax. Not only does this ensure compatibility with Cython < 3, but even with Cython 3 PEP 484 support remains incomplete as of this writing. - All cudf code should interact only with pylibcudf, never with libcudf directly. This is not currently the case, but is the direction that the library is moving towards. - Ideally, pylibcudf should depend on no RAPIDS component other than rmm, and should in general have minimal runtime dependencies. - +- Type stubs are provided and generated manually. When adding new + functionality, ensure that the matching type stub is appropriately updated. ## Relationship to libcudf @@ -249,3 +250,73 @@ In the event that libcudf provides multiple overloads for the same function with and set arguments not shared between overloads to `None`. If a user tries to pass in an unsupported argument for a specific overload type, you should raise `ValueError`. Finally, consider making an libcudf issue if you think this inconsistency can be addressed on the libcudf side. + +### Type stubs + +Since static type checkers like `mypy` and `pyright` cannot parse +Cython code, we provide type stubs for the pylibcudf package. These +are currently maintained manually, alongside the matching pylibcudf +files. + +Every `pyx` file should have a matching `pyi` file that provides the +type stubs. Most functions can be exposed straightforwardly. Some +guiding principles: + +- For typed integer arguments in libcudf, use `int` as a type + annotation. +- For functions which are annotated as a `list` in Cython, but the + function body does more detailed checking, try and encode the + detailed information in the type. +- For Cython fused types there are two options: + 1. If the fused type appears only once in the function signature, + use a `Union` type; + 2. If the fused type appears more than once (or as both an input + and output type), use a `TypeVar` with + the variants in the fused type provided as constraints. + + +As an example, `pylibcudf.copying.split` is typed in Cython as: + +```cython +ctypedef fused ColumnOrTable: + Table + Column + +cpdef list split(ColumnOrTable input, list splits): ... +``` + +Here we only have a single use of the fused type, and the `list` +arguments do not specify their values. Here, if we provide a `Column` +as input, we receive a `list[Column]` as output, and if we provide a +`Table` we receive `list[Table]` as output. + +In the type stub, we can encode this with a `TypeVar`, we can also +provide typing for the `splits` argument that indicates that the split +values must be integers: + +```python +ColumnOrTable = TypeVar("ColumnOrTable", Column, Table) + +def split(input: ColumnOrTable, splits: list[int]) -> list[ColumnOrTable]: ... +``` + +Conversely, `pylibcudf.copying.scatter` uses a fused type only once in +its input: + +```cython +ctypedef fused TableOrListOfScalars: + Table + list + +cpdef Table scatter( + TableOrListOfScalars source, Column scatter_map, Table target +) +``` + +In the type stub, we can use a normal union in this case + +```python +def scatter( + source: Table | list[Scalar], scatter_map: Column, target: Table +) -> Table: ... +``` From 1801379712beb8f59c44276b3cfb7ada644b0a28 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 7 Nov 2024 14:38:46 +0000 Subject: [PATCH 09/16] Add __all__ to all pylibcudf modules --- python/pylibcudf/pylibcudf/aggregation.pyx | 34 +++++++++++++++++++ python/pylibcudf/pylibcudf/binaryop.pyx | 1 + python/pylibcudf/pylibcudf/column.pyx | 1 + .../pylibcudf/pylibcudf/column_factories.pyx | 9 +++++ python/pylibcudf/pylibcudf/concatenate.pyx | 1 + .../pylibcudf/pylibcudf/contiguous_split.pyx | 7 ++++ python/pylibcudf/pylibcudf/copying.pyx | 17 ++++++++++ python/pylibcudf/pylibcudf/datetime.pyx | 18 ++++++++++ python/pylibcudf/pylibcudf/experimental.pyx | 2 ++ python/pylibcudf/pylibcudf/expressions.pyx | 10 ++++++ python/pylibcudf/pylibcudf/filling.pyx | 2 ++ python/pylibcudf/pylibcudf/gpumemoryview.pyx | 1 + python/pylibcudf/pylibcudf/groupby.pyx | 2 ++ python/pylibcudf/pylibcudf/hashing.pyx | 13 +++++++ python/pylibcudf/pylibcudf/interop.pyx | 8 +++++ python/pylibcudf/pylibcudf/io/__init__.py | 14 ++++++++ python/pylibcudf/pylibcudf/io/avro.pyx | 2 ++ python/pylibcudf/pylibcudf/io/csv.pyx | 2 ++ python/pylibcudf/pylibcudf/io/datasource.pyx | 1 + python/pylibcudf/pylibcudf/io/json.pyx | 1 + python/pylibcudf/pylibcudf/io/orc.pyx | 6 ++++ python/pylibcudf/pylibcudf/io/parquet.pyx | 2 ++ python/pylibcudf/pylibcudf/io/timezone.pyx | 1 + python/pylibcudf/pylibcudf/io/types.pyx | 11 ++++++ python/pylibcudf/pylibcudf/join.pyx | 18 ++++++++++ python/pylibcudf/pylibcudf/json.pyx | 1 + python/pylibcudf/pylibcudf/labeling.pyx | 1 + python/pylibcudf/pylibcudf/lists.pyx | 22 ++++++++++++ python/pylibcudf/pylibcudf/merge.pyx | 1 + python/pylibcudf/pylibcudf/null_mask.pyx | 7 ++++ .../pylibcudf/nvtext/byte_pair_encode.pyx | 1 + .../pylibcudf/nvtext/edit_distance.pyx | 1 + .../pylibcudf/nvtext/generate_ngrams.pyx | 5 +++ python/pylibcudf/pylibcudf/nvtext/jaccard.pyx | 1 + python/pylibcudf/pylibcudf/nvtext/minhash.pyx | 6 ++++ .../pylibcudf/nvtext/ngrams_tokenize.pyx | 1 + .../pylibcudf/pylibcudf/nvtext/normalize.pyx | 1 + python/pylibcudf/pylibcudf/nvtext/replace.pyx | 1 + python/pylibcudf/pylibcudf/nvtext/stemmer.pyx | 1 + .../pylibcudf/nvtext/subword_tokenize.pyx | 1 + .../pylibcudf/pylibcudf/nvtext/tokenize.pyx | 10 ++++++ python/pylibcudf/pylibcudf/partitioning.pyx | 5 +++ python/pylibcudf/pylibcudf/quantiles.pyx | 1 + python/pylibcudf/pylibcudf/reduce.pyx | 1 + python/pylibcudf/pylibcudf/replace.pyx | 8 +++++ python/pylibcudf/pylibcudf/reshape.pyx | 1 + python/pylibcudf/pylibcudf/rolling.pyx | 1 + python/pylibcudf/pylibcudf/round.pyx | 1 + python/pylibcudf/pylibcudf/scalar.pyx | 2 ++ python/pylibcudf/pylibcudf/search.pyx | 1 + python/pylibcudf/pylibcudf/sorting.pyx | 12 +++++++ .../pylibcudf/pylibcudf/stream_compaction.pyx | 12 +++++++ .../pylibcudf/pylibcudf/strings/__init__.py | 4 +-- .../pylibcudf/strings/attributes.pyx | 1 + .../pylibcudf/strings/capitalize.pyx | 1 + python/pylibcudf/pylibcudf/strings/case.pyx | 1 + .../pylibcudf/strings/char_types.pyx | 5 +++ .../pylibcudf/pylibcudf/strings/combine.pyx | 7 ++++ .../pylibcudf/pylibcudf/strings/contains.pyx | 1 + .../strings/convert/convert_booleans.pyx | 1 + .../strings/convert/convert_datetime.pyx | 1 + .../strings/convert/convert_durations.pyx | 1 + .../strings/convert/convert_fixed_point.pyx | 2 ++ .../strings/convert/convert_floats.pyx | 1 + .../strings/convert/convert_integers.pyx | 8 +++++ .../strings/convert/convert_ipv4.pyx | 1 + .../strings/convert/convert_lists.pyx | 1 + .../strings/convert/convert_urls.pyx | 1 + .../pylibcudf/pylibcudf/strings/extract.pyx | 1 + python/pylibcudf/pylibcudf/strings/find.pyx | 1 + .../pylibcudf/strings/find_multiple.pyx | 1 + .../pylibcudf/pylibcudf/strings/findall.pyx | 1 + .../pylibcudf/pylibcudf/strings/padding.pyx | 1 + .../pylibcudf/strings/regex_flags.pyx | 2 ++ .../pylibcudf/strings/regex_program.pyx | 1 + python/pylibcudf/pylibcudf/strings/repeat.pyx | 1 + .../pylibcudf/pylibcudf/strings/replace.pyx | 1 + .../pylibcudf/strings/replace_re.pyx | 1 + .../pylibcudf/pylibcudf/strings/side_type.pyx | 2 ++ python/pylibcudf/pylibcudf/strings/slice.pyx | 1 + .../pylibcudf/strings/split/partition.pyx | 1 + .../pylibcudf/strings/split/split.pyx | 10 ++++++ python/pylibcudf/pylibcudf/strings/strip.pyx | 1 + .../pylibcudf/pylibcudf/strings/translate.pyx | 1 + python/pylibcudf/pylibcudf/strings/wrap.pyx | 1 + python/pylibcudf/pylibcudf/table.pyx | 1 + python/pylibcudf/pylibcudf/traits.pyx | 20 +++++++++++ python/pylibcudf/pylibcudf/transform.pyx | 9 +++++ python/pylibcudf/pylibcudf/transpose.pyx | 1 + python/pylibcudf/pylibcudf/types.pyx | 16 +++++++++ python/pylibcudf/pylibcudf/unary.pyx | 10 ++++++ 91 files changed, 410 insertions(+), 2 deletions(-) diff --git a/python/pylibcudf/pylibcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/aggregation.pyx index e510b738f70..662f76d5c8e 100644 --- a/python/pylibcudf/pylibcudf/aggregation.pyx +++ b/python/pylibcudf/pylibcudf/aggregation.pyx @@ -64,6 +64,40 @@ from pylibcudf.libcudf.aggregation import udf_type as UdfType # no-cython-lint from .types cimport DataType +__all__ = [ + "Aggregation", + "CorrelationType", + "EWMHistory", + "Kind", + "RankMethod", + "RankPercentage", + "UdfType", + "all", + "any", + "argmax", + "argmin", + "collect_list", + "collect_set", + "correlation", + "count", + "covariance", + "ewma", + "max", + "mean", + "median", + "min", + "nth_element", + "nunique", + "product", + "quantile", + "rank", + "std", + "sum", + "sum_of_squares", + "udf", + "variance", +] + cdef class Aggregation: """A type of aggregation to perform. diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index eef73bf4e9d..b7b4ecc6e83 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -16,6 +16,7 @@ from .column cimport Column from .scalar cimport Scalar from .types cimport DataType +__all__ = ["BinaryOperator", "binary_operation", "is_supported_operation"] cpdef Column binary_operation( LeftBinaryOperand lhs, diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx index 4e5698566d0..794c76438f3 100644 --- a/python/pylibcudf/pylibcudf/column.pyx +++ b/python/pylibcudf/pylibcudf/column.pyx @@ -17,6 +17,7 @@ from .utils cimport int_to_bitmask_ptr, int_to_void_ptr import functools +__all__ = ["Column", "ListColumnView", "is_c_contiguous"] cdef class Column: """A container of nullable device data as a column of elements. diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx index ac942a620b5..c4969a7f502 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pyx +++ b/python/pylibcudf/pylibcudf/column_factories.pyx @@ -17,6 +17,15 @@ from .types cimport DataType, type_id from .types import MaskState, TypeId +__all__ = [ + "make_duration_column", + "make_empty_column", + "make_fixed_point_column", + "make_fixed_width_column", + "make_numeric_column", + "make_timestamp_column", +] + cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id): """Creates an empty column of the specified type. diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx index 10c860d97bb..42c5f34cf3e 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pyx +++ b/python/pylibcudf/pylibcudf/concatenate.pyx @@ -12,6 +12,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from .column cimport Column from .table cimport Table +__all__ = ["concatenate"] cpdef concatenate(list objects): """Concatenate columns or tables. diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx index ed926a3fcc0..451757eea10 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyx +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx @@ -20,6 +20,13 @@ from .table cimport Table from .utils cimport int_to_void_ptr +__all__ = [ + "PackedColumns", + "pack", + "unpack", + "unpack_from_memoryviews", +] + cdef class HostBuffer: """Owning host buffer that implements the buffer protocol""" cdef unique_ptr[vector[uint8_t]] c_obj diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx index 4938f1a3dda..fb8b6f9890e 100644 --- a/python/pylibcudf/pylibcudf/copying.pyx +++ b/python/pylibcudf/pylibcudf/copying.pyx @@ -36,6 +36,23 @@ from .table cimport Table from .utils cimport _as_vector +__all__ = [ + "MaskAllocationPolicy", + "OutOfBoundsPolicy", + "allocate_like", + "boolean_mask_scatter", + "copy_if_else", + "copy_range", + "copy_range_in_place", + "empty_like", + "gather", + "get_element", + "scatter", + "shift", + "slice", + "split", +] + cpdef Table gather( Table source_table, Column gather_map, diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index 9e5e709d81d..b100e3e22d0 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -29,6 +29,24 @@ from cython.operator cimport dereference from .column cimport Column +__all__ = [ + "DatetimeComponent", + "RoundingFrequency", + "add_calendrical_months", + "ceil_datetimes", + "day_of_year", + "days_in_month", + "extract_datetime_component", + "extract_microsecond_fraction", + "extract_millisecond_fraction", + "extract_nanosecond_fraction", + "extract_quarter", + "floor_datetimes", + "is_leap_year", + "last_day_of_month", + "round_datetimes", +] + cpdef Column extract_millisecond_fraction( Column input ): diff --git a/python/pylibcudf/pylibcudf/experimental.pyx b/python/pylibcudf/pylibcudf/experimental.pyx index b25a53e13b2..d94d6d087ac 100644 --- a/python/pylibcudf/pylibcudf/experimental.pyx +++ b/python/pylibcudf/pylibcudf/experimental.pyx @@ -5,6 +5,8 @@ from libcpp.string cimport string from pylibcudf.libcudf cimport experimental as cpp_experimental +__all__ = ["disable_prefetching", "enable_prefetching", "prefetch_debugging"] + cpdef enable_prefetching(str key): """Turn on prefetch instructions for the given key. diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index 1535f68366b..b0db533dba9 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -49,6 +49,16 @@ from .types cimport DataType # Aliases for simplicity ctypedef unique_ptr[libcudf_exp.expression] expression_ptr +__all__ = [ + "ASTOperator", + "ColumnNameReference", + "ColumnReference", + "Expression", + "Literal", + "Operation", + "TableReference", +] + # Define this class just to have a docstring for it cdef class Expression: """ diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index a47004a1e42..883dd49280b 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -18,6 +18,8 @@ from .scalar cimport Scalar from .table cimport Table +__all__ = ["fill", "fill_in_place", "repeat", "sequence"] + cpdef Column fill( Column destination, size_type begin, diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyx b/python/pylibcudf/pylibcudf/gpumemoryview.pyx index 0904022a944..605a19ea0de 100644 --- a/python/pylibcudf/pylibcudf/gpumemoryview.pyx +++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyx @@ -1,5 +1,6 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. +__all__ = ["gpumemoryview"] cdef class gpumemoryview: """Minimal representation of a memory buffer. diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx index 71f9ecb0453..2760516b316 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyx +++ b/python/pylibcudf/pylibcudf/groupby.pyx @@ -25,6 +25,8 @@ from .types cimport null_order, null_policy, order, sorted from .utils cimport _as_vector +__all__ = ["GroupBy", "GroupByRequest"] + cdef class GroupByRequest: """A request for a groupby aggregation or scan. diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx index 9ea3d4d1bda..548cffc0ce8 100644 --- a/python/pylibcudf/pylibcudf/hashing.pyx +++ b/python/pylibcudf/pylibcudf/hashing.pyx @@ -20,6 +20,19 @@ from pylibcudf.libcudf.table.table cimport table from .column cimport Column from .table cimport Table +__all__ = [ + "LIBCUDF_DEFAULT_HASH_SEED", + "md5", + "murmurhash3_x64_128", + "murmurhash3_x86_32", + "sha1", + "sha224", + "sha256", + "sha384", + "sha512", + "xxhash_64", +] + LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED cpdef Column murmurhash3_x86_32( diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx index 61e812353b7..bd5397ac328 100644 --- a/python/pylibcudf/pylibcudf/interop.pyx +++ b/python/pylibcudf/pylibcudf/interop.pyx @@ -38,6 +38,14 @@ from .scalar cimport Scalar from .table cimport Table from .types cimport DataType, type_id +__all__ = [ + "ColumnMetadata", + "from_arrow", + "from_dlpack", + "to_arrow", + "to_dlpack", +] + ARROW_TO_PYLIBCUDF_TYPES = { pa.int8(): type_id.INT8, pa.int16(): type_id.INT16, diff --git a/python/pylibcudf/pylibcudf/io/__init__.py b/python/pylibcudf/pylibcudf/io/__init__.py index 2e4f215b12c..08891e454cd 100644 --- a/python/pylibcudf/pylibcudf/io/__init__.py +++ b/python/pylibcudf/pylibcudf/io/__init__.py @@ -2,3 +2,17 @@ from . import avro, csv, datasource, json, orc, parquet, timezone, types from .types import SinkInfo, SourceInfo, TableWithMetadata + +__all__ = [ + "SinkInfo", + "SourceInfo", + "TableWithMetadata", + "avro", + "csv", + "datasource", + "json", + "orc", + "parquet", + "timezone", + "types", +] diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx index fe765b34f82..4271333511a 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pyx +++ b/python/pylibcudf/pylibcudf/io/avro.pyx @@ -10,6 +10,8 @@ from pylibcudf.libcudf.io.avro cimport ( ) from pylibcudf.libcudf.types cimport size_type +__all__ = ["read_avro"] + cpdef TableWithMetadata read_avro( SourceInfo source_info, diff --git a/python/pylibcudf/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx index 2c61cc42d82..858e580ab34 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pyx +++ b/python/pylibcudf/pylibcudf/io/csv.pyx @@ -19,6 +19,8 @@ from pylibcudf.libcudf.types cimport data_type, size_type from pylibcudf.types cimport DataType +__all__ = ["read_csv"] + cdef tuple _process_parse_dates_hex(list cols): cdef vector[string] str_cols cdef vector[int] int_cols diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyx b/python/pylibcudf/pylibcudf/io/datasource.pyx index 02418444caa..4e7c9c8e385 100644 --- a/python/pylibcudf/pylibcudf/io/datasource.pyx +++ b/python/pylibcudf/pylibcudf/io/datasource.pyx @@ -2,6 +2,7 @@ from pylibcudf.libcudf.io.datasource cimport datasource +__all__ = ["Datasource"] cdef class Datasource: cdef datasource* get_datasource(self) except * nogil: diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx index 65f78f830f1..ad2989925c9 100644 --- a/python/pylibcudf/pylibcudf/io/json.pyx +++ b/python/pylibcudf/pylibcudf/io/json.pyx @@ -23,6 +23,7 @@ from pylibcudf.libcudf.io.types cimport ( from pylibcudf.libcudf.types cimport data_type, size_type from pylibcudf.types cimport DataType +__all__ = ["chunked_read_json", "read_json", "write_json"] cdef map[string, schema_element] _generate_schema_map(list dtypes): cdef map[string, schema_element] schema_map diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx index 70e0a7995a2..f2f644a32f9 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyx +++ b/python/pylibcudf/pylibcudf/io/orc.pyx @@ -30,6 +30,12 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.types cimport DataType from pylibcudf.variant cimport get_if, holds_alternative +__all__ = [ + "OrcColumnStatistics", + "ParsedOrcStatistics", + "read_orc", + "read_parsed_orc_statistics", +] cdef class OrcColumnStatistics: def __init__(self): diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx index 981ca7b8159..7f93c763298 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyx +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -16,6 +16,8 @@ from pylibcudf.libcudf.io.parquet cimport ( from pylibcudf.libcudf.io.types cimport table_with_metadata from pylibcudf.libcudf.types cimport size_type +__all__ = ["ChunkedParquetReader", "read_parquet"] + cdef parquet_reader_options _setup_parquet_reader_options( SourceInfo source_info, diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx index f120b65fb2c..af7cf8a4ee5 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pyx +++ b/python/pylibcudf/pylibcudf/io/timezone.pyx @@ -11,6 +11,7 @@ from pylibcudf.libcudf.table.table cimport table from ..table cimport Table +__all__ = ["make_timezone_transition_table"] cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name): """ diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index e2439fbad88..8bc226d83e5 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -31,6 +31,17 @@ from pylibcudf.libcudf.io.types import ( statistics_freq as StatisticsFreq, # no-cython-lint ) +__all__ = [ + "ColumnEncoding", + "CompressionType", + "DictionaryPolicy", + "JSONRecoveryMode", + "QuoteStyle", + "SinkInfo", + "SourceInfo", + "StatisticsFreq", + "TableWithMetadata", +] cdef class TableWithMetadata: """A container holding a table and its associated metadata diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx index 0d841eee194..c2efe05ffc4 100644 --- a/python/pylibcudf/pylibcudf/join.pyx +++ b/python/pylibcudf/pylibcudf/join.pyx @@ -15,6 +15,24 @@ from .column cimport Column from .expressions cimport Expression from .table cimport Table +__all__ = [ + "conditional_full_join", + "conditional_inner_join", + "conditional_left_anti_join", + "conditional_left_join", + "conditional_left_semi_join", + "cross_join", + "full_join", + "inner_join", + "left_anti_join", + "left_join", + "left_semi_join", + "mixed_full_join", + "mixed_inner_join", + "mixed_left_anti_join", + "mixed_left_join", + "mixed_left_semi_join", +] cdef Column _column_from_gather_map(cpp_join.gather_map_type gather_map): # helper to convert a gather map to a Column diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx index ebb82f80408..9c5dd023509 100644 --- a/python/pylibcudf/pylibcudf/json.pyx +++ b/python/pylibcudf/pylibcudf/json.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.scalar cimport Scalar +__all__ = ["GetJsonObjectOptions", "get_json_object"] cdef class GetJsonObjectOptions: """Settings for ``get_json_object()``""" diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx index 84a7d42283b..cae1830f6b9 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyx +++ b/python/pylibcudf/pylibcudf/labeling.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.labeling import inclusive as Inclusive # no-cython-lint from .column cimport Column +__all__ = ["Inclusive", "label_bins"] cpdef Column label_bins( Column input, diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index f6ffb1874b9..ccc56eaa520 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -49,6 +49,28 @@ from .column cimport Column, ListColumnView from .scalar cimport Scalar from .table cimport Table +__all__ = [ + "ConcatenateNullPolicy", + "DuplicateFindOption", + "apply_boolean_mask", + "concatenate_list_elements", + "concatenate_rows", + "contains", + "contains_nulls", + "count_elements", + "difference_distinct", + "distinct", + "explode_outer", + "extract_list_element", + "have_overlap", + "index_of", + "intersect_distinct", + "reverse", + "segmented_gather", + "sequences", + "sort_lists", + "union_distinct", +] cpdef Table explode_outer(Table input, size_type explode_column_idx): """Explode a column of lists into rows. diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx index 61a21aafdb2..c051cdc0c66 100644 --- a/python/pylibcudf/pylibcudf/merge.pyx +++ b/python/pylibcudf/pylibcudf/merge.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport null_order, order, size_type from .table cimport Table +__all__ = ["merge"] cpdef Table merge ( list tables_to_merge, diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx index 74180951562..adc264e9af6 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyx +++ b/python/pylibcudf/pylibcudf/null_mask.pyx @@ -14,6 +14,13 @@ from pylibcudf.libcudf.types import mask_state as MaskState # no-cython-lint from .column cimport Column from .table cimport Table +__all__ = [ + "bitmask_allocation_size_bytes", + "bitmask_and", + "bitmask_or", + "copy_bitmask", + "create_null_mask", +] cdef DeviceBuffer buffer_to_python(device_buffer buf): return DeviceBuffer.c_from_unique_ptr(make_unique[device_buffer](move(buf))) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx index 76caad276d4..c63b92328f3 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx @@ -16,6 +16,7 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport ( ) from pylibcudf.scalar cimport Scalar +__all__ = ["BPEMergePairs", "byte_pair_encoding"] cdef class BPEMergePairs: """The table of merge pairs for the BPE encoder. diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx index dcacb2e1267..eceeaff24e3 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx @@ -9,6 +9,7 @@ from pylibcudf.libcudf.nvtext.edit_distance cimport ( edit_distance_matrix as cpp_edit_distance_matrix, ) +__all__ = ["edit_distance", "edit_distance_matrix"] cpdef Column edit_distance(Column input, Column targets): """ diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx index 09859d09e9e..521bc0ef4a4 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx @@ -14,6 +14,11 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar +__all__ = [ + "generate_ngrams", + "generate_character_ngrams", + "hash_character_ngrams", +] cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator): """ diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx index 3d8669865d9..90cace088f7 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.nvtext.jaccard cimport ( ) from pylibcudf.libcudf.types cimport size_type +__all__ = ["jaccard_index"] cpdef Column jaccard_index(Column input1, Column input2, size_type width): """ diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx index f1e012e60e5..96b849b6b48 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx @@ -17,6 +17,12 @@ from pylibcudf.scalar cimport Scalar from cython.operator import dereference +__all__ = [ + "minhash", + "minhash64", + "word_minhash", + "word_minhash64", +] cpdef Column minhash(Column input, ColumnOrScalar seeds, size_type width=4): """ diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx index 8a1854c5f0d..771c7c019fc 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx @@ -12,6 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar +__all__ = ["ngrams_tokenize"] cpdef Column ngrams_tokenize( Column input, diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx index 637d900b659..b259ccaefa6 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.nvtext.normalize cimport ( normalize_spaces as cpp_normalize_spaces, ) +__all__ = ["normalize_characters", "normalize_spaces"] cpdef Column normalize_spaces(Column input): """ diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx index b65348ce14d..a27592fb434 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx @@ -16,6 +16,7 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport ( from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar +__all__ = ["filter_tokens", "replace_tokens"] cpdef Column replace_tokens( Column input, diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx index 854d1053624..c9e4f1274e4 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx @@ -12,6 +12,7 @@ from pylibcudf.libcudf.nvtext.stemmer cimport ( ) from pylibcudf.libcudf.types cimport size_type +__all__ = ["is_letter", "porter_stemmer_measure"] cpdef Column is_letter( Column input, diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx index 04643d3bd84..a346eef4619 100644 --- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx @@ -13,6 +13,7 @@ from pylibcudf.libcudf.nvtext.subword_tokenize cimport ( tokenizer_result as cpp_tokenizer_result, ) +__all__ = ["HashedVocabulary", "subword_tokenize"] cdef class HashedVocabulary: """The vocabulary data for use with the subword_tokenize function. diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx index ec02e8ebf4e..26b055fd5ae 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -20,6 +20,16 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport ( ) from pylibcudf.libcudf.types cimport size_type +__all__ = [ + "TokenizeVocabulary", + "character_tokenize", + "count_tokens_column", + "count_tokens_scalar", + "detokenize", + "tokenize_column", + "tokenize_scalar", + "tokenize_with_vocabulary", +] cdef class TokenizeVocabulary: """The Vocabulary object to be used with ``tokenize_with_vocabulary``. diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx index 3cff4843735..1dacabceb06 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pyx +++ b/python/pylibcudf/pylibcudf/partitioning.pyx @@ -11,6 +11,11 @@ from pylibcudf.libcudf.table.table cimport table from .column cimport Column from .table cimport Table +__all__ = [ + "hash_partition", + "partition", + "round_robin_partition", +] cpdef tuple[Table, list] hash_partition( Table input, diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx index 7d92b598bd0..634218586ac 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pyx +++ b/python/pylibcudf/pylibcudf/quantiles.pyx @@ -17,6 +17,7 @@ from .column cimport Column from .table cimport Table from .types cimport interpolation +__all__ = ["quantile", "quantiles"] cpdef Column quantile( Column input, diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx index d9ec3a9bdc4..1d6ffd9de10 100644 --- a/python/pylibcudf/pylibcudf/reduce.pyx +++ b/python/pylibcudf/pylibcudf/reduce.pyx @@ -16,6 +16,7 @@ from .types cimport DataType from pylibcudf.libcudf.reduce import scan_type as ScanType # no-cython-lint +__all__ = ["ScanType", "minmax", "reduce", "scan"] cpdef Scalar reduce(Column col, Aggregation agg, DataType data_type): """Perform a reduction on a column diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx index f77eba7ace5..51be2b29277 100644 --- a/python/pylibcudf/pylibcudf/replace.pyx +++ b/python/pylibcudf/pylibcudf/replace.pyx @@ -15,6 +15,14 @@ from pylibcudf.libcudf.replace import \ from .column cimport Column from .scalar cimport Scalar +__all__ = [ + "ReplacePolicy", + "clamp", + "find_and_replace_all", + "normalize_nans_and_zeros", + "replace_nulls", +] + cpdef Column replace_nulls(Column source_column, ReplacementType replacement): """Replace nulls in source_column. diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx index 6540b5198ab..bdc212a1985 100644 --- a/python/pylibcudf/pylibcudf/reshape.pyx +++ b/python/pylibcudf/pylibcudf/reshape.pyx @@ -13,6 +13,7 @@ from pylibcudf.libcudf.types cimport size_type from .column cimport Column from .table cimport Table +__all__ = ["interleave_columns", "tile"] cpdef Column interleave_columns(Table source_table): """Interleave columns of a table into a single column. diff --git a/python/pylibcudf/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx index 4fd0b005431..11acf57ccf4 100644 --- a/python/pylibcudf/pylibcudf/rolling.pyx +++ b/python/pylibcudf/pylibcudf/rolling.pyx @@ -11,6 +11,7 @@ from pylibcudf.libcudf.types cimport size_type from .aggregation cimport Aggregation from .column cimport Column +__all__ = ["rolling_window"] cpdef Column rolling_window( Column source, diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx index 689363e652d..09e5a9cc3bc 100644 --- a/python/pylibcudf/pylibcudf/round.pyx +++ b/python/pylibcudf/pylibcudf/round.pyx @@ -11,6 +11,7 @@ from pylibcudf.libcudf.column.column cimport column from .column cimport Column +__all__ = ["RoundingMethod", "round"] cpdef Column round( Column source, diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx index d4888a62ad1..e522a2a4670 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyx +++ b/python/pylibcudf/pylibcudf/scalar.pyx @@ -11,6 +11,8 @@ from rmm.pylibrmm.memory_resource cimport get_current_device_resource from .column cimport Column from .types cimport DataType +__all__ = ["Scalar"] + # The DeviceMemoryResource attribute could be released prematurely # by the gc if the Scalar is in a reference cycle. Removing the tp_clear diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx index 1a870248046..50353fcd0cc 100644 --- a/python/pylibcudf/pylibcudf/search.pyx +++ b/python/pylibcudf/pylibcudf/search.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport null_order, order from .column cimport Column from .table cimport Table +__all__ = ["contains", "lower_bound", "upper_bound"] cpdef Column lower_bound( Table haystack, diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx index fc40f03e1fd..fb29ef8c571 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyx +++ b/python/pylibcudf/pylibcudf/sorting.pyx @@ -12,6 +12,18 @@ from pylibcudf.libcudf.types cimport null_order, null_policy, order from .column cimport Column from .table cimport Table +__all__ = [ + "is_sorted", + "rank", + "segmented_sort_by_key", + "sort", + "sort_by_key", + "sorted_order", + "stable_segmented_sort_by_key", + "stable_sort", + "stable_sort_by_key", + "stable_sorted_order", +] cpdef Column sorted_order(Table source_table, list column_order, list null_precedence): """Computes the row indices required to sort the table. diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx index 2145398a191..6e403ca1b07 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyx +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx @@ -21,6 +21,18 @@ from pylibcudf.libcudf.stream_compaction import \ from .column cimport Column from .table cimport Table +__all__ = [ + "DuplicateKeepOption", + "apply_boolean_mask", + "distinct", + "distinct_count", + "distinct_indices", + "drop_nans", + "drop_nulls", + "stable_distinct", + "unique", + "unique_count", +] cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold): """Filters out rows from the input table based on the presence of nulls. diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py index fa7294c7dbd..67054f0b447 100644 --- a/python/pylibcudf/pylibcudf/strings/__init__.py +++ b/python/pylibcudf/pylibcudf/strings/__init__.py @@ -28,6 +28,7 @@ from .side_type import SideType __all__ = [ + "SideType", "attributes", "capitalize", "case", @@ -46,9 +47,8 @@ "replace", "replace_re", "slice", - "strip", "split", - "SideType", + "strip", "translate", "wrap", ] diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx index 8e46a32835d..f1eb09b4965 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pyx +++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx @@ -6,6 +6,7 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport attributes as cpp_attributes +__all__ = ["code_points", "count_bytes", "count_characters"] cpdef Column count_characters(Column source_strings): """ diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx index 06b991c3cf1..a54480b8e4a 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx @@ -14,6 +14,7 @@ from pylibcudf.strings.char_types cimport string_character_types from cython.operator import dereference +__all__ = ["capitalize", "is_title", "title"] cpdef Column capitalize( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx index 9e6cd7717d3..d0e054bef72 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pyx +++ b/python/pylibcudf/pylibcudf/strings/case.pyx @@ -6,6 +6,7 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport case as cpp_case +__all__ = ["swapcase", "to_lower", "to_upper"] cpdef Column to_lower(Column input): cdef unique_ptr[column] c_result diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx index cb04efe5e8f..0af4a1f9c37 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pyx +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx @@ -12,6 +12,11 @@ from cython.operator import dereference from pylibcudf.libcudf.strings.char_types import \ string_character_types as StringCharacterTypes # no-cython-lint +__all__ = [ + "StringCharacterTypes", + "all_characters_of_type", + "filter_characters_of_type", +] cpdef Column all_characters_of_type( Column source_strings, diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx index f17d5265ab4..dc1e72c799b 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyx +++ b/python/pylibcudf/pylibcudf/strings/combine.pyx @@ -17,6 +17,13 @@ from pylibcudf.libcudf.strings.combine import \ from pylibcudf.libcudf.strings.combine import \ separator_on_nulls as SeparatorOnNulls # no-cython-lint +__all__ = [ + "OutputIfEmptyList", + "SeparatorOnNulls", + "concatenate", + "join_list_elements", + "join_strings", +] cpdef Column concatenate( Table strings_columns, diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx index d4b1130241d..7b4c53ed853 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pyx +++ b/python/pylibcudf/pylibcudf/strings/contains.pyx @@ -12,6 +12,7 @@ from pylibcudf.libcudf.scalar.scalar_factories cimport ( from pylibcudf.libcudf.strings cimport contains as cpp_contains from pylibcudf.strings.regex_program cimport RegexProgram +__all__ = ["contains_re", "count_re", "like", "matches_re"] cpdef Column contains_re( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx index dc12b291b11..1899a3b27cc 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx @@ -12,6 +12,7 @@ from pylibcudf.scalar cimport Scalar from cython.operator import dereference +__all__ = ["from_booleans", "to_booleans"] cpdef Column to_booleans(Column input, Scalar true_string): """ diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx index 0ee60812e00..f1cd684166c 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx @@ -11,6 +11,7 @@ from pylibcudf.libcudf.strings.convert cimport ( from pylibcudf.types import DataType +__all__ = ["from_timestamps", "is_timestamp", "to_timestamps"] cpdef Column to_timestamps( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx index 31980ace418..a9654afd00a 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx @@ -11,6 +11,7 @@ from pylibcudf.libcudf.strings.convert cimport ( from pylibcudf.types import DataType +__all__ = ["from_durations", "to_durations"] cpdef Column to_durations( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx index 962a47dfadf..00cbc822f36 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx @@ -9,6 +9,8 @@ from pylibcudf.libcudf.strings.convert cimport ( ) from pylibcudf.types cimport DataType, type_id +__all__ = ["from_fixed_point", "is_fixed_point", "to_fixed_point"] + cpdef Column to_fixed_point(Column input, DataType output_type): """ diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx index 1296f4f9db5..b5199aac577 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx @@ -9,6 +9,7 @@ from pylibcudf.libcudf.strings.convert cimport ( ) from pylibcudf.types cimport DataType +__all__ = ["from_floats", "is_float", "to_floats"] cpdef Column to_floats(Column strings, DataType output_type): """ diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx index 5558683a502..12984e15ce9 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx @@ -9,6 +9,14 @@ from pylibcudf.libcudf.strings.convert cimport ( ) from pylibcudf.types cimport DataType +__all__ = [ + "from_integers", + "hex_to_integers", + "integers_to_hex", + "is_hex", + "is_integer", + "to_integers" +] cpdef Column to_integers(Column input, DataType output_type): """ diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx index 834781f95f3..e7c6aae4fa8 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx @@ -6,6 +6,7 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4 +__all__ = ["integers_to_ipv4", "ipv4_to_integers", "is_ipv4"] cpdef Column ipv4_to_integers(Column input): """ diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx index cbfe5f5aa8b..518f72f6644 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx @@ -17,6 +17,7 @@ from pylibcudf.types cimport type_id from cython.operator import dereference +__all__ = ["format_list_column"] cpdef Column format_list_column( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx index 82f8a75f1d9..bd5e23bca43 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx @@ -6,6 +6,7 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings.convert cimport convert_urls as cpp_convert_urls +__all__ = ["url_decode", "url_encode"] cpdef Column url_encode(Column input): """ diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyx b/python/pylibcudf/pylibcudf/strings/extract.pyx index b56eccc8287..0ce70666e92 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pyx +++ b/python/pylibcudf/pylibcudf/strings/extract.pyx @@ -9,6 +9,7 @@ from pylibcudf.libcudf.table.table cimport table from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.table cimport Table +__all__ = ["extract", "extract_all_record"] cpdef Table extract(Column input, RegexProgram prog): """ diff --git a/python/pylibcudf/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx index 6fc6dca24fd..f0af339ff08 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pyx +++ b/python/pylibcudf/pylibcudf/strings/find.pyx @@ -10,6 +10,7 @@ from cython.operator import dereference from pylibcudf.libcudf.scalar.scalar cimport string_scalar +__all__ = ["contains", "ends_with", "find", "rfind", "starts_with"] cpdef Column find( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx index 672aa606bd0..c9ce734b4be 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx @@ -6,6 +6,7 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport find_multiple as cpp_find_multiple +__all__ = ["find_multiple"] cpdef Column find_multiple(Column input, Column targets): """ diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 89fa4302824..23c84675a16 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -7,6 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport findall as cpp_findall from pylibcudf.strings.regex_program cimport RegexProgram +__all__ = ["findall", "find_re"] cpdef Column findall(Column input, RegexProgram pattern): """ diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx index f6950eecf60..0e349a7be47 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pyx +++ b/python/pylibcudf/pylibcudf/strings/padding.pyx @@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport padding as cpp_padding from pylibcudf.libcudf.strings.side_type cimport side_type +__all__ = ["pad", "zfill"] cpdef Column pad(Column input, size_type width, side_type side, str fill_char): """ diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyx b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx index ce3b6b10a42..65b504e0dc7 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_flags.pyx +++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyx @@ -2,3 +2,5 @@ from pylibcudf.libcudf.strings.regex_flags import \ regex_flags as RegexFlags # no-cython-lint + +__all__ = ["RegexFlags"] diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx index 91f585cd637..cf278d7039d 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_program.pyx +++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx @@ -11,6 +11,7 @@ from pylibcudf.strings.regex_flags import RegexFlags from pylibcudf.strings.regex_flags cimport regex_flags +__all__ = ["RegexProgram"] cdef class RegexProgram: """Regex program class. diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyx b/python/pylibcudf/pylibcudf/strings/repeat.pyx index fb2bb13c666..a497b1f438e 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pyx +++ b/python/pylibcudf/pylibcudf/strings/repeat.pyx @@ -6,6 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport repeat as cpp_repeat from pylibcudf.libcudf.types cimport size_type +__all__ = ["repeat_strings"] cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times): """ diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx index 2b94f5e3fee..3ba6c1b5530 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace.pyx @@ -16,6 +16,7 @@ from pylibcudf.libcudf.strings.replace cimport ( from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar +__all__ = ["replace", "replace_multiple", "replace_slice"] cpdef Column replace( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx index ccc33fd4425..bdabc779ddf 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx @@ -16,6 +16,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.strings.regex_flags cimport regex_flags from pylibcudf.strings.regex_program cimport RegexProgram +__all__ = ["replace_re", "replace_with_backrefs"] cpdef Column replace_re( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyx b/python/pylibcudf/pylibcudf/strings/side_type.pyx index cf0c770cc11..87db4206a9c 100644 --- a/python/pylibcudf/pylibcudf/strings/side_type.pyx +++ b/python/pylibcudf/pylibcudf/strings/side_type.pyx @@ -1,3 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from pylibcudf.libcudf.strings.side_type import \ side_type as SideType # no-cython-lint + +__all__ = ["SideType"] diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx index 70d10cab36c..d32de7c50e0 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pyx +++ b/python/pylibcudf/pylibcudf/strings/slice.pyx @@ -14,6 +14,7 @@ from pylibcudf.scalar cimport Scalar from cython.operator import dereference +__all__ = ["slice_strings"] cpdef Column slice_strings( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx index 0fb4f186c41..75537ea46d3 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx @@ -13,6 +13,7 @@ from pylibcudf.table cimport Table from cython.operator import dereference +__all__ = ["partition", "rpartition"] cpdef Table partition(Column input, Scalar delimiter=None): """ diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyx b/python/pylibcudf/pylibcudf/strings/split/split.pyx index e3827f6645e..90087f996f0 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pyx +++ b/python/pylibcudf/pylibcudf/strings/split/split.pyx @@ -13,6 +13,16 @@ from pylibcudf.table cimport Table from cython.operator import dereference +__all__ = [ + "rsplit", + "rsplit_re", + "rsplit_record", + "rsplit_record_re", + "split", + "split_re", + "split_record", + "split_record_re", +] cpdef Table split(Column strings_column, Scalar delimiter, size_type maxsplit): """ diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx index 429a23c3cdf..805d959891b 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pyx +++ b/python/pylibcudf/pylibcudf/strings/strip.pyx @@ -13,6 +13,7 @@ from pylibcudf.libcudf.strings cimport strip as cpp_strip from pylibcudf.scalar cimport Scalar from pylibcudf.strings.side_type cimport side_type +__all__ = ["strip"] cpdef Column strip( Column input, diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx index d85da8e6cdd..ba1e8dc5d27 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pyx +++ b/python/pylibcudf/pylibcudf/strings/translate.pyx @@ -14,6 +14,7 @@ from cython.operator import dereference from pylibcudf.libcudf.strings.translate import \ filter_type as FilterType # no-cython-lint +__all__ = ["FilterType", "filter_characters", "translate"] cdef vector[pair[char_utf8, char_utf8]] _table_to_c_table(dict table): """ diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx index 2ced250f837..b696eb48e47 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pyx +++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx @@ -7,6 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.strings cimport wrap as cpp_wrap from pylibcudf.libcudf.types cimport size_type +__all__ = ["wrap"] cpdef Column wrap(Column input, size_type width): """ diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx index d0d6f2343d0..97955aa0ae6 100644 --- a/python/pylibcudf/pylibcudf/table.pyx +++ b/python/pylibcudf/pylibcudf/table.pyx @@ -10,6 +10,7 @@ from pylibcudf.libcudf.table.table cimport table from .column cimport Column +__all__ = ["Table"] cdef class Table: """A list of columns of the same size. diff --git a/python/pylibcudf/pylibcudf/traits.pyx b/python/pylibcudf/pylibcudf/traits.pyx index 5a1c67e1f6c..c191a1fa88d 100644 --- a/python/pylibcudf/pylibcudf/traits.pyx +++ b/python/pylibcudf/pylibcudf/traits.pyx @@ -5,6 +5,26 @@ from pylibcudf.libcudf.utilities cimport traits from .types cimport DataType +__all__ = [ + "is_bit_castable", + "is_boolean", + "is_chrono", + "is_compound", + "is_dictionary", + "is_duration", + "is_equality_comparable", + "is_fixed_point", + "is_fixed_width", + "is_floating_point", + "is_index_type", + "is_integral", + "is_integral_not_bool", + "is_nested", + "is_numeric", + "is_relationally_comparable", + "is_timestamp", + "is_unsigned", +] cpdef bool is_relationally_comparable(DataType typ): """Checks if the given data type supports relational comparisons. diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index e8d95cadb0c..9700bcff221 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -18,6 +18,15 @@ from .gpumemoryview cimport gpumemoryview from .types cimport DataType from .utils cimport int_to_bitmask_ptr +__all__ = [ + "bools_to_mask", + "compute_column", + "encode", + "mask_to_bools", + "nans_to_nulls", + "one_hot_encode", + "transform", +] cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input): """Create a null mask preserving existing nulls and converting nans to null. diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx index a24f937ced3..5eb3e58cebc 100644 --- a/python/pylibcudf/pylibcudf/transpose.pyx +++ b/python/pylibcudf/pylibcudf/transpose.pyx @@ -9,6 +9,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from .column cimport Column from .table cimport Table +__all__ = ["transpose"] cpdef Table transpose(Table input_table): """Transpose a Table. diff --git a/python/pylibcudf/pylibcudf/types.pyx b/python/pylibcudf/pylibcudf/types.pyx index a0c31f994a3..afa1b56f38a 100644 --- a/python/pylibcudf/pylibcudf/types.pyx +++ b/python/pylibcudf/pylibcudf/types.pyx @@ -20,6 +20,22 @@ from pylibcudf.libcudf.types import null_order as NullOrder # no-cython-lint, i from pylibcudf.libcudf.types import order as Order # no-cython-lint, isort:skip from pylibcudf.libcudf.types import sorted as Sorted # no-cython-lint, isort:skip +__all__ = [ + "DataType", + "Interpolation", + "MaskState", + "NanEquality", + "NanPolicy", + "NullEquality", + "NullOrder", + "NullPolicy", + "Order", + "SIZE_TYPE", + "SIZE_TYPE_ID", + "Sorted", + "TypeId", + "size_of" +] cdef class DataType: """Indicator for the logical data type of an element in a column. diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx index 53e8c382b5e..b738ab53d1b 100644 --- a/python/pylibcudf/pylibcudf/unary.pyx +++ b/python/pylibcudf/pylibcudf/unary.pyx @@ -13,6 +13,16 @@ from pylibcudf.libcudf.unary import \ from .column cimport Column from .types cimport DataType +__all__ = [ + "UnaryOperator", + "cast", + "is_nan", + "is_not_nan", + "is_null", + "is_supported_cast", + "is_valid", + "unary_operation", +] cpdef Column unary_operation(Column input, unary_operator op): """Perform a unary operation on a column. From 7493605016d75bba0374aeb049e040a73a0f14b6 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 10:38:25 +0000 Subject: [PATCH 10/16] Pylibcudf classes are typically not hashable --- python/pylibcudf/pylibcudf/column.pyx | 4 ++++ python/pylibcudf/pylibcudf/contiguous_split.pyx | 4 ++++ python/pylibcudf/pylibcudf/expressions.pyx | 2 +- python/pylibcudf/pylibcudf/gpumemoryview.pyx | 2 ++ python/pylibcudf/pylibcudf/groupby.pyx | 4 ++++ python/pylibcudf/pylibcudf/io/datasource.pyx | 1 + python/pylibcudf/pylibcudf/io/orc.pyx | 4 ++++ python/pylibcudf/pylibcudf/io/parquet.pyx | 2 ++ python/pylibcudf/pylibcudf/io/types.pyx | 6 ++++++ python/pylibcudf/pylibcudf/json.pyx | 2 ++ python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx | 2 ++ python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx | 2 ++ python/pylibcudf/pylibcudf/nvtext/tokenize.pyx | 2 ++ python/pylibcudf/pylibcudf/scalar.pyx | 2 ++ python/pylibcudf/pylibcudf/strings/regex_program.pyx | 2 ++ python/pylibcudf/pylibcudf/table.pyx | 2 ++ 16 files changed, 42 insertions(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx index 794c76438f3..9bb5574608e 100644 --- a/python/pylibcudf/pylibcudf/column.pyx +++ b/python/pylibcudf/pylibcudf/column.pyx @@ -62,6 +62,8 @@ cdef class Column: self._children = children self._num_children = len(children) + __hash__ = None + cdef column_view view(self) nogil: """Generate a libcudf column_view to pass to libcudf algorithms. @@ -385,6 +387,8 @@ cdef class ListColumnView: raise TypeError("Column is not a list type") self._column = col + __hash__ = None + cpdef child(self): """The data column of the underlying list column.""" return self._column.child(1) diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx index 451757eea10..94873e079c9 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyx +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx @@ -45,6 +45,8 @@ cdef class HostBuffer: out.strides[0] = 1 return out + __hash__ = None + def __getbuffer__(self, Py_buffer *buffer, int flags): buffer.buf = dereference(self.c_obj).data() buffer.format = NULL # byte @@ -76,6 +78,8 @@ cdef class PackedColumns: "Use one of the factories." ) + __hash__ = None + @staticmethod cdef PackedColumns from_libcudf(unique_ptr[packed_columns] data): """Create a Python PackedColumns from a libcudf packed_columns.""" diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index b0db533dba9..0f12cfe313c 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -68,7 +68,7 @@ cdef class Expression: For details, see :cpp:class:`cudf::ast::expression`. """ - pass + __hash__ = None cdef class Literal(Expression): """ diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyx b/python/pylibcudf/pylibcudf/gpumemoryview.pyx index 605a19ea0de..41316eddb60 100644 --- a/python/pylibcudf/pylibcudf/gpumemoryview.pyx +++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyx @@ -26,3 +26,5 @@ cdef class gpumemoryview: @property def __cuda_array_interface__(self): return self.obj.__cuda_array_interface__ + + __hash__ = None diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx index 2760516b316..e6cb3ac81a7 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyx +++ b/python/pylibcudf/pylibcudf/groupby.pyx @@ -47,6 +47,8 @@ cdef class GroupByRequest: self._values = values self._aggregations = aggregations + __hash__ = None + cdef aggregation_request _to_libcudf_agg_request(self) except *: """Convert to a libcudf aggregation_request object. @@ -129,6 +131,8 @@ cdef class GroupBy: # deallocated from under us: self._keys = keys + __hash__ = None + @staticmethod cdef tuple _parse_outputs( pair[unique_ptr[table], vector[aggregation_result]] c_res diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyx b/python/pylibcudf/pylibcudf/io/datasource.pyx index 4e7c9c8e385..aac1c0d1014 100644 --- a/python/pylibcudf/pylibcudf/io/datasource.pyx +++ b/python/pylibcudf/pylibcudf/io/datasource.pyx @@ -5,6 +5,7 @@ from pylibcudf.libcudf.io.datasource cimport datasource __all__ = ["Datasource"] cdef class Datasource: + __hash__ = None cdef datasource* get_datasource(self) except * nogil: with gil: raise NotImplementedError("get_datasource() should not " diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx index f2f644a32f9..4270f5b4f95 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyx +++ b/python/pylibcudf/pylibcudf/io/orc.pyx @@ -45,6 +45,8 @@ cdef class OrcColumnStatistics: "use `OrcColumnStatistics.from_libcudf` instead." ) + __hash__ = None + @property def number_of_values(self): if self.number_of_values_c.has_value(): @@ -189,6 +191,8 @@ cdef class OrcColumnStatistics: cdef class ParsedOrcStatistics: + __hash__ = None + @property def column_names(self): return [name.decode() for name in self.c_obj.column_names] diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx index 7f93c763298..b76a352d633 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyx +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -125,6 +125,8 @@ cdef class ChunkedParquetReader: ) ) + __hash__ = None + cpdef bool has_next(self): """ Returns True if there is another chunk in the Parquet file diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 8bc226d83e5..85eb03fe3b9 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -65,6 +65,8 @@ cdef class TableWithMetadata: self.metadata.schema_info = self._make_column_info(column_names) + __hash__ = None + cdef vector[column_name_info] _make_column_info(self, list column_names): cdef vector[column_name_info] col_name_infos cdef column_name_info info @@ -229,6 +231,8 @@ cdef class SourceInfo: self.c_obj = source_info(c_host_buffers) + __hash__ = None + # Adapts a python io.IOBase object as a libcudf IO data_sink. This lets you # write from cudf to any python file-like object (File/BytesIO/SocketIO etc) @@ -311,3 +315,5 @@ cdef class SinkInfo: else: # we don't have sinks so we must have paths to sinks self.c_obj = sink_info(paths) + + __hash__ = None diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx index 9c5dd023509..5ec1e1be971 100644 --- a/python/pylibcudf/pylibcudf/json.pyx +++ b/python/pylibcudf/pylibcudf/json.pyx @@ -27,6 +27,8 @@ cdef class GetJsonObjectOptions: ) self.set_missing_fields_as_nulls(missing_fields_as_nulls) + __hash__ = None + def get_allow_single_quotes(self): """ Returns true/false depending on whether single-quotes for representing strings diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx index c63b92328f3..7565b21084f 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx @@ -28,6 +28,8 @@ cdef class BPEMergePairs: with nogil: self.c_obj = move(cpp_load_merge_pairs(c_pairs)) + __hash__ = None + cpdef Column byte_pair_encoding( Column input, BPEMergePairs merge_pairs, diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx index a346eef4619..14fb6f5fe1e 100644 --- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyx @@ -25,6 +25,8 @@ cdef class HashedVocabulary: with nogil: self.c_obj = move(cpp_load_vocabulary_file(c_hash_file)) + __hash__ = None + cpdef tuple[Column, Column, Column] subword_tokenize( Column input, HashedVocabulary vocabulary_table, diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx index 26b055fd5ae..43d426489b4 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -41,6 +41,8 @@ cdef class TokenizeVocabulary: with nogil: self.c_obj = move(cpp_load_vocabulary(c_vocab)) + __hash__ = None + cpdef Column tokenize_scalar(Column input, Scalar delimiter=None): """ Returns a single column of strings by tokenizing the input diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx index e522a2a4670..1ac014e891e 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyx +++ b/python/pylibcudf/pylibcudf/scalar.pyx @@ -39,6 +39,8 @@ cdef class Scalar: # DeviceScalar. raise ValueError("Scalar should be constructed with a factory") + __hash__ = None + cdef const scalar* get(self) noexcept nogil: return self.c_obj.get() diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyx b/python/pylibcudf/pylibcudf/strings/regex_program.pyx index cf278d7039d..46bfde074d2 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_program.pyx +++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyx @@ -25,6 +25,8 @@ cdef class RegexProgram: def __init__(self, *args, **kwargs): raise ValueError("Do not instantiate RegexProgram directly, use create") + __hash__ = None + @staticmethod def create(str pattern, int flags): """Create a program from a pattern. diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx index 97955aa0ae6..0c1e88a927c 100644 --- a/python/pylibcudf/pylibcudf/table.pyx +++ b/python/pylibcudf/pylibcudf/table.pyx @@ -25,6 +25,8 @@ cdef class Table: raise ValueError("All columns must be pylibcudf Column objects") self._columns = columns + __hash__ = None + cdef table_view view(self) nogil: """Generate a libcudf table_view to pass to libcudf algorithms. From a4e8617b807a066b3d54cc90127594453d47f5c1 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 10:38:54 +0000 Subject: [PATCH 11/16] Minor fixes in response to review --- python/pylibcudf/pylibcudf/aggregation.pyi | 82 ++++++------- python/pylibcudf/pylibcudf/binaryop.pyi | 72 ++++++------ python/pylibcudf/pylibcudf/column.pyi | 2 +- python/pylibcudf/pylibcudf/copying.pyi | 12 +- python/pylibcudf/pylibcudf/datetime.pyi | 36 +++--- python/pylibcudf/pylibcudf/expressions.pyi | 108 ++++++++--------- python/pylibcudf/pylibcudf/gpumemoryview.pyi | 2 +- python/pylibcudf/pylibcudf/io/types.pyi | 72 ++++++------ python/pylibcudf/pylibcudf/labeling.pyi | 6 +- python/pylibcudf/pylibcudf/lists.pyi | 10 +- .../pylibcudf/nvtext/byte_pair_encode.pyi | 4 +- .../pylibcudf/nvtext/subword_tokenize.pyi | 2 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pyi | 2 +- python/pylibcudf/pylibcudf/reduce.pyi | 6 +- python/pylibcudf/pylibcudf/replace.pyi | 6 +- python/pylibcudf/pylibcudf/round.pyi | 6 +- .../pylibcudf/pylibcudf/stream_compaction.pyi | 10 +- .../pylibcudf/strings/char_types.pyi | 22 ++-- .../pylibcudf/pylibcudf/strings/combine.pyi | 10 +- .../pylibcudf/strings/regex_flags.pyi | 8 +- .../pylibcudf/pylibcudf/strings/side_type.pyi | 8 +- .../pylibcudf/pylibcudf/strings/translate.pyi | 6 +- python/pylibcudf/pylibcudf/table.pyi | 2 +- python/pylibcudf/pylibcudf/types.pyi | 110 +++++++++--------- python/pylibcudf/pylibcudf/unary.pyi | 46 ++++---- 25 files changed, 325 insertions(+), 325 deletions(-) diff --git a/python/pylibcudf/pylibcudf/aggregation.pyi b/python/pylibcudf/pylibcudf/aggregation.pyi index f6fdf0273ae..230249995a5 100644 --- a/python/pylibcudf/pylibcudf/aggregation.pyi +++ b/python/pylibcudf/pylibcudf/aggregation.pyi @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.types import ( DataType, @@ -13,56 +13,56 @@ from pylibcudf.types import ( ) class Kind(IntEnum): - SUM = auto() - PRODUCT = auto() - MIN = auto() - MAX = auto() - COUNT_VALID = auto() - COUNT_ALL = auto() - ANY = auto() - ALL = auto() - SUM_OF_SQUARES = auto() - MEAN = auto() - VARIANCE = auto() - STD = auto() - MEDIAN = auto() - QUANTILE = auto() - ARGMAX = auto() - ARGMIN = auto() - NUNIQUE = auto() - NTH_ELEMENT = auto() - RANK = auto() - COLLECT_LIST = auto() - COLLECT_SET = auto() - PTX = auto() - CUDA = auto() - CORRELATION = auto() - COVARIANCE = auto() + SUM = ... + PRODUCT = ... + MIN = ... + MAX = ... + COUNT_VALID = ... + COUNT_ALL = ... + ANY = ... + ALL = ... + SUM_OF_SQUARES = ... + MEAN = ... + VARIANCE = ... + STD = ... + MEDIAN = ... + QUANTILE = ... + ARGMAX = ... + ARGMIN = ... + NUNIQUE = ... + NTH_ELEMENT = ... + RANK = ... + COLLECT_LIST = ... + COLLECT_SET = ... + PTX = ... + CUDA = ... + CORRELATION = ... + COVARIANCE = ... class CorrelationType(IntEnum): - PEARSON = auto() - KENDALL = auto() - SPEARMAN = auto() + PEARSON = ... + KENDALL = ... + SPEARMAN = ... class EWMHistory(IntEnum): - INFINITE = auto() - FINITE = auto() + INFINITE = ... + FINITE = ... class RankMethod(IntEnum): - FIRST = auto() - AVERAGE = auto() - MIN = auto() - MAX = auto() - DENSE = auto() + FIRST = ... + AVERAGE = ... + MIN = ... + MAX = ... + DENSE = ... class RankPercentage(IntEnum): - NONE = auto() - ZERO_NORMALIZED = auto() - ONE_NORMALIZED = auto() + NONE = ... + ZERO_NORMALIZED = ... + ONE_NORMALIZED = ... class UdfType(IntEnum): - CUDA = auto() - PTX = auto() + CUDA = ... + PTX = ... class Aggregation: def kind(self) -> Kind: ... diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi index 9cbaeb4549e..f745e6c6854 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyi +++ b/python/pylibcudf/pylibcudf/binaryop.pyi @@ -1,47 +1,47 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.types import DataType class BinaryOperator(IntEnum): - ADD = auto() - SUB = auto() - MUL = auto() - DIV = auto() - TRUE_DIV = auto() - FLOOR_DIV = auto() - MOD = auto() - PMOD = auto() - PYMOD = auto() - POW = auto() - INT_POW = auto() - LOG_BASE = auto() - ATAN2 = auto() - SHIFT_LEFT = auto() - SHIFT_RIGHT = auto() - SHIFT_RIGHT_UNSIGNED = auto() - BITWISE_AND = auto() - BITWISE_OR = auto() - BITWISE_XOR = auto() - LOGICAL_AND = auto() - LOGICAL_OR = auto() - EQUAL = auto() - NOT_EQUAL = auto() - LESS = auto() - GREATER = auto() - LESS_EQUAL = auto() - GREATER_EQUAL = auto() - NULL_EQUALS = auto() - NULL_MAX = auto() - NULL_MIN = auto() - NULL_NOT_EQUALS = auto() - GENERIC_BINARY = auto() - NULL_LOGICAL_AND = auto() - NULL_LOGICAL_OR = auto() - INVALID_BINARY = auto() + ADD = ... + SUB = ... + MUL = ... + DIV = ... + TRUE_DIV = ... + FLOOR_DIV = ... + MOD = ... + PMOD = ... + PYMOD = ... + POW = ... + INT_POW = ... + LOG_BASE = ... + ATAN2 = ... + SHIFT_LEFT = ... + SHIFT_RIGHT = ... + SHIFT_RIGHT_UNSIGNED = ... + BITWISE_AND = ... + BITWISE_OR = ... + BITWISE_XOR = ... + LOGICAL_AND = ... + LOGICAL_OR = ... + EQUAL = ... + NOT_EQUAL = ... + LESS = ... + GREATER = ... + LESS_EQUAL = ... + GREATER_EQUAL = ... + NULL_EQUALS = ... + NULL_MAX = ... + NULL_MIN = ... + NULL_NOT_EQUALS = ... + GENERIC_BINARY = ... + NULL_LOGICAL_AND = ... + NULL_LOGICAL_OR = ... + INVALID_BINARY = ... def binary_operation( lhs: Column | Scalar, diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi index 72b41a9be5e..c9f70de3dbf 100644 --- a/python/pylibcudf/pylibcudf/column.pyi +++ b/python/pylibcudf/pylibcudf/column.pyi @@ -39,7 +39,7 @@ class Column: def from_cuda_array_interface_obj(obj: Any) -> Column: ... class ListColumnView: - def __init__(self, column: Column) -> None: ... + def __init__(self, column: Column): ... def child(self) -> Column: ... def offsets(self) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi index 07bfced4a55..6cf4ed48724 100644 --- a/python/pylibcudf/pylibcudf/copying.pyi +++ b/python/pylibcudf/pylibcudf/copying.pyi @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from typing import TypeVar from pylibcudf.column import Column @@ -8,13 +8,13 @@ from pylibcudf.scalar import Scalar from pylibcudf.table import Table class MaskAllocationPolicy(IntEnum): - NEVER = auto() - RETAIN = auto() - ALWAYS = auto() + NEVER = ... + RETAIN = ... + ALWAYS = ... class OutOfBoundsPolicy(IntEnum): - NULLIFY = auto() - DONT_CHECK = auto() + NULLIFY = ... + DONT_CHECK = ... ColumnOrTable = TypeVar("ColumnOrTable", Column, Table) diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi index 30ff3edf4fb..91df1bfb92b 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyi +++ b/python/pylibcudf/pylibcudf/datetime.pyi @@ -1,30 +1,30 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar class DatetimeComponent(IntEnum): - YEAR = auto() - MONTH = auto() - DAY = auto() - WEEKDAY = auto() - HOUR = auto() - MINUTE = auto() - SECOND = auto() - MILLISECOND = auto() - MICROSECOND = auto() - NANOSECOND = auto() + YEAR = ... + MONTH = ... + DAY = ... + WEEKDAY = ... + HOUR = ... + MINUTE = ... + SECOND = ... + MILLISECOND = ... + MICROSECOND = ... + NANOSECOND = ... class RoundingFrequency(IntEnum): - DAY = auto() - HOUR = auto() - MINUTE = auto() - SECOND = auto() - MILLISECOND = auto() - MICROSECOND = auto() - NANOSECOND = auto() + DAY = ... + HOUR = ... + MINUTE = ... + SECOND = ... + MILLISECOND = ... + MICROSECOND = ... + NANOSECOND = ... def extract_millisecond_fraction(input: Column) -> Column: ... def extract_microsecond_fraction(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi index c3769bbfb85..5b5c6755392 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyi +++ b/python/pylibcudf/pylibcudf/expressions.pyi @@ -1,73 +1,73 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.scalar import Scalar class TableReference(IntEnum): - LEFT = auto() - RIGHT = auto() + LEFT = ... + RIGHT = ... class ASTOperator(IntEnum): - ADD = auto() - SUB = auto() - MUL = auto() - DIV = auto() - TRUE_DIV = auto() - FLOOR_DIV = auto() - MOD = auto() - PYMOD = auto() - POW = auto() - EQUAL = auto() - NULL_EQUAL = auto() - NOT_EQUAL = auto() - LESS = auto() - GREATER = auto() - LESS_EQUAL = auto() - GREATER_EQUAL = auto() - BITWISE_AND = auto() - BITWISE_OR = auto() - BITWISE_XOR = auto() - NULL_LOGICAL_AND = auto() - LOGICAL_AND = auto() - NULL_LOGICAL_OR = auto() - LOGICAL_OR = auto() - IDENTITY = auto() - IS_NULL = auto() - SIN = auto() - COS = auto() - TAN = auto() - ARCSIN = auto() - ARCCOS = auto() - ARCTAN = auto() - SINH = auto() - COSH = auto() - TANH = auto() - ARCSINH = auto() - ARCCOSH = auto() - ARCTANH = auto() - EXP = auto() - LOG = auto() - SQRT = auto() - CBRT = auto() - CEIL = auto() - FLOOR = auto() - ABS = auto() - RINT = auto() - BIT_INVERT = auto() - NOT = auto() + ADD = ... + SUB = ... + MUL = ... + DIV = ... + TRUE_DIV = ... + FLOOR_DIV = ... + MOD = ... + PYMOD = ... + POW = ... + EQUAL = ... + NULL_EQUAL = ... + NOT_EQUAL = ... + LESS = ... + GREATER = ... + LESS_EQUAL = ... + GREATER_EQUAL = ... + BITWISE_AND = ... + BITWISE_OR = ... + BITWISE_XOR = ... + NULL_LOGICAL_AND = ... + LOGICAL_AND = ... + NULL_LOGICAL_OR = ... + LOGICAL_OR = ... + IDENTITY = ... + IS_NULL = ... + SIN = ... + COS = ... + TAN = ... + ARCSIN = ... + ARCCOS = ... + ARCTAN = ... + SINH = ... + COSH = ... + TANH = ... + ARCSINH = ... + ARCCOSH = ... + ARCTANH = ... + EXP = ... + LOG = ... + SQRT = ... + CBRT = ... + CEIL = ... + FLOOR = ... + ABS = ... + RINT = ... + BIT_INVERT = ... + NOT = ... class Expression: ... class Literal(Expression): - def __init__(self, value: Scalar) -> None: ... + def __init__(self, value: Scalar): ... class ColumnReference(Expression): def __init__( self, index: int, table_source: TableReference = TableReference.LEFT - ) -> None: ... + ): ... class ColumnNameReference(Expression): - def __init__(self, name: str) -> None: ... + def __init__(self, name: str): ... class Operation(Expression): def __init__( @@ -75,4 +75,4 @@ class Operation(Expression): op: ASTOperator, left: Expression, right: Expression | None = None, - ) -> None: ... + ): ... diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyi b/python/pylibcudf/pylibcudf/gpumemoryview.pyi index 0491ba896e5..50f1f39a515 100644 --- a/python/pylibcudf/pylibcudf/gpumemoryview.pyi +++ b/python/pylibcudf/pylibcudf/gpumemoryview.pyi @@ -4,6 +4,6 @@ from collections.abc import Mapping from typing import Any class gpumemoryview: - def __init__(self, data: Any) -> None: ... + def __init__(self, data: Any): ... @property def __cuda_array_interface__(self) -> Mapping[str, Any]: ... diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi index f668c07f940..3ddf21f84dd 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyi +++ b/python/pylibcudf/pylibcudf/io/types.pyi @@ -2,7 +2,7 @@ import io import os from collections.abc import Mapping -from enum import IntEnum, auto +from enum import IntEnum from typing import Literal, TypeAlias, overload from pylibcudf.column import Column @@ -10,51 +10,51 @@ from pylibcudf.io.datasource import Datasource from pylibcudf.table import Table class JSONRecoveryMode(IntEnum): - FAIL = auto() - RECOVER_WITH_NULL = auto() + FAIL = ... + RECOVER_WITH_NULL = ... class CompressionType(IntEnum): - NONE = auto() - AUTO = auto() - SNAPPY = auto() - GZIP = auto() - BZIP2 = auto() - BROTLI = auto() - ZIP = auto() - XZ = auto() - ZLIB = auto() - LZ4 = auto() - LZO = auto() - ZSTD = auto() + NONE = ... + AUTO = ... + SNAPPY = ... + GZIP = ... + BZIP2 = ... + BROTLI = ... + ZIP = ... + XZ = ... + ZLIB = ... + LZ4 = ... + LZO = ... + ZSTD = ... class ColumnEncoding(IntEnum): - USE_DEFAULT = auto() - DICTIONARY = auto() - PLAIN = auto() - DELTA_BINARY_PACKED = auto() - DELTA_LENGTH_BYTE_ARRAY = auto() - DELTA_BYTE_ARRAY = auto() - BYTE_STREAM_SPLIT = auto() - DIRECT = auto() - DIRECT_V2 = auto() - DICTIONARY_V2 = auto() + USE_DEFAULT = ... + DICTIONARY = ... + PLAIN = ... + DELTA_BINARY_PACKED = ... + DELTA_LENGTH_BYTE_ARRAY = ... + DELTA_BYTE_ARRAY = ... + BYTE_STREAM_SPLIT = ... + DIRECT = ... + DIRECT_V2 = ... + DICTIONARY_V2 = ... class DictionaryPolicy(IntEnum): - NEVER = auto() - ADAPTIVE = auto() - ALWAYS = auto() + NEVER = ... + ADAPTIVE = ... + ALWAYS = ... class StatisticsFreq(IntEnum): - STATISTICS_NONE = auto() - STATISTICS_ROWGROUP = auto() - STATISTICS_PAGE = auto() - STATISTICS_COLUMN = auto() + STATISTICS_NONE = ... + STATISTICS_ROWGROUP = ... + STATISTICS_PAGE = ... + STATISTICS_COLUMN = ... class QuoteStyle(IntEnum): - MINIMAL = auto() - ALL = auto() - NONNUMERIC = auto() - NONE = auto() + MINIMAL = ... + ALL = ... + NONNUMERIC = ... + NONE = ... ColumnNameSpec: TypeAlias = tuple[str, list[ColumnNameSpec]] ChildNameSpec: TypeAlias = Mapping[str, ChildNameSpec] diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi index 0a6f2c13719..c3a75d10baf 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyi +++ b/python/pylibcudf/pylibcudf/labeling.pyi @@ -1,12 +1,12 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column class Inclusive(IntEnum): - YES = auto() - NO = auto() + YES = ... + NO = ... def label_bins( input: Column, diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index 4e8966ce98a..6e86aca40aa 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar @@ -8,12 +8,12 @@ from pylibcudf.table import Table from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order class ConcatenateNullPolicy(IntEnum): - IGNORE = auto() - NULLIFY_OUTPUT_ROW = auto() + IGNORE = ... + NULLIFY_OUTPUT_ROW = ... class DuplicateFindOption(IntEnum): - FIND_FIRST = auto() - FIND_LAST = auto() + FIND_FIRST = ... + FIND_LAST = ... def explode_outer(input: Table, explode_column_idx: int) -> Table: ... def concatenate_rows(input: Table) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi index eb85acd56e5..a75714d9648 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi @@ -4,8 +4,8 @@ from pylibcudf.column import Column from pylibcudf.scalar import Scalar class BPEMergePairs: - def __init__(self, merge_pairs: Column) -> None: ... + def __init__(self, merge_pairs: Column): ... def byte_pair_encoding( input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None -) -> Column: ... +): ... diff --git a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi index 996bd093eb4..f6618e296b1 100644 --- a/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/subword_tokenize.pyi @@ -3,7 +3,7 @@ from pylibcudf.column import Column class HashedVocabulary: - def __init__(self, hash_file: str) -> None: ... + def __init__(self, hash_file: str): ... def subword_tokenize( input: Column, diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi index 516011eff61..b9aa2393514 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi @@ -4,7 +4,7 @@ from pylibcudf.column import Column from pylibcudf.scalar import Scalar class TokenizeVocabulary: - def __init__(self, vocab: Column) -> None: ... + def __init__(self, vocab: Column): ... def tokenize_scalar( input: Column, delimiter: Scalar | None = None diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi index 03193d3d0d9..a09949b7b30 100644 --- a/python/pylibcudf/pylibcudf/reduce.pyi +++ b/python/pylibcudf/pylibcudf/reduce.pyi @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column @@ -8,8 +8,8 @@ from pylibcudf.scalar import Scalar from pylibcudf.types import DataType class ScanType(IntEnum): - INCLUSIVE = auto() - EXCLUSIVE = auto() + INCLUSIVE = ... + EXCLUSIVE = ... def reduce(col: Column, agg: Aggregation, data_type: DataType) -> Scalar: ... def scan(col: Column, agg: Aggregation, inclusive: ScanType) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi index b4d65e76f76..eed7a2a6c52 100644 --- a/python/pylibcudf/pylibcudf/replace.pyi +++ b/python/pylibcudf/pylibcudf/replace.pyi @@ -1,13 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar class ReplacePolicy(IntEnum): - PRECEDING = auto() - FOLLOWING = auto() + PRECEDING = ... + FOLLOWING = ... def replace_nulls( source_column: Column, replacement: Column | Scalar | ReplacePolicy diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi index 0099ad3c510..410cf5de586 100644 --- a/python/pylibcudf/pylibcudf/round.pyi +++ b/python/pylibcudf/pylibcudf/round.pyi @@ -1,12 +1,12 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column class RoundingMethod(IntEnum): - HALF_UP = auto() - HALF_EVEN = auto() + HALF_UP = ... + HALF_EVEN = ... def round( source: Column, diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi index fe1cf6ee4fc..5db6875b7c0 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyi +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -1,16 +1,16 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import NanEquality, NanPolicy, NullEquality, NullPolicy class DuplicateKeepOption(IntEnum): - KEEP_ANY = auto() - KEEP_FIRST = auto() - KEEP_LAST = auto() - KEEP_NONE = auto() + KEEP_ANY = ... + KEEP_FIRST = ... + KEEP_LAST = ... + KEEP_NONE = ... def drop_nulls( source_table: Table, keys: list[int], keep_threshold: int diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi index 1e3f57082ef..daa36cbb68d 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pyi +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi @@ -1,21 +1,21 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar class StringCharacterTypes(IntEnum): - DECIMAL = auto() - NUMERIC = auto() - DIGIT = auto() - ALPHA = auto() - SPACE = auto() - UPPER = auto() - LOWER = auto() - ALPHANUM = auto() - CASE_TYPES = auto() - ALL_TYPES = auto() + DECIMAL = ... + NUMERIC = ... + DIGIT = ... + ALPHA = ... + SPACE = ... + UPPER = ... + LOWER = ... + ALPHANUM = ... + CASE_TYPES = ... + ALL_TYPES = ... def all_characters_of_type( source_strings: Column, diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi index 0833ac006c0..d5780c0a923 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyi +++ b/python/pylibcudf/pylibcudf/strings/combine.pyi @@ -1,18 +1,18 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table class SeparatorOnNulls(IntEnum): - YES = auto() - NO = auto() + YES = ... + NO = ... class OutputIfEmptyList(IntEnum): - EMPTY_STRING = auto() - NULL_ELEMENT = auto() + EMPTY_STRING = ... + NULL_ELEMENT = ... def concatenate( strings_columns: Table, diff --git a/python/pylibcudf/pylibcudf/strings/regex_flags.pyi b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi index 2576b5575de..c551cebf181 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_flags.pyi +++ b/python/pylibcudf/pylibcudf/strings/regex_flags.pyi @@ -1,7 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum class RegexFlags(IntEnum): - DEFAULT = auto() - MULTILINE = auto() - DOTALL = auto() + DEFAULT = ... + MULTILINE = ... + DOTALL = ... diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyi b/python/pylibcudf/pylibcudf/strings/side_type.pyi index 15083120be0..532edd60077 100644 --- a/python/pylibcudf/pylibcudf/strings/side_type.pyi +++ b/python/pylibcudf/pylibcudf/strings/side_type.pyi @@ -1,7 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum class SideType(IntEnum): - LEFT = auto() - RIGHT = auto() - BOTH = auto() + LEFT = ... + RIGHT = ... + BOTH = ... diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi index adeafcc2641..7158b6eb05c 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pyi +++ b/python/pylibcudf/pylibcudf/strings/translate.pyi @@ -1,13 +1,13 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from collections.abc import Mapping -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.scalar import Scalar class FilterType(IntEnum): - KEEP = auto() - REMOVE = auto() + KEEP = ... + REMOVE = ... def translate( input: Column, chars_table: Mapping[int | str, int | str] diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi index ad4c9146feb..e84075215c3 100644 --- a/python/pylibcudf/pylibcudf/table.pyi +++ b/python/pylibcudf/pylibcudf/table.pyi @@ -3,7 +3,7 @@ from pylibcudf.column import Column class Table: - def __init__(self, columns: list[Column]) -> None: ... + def __init__(self, columns: list[Column]): ... def num_columns(self) -> int: ... def num_rows(self) -> int: ... def columns(self) -> list[Column]: ... diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi index ce000cafe9d..5ff5f9ac273 100644 --- a/python/pylibcudf/pylibcudf/types.pyi +++ b/python/pylibcudf/pylibcudf/types.pyi @@ -1,81 +1,81 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum class Interpolation(IntEnum): - LINEAR = auto() - LOWER = auto() - HIGHER = auto() - MIDPOINT = auto() - NEAREST = auto() + LINEAR = ... + LOWER = ... + HIGHER = ... + MIDPOINT = ... + NEAREST = ... class MaskState(IntEnum): - UNALLOCATED = auto() - UNINITIALIZED = auto() - ALL_VALID = auto() - ALL_NULL = auto() + UNALLOCATED = ... + UNINITIALIZED = ... + ALL_VALID = ... + ALL_NULL = ... class NanEquality(IntEnum): - ALL_EQUAL = auto() - UNEQUAL = auto() + ALL_EQUAL = ... + UNEQUAL = ... class NanPolicy(IntEnum): - NAN_IS_NULL = auto() - NAN_IS_VALID = auto() + NAN_IS_NULL = ... + NAN_IS_VALID = ... class NullEquality(IntEnum): - EQUAL = auto() - UNEQUAL = auto() + EQUAL = ... + UNEQUAL = ... class NullOrder(IntEnum): - AFTER = auto() - BEFORE = auto() + AFTER = ... + BEFORE = ... class NullPolicy(IntEnum): - EXCLUDE = auto() - INCLUDE = auto() + EXCLUDE = ... + INCLUDE = ... class Order(IntEnum): - ASCENDING = auto() - DESCENDING = auto() + ASCENDING = ... + DESCENDING = ... class Sorted(IntEnum): - NO = auto() - YES = auto() + NO = ... + YES = ... class TypeId(IntEnum): - EMPTY = auto() - INT8 = auto() - INT16 = auto() - INT32 = auto() - INT64 = auto() - UINT8 = auto() - UINT16 = auto() - UINT32 = auto() - UINT64 = auto() - FLOAT32 = auto() - FLOAT64 = auto() - BOOL8 = auto() - TIMESTAMP_DAYS = auto() - TIMESTAMP_SECONDS = auto() - TIMESTAMP_MILLISECONDS = auto() - TIMESTAMP_MICROSECONDS = auto() - TIMESTAMP_NANOSECONDS = auto() - DURATION_DAYS = auto() - DURATION_SECONDS = auto() - DURATION_MILLISECONDS = auto() - DURATION_MICROSECONDS = auto() - DURATION_NANOSECONDS = auto() - DICTIONARY32 = auto() - STRING = auto() - LIST = auto() - DECIMAL32 = auto() - DECIMAL64 = auto() - DECIMAL128 = auto() - STRUCT = auto() - NUM_TYPE_IDS = auto() + EMPTY = ... + INT8 = ... + INT16 = ... + INT32 = ... + INT64 = ... + UINT8 = ... + UINT16 = ... + UINT32 = ... + UINT64 = ... + FLOAT32 = ... + FLOAT64 = ... + BOOL8 = ... + TIMESTAMP_DAYS = ... + TIMESTAMP_SECONDS = ... + TIMESTAMP_MILLISECONDS = ... + TIMESTAMP_MICROSECONDS = ... + TIMESTAMP_NANOSECONDS = ... + DURATION_DAYS = ... + DURATION_SECONDS = ... + DURATION_MILLISECONDS = ... + DURATION_MICROSECONDS = ... + DURATION_NANOSECONDS = ... + DICTIONARY32 = ... + STRING = ... + LIST = ... + DECIMAL32 = ... + DECIMAL64 = ... + DECIMAL128 = ... + STRUCT = ... + NUM_TYPE_IDS = ... class DataType: - def __init__(self, type_id: TypeId, scale: int = 0) -> None: ... + def __init__(self, type_id: TypeId, scale: int = 0): ... def id(self) -> TypeId: ... def scale(self) -> int: ... diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi index d3095e56528..7aa23b618f4 100644 --- a/python/pylibcudf/pylibcudf/unary.pyi +++ b/python/pylibcudf/pylibcudf/unary.pyi @@ -1,33 +1,33 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from enum import IntEnum, auto +from enum import IntEnum from pylibcudf.column import Column from pylibcudf.types import DataType class UnaryOperator(IntEnum): - SIN = auto() - COS = auto() - TAN = auto() - ARCSIN = auto() - ARCCOS = auto() - ARCTAN = auto() - SINH = auto() - COSH = auto() - TANH = auto() - ARCSINH = auto() - ARCCOSH = auto() - ARCTANH = auto() - EXP = auto() - LOG = auto() - SQRT = auto() - CBRT = auto() - CEIL = auto() - FLOOR = auto() - ABS = auto() - RINT = auto() - BIT_INVERT = auto() - NOT = auto() + SIN = ... + COS = ... + TAN = ... + ARCSIN = ... + ARCCOS = ... + ARCTAN = ... + SINH = ... + COSH = ... + TANH = ... + ARCSINH = ... + ARCCOSH = ... + ARCTANH = ... + EXP = ... + LOG = ... + SQRT = ... + CBRT = ... + CEIL = ... + FLOOR = ... + ABS = ... + RINT = ... + BIT_INVERT = ... + NOT = ... def unary_operation(input: Column, op: UnaryOperator) -> Column: ... def is_null(input: Column) -> Column: ... From 635da9ee9e349f1980bb6fba24a30811e642bfa4 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 10:47:06 +0000 Subject: [PATCH 12/16] pylibcudf: enable flake8-tidy/type-checking rules --- python/pylibcudf/pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index 7ddb70b180e..d4b7eff1149 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -56,6 +56,12 @@ Documentation = "https://docs.rapids.ai/api/cudf/stable/" [tool.ruff] extend = "../../pyproject.toml" +[tool.ruff.lint] +extend-select = [ + "TCH", # flake8-type-checking + "TID", # flake8-tidy-imports +] + [tool.ruff.lint.isort] combine-as-imports = true known-first-party = ["pylibcudf"] From 1b68bfebf67acfa42d9aaa54d9197c429422bd3c Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 11:22:29 +0000 Subject: [PATCH 13/16] Catch some missing bits --- python/pylibcudf/pylibcudf/hashing.pyi | 16 ++++++---------- python/pylibcudf/pylibcudf/interop.pyi | 4 +++- python/pylibcudf/pylibcudf/io/types.pyi | 6 +++--- .../pylibcudf/nvtext/byte_pair_encode.pyi | 2 +- python/pylibcudf/pylibcudf/sorting.pyi | 8 +++++--- python/pylibcudf/pylibcudf/types.pyi | 5 +++-- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi index 69a72aa4783..a849f5d0729 100644 --- a/python/pylibcudf/pylibcudf/hashing.pyi +++ b/python/pylibcudf/pylibcudf/hashing.pyi @@ -1,19 +1,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from typing import Final + from pylibcudf.column import Column from pylibcudf.table import Table -LIBCUDF_DEFAULT_HASH_SEED: int +LIBCUDF_DEFAULT_HASH_SEED: Final[int] -def murmurhash3_x86_32( - input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED -) -> Column: ... -def murmurhash3_x64_128( - input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED -) -> Table: ... -def xxhash_64( - input: Table, seed: int = LIBCUDF_DEFAULT_HASH_SEED -) -> Column: ... +def murmurhash3_x86_32(input: Table, seed: int = ...) -> Column: ... +def murmurhash3_x64_128(input: Table, seed: int = ...) -> Table: ... +def xxhash_64(input: Table, seed: int = ...) -> Column: ... def md5(input: Table) -> Column: ... def sha1(input: Table) -> Column: ... def sha224(input: Table) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi index cd8cb0c4a2c..5d1696c4f4f 100644 --- a/python/pylibcudf/pylibcudf/interop.pyi +++ b/python/pylibcudf/pylibcudf/interop.pyi @@ -1,6 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from collections.abc import Iterable, Mapping +from dataclasses import dataclass from typing import Any, overload import pyarrow as pa @@ -10,6 +11,7 @@ from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType +@dataclass class ColumnMetadata: name: str children_meta: list[ColumnMetadata] @@ -29,7 +31,7 @@ def to_arrow( obj: DataType, *, precision: int | None = None, - fields: Iterable[pa.Field | tuple[str, pa.DataType]] + fields: Iterable[pa.Field[pa.DataType] | tuple[str, pa.DataType]] | Mapping[str, pa.DataType] | None = None, value_type: pa.DataType | None = None, diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi index 3ddf21f84dd..a4f4fc13bdc 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyi +++ b/python/pylibcudf/pylibcudf/io/types.pyi @@ -3,7 +3,7 @@ import io import os from collections.abc import Mapping from enum import IntEnum -from typing import Literal, TypeAlias, overload +from typing import Any, Literal, TypeAlias, overload from pylibcudf.column import Column from pylibcudf.io.datasource import Datasource @@ -83,13 +83,13 @@ class TableWithMetadata: class SourceInfo: def __init__( - self, sources: list[str] | list[os.PathLike] | list[Datasource] + self, sources: list[str] | list[os.PathLike[Any]] | list[Datasource] ) -> None: ... class SinkInfo: def __init__( self, - sinks: list[os.PathLike] + sinks: list[os.PathLike[Any]] | list[io.StringIO] | list[io.BytesIO] | list[io.TextIOBase] diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi index a75714d9648..ca39aa16d7e 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi @@ -8,4 +8,4 @@ class BPEMergePairs: def byte_pair_encoding( input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None -): ... +) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi index 60a42023a6a..4ba0f07e986 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyi +++ b/python/pylibcudf/pylibcudf/sorting.pyi @@ -6,12 +6,14 @@ from pylibcudf.table import Table from pylibcudf.types import NullOrder, NullPolicy, Order def sorted_order( - source_table: Table, column_order: list, null_precedence: list + source_table: Table, + column_order: list[Order], + null_precedence: list[NullOrder], ) -> Column: ... def stable_sorted_order( source_table: Table, - column_order: list, - null_precedence: list, + column_order: list[Order], + null_precedence: list[NullOrder], ) -> Column: ... def rank( input_view: Column, diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi index 5ff5f9ac273..c67555dfeb4 100644 --- a/python/pylibcudf/pylibcudf/types.pyi +++ b/python/pylibcudf/pylibcudf/types.pyi @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from enum import IntEnum +from typing import Final class Interpolation(IntEnum): LINEAR = ... @@ -81,5 +82,5 @@ class DataType: def size_of(dtype: DataType) -> int: ... -SIZE_TYPE: DataType -SIZE_TYPE_ID: TypeId +SIZE_TYPE: Final[DataType] +SIZE_TYPE_ID: Final[TypeId] From 42be70bae2ccb59d79e1c4c69fa6c1574f2c574d Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 11:30:41 +0000 Subject: [PATCH 14/16] Oops --- python/cudf/cudf/_lib/lists.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index f28afd10f86..9a2aa4a6130 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -127,7 +127,7 @@ def concatenate_list_elements(Column input_column, dropna=False): input_column.to_pylibcudf(mode="read"), plc.lists.ConcatenateNullPolicy.IGNORE if dropna - else plc.lists.ConcatenateNullPolicy.NULLIFTY_OUTPUT_ROW, + else plc.lists.ConcatenateNullPolicy.NULLIFY_OUTPUT_ROW, ) ) From 9f3f5fbaa0dc46c85f94a960b59d593a9630d51a Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 11:45:04 +0000 Subject: [PATCH 15/16] No need for __init__.pyi --- python/pylibcudf/pylibcudf/__init__.pyi | 95 ------------------- python/pylibcudf/pylibcudf/io/__init__.pyi | 27 ------ .../pylibcudf/pylibcudf/nvtext/__init__.pyi | 29 ------ .../pylibcudf/pylibcudf/strings/__init__.pyi | 55 ----------- .../pylibcudf/strings/convert/__init__.pyi | 24 ----- .../pylibcudf/strings/split/__init__.pyi | 4 - 6 files changed, 234 deletions(-) delete mode 100644 python/pylibcudf/pylibcudf/__init__.pyi delete mode 100644 python/pylibcudf/pylibcudf/io/__init__.pyi delete mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.pyi delete mode 100644 python/pylibcudf/pylibcudf/strings/__init__.pyi delete mode 100644 python/pylibcudf/pylibcudf/strings/convert/__init__.pyi delete mode 100644 python/pylibcudf/pylibcudf/strings/split/__init__.pyi diff --git a/python/pylibcudf/pylibcudf/__init__.pyi b/python/pylibcudf/pylibcudf/__init__.pyi deleted file mode 100644 index a728647f82f..00000000000 --- a/python/pylibcudf/pylibcudf/__init__.pyi +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -# If libcudf was installed as a wheel, we must request it to load the library symbols. -# Otherwise, we assume that the library was installed in a system path that ld can find. -from pylibcudf import ( - aggregation, - binaryop, - column_factories, - concatenate, - contiguous_split, - copying, - datetime, - experimental, - expressions, - filling, - groupby, - hashing, - interop, - io, - join, - json, - labeling, - lists, - merge, - null_mask, - nvtext, - partitioning, - quantiles, - reduce, - replace, - reshape, - rolling, - round, - search, - sorting, - stream_compaction, - strings, - traits, - transform, - transpose, - types, - unary, -) -from pylibcudf.column import Column -from pylibcudf.gpumemoryview import gpumemoryview -from pylibcudf.scalar import Scalar -from pylibcudf.table import Table -from pylibcudf.types import DataType, MaskState, TypeId - -__all__ = [ - "Column", - "DataType", - "MaskState", - "Scalar", - "Table", - "TypeId", - "aggregation", - "binaryop", - "column_factories", - "contiguous_split", - "concatenate", - "copying", - "datetime", - "experimental", - "expressions", - "filling", - "gpumemoryview", - "groupby", - "hashing", - "interop", - "io", - "join", - "json", - "labeling", - "lists", - "merge", - "null_mask", - "partitioning", - "quantiles", - "reduce", - "replace", - "reshape", - "rolling", - "round", - "search", - "stream_compaction", - "strings", - "sorting", - "traits", - "transform", - "transpose", - "types", - "unary", - "nvtext", -] diff --git a/python/pylibcudf/pylibcudf/io/__init__.pyi b/python/pylibcudf/pylibcudf/io/__init__.pyi deleted file mode 100644 index e1a93ce08e3..00000000000 --- a/python/pylibcudf/pylibcudf/io/__init__.pyi +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -from pylibcudf.io import ( - avro, - csv, - datasource, - json, - orc, - parquet, - timezone, - types, -) -from pylibcudf.io.types import SinkInfo, SourceInfo, TableWithMetadata - -__all__ = [ - "avro", - "csv", - "datasource", - "json", - "orc", - "parquet", - "timezone", - "types", - "SinkInfo", - "SourceInfo", - "TableWithMetadata", -] diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pyi b/python/pylibcudf/pylibcudf/nvtext/__init__.pyi deleted file mode 100644 index aa51eff6bf5..00000000000 --- a/python/pylibcudf/pylibcudf/nvtext/__init__.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -from pylibcudf.nvtext import ( - byte_pair_encode, - edit_distance, - generate_ngrams, - jaccard, - minhash, - ngrams_tokenize, - normalize, - replace, - stemmer, - subword_tokenize, - tokenize, -) - -__all__ = [ - "byte_pair_encode", - "edit_distance", - "generate_ngrams", - "jaccard", - "minhash", - "ngrams_tokenize", - "normalize", - "replace", - "stemmer", - "subword_tokenize", - "tokenize", -] diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pyi b/python/pylibcudf/pylibcudf/strings/__init__.pyi deleted file mode 100644 index 492ed311c28..00000000000 --- a/python/pylibcudf/pylibcudf/strings/__init__.pyi +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -from pylibcudf.strings import ( - attributes, - capitalize, - case, - char_types, - combine, - contains, - convert, - extract, - find, - find_multiple, - findall, - padding, - regex_flags, - regex_program, - repeat, - replace, - replace_re, - side_type, - slice, - split, - strip, - translate, - wrap, -) -from pylibcudf.strings.side_type import SideType - -__all__ = [ - "SideType", - "attributes", - "capitalize", - "case", - "char_types", - "combine", - "contains", - "convert", - "extract", - "find", - "find_multiple", - "findall", - "padding", - "regex_flags", - "regex_program", - "repeat", - "replace", - "replace_re", - "side_type", - "slice", - "split", - "strip", - "translate", - "wrap", -] diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi b/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi deleted file mode 100644 index edf615376c2..00000000000 --- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -from pylibcudf.strings.convert import ( - convert_booleans, - convert_datetime, - convert_durations, - convert_fixed_point, - convert_floats, - convert_integers, - convert_ipv4, - convert_lists, - convert_urls, -) - -__all__ = [ - "convert_booleans", - "convert_datetime", - "convert_durations", - "convert_fixed_point", - "convert_floats", - "convert_integers", - "convert_ipv4", - "convert_lists", - "convert_urls", -] diff --git a/python/pylibcudf/pylibcudf/strings/split/__init__.pyi b/python/pylibcudf/pylibcudf/strings/split/__init__.pyi deleted file mode 100644 index c44bce048b6..00000000000 --- a/python/pylibcudf/pylibcudf/strings/split/__init__.pyi +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -from pylibcudf.strings.split import partition, split - -__all__ = ["partition", "split"] From 1dcf8ec54bae8b017436fc7e43116b3917698f35 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 8 Nov 2024 18:39:42 +0000 Subject: [PATCH 16/16] Minor fixes from work in progress checker --- python/pylibcudf/pylibcudf/aggregation.pyi | 1 + python/pylibcudf/pylibcudf/contiguous_split.pyi | 1 + python/pylibcudf/pylibcudf/datetime.pyi | 2 +- python/pylibcudf/pylibcudf/expressions.pyi | 3 ++- python/pylibcudf/pylibcudf/filling.pyi | 2 +- python/pylibcudf/pylibcudf/groupby.pyi | 2 +- python/pylibcudf/pylibcudf/io/datasource.pyi | 2 +- python/pylibcudf/pylibcudf/io/json.pyi | 2 +- python/pylibcudf/pylibcudf/io/orc.pyi | 2 ++ python/pylibcudf/pylibcudf/json.pyi | 6 +++--- python/pylibcudf/pylibcudf/lists.pyi | 2 +- python/pylibcudf/pylibcudf/sorting.pyi | 2 +- python/pylibcudf/pylibcudf/stream_compaction.pyi | 4 ++-- python/pylibcudf/pylibcudf/strings/capitalize.pyi | 6 +++++- python/pylibcudf/pylibcudf/strings/combine.pyi | 2 +- python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi | 4 ++-- python/pylibcudf/pylibcudf/strings/regex_program.pyi | 1 + python/pylibcudf/pylibcudf/table.pyi | 2 +- python/pylibcudf/pylibcudf/transform.pyi | 2 +- python/pylibcudf/pylibcudf/types.pyi | 2 +- 20 files changed, 30 insertions(+), 20 deletions(-) diff --git a/python/pylibcudf/pylibcudf/aggregation.pyi b/python/pylibcudf/pylibcudf/aggregation.pyi index 230249995a5..a59e2a9dc93 100644 --- a/python/pylibcudf/pylibcudf/aggregation.pyi +++ b/python/pylibcudf/pylibcudf/aggregation.pyi @@ -65,6 +65,7 @@ class UdfType(IntEnum): PTX = ... class Aggregation: + def __init__(self): ... def kind(self) -> Kind: ... def sum() -> Aggregation: ... diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi index 66e6c5e50c5..dd6328fbf23 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyi +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi @@ -4,6 +4,7 @@ from pylibcudf.gpumemoryview import gpumemoryview from pylibcudf.table import Table class PackedColumns: + def __init__(self): ... def release(self) -> tuple[memoryview, gpumemoryview]: ... def pack(input: Table) -> PackedColumns: ... diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi index 91df1bfb92b..6a3ae7953d9 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyi +++ b/python/pylibcudf/pylibcudf/datetime.pyi @@ -36,7 +36,7 @@ def ceil_datetimes(input: Column, freq: RoundingFrequency) -> Column: ... def floor_datetimes(input: Column, freq: RoundingFrequency) -> Column: ... def round_datetimes(input: Column, freq: RoundingFrequency) -> Column: ... def add_calendrical_months( - timestamps: Column, months: Column | Scalar + input: Column, months: Column | Scalar ) -> Column: ... def day_of_year(input: Column) -> Column: ... def is_leap_year(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/expressions.pyi b/python/pylibcudf/pylibcudf/expressions.pyi index 5b5c6755392..12b473d8605 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyi +++ b/python/pylibcudf/pylibcudf/expressions.pyi @@ -56,7 +56,8 @@ class ASTOperator(IntEnum): BIT_INVERT = ... NOT = ... -class Expression: ... +class Expression: + def __init__(self): ... class Literal(Expression): def __init__(self, value: Scalar): ... diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi index c0534f1344b..c88cdd8b16f 100644 --- a/python/pylibcudf/pylibcudf/filling.pyi +++ b/python/pylibcudf/pylibcudf/filling.pyi @@ -8,7 +8,7 @@ def fill( destination: Column, begin: int, end: int, value: Scalar ) -> Column: ... def fill_in_place( - destination: Column, c_begin: int, c_end: int, value: Scalar + destination: Column, begin: int, end: int, value: Scalar ) -> None: ... def sequence(size: int, init: Scalar, step: Scalar) -> Column: ... def repeat(input_table: Table, count: Column | int) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi index e933cafdeef..883ad6e34cf 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyi +++ b/python/pylibcudf/pylibcudf/groupby.pyi @@ -31,7 +31,7 @@ class GroupBy: self, values: Table, offset: list[int], fill_values: list[Scalar] ) -> tuple[Table, Table]: ... def replace_nulls( - self, values: Table, replace_policies: list[ReplacePolicy] + self, value: Table, replace_policies: list[ReplacePolicy] ) -> tuple[Table, Table]: ... def get_groups( self, values: Table | None = None diff --git a/python/pylibcudf/pylibcudf/io/datasource.pyi b/python/pylibcudf/pylibcudf/io/datasource.pyi index c4184208b0c..e52197f793b 100644 --- a/python/pylibcudf/pylibcudf/io/datasource.pyi +++ b/python/pylibcudf/pylibcudf/io/datasource.pyi @@ -1,4 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. class Datasource: - pass + def __init__(self): ... diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi index 33794afb208..b2bc6a43700 100644 --- a/python/pylibcudf/pylibcudf/io/json.pyi +++ b/python/pylibcudf/pylibcudf/io/json.pyi @@ -30,7 +30,7 @@ def read_json( ) -> TableWithMetadata: ... def write_json( sink_info: SinkInfo, - tbl: TableWithMetadata, + table_w_meta: TableWithMetadata, na_rep: str = "", include_nulls: bool = False, lines: bool = False, diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi index 87ea9088b44..4cf87f1a832 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyi +++ b/python/pylibcudf/pylibcudf/io/orc.pyi @@ -18,6 +18,7 @@ def read_orc( ) -> TableWithMetadata: ... class OrcColumnStatistics: + def __init__(self): ... @property def number_of_values(self) -> int | None: ... @property @@ -27,6 +28,7 @@ class OrcColumnStatistics: def get[T](self, item: str, default: None | T = None) -> T | None: ... class ParsedOrcStatistics: + def __init__(self): ... @property def column_names(self) -> list[str]: ... @property diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi index 41872c037de..b93d4876dab 100644 --- a/python/pylibcudf/pylibcudf/json.pyi +++ b/python/pylibcudf/pylibcudf/json.pyi @@ -14,9 +14,9 @@ class GetJsonObjectOptions: def get_allow_single_quotes(self) -> bool: ... def get_strip_quotes_from_single_strings(self) -> bool: ... def get_missing_fields_as_nulls(self) -> bool: ... - def set_allow_single_quotes(self, value: bool) -> None: ... - def set_strip_quotes_from_single_strings(self, value: bool) -> None: ... - def set_missing_fields_as_nulls(self, value: bool) -> None: ... + def set_allow_single_quotes(self, val: bool) -> None: ... + def set_strip_quotes_from_single_strings(self, val: bool) -> None: ... + def set_missing_fields_as_nulls(self, val: bool) -> None: ... def get_json_object( col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index 6e86aca40aa..dff6c400638 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -64,7 +64,7 @@ def union_distinct( nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, ) -> Column: ... -def apply_boolean_mask(input: Column, mask: Column) -> Column: ... +def apply_boolean_mask(input: Column, boolean_mask: Column) -> Column: ... def distinct( input: Column, nulls_equal: NullEquality, nans_equal: NanEquality ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi index 4ba0f07e986..5255d869a4d 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyi +++ b/python/pylibcudf/pylibcudf/sorting.pyi @@ -24,7 +24,7 @@ def rank( percentage: bool, ) -> Column: ... def is_sorted( - table: Table, column_order: list[Order], null_precedence: list[NullOrder] + tbl: Table, column_order: list[Order], null_precedence: list[NullOrder] ) -> bool: ... def segmented_sort_by_key( values: Table, diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi index 5db6875b7c0..99cade48309 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyi +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -46,8 +46,8 @@ def stable_distinct( nans_equal: NanEquality, ) -> Table: ... def unique_count( - column: Column, null_handling: NullPolicy, nan_handling: NanPolicy + source: Column, null_handling: NullPolicy, nan_handling: NanPolicy ) -> int: ... def distinct_count( - column: Column, null_handling: NullPolicy, nan_handling: NanPolicy + source: Column, null_handling: NullPolicy, nan_handling: NanPolicy ) -> int: ... diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi index 05bf8043727..5c6689418e2 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pyi +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi @@ -2,7 +2,11 @@ from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.strings.char_types import StringCharacterTypes def capitalize(input: Column, delimiters: Scalar | None = None) -> Column: ... -def title(input: Column) -> Column: ... +def title( + input: Column, + sequence_type: StringCharacterTypes = StringCharacterTypes.ALPHA, +) -> Column: ... def is_title(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi index d5780c0a923..3094b20f141 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyi +++ b/python/pylibcudf/pylibcudf/strings/combine.pyi @@ -25,7 +25,7 @@ def join_strings( input: Column, separator: Scalar, narep: Scalar ) -> Column: ... def join_list_elements( - source_strings: Column, + lists_strings_column: Column, separator: Column | Scalar, separator_narep: Scalar, string_narep: Scalar, diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi index 40321c3ae66..49b8468957c 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi @@ -2,5 +2,5 @@ from pylibcudf.column import Column -def url_encode(Input: Column) -> Column: ... -def url_decode(Input: Column) -> Column: ... +def url_encode(input: Column) -> Column: ... +def url_decode(input: Column) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/regex_program.pyi b/python/pylibcudf/pylibcudf/strings/regex_program.pyi index 6c853bcfc44..9abd6fa7802 100644 --- a/python/pylibcudf/pylibcudf/strings/regex_program.pyi +++ b/python/pylibcudf/pylibcudf/strings/regex_program.pyi @@ -3,5 +3,6 @@ from pylibcudf.strings.regex_flags import RegexFlags class RegexProgram: + def __init__(self): ... @staticmethod def create(pattern: str, flags: RegexFlags) -> RegexProgram: ... diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi index e84075215c3..5aef7e009c8 100644 --- a/python/pylibcudf/pylibcudf/table.pyi +++ b/python/pylibcudf/pylibcudf/table.pyi @@ -3,7 +3,7 @@ from pylibcudf.column import Column class Table: - def __init__(self, columns: list[Column]): ... + def __init__(self, column: list[Column]): ... def num_columns(self) -> int: ... def num_rows(self) -> int: ... def columns(self) -> list[Column]: ... diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi index 103b9ec36ab..5cbd2e635f0 100644 --- a/python/pylibcudf/pylibcudf/transform.pyi +++ b/python/pylibcudf/pylibcudf/transform.pyi @@ -13,4 +13,4 @@ def transform( input: Column, unary_udf: str, output_type: DataType, is_ptx: bool ) -> Column: ... def encode(input: Table) -> tuple[Table, Column]: ... -def one_hot_encode(input_column: Column, categories: Column) -> Table: ... +def one_hot_encode(input: Column, categories: Column) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/types.pyi b/python/pylibcudf/pylibcudf/types.pyi index c67555dfeb4..c91a95414bd 100644 --- a/python/pylibcudf/pylibcudf/types.pyi +++ b/python/pylibcudf/pylibcudf/types.pyi @@ -80,7 +80,7 @@ class DataType: def id(self) -> TypeId: ... def scale(self) -> int: ... -def size_of(dtype: DataType) -> int: ... +def size_of(t: DataType) -> int: ... SIZE_TYPE: Final[DataType] SIZE_TYPE_ID: Final[TypeId]