diff --git a/src/dask_awkward/__init__.py b/src/dask_awkward/__init__.py index 267fdf693..eaf3ab4d5 100644 --- a/src/dask_awkward/__init__.py +++ b/src/dask_awkward/__init__.py @@ -6,6 +6,7 @@ import dask_awkward.lib.operations as operations import dask_awkward.lib.optimize as optimize import dask_awkward.lib.reducers as reducers +import dask_awkward.lib.str as str import dask_awkward.lib.structure as structure from dask_awkward.lib.core import Array, PartitionCompatibility, Record, Scalar from dask_awkward.lib.core import _type as type diff --git a/src/dask_awkward/lib/__init__.py b/src/dask_awkward/lib/__init__.py index 197d141ca..62303eb6b 100644 --- a/src/dask_awkward/lib/__init__.py +++ b/src/dask_awkward/lib/__init__.py @@ -1,3 +1,4 @@ +import dask_awkward.lib.str as str from dask_awkward.lib.core import Array, PartitionCompatibility, Record, Scalar from dask_awkward.lib.core import _type as type from dask_awkward.lib.core import ( diff --git a/src/dask_awkward/lib/str.py b/src/dask_awkward/lib/str.py new file mode 100644 index 000000000..21d530b1d --- /dev/null +++ b/src/dask_awkward/lib/str.py @@ -0,0 +1,20 @@ +import awkward as ak + +from dask_awkward.lib.core import Array, map_partitions + + +def split_whitespace( + array: Array, + *, + max_splits: int | None = None, + reverse: bool = False, + highlevel: bool = True, + behavior: dict | None = None, +): + return map_partitions( + ak.str.split_whitespace, + array, + max_splits=max_splits, + reverse=reverse, + behavior=behavior, + ) diff --git a/tests/test_str.py b/tests/test_str.py new file mode 100644 index 000000000..983d0d89c --- /dev/null +++ b/tests/test_str.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import awkward as ak + +import dask_awkward as dak +from dask_awkward.lib.testutils import assert_eq + + +def test_split_whitespace(): + a = ak.Array( + [ + ["abc 123", "fooo ooo", "123"], + ["hij\tj"], + ["lmn op", ""], + ["123 456 789", "98765 43210"], + ] + ) + b = dak.from_awkward(a, npartitions=2) + a2 = ak.str.split_whitespace(a) + b2 = dak.str.split_whitespace(b) + assert_eq(a2, b2) + assert_eq(ak.num(a2, axis=2), ak.num(b2, axis=2))