From f499f275c437333e15fc0d2fb3321e8534bd50d8 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 16:22:23 -0500
Subject: [PATCH 01/73] First function is working: is_alnum.

---
 pyproject.toml                             |  3 +-
 src/awkward/operations/__init__.py         |  2 +-
 src/awkward/operations/str/__init__.py     | 21 +++++++++
 src/awkward/operations/str/ak_is_alnum.py  | 54 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 32 +++++++++++++
 5 files changed, 110 insertions(+), 2 deletions(-)
 create mode 100644 src/awkward/operations/str/__init__.py
 create mode 100644 src/awkward/operations/str/ak_is_alnum.py
 create mode 100644 tests/test_2616_use_pyarrow_for_strings.py

diff --git a/pyproject.toml b/pyproject.toml
index 87a1b29507..d4b6ce3d3e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -309,7 +309,8 @@ mccabe.max-complexity = 100
 "src/awkward/_connect/*" = ["TID251"]
 "src/awkward/__init__.py" = ["E402", "F401", "F403", "I001"]
 "src/awkward/_ext.py" = ["F401"]
-"src/awkward/operations/__init__.py" = ["F403"]
+"src/awkward/operations/__init__.py" = ["F401", "F403"]
+"src/awkward/operations/str/__init__.py" = ["F401", "F403"]
 "src/awkward/_nplikes/*" = ["TID251"]
 "src/awkward/_operators.py" = ["TID251"]
 "tests*/*" = ["T20", "TID251"]
diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
index 450e4679de..f378a12dc7 100644
--- a/src/awkward/operations/__init__.py
+++ b/src/awkward/operations/__init__.py
@@ -1,6 +1,6 @@
 # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
-# ruff: noqa: F401
 
+import awkward.operations.str
 from awkward.operations.ak_all import *
 from awkward.operations.ak_almost_equal import *
 from awkward.operations.ak_any import *
diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
new file mode 100644
index 0000000000..1529f7411f
--- /dev/null
+++ b/src/awkward/operations/str/__init__.py
@@ -0,0 +1,21 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+from awkward.operations.str.ak_is_alnum import *
+
+
+def get_action(utf8_function, ascii_function):
+    from awkward.operations.ak_from_arrow import from_arrow
+    from awkward.operations.ak_to_arrow import to_arrow
+
+    def action(layout, **kwargs):
+        if layout.is_list and layout.parameter("__array__") == "string":
+            return from_arrow(
+                utf8_function(to_arrow(layout, extensionarray=False)), highlevel=False
+            )
+
+        elif layout.is_list and layout.parameter("__array__") == "bytestring":
+            return from_arrow(
+                ascii_function(to_arrow(layout, extensionarray=False)), highlevel=False
+            )
+
+    return action
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
new file mode 100644
index 0000000000..3c1f667ed6
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -0,0 +1,54 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_alnum",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_alnum(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with booleans indicating whether they are alphanumeric.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alnum.html)
+    or
+    [pyarrow.compute.ascii_isalnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_alnum.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str.get_action(
+            pyarrow.compute.utf8_is_alnum, pyarrow.compute.ascii_is_alnum
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
new file mode 100644
index 0000000000..0be962bc7f
--- /dev/null
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -0,0 +1,32 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+import awkward as ak
+
+string = ak.Array(
+    [
+        ["\u03b1\u03b2\u03b3", ""],
+        [],
+        ["\u2192\u03b4\u03b5\u2190", "\u03b6z z\u03b6", "abc"],
+    ]
+)
+bytestring = ak.Array(
+    [
+        ["\u03b1\u03b2\u03b3".encode(), b""],
+        [],
+        ["\u2192\u03b4\u03b5\u2190".encode(), "\u03b6z z\u03b6".encode(), b"abc"],
+    ]
+)
+
+
+def test_is_alnum():
+    assert ak.str.is_alnum(string).tolist() == [
+        [True, False],
+        [],
+        [False, False, True],
+    ]
+    # ArrowNotImplementedError
+    # assert ak.str.is_alnum(bytestring).tolist() == [
+    #     [False, False],
+    #     [],
+    #     [False, False, True],
+    # ]

From 018b8e3351fc90f842588cd4ae2c3c3cc50c2cb2 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 16:51:23 -0500
Subject: [PATCH 02/73] is_alpha

---
 src/awkward/operations/str/__init__.py     | 26 +++++++++--
 src/awkward/operations/str/ak_is_alnum.py  |  6 +--
 src/awkward/operations/str/ak_is_alpha.py  | 54 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 28 ++++++++---
 4 files changed, 101 insertions(+), 13 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_is_alpha.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 1529f7411f..0bf724411c 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -1,9 +1,10 @@
 # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
 
 from awkward.operations.str.ak_is_alnum import *
+from awkward.operations.str.ak_is_alpha import *
 
 
-def get_action(utf8_function, ascii_function):
+def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
     from awkward.operations.ak_from_arrow import from_arrow
     from awkward.operations.ak_to_arrow import to_arrow
 
@@ -14,8 +15,25 @@ def action(layout, **kwargs):
             )
 
         elif layout.is_list and layout.parameter("__array__") == "bytestring":
-            return from_arrow(
-                ascii_function(to_arrow(layout, extensionarray=False)), highlevel=False
-            )
+            if bytestring_to_string:
+                return from_arrow(
+                    ascii_function(
+                        to_arrow(
+                            layout.copy(
+                                content=layout.content.copy(
+                                    parameters={"__array__": "char"}
+                                ),
+                                parameters={"__array__": "string"},
+                            ),
+                            extensionarray=False,
+                        )
+                    ),
+                    highlevel=False,
+                )
+            else:
+                return from_arrow(
+                    ascii_function(to_arrow(layout, extensionarray=False)),
+                    highlevel=False,
+                )
 
     return action
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index 3c1f667ed6..880894df3f 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -39,14 +39,14 @@ def is_alnum(array, *, highlevel=True, behavior=None):
 def _impl(array, highlevel, behavior):
     import awkward._connect.pyarrow  # noqa: F401, I001
 
-    import pyarrow.compute
+    import pyarrow.compute as pc
 
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str.get_action(
-            pyarrow.compute.utf8_is_alnum, pyarrow.compute.ascii_is_alnum
+        ak.operations.str._get_action(
+            pc.utf8_is_alnum, pc.ascii_is_alnum, bytestring_to_string=True
         ),
         behavior,
     )
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
new file mode 100644
index 0000000000..94e26aa142
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -0,0 +1,54 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_alpha",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_alpha(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with booleans indicating whether they are alphanumeric.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alpha.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_alpha.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_alpha, pc.ascii_is_alpha, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 0be962bc7f..dd533d03ce 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -1,7 +1,11 @@
 # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
 
+import pytest
+
 import awkward as ak
 
+pytest.importorskip("pyarrow")
+
 string = ak.Array(
     [
         ["\u03b1\u03b2\u03b3", ""],
@@ -24,9 +28,21 @@ def test_is_alnum():
         [],
         [False, False, True],
     ]
-    # ArrowNotImplementedError
-    # assert ak.str.is_alnum(bytestring).tolist() == [
-    #     [False, False],
-    #     [],
-    #     [False, False, True],
-    # ]
+    assert ak.str.is_alnum(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, True],
+    ]
+
+
+def test_is_alpha():
+    assert ak.str.is_alpha(string).tolist() == [
+        [True, False],
+        [],
+        [False, False, True],
+    ]
+    assert ak.str.is_alpha(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, True],
+    ]

From 1d97c326c3d1dda330a5a96dbfaa646bbffedb6b Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 16:53:06 -0500
Subject: [PATCH 03/73] is_decimal

---
 src/awkward/operations/str/__init__.py      |  1 +
 src/awkward/operations/str/ak_is_decimal.py | 54 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py  | 13 +++++
 3 files changed, 68 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_is_decimal.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 0bf724411c..550df65c11 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -2,6 +2,7 @@
 
 from awkward.operations.str.ak_is_alnum import *
 from awkward.operations.str.ak_is_alpha import *
+from awkward.operations.str.ak_is_decimal import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
new file mode 100644
index 0000000000..15ab147eee
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -0,0 +1,54 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_decimal",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_decimal(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with booleans indicating whether they are alphanumeric.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_decimal.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_decimal.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_decimal, pc.ascii_is_decimal, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index dd533d03ce..b04b299679 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -46,3 +46,16 @@ def test_is_alpha():
         [],
         [False, False, True],
     ]
+
+
+def test_is_decimal():
+    assert ak.str.is_decimal(string).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+    assert ak.str.is_decimal(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]

From f3d20750021e31197695e0da01717b4270db9355 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:03:29 -0500
Subject: [PATCH 04/73] is_lower

---
 src/awkward/operations/str/__init__.py      |  1 +
 src/awkward/operations/str/ak_is_alnum.py   |  6 ++-
 src/awkward/operations/str/ak_is_alpha.py   |  6 ++-
 src/awkward/operations/str/ak_is_decimal.py |  6 ++-
 src/awkward/operations/str/ak_is_lower.py   | 56 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py  | 13 +++++
 6 files changed, 82 insertions(+), 6 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_is_lower.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 550df65c11..572ff911ac 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -3,6 +3,7 @@
 from awkward.operations.str.ak_is_alnum import *
 from awkward.operations.str.ak_is_alpha import *
 from awkward.operations.str.ak_is_decimal import *
+from awkward.operations.str.ak_is_lower import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index 880894df3f..6ced0234c6 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -18,10 +18,12 @@ def is_alnum(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with booleans indicating whether they are alphanumeric.
+    Replaces any string-valued data with True iff the string is non-empty and consists only of alphanumeric Unicode characters.
+
+    Replaces any bytestring-valued data with True iff the string is non-empty and consists only of alphanumeric ASCII characters.
 
     Note: this function does not raise an error if the `array` does
-    not contain any string data.
+    not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_isalnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alnum.html)
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 94e26aa142..1910a51e90 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -18,10 +18,12 @@ def is_alpha(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with booleans indicating whether they are alphanumeric.
+    Replaces any string-valued data with True iff the string is non-empty and consists only of alphabetic Unicode characters.
+
+    Replaces any bytestring-valued data with True iff the string is non-empty and consists only of alphabetic ASCII characters.
 
     Note: this function does not raise an error if the `array` does
-    not contain any string data.
+    not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alpha.html)
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index 15ab147eee..0ea853d3e1 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -18,10 +18,12 @@ def is_decimal(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with booleans indicating whether they are alphanumeric.
+    Replaces any string-valued data True iff the string is non-empty and consists only of decimal Unicode characters.
+
+    Replaces any bytestring-valued data True iff the string is non-empty and consists only of decimal ASCII characters.
 
     Note: this function does not raise an error if the `array` does
-    not contain any string data.
+    not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_decimal.html)
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
new file mode 100644
index 0000000000..30e1b206d0
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_lower",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_lower(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data True iff the string is non-empty and consists only of lowercase Unicode characters.
+
+    Replaces any bytestring-valued data True iff the string is non-empty and consists only of lowercase ASCII characters.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_lower.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_lower.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_lower, pc.ascii_is_lower, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index b04b299679..2023ed0481 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -59,3 +59,16 @@ def test_is_decimal():
         [],
         [False, False, False],
     ]
+
+
+def test_is_lower():
+    assert ak.str.is_lower(string).tolist() == [
+        [True, False],
+        [],
+        [True, True, True],
+    ]
+    assert ak.str.is_lower(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, True, True],
+    ]

From 784dc689cfc37c78834c606d4f6501938d294a4d Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:07:22 -0500
Subject: [PATCH 05/73] is_digit

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_is_digit.py  | 58 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 13 +++++
 3 files changed, 72 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_is_digit.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 572ff911ac..97378a31fb 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -3,6 +3,7 @@
 from awkward.operations.str.ak_is_alnum import *
 from awkward.operations.str.ak_is_alpha import *
 from awkward.operations.str.ak_is_decimal import *
+from awkward.operations.str.ak_is_digit import *
 from awkward.operations.str.ak_is_lower import *
 
 
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
new file mode 100644
index 0000000000..91d0113155
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -0,0 +1,58 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_digit",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_digit(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data True iff the string is non-empty and consists only of Unicode digits.
+
+    Replaces any bytestring-valued data True iff the string is non-empty and consists only of Unicode digits.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
+    or
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
+    on strings and bytestrings, respectively.
+
+    (Arrow's compute module does not have an `ascii_is_digit`.)
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_digit, pc.utf8_is_digit, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 2023ed0481..e5ab8f98a0 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -61,6 +61,19 @@ def test_is_decimal():
     ]
 
 
+def test_is_digit():
+    assert ak.str.is_digit(string).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+    assert ak.str.is_digit(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+
+
 def test_is_lower():
     assert ak.str.is_lower(string).tolist() == [
         [True, False],

From 73b346dbf1cd24ff7bc1d657f6ae87db8e02d98c Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:10:43 -0500
Subject: [PATCH 06/73] is_numeric

---
 src/awkward/operations/str/__init__.py      |  1 +
 src/awkward/operations/str/ak_is_numeric.py | 58 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py  | 13 +++++
 3 files changed, 72 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_is_numeric.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 97378a31fb..579ac11230 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -5,6 +5,7 @@
 from awkward.operations.str.ak_is_decimal import *
 from awkward.operations.str.ak_is_digit import *
 from awkward.operations.str.ak_is_lower import *
+from awkward.operations.str.ak_is_numeric import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
new file mode 100644
index 0000000000..cdb250411b
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -0,0 +1,58 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_numeric",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_numeric(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data true iff the string is non-empty and consists only of numeric Unicode characters.
+
+    Replaces any bytestring-valued data true iff the string is non-empty and consists only of numeric Unicode characters.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
+    or
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
+    on strings and bytestrings, respectively.
+
+    (Arrow's compute module does not have an `ascii_is_numeric`.)
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_numeric, pc.utf8_is_numeric, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index e5ab8f98a0..0889d43146 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -85,3 +85,16 @@ def test_is_lower():
         [],
         [False, True, True],
     ]
+
+
+def test_is_numeric():
+    assert ak.str.is_numeric(string).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+    assert ak.str.is_numeric(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]

From eff2dfebc4b99432fc42d8aea61fdfd53d0f8703 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:13:14 -0500
Subject: [PATCH 07/73] is_printable

---
 src/awkward/operations/str/__init__.py        |  1 +
 src/awkward/operations/str/ak_is_printable.py | 56 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 +++++
 3 files changed, 70 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_is_printable.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 579ac11230..a7c6257368 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -6,6 +6,7 @@
 from awkward.operations.str.ak_is_digit import *
 from awkward.operations.str.ak_is_lower import *
 from awkward.operations.str.ak_is_numeric import *
+from awkward.operations.str.ak_is_printable import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
new file mode 100644
index 0000000000..108f8d6fc7
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_printable",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_printable(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data True iff the string is non-empty and consists only of printable Unicode characters.
+
+    Replaces any bytestring-valued data True iff the string is non-empty and consists only of printable ASCII characters.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_printable.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_printable.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_printable, pc.ascii_is_printable, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 0889d43146..8dcee8c4c6 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -98,3 +98,16 @@ def test_is_numeric():
         [],
         [False, False, False],
     ]
+
+
+def test_is_printable():
+    assert ak.str.is_printable(string).tolist() == [
+        [True, True],
+        [],
+        [True, True, True],
+    ]
+    assert ak.str.is_printable(bytestring).tolist() == [
+        [False, True],
+        [],
+        [False, False, True],
+    ]

From 82b5a7bc9c8f8b3ec52f4d529c46e2cfec787e83 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:16:18 -0500
Subject: [PATCH 08/73] is_space

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_is_space.py  | 56 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 13 +++++
 3 files changed, 70 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_is_space.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index a7c6257368..ca5cea673c 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -7,6 +7,7 @@
 from awkward.operations.str.ak_is_lower import *
 from awkward.operations.str.ak_is_numeric import *
 from awkward.operations.str.ak_is_printable import *
+from awkward.operations.str.ak_is_space import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
new file mode 100644
index 0000000000..fbcd54ce74
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_space",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_space(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data True iff the string is non-empty and consists only of whitespace Unicode characters.
+
+    Replaces any bytestring-valued data True iff the string is non-empty and consists only of whitespace ASCII characters.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_space.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_space.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_space, pc.ascii_is_space, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 8dcee8c4c6..d2dba86457 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -111,3 +111,16 @@ def test_is_printable():
         [],
         [False, False, True],
     ]
+
+
+def test_is_space():
+    assert ak.str.is_space(string).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+    assert ak.str.is_space(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]

From c8c669c81f777affcaa1b4d53fc9fbc2dbc5c81c Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:25:38 -0500
Subject: [PATCH 09/73] is_upper

---
 src/awkward/operations/str/__init__.py        |  4 ++
 src/awkward/operations/str/ak_is_alnum.py     |  4 +-
 src/awkward/operations/str/ak_is_alpha.py     |  4 +-
 src/awkward/operations/str/ak_is_decimal.py   |  4 +-
 src/awkward/operations/str/ak_is_digit.py     |  4 +-
 src/awkward/operations/str/ak_is_lower.py     |  4 +-
 src/awkward/operations/str/ak_is_numeric.py   |  4 +-
 src/awkward/operations/str/ak_is_printable.py |  4 +-
 src/awkward/operations/str/ak_is_space.py     |  4 +-
 src/awkward/operations/str/ak_is_upper.py     | 59 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 ++++
 11 files changed, 92 insertions(+), 16 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_is_upper.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index ca5cea673c..fe4e7c65f7 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -1,5 +1,8 @@
 # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
 
+# https://arrow.apache.org/docs/python/api/compute.html#string-predicates
+
+# string predicates
 from awkward.operations.str.ak_is_alnum import *
 from awkward.operations.str.ak_is_alpha import *
 from awkward.operations.str.ak_is_decimal import *
@@ -8,6 +11,7 @@
 from awkward.operations.str.ak_is_numeric import *
 from awkward.operations.str.ak_is_printable import *
 from awkward.operations.str.ak_is_space import *
+from awkward.operations.str.ak_is_upper import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index 6ced0234c6..fd21786918 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -18,9 +18,9 @@ def is_alnum(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True iff the string is non-empty and consists only of alphanumeric Unicode characters.
+    Replaces any string-valued data with True if the string is non-empty and consists only of alphanumeric Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True iff the string is non-empty and consists only of alphanumeric ASCII characters.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of alphanumeric ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 1910a51e90..1ac2860624 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -18,9 +18,9 @@ def is_alpha(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True iff the string is non-empty and consists only of alphabetic Unicode characters.
+    Replaces any string-valued data with True if the string is non-empty and consists only of alphabetic Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True iff the string is non-empty and consists only of alphabetic ASCII characters.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of alphabetic ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index 0ea853d3e1..b1a7a51e32 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -18,9 +18,9 @@ def is_decimal(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True iff the string is non-empty and consists only of decimal Unicode characters.
+    Replaces any string-valued data True if the string is non-empty and consists only of decimal Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True iff the string is non-empty and consists only of decimal ASCII characters.
+    Replaces any bytestring-valued data True if the string is non-empty and consists only of decimal ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 91d0113155..100ab33b07 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -18,9 +18,9 @@ def is_digit(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True iff the string is non-empty and consists only of Unicode digits.
+    Replaces any string-valued data True if the string is non-empty and consists only of Unicode digits, False otherwise.
 
-    Replaces any bytestring-valued data True iff the string is non-empty and consists only of Unicode digits.
+    Replaces any bytestring-valued data True if the string is non-empty and consists only of Unicode digits, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index 30e1b206d0..6fd261c822 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -18,9 +18,9 @@ def is_lower(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True iff the string is non-empty and consists only of lowercase Unicode characters.
+    Replaces any string-valued data True if the string is non-empty and consists only of lowercase Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True iff the string is non-empty and consists only of lowercase ASCII characters.
+    Replaces any bytestring-valued data True if the string is non-empty and consists only of lowercase ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index cdb250411b..01f9d6fccc 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -18,9 +18,9 @@ def is_numeric(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data true iff the string is non-empty and consists only of numeric Unicode characters.
+    Replaces any string-valued data true if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data true iff the string is non-empty and consists only of numeric Unicode characters.
+    Replaces any bytestring-valued data true if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index 108f8d6fc7..c4dbc1d96f 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -18,9 +18,9 @@ def is_printable(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True iff the string is non-empty and consists only of printable Unicode characters.
+    Replaces any string-valued data True if the string is non-empty and consists only of printable Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True iff the string is non-empty and consists only of printable ASCII characters.
+    Replaces any bytestring-valued data True if the string is non-empty and consists only of printable ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index fbcd54ce74..13217936cd 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -18,9 +18,9 @@ def is_space(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True iff the string is non-empty and consists only of whitespace Unicode characters.
+    Replaces any string-valued data True if the string is non-empty and consists only of whitespace Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True iff the string is non-empty and consists only of whitespace ASCII characters.
+    Replaces any bytestring-valued data True if the string is non-empty and consists only of whitespace ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
new file mode 100644
index 0000000000..bb35d2e6d1
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -0,0 +1,59 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_upper",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_upper(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data True if the string is non-empty and consists only of uppercase Unicode characters, False otherwise.
+
+    Replaces any bytestring-valued data True if the string is non-empty and consists only of uppercase ASCII characters, False otherwise.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_upper.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_upper.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_upper,
+            pc.ascii_is_upper,
+            # pc.ascii_is_upper is defined on binary, but for consistency with lower...
+            bytestring_to_string=True,
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index d2dba86457..b0928e1c18 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -124,3 +124,16 @@ def test_is_space():
         [],
         [False, False, False],
     ]
+
+
+def test_is_upper():
+    assert ak.str.is_space(string).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+    assert ak.str.is_space(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]

From b9f986844b2309508b0721866fb06bf4f2dc77d9 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:36:45 -0500
Subject: [PATCH 10/73] is_title

---
 src/awkward/operations/str/__init__.py        |  1 +
 src/awkward/operations/str/ak_is_decimal.py   |  4 +-
 src/awkward/operations/str/ak_is_digit.py     |  4 +-
 src/awkward/operations/str/ak_is_lower.py     |  4 +-
 src/awkward/operations/str/ak_is_numeric.py   |  4 +-
 src/awkward/operations/str/ak_is_printable.py |  4 +-
 src/awkward/operations/str/ak_is_space.py     |  4 +-
 src/awkward/operations/str/ak_is_title.py     | 56 +++++++++++++++++++
 src/awkward/operations/str/ak_is_upper.py     |  6 +-
 tests/test_2616_use_pyarrow_for_strings.py    | 17 +++++-
 10 files changed, 87 insertions(+), 17 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_is_title.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index fe4e7c65f7..73793a8626 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -11,6 +11,7 @@
 from awkward.operations.str.ak_is_numeric import *
 from awkward.operations.str.ak_is_printable import *
 from awkward.operations.str.ak_is_space import *
+from awkward.operations.str.ak_is_title import *
 from awkward.operations.str.ak_is_upper import *
 
 
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index b1a7a51e32..c367086875 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -18,9 +18,9 @@ def is_decimal(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True if the string is non-empty and consists only of decimal Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of decimal Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True if the string is non-empty and consists only of decimal ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of decimal ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 100ab33b07..8797cb9f51 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -18,9 +18,9 @@ def is_digit(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True if the string is non-empty and consists only of Unicode digits, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of Unicode digits, False otherwise.
 
-    Replaces any bytestring-valued data True if the string is non-empty and consists only of Unicode digits, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of Unicode digits, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index 6fd261c822..f244d0740b 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -18,9 +18,9 @@ def is_lower(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True if the string is non-empty and consists only of lowercase Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of lowercase Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True if the string is non-empty and consists only of lowercase ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of lowercase ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index 01f9d6fccc..a6ac673580 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -18,9 +18,9 @@ def is_numeric(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data true if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data true if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index c4dbc1d96f..a6d78f98d8 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -18,9 +18,9 @@ def is_printable(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True if the string is non-empty and consists only of printable Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of printable Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True if the string is non-empty and consists only of printable ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of printable ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index 13217936cd..8db50c6151 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -18,9 +18,9 @@ def is_space(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True if the string is non-empty and consists only of whitespace Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of whitespace Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True if the string is non-empty and consists only of whitespace ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of whitespace ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
new file mode 100644
index 0000000000..5d318dff2f
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_title",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_title(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with True if the string is title-cased, i.e. it has at least one cased character, each uppercase character follows an uncased character, and each lowercase character follows an uppercase character, otherwise False.
+
+    Replaces any bytestring-valued data with True if the string is title-cased, i.e. it has at least one cased character, each uppercase character follows an uncased character, and each lowercase character follows an uppercase character, otherwise False.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_title.html)
+    or
+    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_title.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_is_title, pc.ascii_is_title, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index bb35d2e6d1..a3ae3a9082 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -18,9 +18,9 @@ def is_upper(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data True if the string is non-empty and consists only of uppercase Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and consists only of uppercase Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data True if the string is non-empty and consists only of uppercase ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty and consists only of uppercase ASCII characters, False otherwise.
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
@@ -50,7 +50,7 @@ def _impl(array, highlevel, behavior):
         ak.operations.str._get_action(
             pc.utf8_is_upper,
             pc.ascii_is_upper,
-            # pc.ascii_is_upper is defined on binary, but for consistency with lower...
+            # pc.ascii_is_upper is defined on binary, but for consistency with is_lower and is_title...
             bytestring_to_string=True,
         ),
         behavior,
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index b0928e1c18..43029fde77 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -127,12 +127,25 @@ def test_is_space():
 
 
 def test_is_upper():
-    assert ak.str.is_space(string).tolist() == [
+    assert ak.str.is_upper(string).tolist() == [
         [False, False],
         [],
         [False, False, False],
     ]
-    assert ak.str.is_space(bytestring).tolist() == [
+    assert ak.str.is_upper(bytestring).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+
+
+def test_is_title():
+    assert ak.str.is_title(string).tolist() == [
+        [False, False],
+        [],
+        [False, False, False],
+    ]
+    assert ak.str.is_title(bytestring).tolist() == [
         [False, False],
         [],
         [False, False, False],

From 88709b24bf7a0d870c8fd1776fc2fc6e25c37c96 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:45:22 -0500
Subject: [PATCH 11/73] is_ascii; done with string predicates

---
 src/awkward/operations/str/__init__.py        |  1 +
 src/awkward/operations/str/ak_is_alnum.py     |  4 +-
 src/awkward/operations/str/ak_is_alpha.py     |  4 +-
 src/awkward/operations/str/ak_is_ascii.py     | 56 +++++++++++++++++++
 src/awkward/operations/str/ak_is_decimal.py   |  4 +-
 src/awkward/operations/str/ak_is_digit.py     |  4 +-
 src/awkward/operations/str/ak_is_lower.py     |  4 +-
 src/awkward/operations/str/ak_is_numeric.py   |  4 +-
 src/awkward/operations/str/ak_is_printable.py |  4 +-
 src/awkward/operations/str/ak_is_space.py     |  4 +-
 src/awkward/operations/str/ak_is_title.py     |  4 +-
 src/awkward/operations/str/ak_is_upper.py     |  4 +-
 tests/test_2616_use_pyarrow_for_strings.py    | 13 +++++
 13 files changed, 90 insertions(+), 20 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_is_ascii.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 73793a8626..96a0b36e7b 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -5,6 +5,7 @@
 # string predicates
 from awkward.operations.str.ak_is_alnum import *
 from awkward.operations.str.ak_is_alpha import *
+from awkward.operations.str.ak_is_ascii import *
 from awkward.operations.str.ak_is_decimal import *
 from awkward.operations.str.ak_is_digit import *
 from awkward.operations.str.ak_is_lower import *
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index fd21786918..ac28e085b8 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -26,9 +26,9 @@ def is_alnum(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alnum.html)
+    [pyarrow.compute.utf8_is_alnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alnum.html)
     or
-    [pyarrow.compute.ascii_isalnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_alnum.html)
+    [pyarrow.compute.ascii_is_alnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_alnum.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 1ac2860624..283ad5a4c6 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -26,9 +26,9 @@ def is_alpha(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alpha.html)
+    [pyarrow.compute.utf8_is_alpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alpha.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_alpha.html)
+    [pyarrow.compute.ascii_is_alpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_alpha.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/ak_is_ascii.py
new file mode 100644
index 0000000000..c00d349048
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_ascii.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_ascii",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_ascii(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with True iff the string consists only of ASCII characters, False otherwise.
+
+    Replaces any bytestring-valued data with True iff the string consists only of ASCII characters, False otherwise.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.string_is_ascii](https://arrow.apache.org/docs/python/generated/pyarrow.compute.string_is_ascii.html)
+    or
+    [pyarrow.compute.string_is_ascii](https://arrow.apache.org/docs/python/generated/pyarrow.compute.string_is_ascii.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.string_is_ascii, pc.string_is_ascii, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index c367086875..8a2f4b0fe7 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -26,9 +26,9 @@ def is_decimal(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_decimal.html)
+    [pyarrow.compute.utf8_is_decimal](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_decimal.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_decimal.html)
+    [pyarrow.compute.ascii_is_decimal](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_decimal.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 8797cb9f51..3cd5f343ae 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -26,9 +26,9 @@ def is_digit(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
+    [pyarrow.compute.utf8_is_digit](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
     or
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
+    [pyarrow.compute.utf8_is_digit](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
     on strings and bytestrings, respectively.
 
     (Arrow's compute module does not have an `ascii_is_digit`.)
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index f244d0740b..74c832ba77 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -26,9 +26,9 @@ def is_lower(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_lower.html)
+    [pyarrow.compute.utf8_is_lower](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_lower.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_lower.html)
+    [pyarrow.compute.ascii_is_lower](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_lower.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index a6ac673580..9bf89c814a 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -26,9 +26,9 @@ def is_numeric(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
+    [pyarrow.compute.utf8_is_numeric](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
     or
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
+    [pyarrow.compute.utf8_is_numeric](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
     on strings and bytestrings, respectively.
 
     (Arrow's compute module does not have an `ascii_is_numeric`.)
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index a6d78f98d8..cf42bfcc97 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -26,9 +26,9 @@ def is_printable(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_printable.html)
+    [pyarrow.compute.utf8_is_printable](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_printable.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_printable.html)
+    [pyarrow.compute.ascii_is_printable](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_printable.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index 8db50c6151..00ace2eb51 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -26,9 +26,9 @@ def is_space(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_space.html)
+    [pyarrow.compute.utf8_is_space](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_space.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_space.html)
+    [pyarrow.compute.ascii_is_space](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_space.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
index 5d318dff2f..e463d00685 100644
--- a/src/awkward/operations/str/ak_is_title.py
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -26,9 +26,9 @@ def is_title(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_title.html)
+    [pyarrow.compute.utf8_is_title](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_title.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_title.html)
+    [pyarrow.compute.ascii_is_title](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_title.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index a3ae3a9082..8cff3a78bc 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -26,9 +26,9 @@ def is_upper(array, *, highlevel=True, behavior=None):
     not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.utf8_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_upper.html)
+    [pyarrow.compute.utf8_is_upper](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_upper.html)
     or
-    [pyarrow.compute.ascii_isalpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_upper.html)
+    [pyarrow.compute.ascii_is_upper](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_is_upper.html)
     on strings and bytestrings, respectively.
     """
     # Dispatch
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 43029fde77..7cbfc0a6ed 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -150,3 +150,16 @@ def test_is_title():
         [],
         [False, False, False],
     ]
+
+
+def test_is_ascii():
+    assert ak.str.is_ascii(string).tolist() == [
+        [False, True],
+        [],
+        [False, False, True],
+    ]
+    assert ak.str.is_ascii(bytestring).tolist() == [
+        [False, True],
+        [],
+        [False, False, True],
+    ]

From 7a5463a107fe05fc7f5c0352626a3f1b6fb7fc26 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 17:59:39 -0500
Subject: [PATCH 12/73] capitalize

---
 pyproject.toml                              |  2 +-
 src/awkward/operations/str/__init__.py      |  7 ++-
 src/awkward/operations/str/ak_capitalize.py | 56 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py  | 15 ++++++
 4 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_capitalize.py

diff --git a/pyproject.toml b/pyproject.toml
index d4b6ce3d3e..24a6fed90a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -310,7 +310,7 @@ mccabe.max-complexity = 100
 "src/awkward/__init__.py" = ["E402", "F401", "F403", "I001"]
 "src/awkward/_ext.py" = ["F401"]
 "src/awkward/operations/__init__.py" = ["F401", "F403"]
-"src/awkward/operations/str/__init__.py" = ["F401", "F403"]
+"src/awkward/operations/str/__init__.py" = ["F401", "F403", "I001"]
 "src/awkward/_nplikes/*" = ["TID251"]
 "src/awkward/_operators.py" = ["TID251"]
 "tests*/*" = ["T20", "TID251"]
diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 96a0b36e7b..11044a3ac3 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -5,15 +5,18 @@
 # string predicates
 from awkward.operations.str.ak_is_alnum import *
 from awkward.operations.str.ak_is_alpha import *
-from awkward.operations.str.ak_is_ascii import *
 from awkward.operations.str.ak_is_decimal import *
 from awkward.operations.str.ak_is_digit import *
 from awkward.operations.str.ak_is_lower import *
 from awkward.operations.str.ak_is_numeric import *
 from awkward.operations.str.ak_is_printable import *
 from awkward.operations.str.ak_is_space import *
-from awkward.operations.str.ak_is_title import *
 from awkward.operations.str.ak_is_upper import *
+from awkward.operations.str.ak_is_title import *
+from awkward.operations.str.ak_is_ascii import *
+
+# string transforms
+from awkward.operations.str.ak_capitalize import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
new file mode 100644
index 0000000000..f86009622e
--- /dev/null
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("capitalize",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def capitalize(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with a capitalized version, with the first character uppercased and the others lowercased.
+
+    Replaces any bytestring-valued data with a capitalized version (of all ASCII characters).
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_capitalize](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_capitalize.html)
+    or
+    [pyarrow.compute.ascii_capitalize](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_capitalize.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_capitalize, pc.ascii_capitalize, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 7cbfc0a6ed..1c4f172240 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -163,3 +163,18 @@ def test_is_ascii():
         [],
         [False, False, True],
     ]
+
+
+def test_capitalize():
+    print(ak.str.capitalize(string))
+
+    assert ak.str.capitalize(string).tolist() == [
+        ["Αβγ", ""],
+        [],
+        ["→δε←", "Ζz zζ", "Abc"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
+    ]
+    assert ak.str.capitalize(bytestring).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "Abc"],
+    ]

From 56cb0b19612c559b8b3ad26efe45209b5dfde096 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 18:08:12 -0500
Subject: [PATCH 13/73] lower

---
 src/awkward/operations/str/__init__.py      |  2 +
 src/awkward/operations/str/ak_capitalize.py |  4 +-
 src/awkward/operations/str/ak_length.py     | 56 +++++++++++++++++++++
 src/awkward/operations/str/ak_lower.py      | 56 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py  | 28 ++++++++++-
 5 files changed, 142 insertions(+), 4 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_length.py
 create mode 100644 src/awkward/operations/str/ak_lower.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 11044a3ac3..e7b32bb2e5 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -17,6 +17,8 @@
 
 # string transforms
 from awkward.operations.str.ak_capitalize import *
+from awkward.operations.str.ak_length import *
+from awkward.operations.str.ak_lower import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
index f86009622e..84e2843e00 100644
--- a/src/awkward/operations/str/ak_capitalize.py
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -18,9 +18,9 @@ def capitalize(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with a capitalized version, with the first character uppercased and the others lowercased.
+    Replaces any string-valued data with a capitalized version (correctly transforming Unicode characters), with the first character uppercased and the others lowercased.
 
-    Replaces any bytestring-valued data with a capitalized version (of all ASCII characters).
+    Replaces any bytestring-valued data with a capitalized version (transforming ASCII characters only).
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/ak_length.py
new file mode 100644
index 0000000000..f77ce22f76
--- /dev/null
+++ b/src/awkward/operations/str/ak_length.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("length",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def length(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with its length in Unicode characters (not its length in bytes).
+
+    Replaces any bytestring-valued data with its length of bytes.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_length](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_length.html)
+    or
+    [pyarrow.compute.binary_length](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_length.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_length, pc.binary_length, bytestring_to_string=False
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/ak_lower.py
new file mode 100644
index 0000000000..92766b8f48
--- /dev/null
+++ b/src/awkward/operations/str/ak_lower.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("lower",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def lower(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with a lowercase version (correctly transforming Unicode characters).
+
+    Replaces any bytestring-valued data with a lowercase version (transforming ASCII characters only).
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_lower](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_lower.html)
+    or
+    [pyarrow.compute.ascii_lower](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_lower.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_lower, pc.ascii_lower, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 1c4f172240..40fbb2859a 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -166,8 +166,6 @@ def test_is_ascii():
 
 
 def test_capitalize():
-    print(ak.str.capitalize(string))
-
     assert ak.str.capitalize(string).tolist() == [
         ["Αβγ", ""],
         [],
@@ -178,3 +176,29 @@ def test_capitalize():
         [],
         ["→δε←", "ζz zζ", "Abc"],
     ]
+
+
+def test_length():
+    assert ak.str.length(string).tolist() == [
+        [3, 0],
+        [],
+        [4, 5, 3],
+    ]
+    assert ak.str.length(bytestring).tolist() == [
+        [6, 0],
+        [],
+        [10, 7, 3],
+    ]
+
+
+def test_lower():
+    assert ak.str.lower(string).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.lower(bytestring).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]

From 2c1fe11b284a30e1a8cc41283c79d233ef0f86ae Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 18:11:39 -0500
Subject: [PATCH 14/73] upper

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_upper.py     | 56 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 13 +++++
 3 files changed, 70 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_upper.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index e7b32bb2e5..cd3b1420a2 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -19,6 +19,7 @@
 from awkward.operations.str.ak_capitalize import *
 from awkward.operations.str.ak_length import *
 from awkward.operations.str.ak_lower import *
+from awkward.operations.str.ak_upper import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/ak_upper.py
new file mode 100644
index 0000000000..f4ae131af7
--- /dev/null
+++ b/src/awkward/operations/str/ak_upper.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("upper",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def upper(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with a uppercase version (correctly transforming Unicode characters).
+
+    Replaces any bytestring-valued data with a uppercase version (transforming ASCII characters only).
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_upper](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_upper.html)
+    or
+    [pyarrow.compute.ascii_upper](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_upper.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_upper, pc.ascii_upper, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 40fbb2859a..969b95ac8b 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -202,3 +202,16 @@ def test_lower():
         [],
         ["→δε←", "ζz zζ", "abc"],
     ]
+
+
+def test_upper():
+    assert ak.str.upper(string).tolist() == [
+        ["ΑΒΓ", ""],
+        [],
+        ["→ΔΕ←", "ΖZ ZΖ", "ABC"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
+    ]
+    assert ak.str.upper(bytestring).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζZ Zζ", "ABC"],
+    ]

From d7db0423e449acea7a5144cf755e3b8454eb566b Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 18:13:08 -0500
Subject: [PATCH 15/73] upper

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_swapcase.py  | 56 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 13 +++++
 3 files changed, 70 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_swapcase.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index cd3b1420a2..c7b21ad1c6 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -19,6 +19,7 @@
 from awkward.operations.str.ak_capitalize import *
 from awkward.operations.str.ak_length import *
 from awkward.operations.str.ak_lower import *
+from awkward.operations.str.ak_swapcase import *
 from awkward.operations.str.ak_upper import *
 
 
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
new file mode 100644
index 0000000000..cc984af7c1
--- /dev/null
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("swapcase",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def swapcase(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with a swapcase version (correctly transforming Unicode characters).
+
+    Replaces any bytestring-valued data with a swapcase version (transforming ASCII characters only).
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_swapcase](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_swapcase.html)
+    or
+    [pyarrow.compute.ascii_swapcase](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_swapcase.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_swapcase, pc.ascii_swapcase, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 969b95ac8b..1bfd483864 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -204,6 +204,19 @@ def test_lower():
     ]
 
 
+def test_swapcase():
+    assert ak.str.swapcase(string).tolist() == [
+        ["ΑΒΓ", ""],
+        [],
+        ["→ΔΕ←", "ΖZ ZΖ", "ABC"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
+    ]
+    assert ak.str.swapcase(bytestring).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζZ Zζ", "ABC"],
+    ]
+
+
 def test_upper():
     assert ak.str.upper(string).tolist() == [
         ["ΑΒΓ", ""],

From 951f9b9dab320ddc9697ef3cc8f35e4e14c313dd Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 18:18:31 -0500
Subject: [PATCH 16/73] title

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_swapcase.py  |  4 +-
 src/awkward/operations/str/ak_title.py     | 56 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 13 +++++
 4 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_title.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index c7b21ad1c6..4c6c54b1b0 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -20,6 +20,7 @@
 from awkward.operations.str.ak_length import *
 from awkward.operations.str.ak_lower import *
 from awkward.operations.str.ak_swapcase import *
+from awkward.operations.str.ak_title import *
 from awkward.operations.str.ak_upper import *
 
 
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
index cc984af7c1..1ff02dabad 100644
--- a/src/awkward/operations/str/ak_swapcase.py
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -18,9 +18,9 @@ def swapcase(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with a swapcase version (correctly transforming Unicode characters).
+    Replaces any string-valued data with uppercase characters transformed to lowercase and vice-versa (correctly transforming Unicode characters).
 
-    Replaces any bytestring-valued data with a swapcase version (transforming ASCII characters only).
+    Replaces any bytestring-valued data with uppercase characters transformed to lowercase and vice-versa (transforming ASCII characters only).
 
     Note: this function does not raise an error if the `array` does
     not contain any string or bytestring data.
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/ak_title.py
new file mode 100644
index 0000000000..8314002311
--- /dev/null
+++ b/src/awkward/operations/str/ak_title.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("title",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def title(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with a titlecase version (correctly transforming Unicode characters). Each word in the output will start with an uppercase character and its remaining characters will be lowercase.
+
+    Replaces any bytestring-valued data with a titlecase version (transforming ASCII characters only). Each word in the output will start with an uppercase character and its remaining characters will be lowercase.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_title](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_title.html)
+    or
+    [pyarrow.compute.ascii_title](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_title.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_title, pc.ascii_title, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 1bfd483864..01b241b442 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -217,6 +217,19 @@ def test_swapcase():
     ]
 
 
+def test_title():
+    assert ak.str.title(string).tolist() == [
+        ["Αβγ", ""],
+        [],
+        ["→Δε←", "Ζz Zζ", "Abc"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
+    ]
+    assert ak.str.title(bytestring).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζZ Zζ", "Abc"],
+    ]
+
+
 def test_upper():
     assert ak.str.upper(string).tolist() == [
         ["ΑΒΓ", ""],

From adab5998505c3d94c09efb69c92443f51d97ef68 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 18:32:51 -0500
Subject: [PATCH 17/73] T -> T operations on bytestrings should return
 bytestrings.

---
 src/awkward/operations/str/__init__.py     |  9 ++++++++-
 tests/test_2616_use_pyarrow_for_strings.py | 20 ++++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 4c6c54b1b0..1d0e380ece 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -36,7 +36,7 @@ def action(layout, **kwargs):
 
         elif layout.is_list and layout.parameter("__array__") == "bytestring":
             if bytestring_to_string:
-                return from_arrow(
+                out = from_arrow(
                     ascii_function(
                         to_arrow(
                             layout.copy(
@@ -50,6 +50,13 @@ def action(layout, **kwargs):
                     ),
                     highlevel=False,
                 )
+                if out.is_list and out.parameter("__array__") == "string":
+                    out = out.copy(
+                        content=out.content.copy(parameters={"__array__": "byte"}),
+                        parameters={"__array__": "bytestring"},
+                    )
+                return out
+
             else:
                 return from_arrow(
                     ascii_function(to_arrow(layout, extensionarray=False)),
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 01b241b442..f034657503 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -172,9 +172,9 @@ def test_capitalize():
         ["→δε←", "Ζz zζ", "Abc"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
     ]
     assert ak.str.capitalize(bytestring).tolist() == [
-        ["αβγ", ""],
+        ["αβγ".encode(), b""],
         [],
-        ["→δε←", "ζz zζ", "Abc"],
+        ["→δε←".encode(), "ζz zζ".encode(), b"Abc"],
     ]
 
 
@@ -198,9 +198,9 @@ def test_lower():
         ["→δε←", "ζz zζ", "abc"],
     ]
     assert ak.str.lower(bytestring).tolist() == [
-        ["αβγ", ""],
+        ["αβγ".encode(), b""],
         [],
-        ["→δε←", "ζz zζ", "abc"],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
     ]
 
 
@@ -211,9 +211,9 @@ def test_swapcase():
         ["→ΔΕ←", "ΖZ ZΖ", "ABC"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
     ]
     assert ak.str.swapcase(bytestring).tolist() == [
-        ["αβγ", ""],
+        ["αβγ".encode(), b""],
         [],
-        ["→δε←", "ζZ Zζ", "ABC"],
+        ["→δε←".encode(), "ζZ Zζ".encode(), b"ABC"],
     ]
 
 
@@ -224,9 +224,9 @@ def test_title():
         ["→Δε←", "Ζz Zζ", "Abc"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
     ]
     assert ak.str.title(bytestring).tolist() == [
-        ["αβγ", ""],
+        ["αβγ".encode(), b""],
         [],
-        ["→δε←", "ζZ Zζ", "Abc"],
+        ["→δε←".encode(), "ζZ Zζ".encode(), b"Abc"],
     ]
 
 
@@ -237,7 +237,7 @@ def test_upper():
         ["→ΔΕ←", "ΖZ ZΖ", "ABC"],  # noqa: RUF001, RUF003 (we care about Ζ vs Z)
     ]
     assert ak.str.upper(bytestring).tolist() == [
-        ["αβγ", ""],
+        ["αβγ".encode(), b""],
         [],
-        ["→δε←", "ζZ Zζ", "ABC"],
+        ["→δε←".encode(), "ζZ Zζ".encode(), b"ABC"],
     ]

From 8279fded3627cb0c32950e0c1ee4764e531ffbb5 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 19:14:38 -0500
Subject: [PATCH 18/73] repeat (the first that needs a broadcastable argument)

---
 src/awkward/operations/str/__init__.py     |   1 +
 src/awkward/operations/str/ak_repeat.py    | 107 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py |  24 +++++
 3 files changed, 132 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_repeat.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 1d0e380ece..2ed7b8d7eb 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -22,6 +22,7 @@
 from awkward.operations.str.ak_swapcase import *
 from awkward.operations.str.ak_title import *
 from awkward.operations.str.ak_upper import *
+from awkward.operations.str.ak_repeat import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
new file mode 100644
index 0000000000..0bd722acc9
--- /dev/null
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -0,0 +1,107 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("repeat",)
+
+import numbers
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+from awkward._nplikes.numpylike import NumpyMetadata
+
+np = NumpyMetadata.instance()
+
+
+@high_level_function
+def repeat(array, num_repeats, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        num_repeats: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued or bytestring-valued data with the same value repeated `num_repeats` times, which can be a scalar integer or a (broadcasted) array of integers.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.binary_repeat](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_repeat.html)
+    or
+    [pyarrow.compute.binary_repeat](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_repeat.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, num_repeats, highlevel, behavior)
+
+
+def _impl(array, num_repeats, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+    from awkward.operations.ak_from_arrow import from_arrow
+    from awkward.operations.ak_to_arrow import to_arrow
+
+    import pyarrow.compute as pc
+
+    layout = ak.operations.to_layout(array)
+    behavior = behavior_of(array, behavior=behavior)
+
+    num_repeats_layout = ak.operations.to_layout(num_repeats, allow_other=True)
+
+    if not isinstance(num_repeats_layout, ak.contents.Content):
+        if not isinstance(num_repeats, numbers.Integral):
+            raise TypeError(
+                "num_repeats must be an integer or broadcastable to integers"
+            )
+        num_repeats = int(num_repeats)
+
+        def action(layout, **kwargs):
+            if layout.is_list and layout.parameter("__array__") in (
+                "string",
+                "bytestring",
+            ):
+                return from_arrow(
+                    pc.binary_repeat(
+                        to_arrow(layout, extensionarray=False), num_repeats
+                    ),
+                    highlevel=False,
+                )
+
+        out = ak._do.recursively_apply(layout, action, behavior)
+
+    else:
+
+        def action(inputs, **kwargs):
+            if inputs[0].is_list and inputs[0].parameter("__array__") in (
+                "string",
+                "bytestring",
+            ):
+                if not inputs[1].is_numpy or not issubclass(
+                    inputs[1].dtype.type, np.integer
+                ):
+                    raise TypeError(
+                        "num_repeats must be an integer or broadcastable to integers"
+                    )
+                return (
+                    from_arrow(
+                        pc.binary_repeat(
+                            to_arrow(inputs[0], extensionarray=False),
+                            to_arrow(inputs[1], extensionarray=False),
+                        ),
+                        highlevel=False,
+                    ),
+                )
+
+        out = ak._broadcasting.broadcast_and_apply(
+            (layout, num_repeats_layout), action, behavior
+        )
+        assert isinstance(out, tuple) and len(out) == 1
+        out = out[0]
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index f034657503..a97902715e 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -241,3 +241,27 @@ def test_upper():
         [],
         ["→δε←".encode(), "ζZ Zζ".encode(), b"ABC"],
     ]
+
+
+def test_repeat():
+    assert ak.str.repeat(string, 3).tolist() == [
+        ["αβγαβγαβγ", ""],
+        [],
+        ["→δε←→δε←→δε←", "ζz zζζz zζζz zζ", "abcabcabc"],
+    ]
+    assert ak.str.repeat(bytestring, 3).tolist() == [
+        ["αβγαβγαβγ".encode(), b""],
+        [],
+        ["→δε←→δε←→δε←".encode(), "ζz zζζz zζζz zζ".encode(), b"abcabcabc"],
+    ]
+
+    assert ak.str.repeat(string, [[3, 3], [], [2, 0, 1]]).tolist() == [
+        ["αβγαβγαβγ", ""],
+        [],
+        ["→δε←→δε←", "", "abc"],
+    ]
+    assert ak.str.repeat(bytestring, [[3, 3], [], [2, 0, 1]]).tolist() == [
+        ["αβγαβγαβγ".encode(), b""],
+        [],
+        ["→δε←→δε←".encode(), b"", b"abc"],
+    ]

From 4c41240eaed3bf235e083f4cea7957720fba51b4 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 19:24:45 -0500
Subject: [PATCH 19/73] reverse (because it's easy)

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_reverse.py   | 56 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 13 +++++
 3 files changed, 70 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_reverse.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 2ed7b8d7eb..dc99b99689 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -23,6 +23,7 @@
 from awkward.operations.str.ak_title import *
 from awkward.operations.str.ak_upper import *
 from awkward.operations.str.ak_repeat import *
+from awkward.operations.str.ak_reverse import *
 
 
 def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/ak_reverse.py
new file mode 100644
index 0000000000..627f8a95cf
--- /dev/null
+++ b/src/awkward/operations/str/ak_reverse.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("reverse",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def reverse(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Reverses the order of Unicode characters in any string-valued data. (This function operates on Unicode codepoints, not grapheme clusters. Hence, it will not correctly reverse grapheme clusters composed of multiple codepoints.)
+
+    Reverses the order of bytes in any bytestring-valued data.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_reverse](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_reverse.html)
+    or
+    [pyarrow.compute.binary_reverse](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_reverse.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_reverse, pc.binary_reverse, bytestring_to_string=False
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index a97902715e..e20cb7205a 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -265,3 +265,16 @@ def test_repeat():
         [],
         ["→δε←→δε←".encode(), b"", b"abc"],
     ]
+
+
+def test_reverse():
+    assert ak.str.reverse(string).tolist() == [
+        ["αβγ"[::-1], ""],
+        [],
+        ["→δε←"[::-1], "ζz zζ"[::-1], "abc"[::-1]],
+    ]
+    assert ak.str.reverse(bytestring).tolist() == [
+        ["αβγ".encode()[::-1], b""],
+        [],
+        ["→δε←".encode()[::-1], "ζz zζ".encode()[::-1], b"abc"[::-1]],
+    ]

From 42604f0d74b2196767e05382550dda2c6d96f648 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 19:46:09 -0500
Subject: [PATCH 20/73] replace_slice

---
 src/awkward/operations/str/__init__.py        | 11 ++--
 src/awkward/operations/str/ak_repeat.py       |  1 -
 .../operations/str/ak_replace_slice.py        | 59 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 ++++
 4 files changed, 79 insertions(+), 5 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_replace_slice.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index dc99b99689..9ad840747a 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -23,17 +23,19 @@
 from awkward.operations.str.ak_title import *
 from awkward.operations.str.ak_upper import *
 from awkward.operations.str.ak_repeat import *
+from awkward.operations.str.ak_replace_slice import *
 from awkward.operations.str.ak_reverse import *
 
 
-def _get_action(utf8_function, ascii_function, *, bytestring_to_string=False):
+def _get_action(utf8_function, ascii_function, *args, bytestring_to_string=False):
     from awkward.operations.ak_from_arrow import from_arrow
     from awkward.operations.ak_to_arrow import to_arrow
 
     def action(layout, **kwargs):
         if layout.is_list and layout.parameter("__array__") == "string":
             return from_arrow(
-                utf8_function(to_arrow(layout, extensionarray=False)), highlevel=False
+                utf8_function(to_arrow(layout, extensionarray=False), *args),
+                highlevel=False,
             )
 
         elif layout.is_list and layout.parameter("__array__") == "bytestring":
@@ -48,7 +50,8 @@ def action(layout, **kwargs):
                                 parameters={"__array__": "string"},
                             ),
                             extensionarray=False,
-                        )
+                        ),
+                        *args,
                     ),
                     highlevel=False,
                 )
@@ -61,7 +64,7 @@ def action(layout, **kwargs):
 
             else:
                 return from_arrow(
-                    ascii_function(to_arrow(layout, extensionarray=False)),
+                    ascii_function(to_arrow(layout, extensionarray=False), *args),
                     highlevel=False,
                 )
 
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index 0bd722acc9..4419eed4c2 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -59,7 +59,6 @@ def _impl(array, num_repeats, highlevel, behavior):
             raise TypeError(
                 "num_repeats must be an integer or broadcastable to integers"
             )
-        num_repeats = int(num_repeats)
 
         def action(layout, **kwargs):
             if layout.is_list and layout.parameter("__array__") in (
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
new file mode 100644
index 0000000000..8bde0e2d0c
--- /dev/null
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -0,0 +1,59 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("replace_slice",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces slices of any string or bytestring-valued data with `replacement` between `start` and `stop` indexes; `start` is inclusive and `stop` is exclusive and both are 0-indexed.
+
+    For strings, `start` and `stop` are measured in Unicode characters; for bytestrings, `start` and `stop` are measured in bytes.
+
+    The `start`, `stop`, and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_replace_slice](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_replace_slice.html)
+    or
+    [pyarrow.compute.binary_replace_slice](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_replace_slice.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, start, stop, replacement, highlevel, behavior)
+
+
+def _impl(array, start, stop, replacement, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_replace_slice, pc.binary_replace_slice, start, stop, replacement
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index e20cb7205a..bffb042efb 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -267,6 +267,19 @@ def test_repeat():
     ]
 
 
+def test_replace_slice():
+    assert ak.str.replace_slice(string[:, :1], 1, 2, "qj").tolist() == [
+        ["αqjγ"],  # noqa: RUF001
+        [],
+        ["→qjε←"],
+    ]
+    assert ak.str.replace_slice(bytestring[:, :1], 1, 2, b"qj").tolist() == [
+        [b"\xceqj\xce\xb2\xce\xb3"],
+        [],
+        [b"\xe2qj\x92\xce\xb4\xce\xb5\xe2\x86\x90"],
+    ]
+
+
 def test_reverse():
     assert ak.str.reverse(string).tolist() == [
         ["αβγ"[::-1], ""],

From b69d7a24dcee32af4124d1c15fd7ae2c29ce7222 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 19:59:29 -0500
Subject: [PATCH 21/73] replace_substring

---
 src/awkward/operations/str/__init__.py        | 14 ++--
 .../operations/str/ak_replace_slice.py        |  3 +
 .../operations/str/ak_replace_substring.py    | 68 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 ++++
 4 files changed, 94 insertions(+), 4 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_replace_substring.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 9ad840747a..6aec5c824c 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -25,16 +25,19 @@
 from awkward.operations.str.ak_repeat import *
 from awkward.operations.str.ak_replace_slice import *
 from awkward.operations.str.ak_reverse import *
+from awkward.operations.str.ak_replace_substring import *
 
 
-def _get_action(utf8_function, ascii_function, *args, bytestring_to_string=False):
+def _get_action(
+    utf8_function, ascii_function, *args, bytestring_to_string=False, **kwargs
+):
     from awkward.operations.ak_from_arrow import from_arrow
     from awkward.operations.ak_to_arrow import to_arrow
 
-    def action(layout, **kwargs):
+    def action(layout, **absorb):
         if layout.is_list and layout.parameter("__array__") == "string":
             return from_arrow(
-                utf8_function(to_arrow(layout, extensionarray=False), *args),
+                utf8_function(to_arrow(layout, extensionarray=False), *args, **kwargs),
                 highlevel=False,
             )
 
@@ -52,6 +55,7 @@ def action(layout, **kwargs):
                             extensionarray=False,
                         ),
                         *args,
+                        **kwargs,
                     ),
                     highlevel=False,
                 )
@@ -64,7 +68,9 @@ def action(layout, **kwargs):
 
             else:
                 return from_arrow(
-                    ascii_function(to_arrow(layout, extensionarray=False), *args),
+                    ascii_function(
+                        to_arrow(layout, extensionarray=False), *args, **kwargs
+                    ),
                     highlevel=False,
                 )
 
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index 8bde0e2d0c..72653128bd 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -14,6 +14,9 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
+        start (int): Index to start slicing at (inclusive).
+        stop (int): Index to stop slicing at (exclusive).
+        replacement (str): What to replace the slice with.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
new file mode 100644
index 0000000000..380750da2f
--- /dev/null
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -0,0 +1,68 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("replace_substring",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def replace_substring(
+    array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str): Substring pattern to look for inside input values.
+        replacement (str): What to replace the pattern with.
+        max_replacements (None or int): If not None and not -1, limits the
+            maximum number of replacements per string/bytestring, counting from
+            the left.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces non-overlapping subsequences of any string or bytestring-valued data that match a literal `pattern` with `replacement`.
+
+    The `pattern` and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.replace_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring.html)
+    or
+    [pyarrow.compute.replace_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, replacement, max_replacements, highlevel, behavior)
+
+
+def _impl(array, pattern, replacement, max_replacements, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.replace_substring,
+            pc.replace_substring,
+            pattern,
+            replacement,
+            max_replacements=max_replacements,
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index bffb042efb..c22e0b83a7 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -291,3 +291,16 @@ def test_reverse():
         [],
         ["→δε←".encode()[::-1], "ζz zζ".encode()[::-1], b"abc"[::-1]],
     ]
+
+
+def test_replace_substring():
+    assert ak.str.replace_substring(string, "βγ", "HELLO").tolist() == [
+        ["αHELLO", ""],  # noqa: RUF001
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.replace_substring(bytestring, "βγ".encode(), b"HELLO").tolist() == [
+        ["αHELLO".encode(), b""],  # noqa: RUF001
+        [],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
+    ]

From 3d825aa1007975cf5a9067a23c7cddb20a9886e4 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:01:39 -0500
Subject: [PATCH 22/73] Also test 'max_replacements' in replace_substring.

---
 tests/test_2616_use_pyarrow_for_strings.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index c22e0b83a7..6efead56a7 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -304,3 +304,18 @@ def test_replace_substring():
         [],
         ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
     ]
+
+    assert ak.str.replace_substring(
+        string, "βγ", "HELLO", max_replacements=0
+    ).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.replace_substring(
+        bytestring, "βγ".encode(), b"HELLO", max_replacements=0
+    ).tolist() == [
+        ["αβγ".encode(), b""],
+        [],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
+    ]

From 983c3ba798a0e10a44898f840589d7dbc3111ea6 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:05:09 -0500
Subject: [PATCH 23/73] replace_substring_regex: done with string transforms

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../str/ak_replace_substring_regex.py         | 68 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 30 ++++++++
 3 files changed, 99 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_replace_substring_regex.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 6aec5c824c..00d3d8e0de 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -26,6 +26,7 @@
 from awkward.operations.str.ak_replace_slice import *
 from awkward.operations.str.ak_reverse import *
 from awkward.operations.str.ak_replace_substring import *
+from awkward.operations.str.ak_replace_substring_regex import *
 
 
 def _get_action(
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
new file mode 100644
index 0000000000..02a77c2b0a
--- /dev/null
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -0,0 +1,68 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("replace_substring_regex",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def replace_substring_regex(
+    array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str): Regular expression pattern to look for inside input values.
+        replacement (str): What to replace the pattern with.
+        max_replacements (None or int): If not None and not -1, limits the
+            maximum number of replacements per string/bytestring, counting from
+            the left.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces non-overlapping subsequences of any string or bytestring-valued data that match a regular expression `pattern` with `replacement`.
+
+    The `pattern` and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.replace_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring_regex.html)
+    or
+    [pyarrow.compute.replace_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring_regex.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, replacement, max_replacements, highlevel, behavior)
+
+
+def _impl(array, pattern, replacement, max_replacements, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.replace_substring_regex,
+            pc.replace_substring_regex,
+            pattern,
+            replacement,
+            max_replacements=max_replacements,
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 6efead56a7..30ea6d6ae8 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -319,3 +319,33 @@ def test_replace_substring():
         [],
         ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
     ]
+
+
+def test_replace_substring_regex():
+    assert ak.str.replace_substring_regex(string, "βγ", "HELLO").tolist() == [
+        ["αHELLO", ""],  # noqa: RUF001
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.replace_substring_regex(
+        bytestring, "βγ".encode(), b"HELLO"
+    ).tolist() == [
+        ["αHELLO".encode(), b""],  # noqa: RUF001
+        [],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
+    ]
+
+    assert ak.str.replace_substring_regex(
+        string, "βγ", "HELLO", max_replacements=0
+    ).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.replace_substring_regex(
+        bytestring, "βγ".encode(), b"HELLO", max_replacements=0
+    ).tolist() == [
+        ["αβγ".encode(), b""],
+        [],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
+    ]

From bb8e8d74e407dd6d55d426aeb685df2da13151a6 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:16:21 -0500
Subject: [PATCH 24/73] center

---
 src/awkward/operations/str/__init__.py        |  3 +
 src/awkward/operations/str/ak_center.py       | 61 +++++++++++++++++++
 .../operations/str/ak_replace_slice.py        |  2 +-
 .../operations/str/ak_replace_substring.py    |  2 +-
 .../str/ak_replace_substring_regex.py         |  2 +-
 tests/test_2616_use_pyarrow_for_strings.py    | 20 ++++++
 6 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_center.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 00d3d8e0de..772f320cd9 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -28,6 +28,9 @@
 from awkward.operations.str.ak_replace_substring import *
 from awkward.operations.str.ak_replace_substring_regex import *
 
+# string padding
+from awkward.operations.str.ak_center import *
+
 
 def _get_action(
     utf8_function, ascii_function, *args, bytestring_to_string=False, **kwargs
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
new file mode 100644
index 0000000000..0b8d16e3be
--- /dev/null
+++ b/src/awkward/operations/str/ak_center.py
@@ -0,0 +1,61 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("center",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def center(array, width, padding=" ", *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        width (int): Desired string length.
+        padding (str or bytes): What to pad the string with. Should be one codepoint or byte.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string or bytestring-valued data with strings/bytestrings of a given `width`, padding both sides with the given `padding` codepoint or byte.
+
+    If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
+
+    If the data are bytestrings, `width` is measured in bytes and `padding` must be one byte.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_center](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_center.html)
+    or
+    [pyarrow.compute.ascii_center](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_center.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, width, padding, highlevel, behavior)
+
+
+def _impl(array, width, padding, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_center, pc.ascii_center, width, padding, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index 72653128bd..e569458b66 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -16,7 +16,7 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N
         array: Array-like data (anything #ak.to_layout recognizes).
         start (int): Index to start slicing at (inclusive).
         stop (int): Index to stop slicing at (exclusive).
-        replacement (str): What to replace the slice with.
+        replacement (str or bytes): What to replace the slice with.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 380750da2f..a589afe136 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -17,7 +17,7 @@ def replace_substring(
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
         pattern (str): Substring pattern to look for inside input values.
-        replacement (str): What to replace the pattern with.
+        replacement (str or bytes): What to replace the pattern with.
         max_replacements (None or int): If not None and not -1, limits the
             maximum number of replacements per string/bytestring, counting from
             the left.
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index 02a77c2b0a..be63772e61 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -17,7 +17,7 @@ def replace_substring_regex(
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
         pattern (str): Regular expression pattern to look for inside input values.
-        replacement (str): What to replace the pattern with.
+        replacement (str or bytes): What to replace the pattern with.
         max_replacements (None or int): If not None and not -1, limits the
             maximum number of replacements per string/bytestring, counting from
             the left.
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 30ea6d6ae8..88e8063d5a 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -349,3 +349,23 @@ def test_replace_substring_regex():
         [],
         ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
     ]
+
+
+def test_center():
+    assert ak.str.center(string, 15, " ").tolist() == [
+        ["      αβγ      ", "               "],
+        [],
+        ["     →δε←      ", "     ζz zζ     ", "      abc      "],
+    ]
+
+    print(ak.str.center(bytestring, 15, " ").tolist())
+
+    assert ak.str.center(bytestring, 15, b" ").tolist() == [
+        [b"    \xce\xb1\xce\xb2\xce\xb3     ", b"               "],
+        [],
+        [
+            b"  \xe2\x86\x92\xce\xb4\xce\xb5\xe2\x86\x90   ",
+            b"    \xce\xb6z z\xce\xb6    ",
+            b"      abc      ",
+        ],
+    ]

From fa5d0bc58ba42c896a273f973e73c2e7a5ec5955 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:21:12 -0500
Subject: [PATCH 25/73] lpad and rpad

---
 src/awkward/operations/str/__init__.py     |  2 +
 src/awkward/operations/str/ak_center.py    |  2 +-
 src/awkward/operations/str/ak_lpad.py      | 61 ++++++++++++++++++++++
 src/awkward/operations/str/ak_rpad.py      | 61 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 40 ++++++++++++++
 5 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 src/awkward/operations/str/ak_lpad.py
 create mode 100644 src/awkward/operations/str/ak_rpad.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 772f320cd9..b936b0de8a 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -30,6 +30,8 @@
 
 # string padding
 from awkward.operations.str.ak_center import *
+from awkward.operations.str.ak_lpad import *
+from awkward.operations.str.ak_rpad import *
 
 
 def _get_action(
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 0b8d16e3be..284e6595c3 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -21,7 +21,7 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string or bytestring-valued data with strings/bytestrings of a given `width`, padding both sides with the given `padding` codepoint or byte.
+    Replaces any string or bytestring-valued data with centered strings/bytestrings of a given `width`, padding both sides with the given `padding` codepoint or byte.
 
     If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
 
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
new file mode 100644
index 0000000000..2aad079ea7
--- /dev/null
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -0,0 +1,61 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("lpad",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        width (int): Desired string length.
+        padding (str or bytes): What to pad the string with. Should be one codepoint or byte.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string or bytestring-valued data with right-aligned strings/bytestrings of a given `width`, padding the left side with the given `padding` codepoint or byte.
+
+    If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
+
+    If the data are bytestrings, `width` is measured in bytes and `padding` must be one byte.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_lpad](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_lpad.html)
+    or
+    [pyarrow.compute.ascii_lpad](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_lpad.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, width, padding, highlevel, behavior)
+
+
+def _impl(array, width, padding, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_lpad, pc.ascii_lpad, width, padding, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
new file mode 100644
index 0000000000..5146abb6bb
--- /dev/null
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -0,0 +1,61 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("rpad",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        width (int): Desired string length.
+        padding (str or bytes): What to pad the string with. Should be one codepoint or byte.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string or bytestring-valued data with left-aligned strings/bytestrings of a given `width`, padding the right side with the given `padding` codepoint or byte.
+
+    If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
+
+    If the data are bytestrings, `width` is measured in bytes and `padding` must be one byte.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_rpad](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_rpad.html)
+    or
+    [pyarrow.compute.ascii_rpad](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_rpad.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, width, padding, highlevel, behavior)
+
+
+def _impl(array, width, padding, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_rpad, pc.ascii_rpad, width, padding, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 88e8063d5a..36aabb7d4b 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -369,3 +369,43 @@ def test_center():
             b"      abc      ",
         ],
     ]
+
+
+def test_lpad():
+    assert ak.str.lpad(string, 15, " ").tolist() == [
+        ["            αβγ", "               "],
+        [],
+        ["           →δε←", "          ζz zζ", "            abc"],
+    ]
+
+    print(ak.str.lpad(bytestring, 15, " ").tolist())
+
+    assert ak.str.lpad(bytestring, 15, b" ").tolist() == [
+        [b"         \xce\xb1\xce\xb2\xce\xb3", b"               "],
+        [],
+        [
+            b"     \xe2\x86\x92\xce\xb4\xce\xb5\xe2\x86\x90",
+            b"        \xce\xb6z z\xce\xb6",
+            b"            abc",
+        ],
+    ]
+
+
+def test_rpad():
+    assert ak.str.rpad(string, 15, " ").tolist() == [
+        ["αβγ            ", "               "],
+        [],
+        ["→δε←           ", "ζz zζ          ", "abc            "],
+    ]
+
+    print(ak.str.rpad(bytestring, 15, " ").tolist())
+
+    assert ak.str.rpad(bytestring, 15, b" ").tolist() == [
+        [b"\xce\xb1\xce\xb2\xce\xb3         ", b"               "],
+        [],
+        [
+            b"\xe2\x86\x92\xce\xb4\xce\xb5\xe2\x86\x90     ",
+            b"\xce\xb6z z\xce\xb6        ",
+            b"abc            ",
+        ],
+    ]

From 99c4ce0bbb4354958360398bf15175b8503f7be8 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:30:50 -0500
Subject: [PATCH 26/73] trim

---
 src/awkward/operations/str/__init__.py     |  3 ++
 src/awkward/operations/str/ak_trim.py      | 60 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 32 ++++++++++++
 3 files changed, 95 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_trim.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index b936b0de8a..8f0a419840 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -33,6 +33,9 @@
 from awkward.operations.str.ak_lpad import *
 from awkward.operations.str.ak_rpad import *
 
+# string trimming
+from awkward.operations.str.ak_trim import *
+
 
 def _get_action(
     utf8_function, ascii_function, *args, bytestring_to_string=False, **kwargs
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
new file mode 100644
index 0000000000..e51b638666
--- /dev/null
+++ b/src/awkward/operations/str/ak_trim.py
@@ -0,0 +1,60 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("trim",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def trim(array, characters, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        characters (str or bytes): Individual characters to be trimmed from the string.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes any leading or trailing characters of `characters` of any string or bytestring-valued data.
+
+    If the data are strings, `characters` are interpreted as unordered, individual codepoints.
+
+    If the data are bytestrings, `characters` are interpreted as unordered, individual bytes.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_trim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_trim.html)
+    or
+    [pyarrow.compute.ascii_trim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_trim.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, characters, highlevel, behavior)
+
+
+def _impl(array, characters, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_trim, pc.ascii_trim, characters, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 36aabb7d4b..69579d584f 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -21,6 +21,25 @@
     ]
 )
 
+string_padded = ak.Array(
+    [
+        ["      αβγ      ", "               "],
+        [],
+        ["     →δε←      ", "     ζz zζ     ", "      abc      "],
+    ]
+)
+bytestring_padded = ak.Array(
+    [
+        [b"    \xce\xb1\xce\xb2\xce\xb3     ", b"               "],
+        [],
+        [
+            b"  \xe2\x86\x92\xce\xb4\xce\xb5\xe2\x86\x90   ",
+            b"    \xce\xb6z z\xce\xb6    ",
+            b"      abc      ",
+        ],
+    ]
+)
+
 
 def test_is_alnum():
     assert ak.str.is_alnum(string).tolist() == [
@@ -409,3 +428,16 @@ def test_rpad():
             b"abc            ",
         ],
     ]
+
+
+def test_trim():
+    assert ak.str.trim(string_padded, " ").tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.trim(bytestring_padded, b" ").tolist() == [
+        ["αβγ".encode(), b""],
+        [],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
+    ]

From d71367048262566b3e234d7beeced0840854f9d5 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:33:19 -0500
Subject: [PATCH 27/73] trim_whitespace

---
 src/awkward/operations/str/__init__.py        |  1 +
 src/awkward/operations/str/ak_trim.py         |  2 +-
 .../operations/str/ak_trim_whitespace.py      | 55 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 +++++
 4 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 src/awkward/operations/str/ak_trim_whitespace.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 8f0a419840..75d9ca92d5 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -35,6 +35,7 @@
 
 # string trimming
 from awkward.operations.str.ak_trim import *
+from awkward.operations.str.ak_trim_whitespace import *
 
 
 def _get_action(
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index e51b638666..d932016b3f 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -20,7 +20,7 @@ def trim(array, characters, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Removes any leading or trailing characters of `characters` of any string or bytestring-valued data.
+    Removes any leading or trailing characters of `characters` from any string or bytestring-valued data.
 
     If the data are strings, `characters` are interpreted as unordered, individual codepoints.
 
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
new file mode 100644
index 0000000000..891c6d706e
--- /dev/null
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -0,0 +1,55 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("trim_whitespace",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def trim_whitespace(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes any leading or trailing whitespace from any string or bytestring-valued data.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_trim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_trim_whitespace.html)
+    or
+    [pyarrow.compute.ascii_trim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_trim_whitespace.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_trim_whitespace, pc.ascii_trim_whitespace, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 69579d584f..38bc25b11d 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -441,3 +441,16 @@ def test_trim():
         [],
         ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
     ]
+
+
+def test_trim_whitespace():
+    assert ak.str.trim_whitespace(string_padded).tolist() == [
+        ["αβγ", ""],
+        [],
+        ["→δε←", "ζz zζ", "abc"],
+    ]
+    assert ak.str.trim_whitespace(bytestring_padded).tolist() == [
+        ["αβγ".encode(), b""],
+        [],
+        ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
+    ]

From e63bd3eb9efbd96a712325ad04555cac7be04430 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:36:08 -0500
Subject: [PATCH 28/73] ltrim

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_ltrim.py     | 60 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 33 ++++++++++++
 3 files changed, 94 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_ltrim.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 75d9ca92d5..dc236b8ba5 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -34,6 +34,7 @@
 from awkward.operations.str.ak_rpad import *
 
 # string trimming
+from awkward.operations.str.ak_ltrim import *
 from awkward.operations.str.ak_trim import *
 from awkward.operations.str.ak_trim_whitespace import *
 
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
new file mode 100644
index 0000000000..1a1959bdb6
--- /dev/null
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -0,0 +1,60 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("ltrim",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def ltrim(array, characters, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        characters (str or bytes): Individual characters to be trimmed from the string.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes any leading characters of `characters` from any string or bytestring-valued data.
+
+    If the data are strings, `characters` are interpreted as unordered, individual codepoints.
+
+    If the data are bytestrings, `characters` are interpreted as unordered, individual bytes.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_ltrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_ltrim.html)
+    or
+    [pyarrow.compute.ascii_ltrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_ltrim.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, characters, highlevel, behavior)
+
+
+def _impl(array, characters, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_ltrim, pc.ascii_ltrim, characters, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 38bc25b11d..3d256e8e9f 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -430,6 +430,39 @@ def test_rpad():
     ]
 
 
+# string_padded = ak.Array(
+#     [
+#         ["      αβγ      ", "               "],
+#         [],
+#         ["     →δε←      ", "     ζz zζ     ", "      abc      "],
+#     ]
+# )
+# bytestring_padded = ak.Array(
+#     [
+#         [b"    \xce\xb1\xce\xb2\xce\xb3     ", b"               "],
+#         [],
+#         [
+#             b"  \xe2\x86\x92\xce\xb4\xce\xb5\xe2\x86\x90   ",
+#             b"    \xce\xb6z z\xce\xb6    ",
+#             b"      abc      ",
+#         ],
+#     ]
+# )
+
+
+def test_ltrim():
+    assert ak.str.ltrim(string_padded, " ").tolist() == [
+        ["αβγ      ", ""],
+        [],
+        ["→δε←      ", "ζz zζ     ", "abc      "],
+    ]
+    assert ak.str.ltrim(bytestring_padded, b" ").tolist() == [
+        ["αβγ     ".encode(), b""],
+        [],
+        ["→δε←   ".encode(), "ζz zζ    ".encode(), b"abc      "],
+    ]
+
+
 def test_trim():
     assert ak.str.trim(string_padded, " ").tolist() == [
         ["αβγ", ""],

From 3040c4eea9838a17bac77eae1c975bec5c3f6011 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:39:19 -0500
Subject: [PATCH 29/73] rtrim

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_rtrim.py     | 60 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 33 +++++-------
 3 files changed, 74 insertions(+), 20 deletions(-)
 create mode 100644 src/awkward/operations/str/ak_rtrim.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index dc236b8ba5..f4e3a8b15a 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -35,6 +35,7 @@
 
 # string trimming
 from awkward.operations.str.ak_ltrim import *
+from awkward.operations.str.ak_rtrim import *
 from awkward.operations.str.ak_trim import *
 from awkward.operations.str.ak_trim_whitespace import *
 
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
new file mode 100644
index 0000000000..db5f8f7344
--- /dev/null
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -0,0 +1,60 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("rtrim",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def rtrim(array, characters, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        characters (str or bytes): Individual characters to be trimmed from the string.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes any trailing characters of `characters` from any string or bytestring-valued data.
+
+    If the data are strings, `characters` are interpreted as unordered, individual codepoints.
+
+    If the data are bytestrings, `characters` are interpreted as unordered, individual bytes.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_rtrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_rtrim.html)
+    or
+    [pyarrow.compute.ascii_rtrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_rtrim.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, characters, highlevel, behavior)
+
+
+def _impl(array, characters, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_rtrim, pc.ascii_rtrim, characters, bytestring_to_string=True
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 3d256e8e9f..b170ffc9c4 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -430,26 +430,6 @@ def test_rpad():
     ]
 
 
-# string_padded = ak.Array(
-#     [
-#         ["      αβγ      ", "               "],
-#         [],
-#         ["     →δε←      ", "     ζz zζ     ", "      abc      "],
-#     ]
-# )
-# bytestring_padded = ak.Array(
-#     [
-#         [b"    \xce\xb1\xce\xb2\xce\xb3     ", b"               "],
-#         [],
-#         [
-#             b"  \xe2\x86\x92\xce\xb4\xce\xb5\xe2\x86\x90   ",
-#             b"    \xce\xb6z z\xce\xb6    ",
-#             b"      abc      ",
-#         ],
-#     ]
-# )
-
-
 def test_ltrim():
     assert ak.str.ltrim(string_padded, " ").tolist() == [
         ["αβγ      ", ""],
@@ -463,6 +443,19 @@ def test_ltrim():
     ]
 
 
+def test_rtrim():
+    assert ak.str.rtrim(string_padded, " ").tolist() == [
+        ["      αβγ", ""],
+        [],
+        ["     →δε←", "     ζz zζ", "      abc"],
+    ]
+    assert ak.str.rtrim(bytestring_padded, b" ").tolist() == [
+        ["    αβγ".encode(), b""],
+        [],
+        ["  →δε←".encode(), "    ζz zζ".encode(), b"      abc"],
+    ]
+
+
 def test_trim():
     assert ak.str.trim(string_padded, " ").tolist() == [
         ["αβγ", ""],

From 6320f2e7c3e51f0af829129aa158776e6d0fd6b5 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:40:56 -0500
Subject: [PATCH 30/73] rtrim_whitespace

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_rtrim_whitespace.py     | 57 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 +++++
 3 files changed, 71 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_rtrim_whitespace.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index f4e3a8b15a..ecf688a55b 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -36,6 +36,7 @@
 # string trimming
 from awkward.operations.str.ak_ltrim import *
 from awkward.operations.str.ak_rtrim import *
+from awkward.operations.str.ak_rtrim_whitespace import *
 from awkward.operations.str.ak_trim import *
 from awkward.operations.str.ak_trim_whitespace import *
 
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
new file mode 100644
index 0000000000..17df969275
--- /dev/null
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -0,0 +1,57 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("rtrim_whitespace",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def rtrim_whitespace(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes any trailing whitespace from any string or bytestring-valued data.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_rtrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_rtrim_whitespace.html)
+    or
+    [pyarrow.compute.ascii_rtrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_rtrim_whitespace.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_rtrim_whitespace,
+            pc.ascii_rtrim_whitespace,
+            bytestring_to_string=True,
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index b170ffc9c4..f96891eac8 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -456,6 +456,19 @@ def test_rtrim():
     ]
 
 
+def test_rtrim_whitespace():
+    assert ak.str.rtrim_whitespace(string_padded).tolist() == [
+        ["      αβγ", ""],
+        [],
+        ["     →δε←", "     ζz zζ", "      abc"],
+    ]
+    assert ak.str.rtrim_whitespace(bytestring_padded).tolist() == [
+        ["    αβγ".encode(), b""],
+        [],
+        ["  →δε←".encode(), "    ζz zζ".encode(), b"      abc"],
+    ]
+
+
 def test_trim():
     assert ak.str.trim(string_padded, " ").tolist() == [
         ["αβγ", ""],

From 3d0998b087ee0b66df1eaac6ae0798233ed01f8a Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 20:42:48 -0500
Subject: [PATCH 31/73] ltrim_whitespace

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_ltrim_whitespace.py     | 57 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 13 +++++
 3 files changed, 71 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_ltrim_whitespace.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index ecf688a55b..6a67aee697 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -35,6 +35,7 @@
 
 # string trimming
 from awkward.operations.str.ak_ltrim import *
+from awkward.operations.str.ak_ltrim_whitespace import *
 from awkward.operations.str.ak_rtrim import *
 from awkward.operations.str.ak_rtrim_whitespace import *
 from awkward.operations.str.ak_trim import *
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
new file mode 100644
index 0000000000..f465f81e13
--- /dev/null
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -0,0 +1,57 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("ltrim_whitespace",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def ltrim_whitespace(array, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes any leading whitespace from any string or bytestring-valued data.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_ltrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_ltrim_whitespace.html)
+    or
+    [pyarrow.compute.ascii_ltrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_ltrim_whitespace.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, highlevel, behavior)
+
+
+def _impl(array, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_action(
+            pc.utf8_ltrim_whitespace,
+            pc.ascii_ltrim_whitespace,
+            bytestring_to_string=True,
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index f96891eac8..d5604b37a8 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -443,6 +443,19 @@ def test_ltrim():
     ]
 
 
+def test_ltrim_whitespace():
+    assert ak.str.ltrim_whitespace(string_padded).tolist() == [
+        ["αβγ      ", ""],
+        [],
+        ["→δε←      ", "ζz zζ     ", "abc      "],
+    ]
+    assert ak.str.ltrim_whitespace(bytestring_padded).tolist() == [
+        ["αβγ     ".encode(), b""],
+        [],
+        ["→δε←   ".encode(), "ζz zζ    ".encode(), b"abc      "],
+    ]
+
+
 def test_rtrim():
     assert ak.str.rtrim(string_padded, " ").tolist() == [
         ["      αβγ", ""],

From e624ee3bd05982f069239156079089b80b532208 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Fri, 4 Aug 2023 21:09:41 -0500
Subject: [PATCH 32/73] slice

---
 src/awkward/operations/str/__init__.py     | 12 ++++
 src/awkward/operations/str/ak_slice.py     | 69 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 25 ++++++++
 3 files changed, 106 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_slice.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 6a67aee697..7bb5fce6db 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -41,6 +41,18 @@
 from awkward.operations.str.ak_trim import *
 from awkward.operations.str.ak_trim_whitespace import *
 
+# string splitting
+
+# string component extraction
+
+# string joining
+
+# string slicing
+
+from awkward.operations.str.ak_slice import *
+
+# containment tests
+
 
 def _get_action(
     utf8_function, ascii_function, *args, bytestring_to_string=False, **kwargs
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/ak_slice.py
new file mode 100644
index 0000000000..7afaab7d93
--- /dev/null
+++ b/src/awkward/operations/str/ak_slice.py
@@ -0,0 +1,69 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("slice",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        start (int): Index to start slicing at (inclusive).
+        stop (None or int): Index to stop slicing at (exclusive). If not given,
+            slicing will stop at the end.
+        step (int): Slice step.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string or bytestring-valued data with a slice between `start` and `stop` indexes; `start` is inclusive and `stop` is exclusive and both are 0-indexed.
+
+    For strings, `start` and `stop` are measured in Unicode characters; for bytestrings, `start` and `stop` are measured in bytes.
+
+    The `start`, `stop`, and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_slice_codeunits](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_slice_codeunits.html)
+    or performs a literal slice on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, start, stop, step, highlevel, behavior)
+
+
+def _impl(array, start, stop, step, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+    from awkward.operations.ak_from_arrow import from_arrow
+    from awkward.operations.ak_to_arrow import to_arrow
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    def action(layout, **absorb):
+        if layout.is_list and layout.parameter("__array__") == "string":
+            return from_arrow(
+                pc.utf8_slice_codeunits(
+                    to_arrow(layout, extensionarray=False), start, stop, step
+                ),
+                highlevel=False,
+            )
+
+        elif layout.is_list and layout.parameter("__array__") == "bytestring":
+            return layout[:, start:stop:step]
+
+    out = ak._do.recursively_apply(ak.operations.to_layout(array), action, behavior)
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index d5604b37a8..51f822d92c 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -506,3 +506,28 @@ def test_trim_whitespace():
         [],
         ["→δε←".encode(), "ζz zζ".encode(), b"abc"],
     ]
+
+
+def test_slice():
+    assert ak.str.slice(string, 1, 3).tolist() == [
+        ["αβγ"[1:3], ""[1:3]],
+        [],
+        ["→δε←"[1:3], "ζz zζ"[1:3], "abc"[1:3]],
+    ]
+    assert ak.str.slice(bytestring, 1, 3).tolist() == [
+        ["αβγ".encode()[1:3], b""[1:3]],
+        [],
+        ["→δε←".encode()[1:3], "ζz zζ".encode()[1:3], b"abc"[1:3]],
+    ]
+
+    # ArrowInvalid: Negative buffer resize: -40 (looks like an Arrow bug)
+    # assert ak.str.slice(string, 1).tolist() == [
+    #     ["αβγ"[1:], ""[1:]],
+    #     [],
+    #     ["→δε←"[1:], "ζz zζ"[1:], "abc"[1:]],
+    # ]
+    assert ak.str.slice(bytestring, 1).tolist() == [
+        ["αβγ".encode()[1:], b""[1:]],
+        [],
+        ["→δε←".encode()[1:], "ζz zζ".encode()[1:], b"abc"[1:]],
+    ]

From 766c9df23b8092c3e02340a47e7250953bc082b9 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 10:13:16 +0100
Subject: [PATCH 33/73] feat: add `split_whitespace`

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_split_whitespace.py     | 65 +++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_split_whitespace.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 7bb5fce6db..5224995c87 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -42,6 +42,7 @@
 from awkward.operations.str.ak_trim_whitespace import *
 
 # string splitting
+from awkward.operations.str.ak_split_whitespace import *
 
 # string component extraction
 
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
new file mode 100644
index 0000000000..198aa09ac6
--- /dev/null
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -0,0 +1,65 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("split_whitespace",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def split_whitespace(
+    array, *, max_splits=None, reverse=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
+        reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
+            from the beginning of each value. This flag only has an effect if `max_splits` is not None.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Splits any string or bytestring-valued data into a list of substrings according to any non-zero length sequence of
+    whitespace characters.
+
+    For strings, a split is performed for every sequence of Unicode whitespace characters; for bytestrings, splitting
+    is performed for sequences of ascii whitespace characters.
+
+    The `max_splits`, and `reverse` arguments are scalars; they cannot be different for each string/bytestring in the
+    sample.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.utf8_split_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_split_whitespace.html)
+    or [pyarrow.compute.ascii_split_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_split_whitespace.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, max_splits, reverse, highlevel, behavior)
+
+
+def _impl(array, max_splits, reverse, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+    action = ak.operations.str._get_action(
+        pc.utf8_split_whitespace,
+        pc.ascii_split_whitespace,
+        max_splits=max_splits,
+        reverse=reverse,
+        bytestring_to_string=True,
+    )
+    out = ak._do.recursively_apply(ak.operations.to_layout(array), action, behavior)
+
+    return wrap_layout(out, behavior, highlevel)

From c25a5584a33a2faeea81d6aef2d3a82c94b3d292 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 10:17:57 +0100
Subject: [PATCH 34/73] test: add test for `split_whitespace`

---
 tests/test_2616_use_pyarrow_for_strings.py | 39 ++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 51f822d92c..d56b0413dc 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -531,3 +531,42 @@ def test_slice():
         [],
         ["→δε←".encode()[1:], "ζz zζ".encode()[1:], b"abc"[1:]],
     ]
+
+
+def test_split_whitespace():
+    assert ak.str.split_whitespace(string_padded, max_splits=1).tolist() == [
+        [["", "αβγ      "], ["", " "]],
+        [],
+        [["", "→δε←      "], ["", "ζz zζ     "], ["", "abc      "]],
+    ]
+    assert ak.str.split_whitespace(
+        string_padded, max_splits=1, reverse=True
+    ).tolist() == [
+        [["      αβγ", ""], [" ", ""]],
+        [],
+        [["     →δε←", ""], ["     ζz zζ", ""], ["      abc", ""]],
+    ]
+    assert ak.str.split_whitespace(string_padded, max_splits=None).tolist() == [
+        [["", "αβγ", "", ""], ["", "", ""]],
+        [],
+        [["", "→δε←", "", ""], ["", "ζz", "zζ", "", ""], ["", "abc", "", ""]],
+    ]
+
+    # Bytestrings
+    assert ak.str.split_whitespace(bytestring_padded, max_splits=1).tolist() == [
+        [["", "αβγ     "], ["", ""]],
+        [],
+        [["", "→δε←   "], ["", "ζz zζ    "], ["", "abc      "]],
+    ]
+    assert ak.str.split_whitespace(
+        bytestring_padded, max_splits=1, reverse=True
+    ).tolist() == [
+        [["    αβγ", ""], ["", ""]],
+        [],
+        [["  →δε←", ""], ["    ζz zζ", ""], ["      abc", ""]],
+    ]
+    assert ak.str.split_whitespace(bytestring_padded, max_splits=None).tolist() == [
+        [["", "αβγ", ""], ["", ""]],
+        [],
+        [["", "→δε←", ""], ["", "ζz", "zζ", ""], ["", "abc", ""]],
+    ]

From ddc9bc724bf1331c4d125cf8a3a2a76c1c76f2a1 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 11:20:22 +0100
Subject: [PATCH 35/73] test: correct test

---
 tests/test_2616_use_pyarrow_for_strings.py | 24 ++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index d56b0413dc..f4eb716d18 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -554,19 +554,31 @@ def test_split_whitespace():
 
     # Bytestrings
     assert ak.str.split_whitespace(bytestring_padded, max_splits=1).tolist() == [
-        [["", "αβγ     "], ["", ""]],
+        [[b"", "αβγ     ".encode()], [b"", b""]],
         [],
-        [["", "→δε←   "], ["", "ζz zζ    "], ["", "abc      "]],
+        [
+            [b"", "→δε←   ".encode()],
+            [b"", "ζz zζ    ".encode()],
+            [b"", b"abc      "],
+        ],
     ]
     assert ak.str.split_whitespace(
         bytestring_padded, max_splits=1, reverse=True
     ).tolist() == [
-        [["    αβγ", ""], ["", ""]],
+        [["    αβγ".encode(), b""], [b"", b""]],
         [],
-        [["  →δε←", ""], ["    ζz zζ", ""], ["      abc", ""]],
+        [
+            ["  →δε←".encode(), b""],
+            ["    ζz zζ".encode(), b""],
+            [b"      abc", b""],
+        ],
     ]
     assert ak.str.split_whitespace(bytestring_padded, max_splits=None).tolist() == [
-        [["", "αβγ", ""], ["", ""]],
+        [[b"", "αβγ".encode(), b""], [b"", b""]],
         [],
-        [["", "→δε←", ""], ["", "ζz", "zζ", ""], ["", "abc", ""]],
+        [
+            [b"", "→δε←".encode(), b""],
+            [b"", "ζz".encode(), "zζ".encode(), b""],
+            [b"", b"abc", b""],
+        ],
     ]

From 5638a79b12b18d4d28f6fdf1af16c14c9695c4ee Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 11:20:31 +0100
Subject: [PATCH 36/73] feat: add `split_pattern`

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_split_pattern.py        | 58 +++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_split_pattern.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 5224995c87..cb70649438 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -43,6 +43,7 @@
 
 # string splitting
 from awkward.operations.str.ak_split_whitespace import *
+from awkward.operations.str.ak_split_pattern import *
 
 # string component extraction
 
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
new file mode 100644
index 0000000000..d8d952db25
--- /dev/null
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -0,0 +1,58 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("split_pattern",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def split_pattern(
+    array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str or bytes): Individual characters to be trimmed from the string.
+        max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
+        reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
+            from the beginning of each value. This flag only has an effect if `max_splits` is not None.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Splits any string or bytestring-valued data into a list of substrings according to the given separator.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, max_splits, reverse, highlevel, behavior)
+
+
+def _impl(array, pattern, max_splits, reverse, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+    action = ak.operations.str._get_action(
+        pc.split_pattern,
+        pc.split_pattern,
+        pattern=pattern,
+        max_splits=max_splits,
+        reverse=reverse,
+        bytestring_to_string=True,
+    )
+    out = ak._do.recursively_apply(ak.operations.to_layout(array), action, behavior)
+
+    return wrap_layout(out, behavior, highlevel)

From 3ef7ded2ea94a9e822667f143ebf5a83f1299ed1 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 12:43:02 +0100
Subject: [PATCH 37/73] refactor: rename `_get_action`

---
 src/awkward/contents/unmaskedarray.py         |  2 +-
 src/awkward/operations/str/__init__.py        | 87 ++++++++++++++++++-
 src/awkward/operations/str/ak_capitalize.py   |  2 +-
 src/awkward/operations/str/ak_center.py       |  2 +-
 src/awkward/operations/str/ak_is_alnum.py     |  2 +-
 src/awkward/operations/str/ak_is_alpha.py     |  2 +-
 src/awkward/operations/str/ak_is_ascii.py     |  2 +-
 src/awkward/operations/str/ak_is_decimal.py   |  2 +-
 src/awkward/operations/str/ak_is_digit.py     |  2 +-
 src/awkward/operations/str/ak_is_lower.py     |  2 +-
 src/awkward/operations/str/ak_is_numeric.py   |  2 +-
 src/awkward/operations/str/ak_is_printable.py |  2 +-
 src/awkward/operations/str/ak_is_space.py     |  2 +-
 src/awkward/operations/str/ak_is_title.py     |  2 +-
 src/awkward/operations/str/ak_is_upper.py     |  2 +-
 src/awkward/operations/str/ak_length.py       |  2 +-
 src/awkward/operations/str/ak_lower.py        |  2 +-
 src/awkward/operations/str/ak_lpad.py         |  2 +-
 src/awkward/operations/str/ak_ltrim.py        |  2 +-
 .../operations/str/ak_ltrim_whitespace.py     |  2 +-
 .../operations/str/ak_replace_slice.py        |  2 +-
 .../operations/str/ak_replace_substring.py    |  2 +-
 .../str/ak_replace_substring_regex.py         |  2 +-
 src/awkward/operations/str/ak_reverse.py      |  2 +-
 src/awkward/operations/str/ak_rpad.py         |  2 +-
 src/awkward/operations/str/ak_rtrim.py        |  2 +-
 .../operations/str/ak_rtrim_whitespace.py     |  2 +-
 .../operations/str/ak_split_pattern.py        |  6 +-
 .../operations/str/ak_split_whitespace.py     |  3 +-
 src/awkward/operations/str/ak_swapcase.py     |  2 +-
 src/awkward/operations/str/ak_title.py        |  2 +-
 src/awkward/operations/str/ak_trim.py         |  2 +-
 .../operations/str/ak_trim_whitespace.py      |  2 +-
 src/awkward/operations/str/ak_upper.py        |  2 +-
 34 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py
index 804bf02c7b..12c31e4bc0 100644
--- a/src/awkward/contents/unmaskedarray.py
+++ b/src/awkward/contents/unmaskedarray.py
@@ -491,7 +491,7 @@ def _remove_structure(self, backend, options):
             return [self]
 
     def _drop_none(self) -> Content:
-        return self.to_ByteMaskedArray(True)._drop_none()
+        return self.content[:0]
 
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index cb70649438..260cb9c1b3 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -56,8 +56,13 @@
 # containment tests
 
 
-def _get_action(
-    utf8_function, ascii_function, *args, bytestring_to_string=False, **kwargs
+def _get_ufunc_action(
+    utf8_function,
+    ascii_function,
+    *args,
+    bytestring_to_string=False,
+    drop_unmasked_option=False,
+    **kwargs,
 ):
     from awkward.operations.ak_from_arrow import from_arrow
     from awkward.operations.ak_to_arrow import to_arrow
@@ -103,3 +108,81 @@ def action(layout, **absorb):
                 )
 
     return action
+
+
+def _erase_list_option(layout):
+    from awkward.contents.unmaskedarray import UnmaskedArray
+
+    assert layout.is_list
+    if layout.content.is_option:
+        assert isinstance(layout.content, UnmaskedArray)
+        return layout.copy(content=layout.content.content)
+    else:
+        return layout
+
+
+def _get_split_action(
+    utf8_function, ascii_function, *args, bytestring_to_string=False, **kwargs
+):
+    from awkward.operations.ak_from_arrow import from_arrow
+    from awkward.operations.ak_to_arrow import to_arrow
+
+    def action(layout, **absorb):
+        if layout.is_list and layout.parameter("__array__") == "string":
+            return _erase_list_option(
+                from_arrow(
+                    utf8_function(
+                        to_arrow(layout, extensionarray=False),
+                        *args,
+                        **kwargs,
+                    ),
+                    highlevel=False,
+                )
+            )
+
+        elif layout.is_list and layout.parameter("__array__") == "bytestring":
+            if bytestring_to_string:
+                out = _erase_list_option(
+                    from_arrow(
+                        ascii_function(
+                            to_arrow(
+                                layout.copy(
+                                    content=layout.content.copy(
+                                        parameters={"__array__": "char"}
+                                    ),
+                                    parameters={"__array__": "string"},
+                                ),
+                                extensionarray=False,
+                            ),
+                            *args,
+                            **kwargs,
+                        ),
+                        highlevel=False,
+                    )
+                )
+                assert out.is_list
+
+                assert (
+                    out.content.is_list
+                    and out.content.parameter("__array__") == "string"
+                )
+                return out.copy(
+                    content=out.content.copy(
+                        content=out.content.content.copy(
+                            parameters={"__array__": "byte"}
+                        ),
+                        parameters={"__array__": "bytestring"},
+                    ),
+                )
+
+            else:
+                return _erase_list_option(
+                    from_arrow(
+                        ascii_function(
+                            to_arrow(layout, extensionarray=False), *args, **kwargs
+                        ),
+                        highlevel=False,
+                    )
+                )
+
+    return action
diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
index 84e2843e00..9400c21c9e 100644
--- a/src/awkward/operations/str/ak_capitalize.py
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_capitalize, pc.ascii_capitalize, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 284e6595c3..9bd2246673 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -52,7 +52,7 @@ def _impl(array, width, padding, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_center, pc.ascii_center, width, padding, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index ac28e085b8..2f93d87982 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_alnum, pc.ascii_is_alnum, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 283ad5a4c6..c40f612e75 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_alpha, pc.ascii_is_alpha, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/ak_is_ascii.py
index c00d349048..bc588f2888 100644
--- a/src/awkward/operations/str/ak_is_ascii.py
+++ b/src/awkward/operations/str/ak_is_ascii.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.string_is_ascii, pc.string_is_ascii, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index 8a2f4b0fe7..26ff606bd0 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_decimal, pc.ascii_is_decimal, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 3cd5f343ae..338b86d30a 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -49,7 +49,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_digit, pc.utf8_is_digit, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index 74c832ba77..87dd3462a6 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_lower, pc.ascii_is_lower, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index 9bf89c814a..437ff31b47 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -49,7 +49,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_numeric, pc.utf8_is_numeric, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index cf42bfcc97..24c5184fde 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_printable, pc.ascii_is_printable, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index 00ace2eb51..5b69031d1f 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_space, pc.ascii_is_space, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
index e463d00685..5275a1df0e 100644
--- a/src/awkward/operations/str/ak_is_title.py
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_title, pc.ascii_is_title, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index 8cff3a78bc..fa20f04fe6 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_is_upper,
             pc.ascii_is_upper,
             # pc.ascii_is_upper is defined on binary, but for consistency with is_lower and is_title...
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/ak_length.py
index f77ce22f76..e5ef1c7b84 100644
--- a/src/awkward/operations/str/ak_length.py
+++ b/src/awkward/operations/str/ak_length.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_length, pc.binary_length, bytestring_to_string=False
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/ak_lower.py
index 92766b8f48..971ffe043e 100644
--- a/src/awkward/operations/str/ak_lower.py
+++ b/src/awkward/operations/str/ak_lower.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_lower, pc.ascii_lower, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index 2aad079ea7..909f1663d9 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -52,7 +52,7 @@ def _impl(array, width, padding, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_lpad, pc.ascii_lpad, width, padding, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 1a1959bdb6..0180270067 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -51,7 +51,7 @@ def _impl(array, characters, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_ltrim, pc.ascii_ltrim, characters, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
index f465f81e13..e415a1400f 100644
--- a/src/awkward/operations/str/ak_ltrim_whitespace.py
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -46,7 +46,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_ltrim_whitespace,
             pc.ascii_ltrim_whitespace,
             bytestring_to_string=True,
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index e569458b66..cd80f111aa 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -53,7 +53,7 @@ def _impl(array, start, stop, replacement, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_replace_slice, pc.binary_replace_slice, start, stop, replacement
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index a589afe136..691e9fd3e7 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -55,7 +55,7 @@ def _impl(array, pattern, replacement, max_replacements, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.replace_substring,
             pc.replace_substring,
             pattern,
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index be63772e61..77dc2c12b2 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -55,7 +55,7 @@ def _impl(array, pattern, replacement, max_replacements, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.replace_substring_regex,
             pc.replace_substring_regex,
             pattern,
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/ak_reverse.py
index 627f8a95cf..6f15db9df8 100644
--- a/src/awkward/operations/str/ak_reverse.py
+++ b/src/awkward/operations/str/ak_reverse.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_reverse, pc.binary_reverse, bytestring_to_string=False
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index 5146abb6bb..da0cf61fb6 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -52,7 +52,7 @@ def _impl(array, width, padding, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_rpad, pc.ascii_rpad, width, padding, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index db5f8f7344..3d1d518754 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -51,7 +51,7 @@ def _impl(array, characters, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_rtrim, pc.ascii_rtrim, characters, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
index 17df969275..e2064bc412 100644
--- a/src/awkward/operations/str/ak_rtrim_whitespace.py
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -46,7 +46,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_rtrim_whitespace,
             pc.ascii_rtrim_whitespace,
             bytestring_to_string=True,
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index d8d952db25..b94187d9fe 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -16,7 +16,7 @@ def split_pattern(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str or bytes): Individual characters to be trimmed from the string.
+        pattern (str or bytes): Pattern of characters/bytes to split on.
         max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
         reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
             from the beginning of each value. This flag only has an effect if `max_splits` is not None.
@@ -45,13 +45,13 @@ def _impl(array, pattern, max_splits, reverse, highlevel, behavior):
     import pyarrow.compute as pc
 
     behavior = behavior_of(array, behavior=behavior)
-    action = ak.operations.str._get_action(
+    action = ak.operations.str._get_split_action(
         pc.split_pattern,
         pc.split_pattern,
         pattern=pattern,
         max_splits=max_splits,
         reverse=reverse,
-        bytestring_to_string=True,
+        bytestring_to_string=False,
     )
     out = ak._do.recursively_apply(ak.operations.to_layout(array), action, behavior)
 
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index 198aa09ac6..07be7a0e5c 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -49,11 +49,10 @@ def split_whitespace(
 
 def _impl(array, max_splits, reverse, highlevel, behavior):
     import awkward._connect.pyarrow  # noqa: F401, I001
-
     import pyarrow.compute as pc
 
     behavior = behavior_of(array, behavior=behavior)
-    action = ak.operations.str._get_action(
+    action = ak.operations.str._get_split_action(
         pc.utf8_split_whitespace,
         pc.ascii_split_whitespace,
         max_splits=max_splits,
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
index 1ff02dabad..36d6d53e11 100644
--- a/src/awkward/operations/str/ak_swapcase.py
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_swapcase, pc.ascii_swapcase, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/ak_title.py
index 8314002311..cdd147c012 100644
--- a/src/awkward/operations/str/ak_title.py
+++ b/src/awkward/operations/str/ak_title.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_title, pc.ascii_title, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index d932016b3f..c43df209be 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -51,7 +51,7 @@ def _impl(array, characters, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_trim, pc.ascii_trim, characters, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
index 891c6d706e..197aa777cd 100644
--- a/src/awkward/operations/str/ak_trim_whitespace.py
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -46,7 +46,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_trim_whitespace, pc.ascii_trim_whitespace, bytestring_to_string=True
         ),
         behavior,
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/ak_upper.py
index f4ae131af7..776b0526c0 100644
--- a/src/awkward/operations/str/ak_upper.py
+++ b/src/awkward/operations/str/ak_upper.py
@@ -47,7 +47,7 @@ def _impl(array, highlevel, behavior):
 
     out = ak._do.recursively_apply(
         ak.operations.to_layout(array),
-        ak.operations.str._get_action(
+        ak.operations.str._get_ufunc_action(
             pc.utf8_upper, pc.ascii_upper, bytestring_to_string=True
         ),
         behavior,

From 65d216696eae1157bd2dac0a4ab58c6b6db435c1 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 12:43:15 +0100
Subject: [PATCH 38/73] feat: add `ak_split_pattern_regex`

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_split_pattern_regex.py  | 58 +++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_split_pattern_regex.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 260cb9c1b3..434d9150da 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -44,6 +44,7 @@
 # string splitting
 from awkward.operations.str.ak_split_whitespace import *
 from awkward.operations.str.ak_split_pattern import *
+from awkward.operations.str.ak_split_pattern_regex import *
 
 # string component extraction
 
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
new file mode 100644
index 0000000000..56a7876efd
--- /dev/null
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -0,0 +1,58 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("split_pattern_regex",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def split_pattern_regex(
+    array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str or bytes): Regular expression of characters/bytes to split on.
+        max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
+        reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
+            from the beginning of each value. This flag only has an effect if `max_splits` is not None.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Splits any string or bytestring-valued data into a list of substrings according to the given regular expression.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, max_splits, reverse, highlevel, behavior)
+
+
+def _impl(array, pattern, max_splits, reverse, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+    action = ak.operations.str._get_split_action(
+        pc.split_pattern_regex,
+        pc.split_pattern_regex,
+        pattern=pattern,
+        max_splits=max_splits,
+        reverse=reverse,
+        bytestring_to_string=False,
+    )
+    out = ak._do.recursively_apply(ak.operations.to_layout(array), action, behavior)
+
+    return wrap_layout(out, behavior, highlevel)

From 0e267980f789d857d06d5e9228ba98a92dc76508 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 13:09:38 +0100
Subject: [PATCH 39/73] test: update tests for new features

---
 tests/test_2616_use_pyarrow_for_strings.py | 104 ++++++++++++++++++++-
 1 file changed, 103 insertions(+), 1 deletion(-)

diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index f4eb716d18..8af0023a0a 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -4,7 +4,7 @@
 
 import awkward as ak
 
-pytest.importorskip("pyarrow")
+pyarrow = pytest.importorskip("pyarrow")
 
 string = ak.Array(
     [
@@ -40,6 +40,14 @@
     ]
 )
 
+string_repeats = ak.Array(
+    [["foo123bar123baz", "foo", "bar"], ["123foo", "456bar", "foo123456bar"], []]
+)
+
+bytestring_repeats = ak.Array(
+    [[b"foo123bar123baz", b"foo", b"bar"], [b"123foo", b"456bar", b"foo123456bar"], []]
+)
+
 
 def test_is_alnum():
     assert ak.str.is_alnum(string).tolist() == [
@@ -582,3 +590,97 @@ def test_split_whitespace():
             [b"", b"abc", b""],
         ],
     ]
+
+
+def test_split_pattern():
+    assert ak.str.split_pattern(string_repeats, "123", max_splits=1).tolist() == [
+        [["foo", "bar123baz"], ["foo"], ["bar"]],
+        [["", "foo"], ["456bar"], ["foo", "456bar"]],
+        [],
+    ]
+    assert ak.str.split_pattern(
+        string_repeats, "123", max_splits=1, reverse=True
+    ).tolist() == [
+        [["foo123bar", "baz"], ["foo"], ["bar"]],
+        [["", "foo"], ["456bar"], ["foo", "456bar"]],
+        [],
+    ]
+    assert ak.str.split_pattern(string_repeats, "123", max_splits=None).tolist() == [
+        [["foo", "bar", "baz"], ["foo"], ["bar"]],
+        [["", "foo"], ["456bar"], ["foo", "456bar"]],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.split_pattern(bytestring_repeats, b"123", max_splits=1).tolist() == [
+        [[b"foo", b"bar123baz"], [b"foo"], [b"bar"]],
+        [[b"", b"foo"], [b"456bar"], [b"foo", b"456bar"]],
+        [],
+    ]
+    assert ak.str.split_pattern(
+        bytestring_repeats, b"123", max_splits=1, reverse=True
+    ).tolist() == [
+        [[b"foo123bar", b"baz"], [b"foo"], [b"bar"]],
+        [[b"", b"foo"], [b"456bar"], [b"foo", b"456bar"]],
+        [],
+    ]
+    assert ak.str.split_pattern(
+        bytestring_repeats, b"123", max_splits=None
+    ).tolist() == [
+        [[b"foo", b"bar", b"baz"], [b"foo"], [b"bar"]],
+        [[b"", b"foo"], [b"456bar"], [b"foo", b"456bar"]],
+        [],
+    ]
+
+
+def test_split_pattern_regex():
+    assert ak.str.split_pattern_regex(
+        string_repeats, r"\d{3}", max_splits=1
+    ).tolist() == [
+        [["foo", "bar123baz"], ["foo"], ["bar"]],
+        [["", "foo"], ["", "bar"], ["foo", "456bar"]],
+        [],
+    ]
+    with pytest.raises(
+        pyarrow.ArrowNotImplementedError, match=r"split in reverse with regex"
+    ):
+        assert ak.str.split_pattern_regex(
+            string_repeats, r"\d{3}", max_splits=1, reverse=True
+        ).tolist() == [
+            [["foo123bar", "baz"], ["foo"], ["bar"]],
+            [["", "foo"], ["", "bar"], ["foo", "456bar"]],
+            [],
+        ]
+    assert ak.str.split_pattern_regex(
+        string_repeats, r"\d{3}", max_splits=None
+    ).tolist() == [
+        [["foo", "bar", "baz"], ["foo"], ["bar"]],
+        [["", "foo"], ["", "bar"], ["foo", "", "bar"]],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.split_pattern_regex(
+        bytestring_repeats, rb"\d{3}", max_splits=1
+    ).tolist() == [
+        [[b"foo", b"bar123baz"], [b"foo"], [b"bar"]],
+        [[b"", b"foo"], [b"", b"bar"], [b"foo", b"456bar"]],
+        [],
+    ]
+    with pytest.raises(
+        pyarrow.ArrowNotImplementedError, match=r"split in reverse with regex"
+    ):
+        assert ak.str.split_pattern_regex(
+            bytestring_repeats, rb"\d{3}", max_splits=1, reverse=True
+        ).tolist() == [
+            [[b"foo123bar", b"baz"], [b"foo"], [b"bar"]],
+            [[b"", b"foo"], [b"", b"bar"], [b"foo", b"456bar"]],
+            [],
+        ]
+    assert ak.str.split_pattern_regex(
+        bytestring_repeats, rb"\d{3}", max_splits=None
+    ).tolist() == [
+        [[b"foo", b"bar", b"baz"], [b"foo"], [b"bar"]],
+        [[b"", b"foo"], [b"", b"bar"], [b"foo", b"", b"bar"]],
+        [],
+    ]

From 5ec706cabadb8d91151b183d0f5b69f436f6d27e Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Mon, 7 Aug 2023 13:04:30 -0500
Subject: [PATCH 40/73] Fixed UnmaskedArray._drop_none.

---
 src/awkward/contents/unmaskedarray.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py
index 12c31e4bc0..4431eb6cb6 100644
--- a/src/awkward/contents/unmaskedarray.py
+++ b/src/awkward/contents/unmaskedarray.py
@@ -491,7 +491,7 @@ def _remove_structure(self, backend, options):
             return [self]
 
     def _drop_none(self) -> Content:
-        return self.content[:0]
+        return self.content
 
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options

From bd8e2e6fca5000fa0643baba9ded846fc6ce220f Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@users.noreply.github.com>
Date: Mon, 7 Aug 2023 13:42:26 -0500
Subject: [PATCH 41/73] fix: adjust for numexpr 2.8.5, which hid getContext's
 frame_depth argument (#2617)


From 73c81217c92e021d1f9c78ca8fd1d29650c6cf91 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Mon, 7 Aug 2023 15:07:25 -0500
Subject: [PATCH 42/73] extract_regex.

---
 src/awkward/operations/str/__init__.py        |  2 +
 .../operations/str/ak_extract_regex.py        | 78 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 28 +++++++
 3 files changed, 108 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_extract_regex.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 434d9150da..76eca3363a 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -48,6 +48,8 @@
 
 # string component extraction
 
+from awkward.operations.str.ak_extract_regex import *
+
 # string joining
 
 # string slicing
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
new file mode 100644
index 0000000000..9a4aecd038
--- /dev/null
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -0,0 +1,78 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("extract_regex",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def extract_regex(array, pattern, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str or bytes): Regular expression with named capture fields.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Replaces any string-valued data with None if the `pattern` does not match or records whose fields are named capture groups and the substrings they've captured if `pattern` does match.
+
+    Uses [Google RE2](https://github.com/google/re2/wiki/Syntax), and `pattern` must
+    contain named groups. The syntax for a named group is `(?P<...>...)` in which
+    the first `...` is a name and the last `...` is a regular expression.
+
+    For example,
+
+        >>> array = ak.Array([["one1", "two2", "three3"], [], ["four4", "five5"]])
+        >>> result = ak.str.extract_regex(array, "(?P<vowel>[aeiou])(?P<number>[0-9]+)")
+        >>> result.show(type=True)
+        type: 3 * var * ?{
+            vowel: ?string,
+            number: ?string
+        }
+        [[{vowel: 'e', number: '1'}, {vowel: 'o', number: '2'}, {vowel: 'e', number: '3'}],
+         [],
+         [None, {vowel: 'e', number: '5'}]]
+
+    (The string `"four4"` does not match because the vowel is not immediately before
+    the number.)
+
+    Regular expressions with unnamed groups or features not implemented by RE2 raise an error.
+
+    Note: this function does not raise an error if the `array` does
+    not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.extract_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.extract_regex.html)
+    or
+    [pyarrow.compute.extract_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.extract_regex.html)
+    on strings and bytestrings, respectively.
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, highlevel, behavior)
+
+
+def _impl(array, pattern, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    behavior = behavior_of(array, behavior=behavior)
+
+    out = ak._do.recursively_apply(
+        ak.operations.to_layout(array),
+        ak.operations.str._get_ufunc_action(
+            pc.extract_regex, pc.extract_regex, pattern, bytestring_to_string=False
+        ),
+        behavior,
+    )
+
+    return wrap_layout(out, behavior, highlevel)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 8af0023a0a..d276bbd579 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -684,3 +684,31 @@ def test_split_pattern_regex():
         [[b"", b"foo"], [b"", b"bar"], [b"foo", b"", b"bar"]],
         [],
     ]
+
+
+def test_extract_regex():
+    assert ak.str.extract_regex(
+        ak.Array([["one1", "two2", "three3"], [], ["four4", "five5"]]),
+        "(?P<vowel>[aeiou])(?P<number>[0-9]+)",
+    ).tolist() == [
+        [
+            {"vowel": "e", "number": "1"},
+            {"vowel": "o", "number": "2"},
+            {"vowel": "e", "number": "3"},
+        ],
+        [],
+        [None, {"vowel": "e", "number": "5"}],
+    ]
+
+    assert ak.str.extract_regex(
+        ak.Array([[b"one1", b"two2", b"three3"], [], [b"four4", b"five5"]]),
+        b"(?P<vowel>[aeiou])(?P<number>[0-9]+)",
+    ).tolist() == [
+        [
+            {"vowel": b"e", "number": b"1"},
+            {"vowel": b"o", "number": b"2"},
+            {"vowel": b"e", "number": b"3"},
+        ],
+        [],
+        [None, {"vowel": b"e", "number": b"5"}],
+    ]

From dc0746ca747282b30f717f6b2de4b0992c0cd932 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Mon, 7 Aug 2023 15:23:25 -0500
Subject: [PATCH 43/73] join (almost entirely from
 https://gist.github.com/agoose77/28e5bb0250678e454356a85861a16368)

---
 src/awkward/operations/str/__init__.py     |   2 +
 src/awkward/operations/str/ak_join.py      | 122 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py |  30 +++++
 3 files changed, 154 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_join.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 76eca3363a..762a90a0be 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -52,6 +52,8 @@
 
 # string joining
 
+from awkward.operations.str.ak_join import *
+
 # string slicing
 
 from awkward.operations.str.ak_slice import *
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/ak_join.py
new file mode 100644
index 0000000000..622c483c22
--- /dev/null
+++ b/src/awkward/operations/str/ak_join.py
@@ -0,0 +1,122 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("join",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def join(array, separator, *, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        separator (str, bytes, or array of them to broadcast): separator to insert
+            between strings. If array-like, `separator` is broadcast against `array`
+            which permits a unique separator for each list of strings.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Concatenate the strings in `array`. The separator is inserted between each string.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.binary_join](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_join.html).
+    """
+    # Dispatch
+    yield (array, separator)
+
+    # Implementation
+    return _impl(array, separator, highlevel, behavior)
+
+
+def _is_maybe_optional_list_of_string(layout):
+    if layout.is_list and layout.parameter("__array__") in {"string", "bytestring"}:
+        return True
+    elif layout.is_option or layout.is_indexed:
+        return _is_maybe_optional_list_of_string(layout.content)
+    else:
+        return False
+
+
+def _impl(array, separator, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+    from awkward.operations.ak_from_arrow import from_arrow
+    from awkward.operations.ak_to_arrow import to_arrow
+
+    import pyarrow.compute as pc
+
+    def apply_unary(layout, **kwargs):
+        if not (layout.is_list and layout.purelist_depth == 2):
+            return
+
+        if not _is_maybe_optional_list_of_string(layout.content):
+            return
+
+        # We have (maybe option/indexed type wrapping) strings
+
+        arrow_array = to_arrow(
+            # Arrow needs an option type here
+            layout.copy(content=ak.contents.UnmaskedArray.simplified(layout.content)),
+            extensionarray=False,
+            # This kernel requires non-large string/bytestrings
+            string_to32=True,
+            bytestring_to32=True,
+        )
+        return from_arrow(
+            pc.binary_join(arrow_array, separator),
+            highlevel=False,
+        )
+
+    def apply_binary(layouts, **kwargs):
+        layout, separator_layout = layouts
+        if not (layout.is_list and layout.purelist_depth == 2):
+            return
+
+        if not _is_maybe_optional_list_of_string(layout.content):
+            return
+
+        if not _is_maybe_optional_list_of_string(separator_layout):
+            raise TypeError(
+                f"separator must be a list of strings, not {type(separator_layout)}"
+            )
+
+        # We have (maybe option/indexed type wrapping) strings
+        layout_arrow = to_arrow(
+            # Arrow needs an option type here
+            layout.copy(content=ak.contents.UnmaskedArray.simplified(layout.content)),
+            extensionarray=False,
+            # This kernel requires non-large string/bytestrings
+            string_to32=True,
+            bytestring_to32=True,
+        )
+        separator_arrow = to_arrow(
+            separator_layout,
+            extensionarray=False,
+            # This kernel requires non-large string/bytestrings
+            string_to32=True,
+            bytestring_to32=True,
+        )
+        return (
+            from_arrow(
+                pc.binary_join(layout_arrow, separator_arrow),
+                highlevel=False,
+            ),
+        )
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, separator, behavior=behavior)
+    if isinstance(separator, (bytes, str)):
+        out = ak._do.recursively_apply(layout, apply_unary, behavior=behavior)
+    else:
+        separator_layout = ak.to_layout(separator, allow_record=False, allow_other=True)
+        (out,) = ak._broadcasting.broadcast_and_apply(
+            (layout, separator_layout), apply_binary, behavior
+        )
+
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index d276bbd579..cd5437cdb3 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -712,3 +712,33 @@ def test_extract_regex():
         [],
         [None, {"vowel": b"e", "number": b"5"}],
     ]
+
+
+def test_join():
+    array1 = ak.Array(
+        [
+            ["this", "that"],
+            [],
+            ["foo", "bar", "baz"],
+        ]
+    )
+    assert ak.str.join(array1, "-").tolist() == ["this-that", "", "foo-bar-baz"]
+
+    separator = ak.Array(["→", "↑", "←"])
+    assert ak.str.join(array1, separator).tolist() == ["this→that", "", "foo←bar←baz"]
+
+    array2 = ak.Array(
+        [
+            [b"this", b"that"],
+            [],
+            [b"foo", b"bar", b"baz"],
+        ]
+    )
+    assert ak.str.join(array2, b"-").tolist() == [b"this-that", b"", b"foo-bar-baz"]
+
+    separator = ak.Array(["→".encode(), "↑".encode(), "←".encode()])
+    assert ak.str.join(array2, separator).tolist() == [
+        "this→that".encode(),
+        b"",
+        "foo←bar←baz".encode(),
+    ]

From 43aa272e21e683aff4547720644f067f78f35059 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Mon, 7 Aug 2023 15:29:49 -0500
Subject: [PATCH 44/73] use dispatch correctly

---
 src/awkward/operations/str/ak_center.py                  | 2 +-
 src/awkward/operations/str/ak_extract_regex.py           | 2 +-
 src/awkward/operations/str/ak_lpad.py                    | 2 +-
 src/awkward/operations/str/ak_ltrim.py                   | 2 +-
 src/awkward/operations/str/ak_repeat.py                  | 2 +-
 src/awkward/operations/str/ak_replace_slice.py           | 2 +-
 src/awkward/operations/str/ak_replace_substring.py       | 4 ++--
 src/awkward/operations/str/ak_replace_substring_regex.py | 4 ++--
 src/awkward/operations/str/ak_rpad.py                    | 2 +-
 src/awkward/operations/str/ak_rtrim.py                   | 2 +-
 src/awkward/operations/str/ak_slice.py                   | 2 +-
 src/awkward/operations/str/ak_split_pattern.py           | 4 ++--
 src/awkward/operations/str/ak_split_pattern_regex.py     | 4 ++--
 src/awkward/operations/str/ak_split_whitespace.py        | 4 ++--
 src/awkward/operations/str/ak_trim.py                    | 2 +-
 15 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 9bd2246673..3d0da1893b 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -37,7 +37,7 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, width, padding)
 
     # Implementation
     return _impl(array, width, padding, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
index 9a4aecd038..c3bcdc1d49 100644
--- a/src/awkward/operations/str/ak_extract_regex.py
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -54,7 +54,7 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, pattern)
 
     # Implementation
     return _impl(array, pattern, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index 909f1663d9..5a869f2a92 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -37,7 +37,7 @@ def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, width, padding)
 
     # Implementation
     return _impl(array, width, padding, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 0180270067..2399d7ed9a 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -36,7 +36,7 @@ def ltrim(array, characters, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, characters)
 
     # Implementation
     return _impl(array, characters, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index 4419eed4c2..75324de63a 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -36,7 +36,7 @@ def repeat(array, num_repeats, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, num_repeats)
 
     # Implementation
     return _impl(array, num_repeats, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index cd80f111aa..ce6ab2fbab 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -38,7 +38,7 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, start, stop, replacement)
 
     # Implementation
     return _impl(array, start, stop, replacement, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 691e9fd3e7..bc5dae8b96 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def replace_substring(
-    array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
+    array, pattern, replacement, max_replacements=None, *, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -40,7 +40,7 @@ def replace_substring(
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, pattern, replacement, max_replacements)
 
     # Implementation
     return _impl(array, pattern, replacement, max_replacements, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index 77dc2c12b2..fc737da163 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def replace_substring_regex(
-    array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
+    array, pattern, replacement, max_replacements=None, *, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -40,7 +40,7 @@ def replace_substring_regex(
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, pattern, replacement, max_replacements)
 
     # Implementation
     return _impl(array, pattern, replacement, max_replacements, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index da0cf61fb6..02fb2f4fcc 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -37,7 +37,7 @@ def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, width, padding)
 
     # Implementation
     return _impl(array, width, padding, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index 3d1d518754..00be21f2ba 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -36,7 +36,7 @@ def rtrim(array, characters, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, characters)
 
     # Implementation
     return _impl(array, characters, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/ak_slice.py
index 7afaab7d93..cba2775a37 100644
--- a/src/awkward/operations/str/ak_slice.py
+++ b/src/awkward/operations/str/ak_slice.py
@@ -37,7 +37,7 @@ def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None):
     or performs a literal slice on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, start, stop, step)
 
     # Implementation
     return _impl(array, start, stop, step, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index b94187d9fe..680eef3134 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def split_pattern(
-    array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
+    array, pattern, max_splits=None, reverse=False, *, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -33,7 +33,7 @@ def split_pattern(
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
     """
     # Dispatch
-    yield (array,)
+    yield (array, pattern, max_splits, reverse)
 
     # Implementation
     return _impl(array, pattern, max_splits, reverse, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index 56a7876efd..f34ffa817d 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def split_pattern_regex(
-    array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
+    array, pattern, max_splits=None, reverse=False, *, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -33,7 +33,7 @@ def split_pattern_regex(
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
     """
     # Dispatch
-    yield (array,)
+    yield (array, pattern, max_splits, reverse)
 
     # Implementation
     return _impl(array, pattern, max_splits, reverse, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index 07be7a0e5c..95d951f725 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def split_whitespace(
-    array, *, max_splits=None, reverse=False, highlevel=True, behavior=None
+    array, max_splits=None, reverse=False, *, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -41,7 +41,7 @@ def split_whitespace(
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, max_splits, reverse)
 
     # Implementation
     return _impl(array, max_splits, reverse, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index c43df209be..192aad730a 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -36,7 +36,7 @@ def trim(array, characters, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, characters)
 
     # Implementation
     return _impl(array, characters, highlevel, behavior)

From cbf15776f3a17590cb56a8e16d9988d884f689e9 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:21:13 +0100
Subject: [PATCH 45/73] fix: drop unused arg

---
 src/awkward/operations/str/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 762a90a0be..4d87449799 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -66,7 +66,6 @@ def _get_ufunc_action(
     ascii_function,
     *args,
     bytestring_to_string=False,
-    drop_unmasked_option=False,
     **kwargs,
 ):
     from awkward.operations.ak_from_arrow import from_arrow

From 068b6af7c279daed23b101d2cf4decbfc312ffa0 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Mon, 7 Aug 2023 16:13:58 -0500
Subject: [PATCH 46/73] join_element_wise

---
 src/awkward/operations/str/__init__.py        |  1 +
 src/awkward/operations/str/ak_join.py         |  2 +
 .../operations/str/ak_join_element_wise.py    | 71 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 22 ++++++
 4 files changed, 96 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_join_element_wise.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 4d87449799..fd3c651f19 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -53,6 +53,7 @@
 # string joining
 
 from awkward.operations.str.ak_join import *
+from awkward.operations.str.ak_join_element_wise import *
 
 # string slicing
 
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/ak_join.py
index 622c483c22..40289bc4a6 100644
--- a/src/awkward/operations/str/ak_join.py
+++ b/src/awkward/operations/str/ak_join.py
@@ -27,6 +27,8 @@ def join(array, separator, *, highlevel=True, behavior=None):
 
     Requires the pyarrow library and calls
     [pyarrow.compute.binary_join](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_join.html).
+
+    See also: #ak.str.join_element_wise.
     """
     # Dispatch
     yield (array, separator)
diff --git a/src/awkward/operations/str/ak_join_element_wise.py b/src/awkward/operations/str/ak_join_element_wise.py
new file mode 100644
index 0000000000..ad3639adb6
--- /dev/null
+++ b/src/awkward/operations/str/ak_join_element_wise.py
@@ -0,0 +1,71 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("join_element_wise",)
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def join_element_wise(*arrays, highlevel=True, behavior=None):
+    """
+    Args:
+        arrays: Array-like data (anything #ak.to_layout recognizes).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Broadcasts and concatenates all but the last array of strings in `arrays`; the last is used as a separator.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.binary_join_element_wise](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_join_element_wise.html).
+
+    Unlike Arrow's `binary_join_element_wise`, this function has no `null_handling`
+    and `null_replacement` arguments. This function's behavior is like
+    `null_handling="emit_null"` (Arrow's default). The other cases can be implemented
+    with Awkward slices, #ak.drop_none, and #ak.fill_none.
+
+    See also: #ak.str.join.
+    """
+    # Dispatch
+    yield arrays
+
+    # Implementation
+    return _impl(arrays, highlevel, behavior)
+
+
+def _impl(arrays, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+    from awkward.operations.ak_from_arrow import from_arrow
+    from awkward.operations.ak_to_arrow import to_arrow
+
+    import pyarrow.compute as pc
+
+    layouts = [ak.to_layout(x) for x in arrays]
+    behavior = behavior_of(*arrays, behavior=behavior)
+
+    if len(arrays) < 1:
+        raise TypeError("at least one array is required")
+
+    def action(layouts, **kwargs):
+        if all(
+            x.is_list and x.parameter("__array__") in ("string", "bytestring")
+            for x in layouts
+        ):
+            return (
+                from_arrow(
+                    pc.binary_join_element_wise(
+                        *[to_arrow(x, extensionarray=False) for x in layouts]
+                    ),
+                    highlevel=False,
+                ),
+            )
+
+    (out,) = ak._broadcasting.broadcast_and_apply(layouts, action, behavior)
+
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index cd5437cdb3..4c640e90a2 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -742,3 +742,25 @@ def test_join():
         b"",
         "foo←bar←baz".encode(),
     ]
+
+
+def test_join_element_wise():
+    array1 = ak.Array([["one", "two", "three"], [], ["four", "five"]])
+    array2 = ak.Array([["111", "222", "333"], [], ["444", "555"]])
+    separator = ak.Array(["→", "↑", "←"])
+
+    assert ak.str.join_element_wise(array1, array2, separator).tolist() == [
+        ["one→111", "two→222", "three→333"],
+        [],
+        ["four←444", "five←555"],
+    ]
+
+    array1 = ak.Array([[b"one", b"two", b"three"], [], [b"four", b"five"]])
+    array2 = ak.Array([[b"111", b"222", b"333"], [], [b"444", b"555"]])
+    separator = ak.Array(["→".encode(), "↑".encode(), "←".encode()])
+
+    assert ak.str.join_element_wise(array1, array2, separator).tolist() == [
+        ["one→111".encode(), "two→222".encode(), "three→333".encode()],
+        [],
+        ["four←444".encode(), "five←555".encode()],
+    ]

From ffeef7b9674d0dcf3af08baa4b812c39d97b801e Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:23:12 +0100
Subject: [PATCH 47/73] Revert "use dispatch correctly"

This reverts commit 559073b1b047e099b5fce0dcd0930bd2154feedd.
---
 src/awkward/operations/str/ak_center.py                  | 2 +-
 src/awkward/operations/str/ak_extract_regex.py           | 2 +-
 src/awkward/operations/str/ak_lpad.py                    | 2 +-
 src/awkward/operations/str/ak_ltrim.py                   | 2 +-
 src/awkward/operations/str/ak_repeat.py                  | 2 +-
 src/awkward/operations/str/ak_replace_slice.py           | 2 +-
 src/awkward/operations/str/ak_replace_substring.py       | 4 ++--
 src/awkward/operations/str/ak_replace_substring_regex.py | 4 ++--
 src/awkward/operations/str/ak_rpad.py                    | 2 +-
 src/awkward/operations/str/ak_rtrim.py                   | 2 +-
 src/awkward/operations/str/ak_slice.py                   | 2 +-
 src/awkward/operations/str/ak_split_pattern.py           | 4 ++--
 src/awkward/operations/str/ak_split_pattern_regex.py     | 4 ++--
 src/awkward/operations/str/ak_split_whitespace.py        | 4 ++--
 src/awkward/operations/str/ak_trim.py                    | 2 +-
 15 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 3d0da1893b..9bd2246673 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -37,7 +37,7 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, width, padding)
+    yield (array,)
 
     # Implementation
     return _impl(array, width, padding, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
index c3bcdc1d49..9a4aecd038 100644
--- a/src/awkward/operations/str/ak_extract_regex.py
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -54,7 +54,7 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, pattern)
+    yield (array,)
 
     # Implementation
     return _impl(array, pattern, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index 5a869f2a92..909f1663d9 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -37,7 +37,7 @@ def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, width, padding)
+    yield (array,)
 
     # Implementation
     return _impl(array, width, padding, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 2399d7ed9a..0180270067 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -36,7 +36,7 @@ def ltrim(array, characters, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, characters)
+    yield (array,)
 
     # Implementation
     return _impl(array, characters, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index 75324de63a..4419eed4c2 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -36,7 +36,7 @@ def repeat(array, num_repeats, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, num_repeats)
+    yield (array,)
 
     # Implementation
     return _impl(array, num_repeats, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index ce6ab2fbab..cd80f111aa 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -38,7 +38,7 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, start, stop, replacement)
+    yield (array,)
 
     # Implementation
     return _impl(array, start, stop, replacement, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index bc5dae8b96..691e9fd3e7 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def replace_substring(
-    array, pattern, replacement, max_replacements=None, *, highlevel=True, behavior=None
+    array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -40,7 +40,7 @@ def replace_substring(
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, pattern, replacement, max_replacements)
+    yield (array,)
 
     # Implementation
     return _impl(array, pattern, replacement, max_replacements, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index fc737da163..77dc2c12b2 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def replace_substring_regex(
-    array, pattern, replacement, max_replacements=None, *, highlevel=True, behavior=None
+    array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -40,7 +40,7 @@ def replace_substring_regex(
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, pattern, replacement, max_replacements)
+    yield (array,)
 
     # Implementation
     return _impl(array, pattern, replacement, max_replacements, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index 02fb2f4fcc..da0cf61fb6 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -37,7 +37,7 @@ def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, width, padding)
+    yield (array,)
 
     # Implementation
     return _impl(array, width, padding, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index 00be21f2ba..3d1d518754 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -36,7 +36,7 @@ def rtrim(array, characters, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, characters)
+    yield (array,)
 
     # Implementation
     return _impl(array, characters, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/ak_slice.py
index cba2775a37..7afaab7d93 100644
--- a/src/awkward/operations/str/ak_slice.py
+++ b/src/awkward/operations/str/ak_slice.py
@@ -37,7 +37,7 @@ def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None):
     or performs a literal slice on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, start, stop, step)
+    yield (array,)
 
     # Implementation
     return _impl(array, start, stop, step, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index 680eef3134..b94187d9fe 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def split_pattern(
-    array, pattern, max_splits=None, reverse=False, *, highlevel=True, behavior=None
+    array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -33,7 +33,7 @@ def split_pattern(
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
     """
     # Dispatch
-    yield (array, pattern, max_splits, reverse)
+    yield (array,)
 
     # Implementation
     return _impl(array, pattern, max_splits, reverse, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index f34ffa817d..56a7876efd 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def split_pattern_regex(
-    array, pattern, max_splits=None, reverse=False, *, highlevel=True, behavior=None
+    array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -33,7 +33,7 @@ def split_pattern_regex(
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
     """
     # Dispatch
-    yield (array, pattern, max_splits, reverse)
+    yield (array,)
 
     # Implementation
     return _impl(array, pattern, max_splits, reverse, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index 95d951f725..07be7a0e5c 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -11,7 +11,7 @@
 
 @high_level_function
 def split_whitespace(
-    array, max_splits=None, reverse=False, *, highlevel=True, behavior=None
+    array, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
     """
     Args:
@@ -41,7 +41,7 @@ def split_whitespace(
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, max_splits, reverse)
+    yield (array,)
 
     # Implementation
     return _impl(array, max_splits, reverse, highlevel, behavior)
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index 192aad730a..c43df209be 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -36,7 +36,7 @@ def trim(array, characters, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array, characters)
+    yield (array,)
 
     # Implementation
     return _impl(array, characters, highlevel, behavior)

From 19c719730e9c85408056099c62cc3e110bd78468 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:24:21 +0100
Subject: [PATCH 48/73] fix: broadcast `num_repeats`

---
 src/awkward/operations/str/ak_repeat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index 4419eed4c2..75324de63a 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -36,7 +36,7 @@ def repeat(array, num_repeats, *, highlevel=True, behavior=None):
     on strings and bytestrings, respectively.
     """
     # Dispatch
-    yield (array,)
+    yield (array, num_repeats)
 
     # Implementation
     return _impl(array, num_repeats, highlevel, behavior)

From 21973bdf2885089dca86af83878064b0c9d6c019 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:50:15 +0100
Subject: [PATCH 49/73] feat: add `count_substring[_pattern]`

---
 src/awkward/operations/str/__init__.py        |  3 +
 .../operations/str/ak_count_substring.py      | 56 ++++++++++++++++
 .../str/ak_count_substring_regex.py           | 56 ++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 66 +++++++++++++++++++
 4 files changed, 181 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_count_substring.py
 create mode 100644 src/awkward/operations/str/ak_count_substring_regex.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index fd3c651f19..504ff93b3a 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -61,6 +61,9 @@
 
 # containment tests
 
+from awkward.operations.str.ak_count_substring import *
+from awkward.operations.str.ak_count_substring_regex import *
+
 
 def _get_ufunc_action(
     utf8_function,
diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
new file mode 100644
index 0000000000..52575999c8
--- /dev/null
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("count_substring",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def count_substring(
+    array, pattern, *, ignore_case=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the count the number of occurrences of the given literal pattern.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.count_substring,
+        pc.count_substring,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
new file mode 100644
index 0000000000..413c46bca0
--- /dev/null
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("count_substring_regex",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def count_substring_regex(
+    array, pattern, *, ignore_case=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the count the number of occurrences of the given regular expression pattern.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.count_substring_regex,
+        pc.count_substring_regex,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 4c640e90a2..cc090a6b1d 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -764,3 +764,69 @@ def test_join_element_wise():
         [],
         ["four←444".encode(), "five←555".encode()],
     ]
+
+
+def test_count_substring():
+    assert ak.str.count_substring(string_repeats, "BA").tolist() == [
+        [0, 0, 0],
+        [0, 0, 0],
+        [],
+    ]
+    assert ak.str.count_substring(string_repeats, "BA", ignore_case=True).tolist() == [
+        [2, 0, 1],
+        [0, 1, 1],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.count_substring(bytestring_repeats, b"BA").tolist() == [
+        [0, 0, 0],
+        [0, 0, 0],
+        [],
+    ]
+    assert ak.str.count_substring(
+        bytestring_repeats, b"BA", ignore_case=True
+    ).tolist() == [
+        [2, 0, 1],
+        [0, 1, 1],
+        [],
+    ]
+
+
+def test_count_substring_regex():
+    assert ak.str.count_substring_regex(string_repeats, r"BA\d*").tolist() == [
+        [0, 0, 0],
+        [0, 0, 0],
+        [],
+    ]
+    assert ak.str.count_substring_regex(
+        string_repeats, r"BA\d*", ignore_case=True
+    ).tolist() == [
+        [2, 0, 1],
+        [0, 1, 1],
+        [],
+    ]
+    assert ak.str.count_substring_regex(string_repeats, r"\d{1,}").tolist() == [
+        [2, 0, 0],
+        [1, 1, 1],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.count_substring_regex(bytestring_repeats, rb"BA\d*").tolist() == [
+        [0, 0, 0],
+        [0, 0, 0],
+        [],
+    ]
+    assert ak.str.count_substring_regex(
+        bytestring_repeats, rb"BA\d*", ignore_case=True
+    ).tolist() == [
+        [2, 0, 1],
+        [0, 1, 1],
+        [],
+    ]
+    assert ak.str.count_substring_regex(bytestring_repeats, rb"\d{1,}").tolist() == [
+        [2, 0, 0],
+        [1, 1, 1],
+        [],
+    ]

From d385e615a750eb0683f56c8adc48e98a6e7f6f90 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:51:37 +0100
Subject: [PATCH 50/73] docs: fixup docstring

---
 src/awkward/operations/str/ak_count_substring.py       | 2 +-
 src/awkward/operations/str/ak_count_substring_regex.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
index 52575999c8..cbd8cdc550 100644
--- a/src/awkward/operations/str/ak_count_substring.py
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -23,7 +23,7 @@ def count_substring(
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the count the number of occurrences of the given literal pattern.
+    For each string in the array, count the number of occurrences of the given literal pattern.
 
     Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
 
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
index 413c46bca0..f3041c32e0 100644
--- a/src/awkward/operations/str/ak_count_substring_regex.py
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -23,12 +23,12 @@ def count_substring_regex(
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the count the number of occurrences of the given regular expression pattern.
+    For each string in the array, count the number of occurrences of the given regular expression pattern.
 
     Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    [pyarrow.compute.count_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring_regex.html).
     """
     # Dispatch
     yield (array,)

From c9164d5da544459ed7eddb7896544914954aaab1 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:55:33 +0100
Subject: [PATCH 51/73] feat: add `ends_with`

---
 src/awkward/operations/str/__init__.py     |  1 +
 src/awkward/operations/str/ak_ends_with.py | 53 ++++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 25 ++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_ends_with.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 504ff93b3a..ec8e79e078 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -63,6 +63,7 @@
 
 from awkward.operations.str.ak_count_substring import *
 from awkward.operations.str.ak_count_substring_regex import *
+from awkward.operations.str.ak_ends_with import *
 
 
 def _get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/ak_ends_with.py
new file mode 100644
index 0000000000..7a7b2f40a0
--- /dev/null
+++ b/src/awkward/operations/str/ak_ends_with.py
@@ -0,0 +1,53 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("ends_with",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine whether it ends with the given literal suffix.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.ends_with,
+        pc.ends_with,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index cc090a6b1d..db759b7d78 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -830,3 +830,28 @@ def test_count_substring_regex():
         [1, 1, 1],
         [],
     ]
+
+
+def test_ends_with():
+    assert ak.str.ends_with(string_repeats, "BAR").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.ends_with(string_repeats, "BAR", ignore_case=True).tolist() == [
+        [False, False, True],
+        [False, True, True],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.ends_with(bytestring_repeats, b"BAR").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.ends_with(bytestring_repeats, b"BAR", ignore_case=True).tolist() == [
+        [False, False, True],
+        [False, True, True],
+        [],
+    ]

From aac5e8a54491e920dd49e3b332bfaf99983d9abf Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:57:18 +0100
Subject: [PATCH 52/73] feat: add `starts_with`

---
 src/awkward/operations/str/__init__.py       |  1 +
 src/awkward/operations/str/ak_starts_with.py | 53 ++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py   | 27 ++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_starts_with.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index ec8e79e078..2bd84756a1 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -64,6 +64,7 @@
 from awkward.operations.str.ak_count_substring import *
 from awkward.operations.str.ak_count_substring_regex import *
 from awkward.operations.str.ak_ends_with import *
+from awkward.operations.str.ak_starts_with import *
 
 
 def _get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/ak_starts_with.py
new file mode 100644
index 0000000000..6452cb8e3b
--- /dev/null
+++ b/src/awkward/operations/str/ak_starts_with.py
@@ -0,0 +1,53 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("starts_with",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine whether it starts with the given literal suffix.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.starts_with,
+        pc.starts_with,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index db759b7d78..37fac3b5a9 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -855,3 +855,30 @@ def test_ends_with():
         [False, True, True],
         [],
     ]
+
+
+def test_starts_with():
+    assert ak.str.starts_with(string_repeats, "FOO").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.starts_with(string_repeats, "FOO", ignore_case=True).tolist() == [
+        [True, True, False],
+        [False, False, True],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.starts_with(bytestring_repeats, b"FOO").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.starts_with(
+        bytestring_repeats, b"FOO", ignore_case=True
+    ).tolist() == [
+        [True, True, False],
+        [False, False, True],
+        [],
+    ]

From 17a6a0e470f6fb82ced119e54605c7ab8c4c193a Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 22:59:07 +0100
Subject: [PATCH 53/73] docs: fix link

---
 src/awkward/operations/str/ak_ends_with.py   | 2 +-
 src/awkward/operations/str/ak_starts_with.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/ak_ends_with.py
index 7a7b2f40a0..89f82c8fa0 100644
--- a/src/awkward/operations/str/ak_ends_with.py
+++ b/src/awkward/operations/str/ak_ends_with.py
@@ -26,7 +26,7 @@ def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=Non
     Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    [pyarrow.compute.ends_with](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ends_with.html).
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/ak_starts_with.py
index 6452cb8e3b..a203c5a318 100644
--- a/src/awkward/operations/str/ak_starts_with.py
+++ b/src/awkward/operations/str/ak_starts_with.py
@@ -26,7 +26,7 @@ def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=N
     Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+    [pyarrow.compute.starts_with](https://arrow.apache.org/docs/python/generated/pyarrow.compute.starts_with.html).
     """
     # Dispatch
     yield (array,)

From 83f1597b92b3b7f91f00d35304f78388c7bce104 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:02:05 +0100
Subject: [PATCH 54/73] feat: add `find_substring`

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_find_substring.py       | 54 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 27 ++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_find_substring.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 2bd84756a1..15bc71f85d 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -64,6 +64,7 @@
 from awkward.operations.str.ak_count_substring import *
 from awkward.operations.str.ak_count_substring_regex import *
 from awkward.operations.str.ak_ends_with import *
+from awkward.operations.str.ak_find_substring import *
 from awkward.operations.str.ak_starts_with import *
 
 
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
new file mode 100644
index 0000000000..ed75d90911
--- /dev/null
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -0,0 +1,54 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("find_substring",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine the index at which the first occurrence of the given literal pattern is
+    found. If the literay pattern is not found inside the string, the index is taken to be -1.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.find_substring,
+        pc.find_substring,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 37fac3b5a9..619926b0b9 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -882,3 +882,30 @@ def test_starts_with():
         [False, False, True],
         [],
     ]
+
+
+def test_find_substring():
+    assert ak.str.find_substring(string_repeats, "FOO").tolist() == [
+        [-1, -1, -1],
+        [-1, -1, -1],
+        [],
+    ]
+    assert ak.str.find_substring(string_repeats, "FOO", ignore_case=True).tolist() == [
+        [0, 0, -1],
+        [3, -1, 0],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.find_substring(bytestring_repeats, b"FOO").tolist() == [
+        [-1, -1, -1],
+        [-1, -1, -1],
+        [],
+    ]
+    assert ak.str.find_substring(
+        bytestring_repeats, b"FOO", ignore_case=True
+    ).tolist() == [
+        [0, 0, -1],
+        [3, -1, 0],
+        [],
+    ]

From 6ad578fda3e6f162ef0374b2e8add42b3fbdf07f Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:04:11 +0100
Subject: [PATCH 55/73] docs: fix typo

---
 src/awkward/operations/str/ak_find_substring.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
index ed75d90911..e7a9acaaa5 100644
--- a/src/awkward/operations/str/ak_find_substring.py
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -22,7 +22,7 @@ def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavio
             high-level.
 
     For each string in the array, determine the index at which the first occurrence of the given literal pattern is
-    found. If the literay pattern is not found inside the string, the index is taken to be -1.
+    found. If the literal pattern is not found inside the string, the index is taken to be -1.
 
     Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
 

From 3141ebb753e66a59dd97b6d7d0af3963798f45a9 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:06:58 +0100
Subject: [PATCH 56/73] feat: add `find_substring_regex`

---
 src/awkward/operations/str/__init__.py        |  1 +
 .../operations/str/ak_find_substring_regex.py | 56 +++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 29 ++++++++++
 3 files changed, 86 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_find_substring_regex.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 15bc71f85d..2475e43e03 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -65,6 +65,7 @@
 from awkward.operations.str.ak_count_substring_regex import *
 from awkward.operations.str.ak_ends_with import *
 from awkward.operations.str.ak_find_substring import *
+from awkward.operations.str.ak_find_substring_regex import *
 from awkward.operations.str.ak_starts_with import *
 
 
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
new file mode 100644
index 0000000000..6a553163a6
--- /dev/null
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -0,0 +1,56 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("find_substring_regex",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def find_substring_regex(
+    array, pattern, *, ignore_case=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine the index at which the first occurrence of the given regular expression
+    pattern. is found. If the regular expression pattern is not found inside the string, the index is taken to be -1.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.find_substring_regex,
+        pc.find_substring_regex,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 619926b0b9..607e7d39df 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -909,3 +909,32 @@ def test_find_substring():
         [3, -1, 0],
         [],
     ]
+
+
+def test_find_substring_regex():
+    assert ak.str.find_substring_regex(string_repeats, r"FOO\d+").tolist() == [
+        [-1, -1, -1],
+        [-1, -1, -1],
+        [],
+    ]
+    assert ak.str.find_substring_regex(
+        string_repeats, r"FOO\d+", ignore_case=True
+    ).tolist() == [
+        [0, -1, -1],
+        [-1, -1, 0],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.find_substring_regex(bytestring_repeats, rb"FOO\d+").tolist() == [
+        [-1, -1, -1],
+        [-1, -1, -1],
+        [],
+    ]
+    assert ak.str.find_substring_regex(
+        bytestring_repeats, rb"FOO\d+", ignore_case=True
+    ).tolist() == [
+        [0, -1, -1],
+        [-1, -1, 0],
+        [],
+    ]

From 4c69e86e7cbed3a2d97069aa773e642f0082c0d1 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:09:29 +0100
Subject: [PATCH 57/73] docs: fix link

---
 src/awkward/operations/str/ak_find_substring_regex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
index 6a553163a6..a0dc5b2ce0 100644
--- a/src/awkward/operations/str/ak_find_substring_regex.py
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -29,7 +29,7 @@ def find_substring_regex(
     Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
+    [pyarrow.compute.find_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring_regex.html).
     """
     # Dispatch
     yield (array,)

From 8e230f4fc0a455c7d6483b1f3b60839f6896cb42 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:14:22 +0100
Subject: [PATCH 58/73] feat: add `match_like`

---
 src/awkward/operations/str/__init__.py      |  1 +
 src/awkward/operations/str/ak_match_like.py | 55 +++++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py  | 27 ++++++++++
 3 files changed, 83 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_match_like.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 2475e43e03..a2c452ce29 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -66,6 +66,7 @@
 from awkward.operations.str.ak_ends_with import *
 from awkward.operations.str.ak_find_substring import *
 from awkward.operations.str.ak_find_substring_regex import *
+from awkward.operations.str.ak_match_like import *
 from awkward.operations.str.ak_starts_with import *
 
 
diff --git a/src/awkward/operations/str/ak_match_like.py b/src/awkward/operations/str/ak_match_like.py
new file mode 100644
index 0000000000..ef8462d513
--- /dev/null
+++ b/src/awkward/operations/str/ak_match_like.py
@@ -0,0 +1,55 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("match_like",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine whether it matches the given SQL-style LIKE pattern.
+    '%' matches any number of characters, '_' matches exactly one character, and any other character matches itself.
+    To match a literal '%', '_', or "'", the character must be preceded with a backslash.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.match_like](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_like.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.match_like,
+        pc.match_like,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 607e7d39df..3acf98a84a 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -938,3 +938,30 @@ def test_find_substring_regex():
         [-1, -1, 0],
         [],
     ]
+
+
+def test_match_like():
+    assert ak.str.match_like(string_repeats, "FOO%").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.match_like(string_repeats, "FOO%", ignore_case=True).tolist() == [
+        [True, True, False],
+        [False, False, True],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.match_like(bytestring_repeats, b"FOO%").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.match_like(
+        bytestring_repeats, b"FOO%", ignore_case=True
+    ).tolist() == [
+        [True, True, False],
+        [False, False, True],
+        [],
+    ]

From c676fbdfe64b3b326a0016067dc7d24aa47fc136 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:15:57 +0100
Subject: [PATCH 59/73] test: improve test

---
 tests/test_2616_use_pyarrow_for_strings.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index 3acf98a84a..ae55309763 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -941,27 +941,27 @@ def test_find_substring_regex():
 
 
 def test_match_like():
-    assert ak.str.match_like(string_repeats, "FOO%").tolist() == [
+    assert ak.str.match_like(string_repeats, "FOO%BA%").tolist() == [
         [False, False, False],
         [False, False, False],
         [],
     ]
-    assert ak.str.match_like(string_repeats, "FOO%", ignore_case=True).tolist() == [
-        [True, True, False],
+    assert ak.str.match_like(string_repeats, "FOO%BA%", ignore_case=True).tolist() == [
+        [True, False, False],
         [False, False, True],
         [],
     ]
 
     # Bytestrings
-    assert ak.str.match_like(bytestring_repeats, b"FOO%").tolist() == [
+    assert ak.str.match_like(bytestring_repeats, b"FOO%BA%").tolist() == [
         [False, False, False],
         [False, False, False],
         [],
     ]
     assert ak.str.match_like(
-        bytestring_repeats, b"FOO%", ignore_case=True
+        bytestring_repeats, b"FOO%BA%", ignore_case=True
     ).tolist() == [
-        [True, True, False],
+        [True, False, False],
         [False, False, True],
         [],
     ]

From 99584ba165eac2498052a9f92358cc0398d16383 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:20:51 +0100
Subject: [PATCH 60/73] feat: add `match_substring`, `match_substring_regex`

---
 src/awkward/operations/str/__init__.py        |  2 +
 .../operations/str/ak_match_substring.py      | 55 ++++++++++++++++++
 .../str/ak_match_substring_regex.py           | 55 ++++++++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py    | 56 +++++++++++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_match_substring.py
 create mode 100644 src/awkward/operations/str/ak_match_substring_regex.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index a2c452ce29..89cf32dfc7 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -67,6 +67,8 @@
 from awkward.operations.str.ak_find_substring import *
 from awkward.operations.str.ak_find_substring_regex import *
 from awkward.operations.str.ak_match_like import *
+from awkward.operations.str.ak_match_substring import *
+from awkward.operations.str.ak_match_substring_regex import *
 from awkward.operations.str.ak_starts_with import *
 
 
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/ak_match_substring.py
new file mode 100644
index 0000000000..9530b96388
--- /dev/null
+++ b/src/awkward/operations/str/ak_match_substring.py
@@ -0,0 +1,55 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("match_substring",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def match_substring(
+    array, pattern, *, ignore_case=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine whether it contains the given literal pattern.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.match_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_substring.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.match_substring,
+        pc.match_substring,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/ak_match_substring_regex.py
new file mode 100644
index 0000000000..bf0f659765
--- /dev/null
+++ b/src/awkward/operations/str/ak_match_substring_regex.py
@@ -0,0 +1,55 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("match_substring_regex",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def match_substring_regex(
+    array, pattern, *, ignore_case=False, highlevel=True, behavior=None
+):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        pattern (str, or bytes): Substring pattern to look for inside the given array.
+        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine whether it contains the given regular expression pattern.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.match_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_substring_regex.html).
+    """
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array, pattern, ignore_case, highlevel, behavior)
+
+
+def _impl(array, pattern, ignore_case, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    behavior = behavior_of(array, behavior=behavior)
+    apply = ak.operations.str._get_ufunc_action(
+        pc.match_substring_regex,
+        pc.match_substring_regex,
+        bytestring_to_string=False,
+        ignore_case=ignore_case,
+        pattern=pattern,
+    )
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index ae55309763..b53e6ef908 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -965,3 +965,59 @@ def test_match_like():
         [False, False, True],
         [],
     ]
+
+
+def test_match_substring():
+    assert ak.str.match_substring(string_repeats, "FOO").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.match_substring(string_repeats, "FOO", ignore_case=True).tolist() == [
+        [True, True, False],
+        [True, False, True],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.match_substring(bytestring_repeats, b"FOO").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.match_substring(
+        bytestring_repeats, b"FOO", ignore_case=True
+    ).tolist() == [
+        [True, True, False],
+        [True, False, True],
+        [],
+    ]
+
+
+def test_match_substring_regex():
+    assert ak.str.match_substring_regex(string_repeats, r"FOO\d+").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.match_substring_regex(
+        string_repeats, r"FOO\d+", ignore_case=True
+    ).tolist() == [
+        [True, False, False],
+        [False, False, True],
+        [],
+    ]
+
+    # Bytestrings
+    assert ak.str.match_substring_regex(bytestring_repeats, rb"FOO\d+").tolist() == [
+        [False, False, False],
+        [False, False, False],
+        [],
+    ]
+    assert ak.str.match_substring_regex(
+        bytestring_repeats, rb"FOO\d+", ignore_case=True
+    ).tolist() == [
+        [True, False, False],
+        [False, False, True],
+        [],
+    ]

From c456b440e671378c5ea9db7b41b8e9c289565b4a Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:47:32 +0100
Subject: [PATCH 61/73] feat: add `is_in` and `index_in`

---
 src/awkward/operations/str/__init__.py     |   2 +
 src/awkward/operations/str/ak_index_in.py  |  80 +++++++++++++
 src/awkward/operations/str/ak_is_in.py     |  79 ++++++++++++
 tests/test_2616_use_pyarrow_for_strings.py | 132 +++++++++++++++++++++
 4 files changed, 293 insertions(+)
 create mode 100644 src/awkward/operations/str/ak_index_in.py
 create mode 100644 src/awkward/operations/str/ak_is_in.py

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 89cf32dfc7..7d4357d12a 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -66,6 +66,8 @@
 from awkward.operations.str.ak_ends_with import *
 from awkward.operations.str.ak_find_substring import *
 from awkward.operations.str.ak_find_substring_regex import *
+from awkward.operations.str.ak_index_in import *
+from awkward.operations.str.ak_is_in import *
 from awkward.operations.str.ak_match_like import *
 from awkward.operations.str.ak_match_substring import *
 from awkward.operations.str.ak_match_substring_regex import *
diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/ak_index_in.py
new file mode 100644
index 0000000000..66b31e5aa8
--- /dev/null
+++ b/src/awkward/operations/str/ak_index_in.py
@@ -0,0 +1,80 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("index_in",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        value_set: Array-like data (anything #ak.to_layout recognizes), set of values to search for.
+        skip_nones (bool): If True, None values in `array` are not matched against `value_set`; otherwise, they are.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine where it is found within the given set of values. If the string is
+    not found within the value set, the index is set to None.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.index_in](https://arrow.apache.org/docs/python/generated/pyarrow.compute.index_in.html).
+    """
+    # Dispatch
+    yield (array, value_set)
+
+    # Implementation
+    return _impl(array, value_set, skip_nones, highlevel, behavior)
+
+
+def _is_maybe_optional_list_of_string(layout):
+    if layout.is_list and layout.parameter("__array__") in {"string", "bytestring"}:
+        return True
+    elif layout.is_option or layout.index_indexed:
+        return _is_maybe_optional_list_of_string(layout.content)
+    else:
+        return False
+
+
+def _impl(array, value_set, skip_nones, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    value_set_layout = ak.to_layout(value_set, allow_record=False, allow_other=True)
+
+    if not _is_maybe_optional_list_of_string(value_set_layout):
+        raise TypeError("`value_set` must be 1D array of (maybe missing) strings")
+
+    behavior = behavior_of(array, value_set, behavior=behavior)
+
+    def apply(layout, **kwargs):
+        if (
+            layout.is_list
+            and layout.purelist_depth == 2
+            and _is_maybe_optional_list_of_string(layout.content)
+        ):
+            return layout.copy(
+                content=ak.from_arrow(
+                    pc.index_in(
+                        ak.to_arrow(layout.content, extensionarray=False),
+                        ak.to_arrow(value_set_layout, extensionarray=False),
+                        skip_nulls=skip_nones,
+                    ),
+                    highlevel=False,
+                )
+            )
+
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/ak_is_in.py
new file mode 100644
index 0000000000..2b5cee9d0c
--- /dev/null
+++ b/src/awkward/operations/str/ak_is_in.py
@@ -0,0 +1,79 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+__all__ = ("is_in",)
+
+
+import awkward as ak
+from awkward._behavior import behavior_of
+from awkward._dispatch import high_level_function
+from awkward._layout import wrap_layout
+
+
+@high_level_function
+def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Array-like data (anything #ak.to_layout recognizes).
+        value_set: Array-like data (anything #ak.to_layout recognizes), set of values to search for.
+        skip_nones (bool): If True, None values in `array` are not matched against `value_set`; otherwise, they are.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    For each string in the array, determine whether it is found within the given set of values.
+
+    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+
+    Requires the pyarrow library and calls
+    [pyarrow.compute.is_in](https://arrow.apache.org/docs/python/generated/pyarrow.compute.is_in.html).
+    """
+    # Dispatch
+    yield (array, value_set)
+
+    # Implementation
+    return _impl(array, value_set, skip_nones, highlevel, behavior)
+
+
+def _is_maybe_optional_list_of_string(layout):
+    if layout.is_list and layout.parameter("__array__") in {"string", "bytestring"}:
+        return True
+    elif layout.is_option or layout.is_indexed:
+        return _is_maybe_optional_list_of_string(layout.content)
+    else:
+        return False
+
+
+def _impl(array, value_set, skip_nones, highlevel, behavior):
+    import awkward._connect.pyarrow  # noqa: F401, I001
+
+    import pyarrow.compute as pc
+
+    layout = ak.to_layout(array, allow_record=False, allow_other=True)
+    value_set_layout = ak.to_layout(value_set, allow_record=False, allow_other=True)
+
+    if not _is_maybe_optional_list_of_string(value_set_layout):
+        raise TypeError("`value_set` must be 1D array of (maybe missing) strings")
+
+    behavior = behavior_of(array, value_set, behavior=behavior)
+
+    def apply(layout, **kwargs):
+        if (
+            layout.is_list
+            and layout.purelist_depth == 2
+            and _is_maybe_optional_list_of_string(layout.content)
+        ):
+            return layout.copy(
+                content=ak.from_arrow(
+                    pc.is_in(
+                        ak.to_arrow(layout.content, extensionarray=False),
+                        ak.to_arrow(value_set_layout, extensionarray=False),
+                        skip_nulls=skip_nones,
+                    ),
+                    highlevel=False,
+                )
+            )
+
+    out = ak._do.recursively_apply(layout, apply, behavior=behavior)
+
+    return wrap_layout(out, highlevel=highlevel, behavior=behavior)
diff --git a/tests/test_2616_use_pyarrow_for_strings.py b/tests/test_2616_use_pyarrow_for_strings.py
index b53e6ef908..79ddfb4d82 100644
--- a/tests/test_2616_use_pyarrow_for_strings.py
+++ b/tests/test_2616_use_pyarrow_for_strings.py
@@ -1021,3 +1021,135 @@ def test_match_substring_regex():
         [False, False, True],
         [],
     ]
+
+
+def test_is_in():
+    assert ak.str.is_in(string_repeats, ["123foo", "foo"]).tolist() == [
+        [False, True, False],
+        [True, False, False],
+        [],
+    ]
+    assert ak.str.is_in(
+        [
+            ["foo123bar123baz", "foo", "bar"],
+            ["123foo", "456bar", "foo123456bar"],
+            [None],
+        ],
+        ["123foo", "foo", None],
+    ).tolist() == [
+        [False, True, False],
+        [True, False, False],
+        [True],
+    ]
+    assert ak.str.is_in(
+        [
+            ["foo123bar123baz", "foo", "bar"],
+            ["123foo", "456bar", "foo123456bar"],
+            [None],
+        ],
+        ["123foo", "foo", None],
+        skip_nones=True,
+    ).tolist() == [
+        [False, True, False],
+        [True, False, False],
+        [False],
+    ]
+
+    # Bytestrings
+
+    assert ak.str.is_in(string_repeats, [b"123foo", b"foo"]).tolist() == [
+        [False, True, False],
+        [True, False, False],
+        [],
+    ]
+    assert ak.str.is_in(
+        [
+            [b"foo123bar123baz", b"foo", b"bar"],
+            [b"123foo", b"456bar", b"foo123456bar"],
+            [None],
+        ],
+        [b"123foo", b"foo", None],
+    ).tolist() == [
+        [False, True, False],
+        [True, False, False],
+        [True],
+    ]
+    assert ak.str.is_in(
+        [
+            [b"foo123bar123baz", b"foo", b"bar"],
+            [b"123foo", b"456bar", b"foo123456bar"],
+            [None],
+        ],
+        [b"123foo", b"foo", None],
+        skip_nones=True,
+    ).tolist() == [
+        [False, True, False],
+        [True, False, False],
+        [False],
+    ]
+
+
+def test_index_in():
+    assert ak.str.index_in(string_repeats, ["123foo", "foo"]).tolist() == [
+        [None, 1, None],
+        [0, None, None],
+        [],
+    ]
+    assert ak.str.index_in(
+        [
+            ["foo123bar123baz", "foo", "bar"],
+            ["123foo", "456bar", "foo123456bar"],
+            [None],
+        ],
+        ["123foo", "foo", None],
+    ).tolist() == [
+        [None, 1, None],
+        [0, None, None],
+        [2],
+    ]
+    assert ak.str.index_in(
+        [
+            ["foo123bar123baz", "foo", "bar"],
+            ["123foo", "456bar", "foo123456bar"],
+            [None],
+        ],
+        ["123foo", "foo", None],
+        skip_nones=True,
+    ).tolist() == [
+        [None, 1, None],
+        [0, None, None],
+        [None],
+    ]
+
+    # Bytestrings
+
+    assert ak.str.index_in(string_repeats, [b"123foo", b"foo"]).tolist() == [
+        [None, 1, None],
+        [0, None, None],
+        [],
+    ]
+    assert ak.str.index_in(
+        [
+            [b"foo123bar123baz", b"foo", b"bar"],
+            [b"123foo", b"456bar", b"foo123456bar"],
+            [None],
+        ],
+        [b"123foo", b"foo", None],
+    ).tolist() == [
+        [None, 1, None],
+        [0, None, None],
+        [2],
+    ]
+    assert ak.str.index_in(
+        [
+            [b"foo123bar123baz", b"foo", b"bar"],
+            [b"123foo", b"456bar", b"foo123456bar"],
+            [None],
+        ],
+        [b"123foo", b"foo", None],
+        skip_nones=True,
+    ).tolist() == [
+        [None, 1, None],
+        [0, None, None],
+        [None],
+    ]

From 88f45cc2ae0a08481e288be0ea8d55ddb35fbca9 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Mon, 7 Aug 2023 23:50:59 +0100
Subject: [PATCH 62/73] fix: operate at leaf depth

---
 src/awkward/operations/str/ak_index_in.py | 24 +++++++++--------------
 src/awkward/operations/str/ak_is_in.py    | 22 ++++++++-------------
 2 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/ak_index_in.py
index 66b31e5aa8..2e0b681d7b 100644
--- a/src/awkward/operations/str/ak_index_in.py
+++ b/src/awkward/operations/str/ak_index_in.py
@@ -39,7 +39,7 @@ def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=Non
 def _is_maybe_optional_list_of_string(layout):
     if layout.is_list and layout.parameter("__array__") in {"string", "bytestring"}:
         return True
-    elif layout.is_option or layout.index_indexed:
+    elif layout.is_option or layout.is_indexed:
         return _is_maybe_optional_list_of_string(layout.content)
     else:
         return False
@@ -59,20 +59,14 @@ def _impl(array, value_set, skip_nones, highlevel, behavior):
     behavior = behavior_of(array, value_set, behavior=behavior)
 
     def apply(layout, **kwargs):
-        if (
-            layout.is_list
-            and layout.purelist_depth == 2
-            and _is_maybe_optional_list_of_string(layout.content)
-        ):
-            return layout.copy(
-                content=ak.from_arrow(
-                    pc.index_in(
-                        ak.to_arrow(layout.content, extensionarray=False),
-                        ak.to_arrow(value_set_layout, extensionarray=False),
-                        skip_nulls=skip_nones,
-                    ),
-                    highlevel=False,
-                )
+        if _is_maybe_optional_list_of_string(layout) and layout.purelist_depth == 1:
+            return ak.from_arrow(
+                pc.index_in(
+                    ak.to_arrow(layout, extensionarray=False),
+                    ak.to_arrow(value_set_layout, extensionarray=False),
+                    skip_nulls=skip_nones,
+                ),
+                highlevel=False,
             )
 
     out = ak._do.recursively_apply(layout, apply, behavior=behavior)
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/ak_is_in.py
index 2b5cee9d0c..3240ed5b39 100644
--- a/src/awkward/operations/str/ak_is_in.py
+++ b/src/awkward/operations/str/ak_is_in.py
@@ -58,20 +58,14 @@ def _impl(array, value_set, skip_nones, highlevel, behavior):
     behavior = behavior_of(array, value_set, behavior=behavior)
 
     def apply(layout, **kwargs):
-        if (
-            layout.is_list
-            and layout.purelist_depth == 2
-            and _is_maybe_optional_list_of_string(layout.content)
-        ):
-            return layout.copy(
-                content=ak.from_arrow(
-                    pc.is_in(
-                        ak.to_arrow(layout.content, extensionarray=False),
-                        ak.to_arrow(value_set_layout, extensionarray=False),
-                        skip_nulls=skip_nones,
-                    ),
-                    highlevel=False,
-                )
+        if _is_maybe_optional_list_of_string(layout) and layout.purelist_depth == 1:
+            return ak.from_arrow(
+                pc.is_in(
+                    ak.to_arrow(layout, extensionarray=False),
+                    ak.to_arrow(value_set_layout, extensionarray=False),
+                    skip_nulls=skip_nones,
+                ),
+                highlevel=False,
             )
 
     out = ak._do.recursively_apply(layout, apply, behavior=behavior)

From 6745ba2ac4fc9bea1c7de76481e4d57de94fb7fb Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 09:36:06 +0100
Subject: [PATCH 63/73] refactor: add internal `pyarrow.compute` helper

---
 src/awkward/_connect/pyarrow.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/awkward/_connect/pyarrow.py b/src/awkward/_connect/pyarrow.py
index 54cae0ca92..b98c17975b 100644
--- a/src/awkward/_connect/pyarrow.py
+++ b/src/awkward/_connect/pyarrow.py
@@ -1,7 +1,9 @@
 # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+from __future__ import annotations
 
 import json
 from collections.abc import Iterable, Sized
+from types import ModuleType
 
 from packaging.version import parse as parse_version
 
@@ -36,13 +38,13 @@
         error_message = "pyarrow 7.0.0 or later required for {0}"
 
 
-def import_pyarrow(name):
+def import_pyarrow(name: str) -> ModuleType:
     if pyarrow is None:
         raise ImportError(error_message.format(name))
     return pyarrow
 
 
-def import_pyarrow_parquet(name):
+def import_pyarrow_parquet(name: str) -> ModuleType:
     if pyarrow is None:
         raise ImportError(error_message.format(name))
 
@@ -51,7 +53,16 @@ def import_pyarrow_parquet(name):
     return out
 
 
-def import_fsspec(name):
+def import_pyarrow_compute(name: str) -> ModuleType:
+    if pyarrow is None:
+        raise ImportError(error_message.format(name))
+
+    import pyarrow.compute as out
+
+    return out
+
+
+def import_fsspec(name: str) -> ModuleType:
     try:
         import fsspec
 

From 4422ad82ae54aa3e3d6b36af69ad239dce7c6a6d Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 09:37:49 +0100
Subject: [PATCH 64/73] refactor: use pyarrow import helper

---
 src/awkward/operations/str/ak_capitalize.py              | 5 ++---
 src/awkward/operations/str/ak_center.py                  | 5 ++---
 src/awkward/operations/str/ak_count_substring.py         | 5 ++---
 src/awkward/operations/str/ak_count_substring_regex.py   | 5 ++---
 src/awkward/operations/str/ak_ends_with.py               | 5 ++---
 src/awkward/operations/str/ak_extract_regex.py           | 5 ++---
 src/awkward/operations/str/ak_find_substring.py          | 5 ++---
 src/awkward/operations/str/ak_find_substring_regex.py    | 5 ++---
 src/awkward/operations/str/ak_is_alnum.py                | 5 ++---
 src/awkward/operations/str/ak_is_alpha.py                | 5 ++---
 src/awkward/operations/str/ak_is_ascii.py                | 5 ++---
 src/awkward/operations/str/ak_is_decimal.py              | 5 ++---
 src/awkward/operations/str/ak_is_digit.py                | 5 ++---
 src/awkward/operations/str/ak_is_lower.py                | 5 ++---
 src/awkward/operations/str/ak_is_numeric.py              | 5 ++---
 src/awkward/operations/str/ak_is_printable.py            | 5 ++---
 src/awkward/operations/str/ak_is_space.py                | 5 ++---
 src/awkward/operations/str/ak_is_title.py                | 5 ++---
 src/awkward/operations/str/ak_is_upper.py                | 5 ++---
 src/awkward/operations/str/ak_length.py                  | 5 ++---
 src/awkward/operations/str/ak_lower.py                   | 5 ++---
 src/awkward/operations/str/ak_lpad.py                    | 5 ++---
 src/awkward/operations/str/ak_ltrim.py                   | 5 ++---
 src/awkward/operations/str/ak_ltrim_whitespace.py        | 5 ++---
 src/awkward/operations/str/ak_match_like.py              | 5 ++---
 src/awkward/operations/str/ak_match_substring.py         | 5 ++---
 src/awkward/operations/str/ak_match_substring_regex.py   | 4 ++--
 src/awkward/operations/str/ak_replace_slice.py           | 5 ++---
 src/awkward/operations/str/ak_replace_substring.py       | 5 ++---
 src/awkward/operations/str/ak_replace_substring_regex.py | 5 ++---
 src/awkward/operations/str/ak_reverse.py                 | 5 ++---
 src/awkward/operations/str/ak_rpad.py                    | 5 ++---
 src/awkward/operations/str/ak_rtrim.py                   | 5 ++---
 src/awkward/operations/str/ak_rtrim_whitespace.py        | 5 ++---
 src/awkward/operations/str/ak_split_pattern.py           | 5 ++---
 src/awkward/operations/str/ak_split_pattern_regex.py     | 5 ++---
 src/awkward/operations/str/ak_split_whitespace.py        | 4 ++--
 src/awkward/operations/str/ak_starts_with.py             | 5 ++---
 src/awkward/operations/str/ak_swapcase.py                | 5 ++---
 src/awkward/operations/str/ak_title.py                   | 5 ++---
 src/awkward/operations/str/ak_trim.py                    | 5 ++---
 src/awkward/operations/str/ak_trim_whitespace.py         | 5 ++---
 src/awkward/operations/str/ak_upper.py                   | 5 ++---
 43 files changed, 86 insertions(+), 127 deletions(-)

diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
index 9400c21c9e..65fd9164db 100644
--- a/src/awkward/operations/str/ak_capitalize.py
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -39,10 +39,9 @@ def capitalize(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.capitalize")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 9bd2246673..5999372824 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -44,10 +44,9 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None):
 
 
 def _impl(array, width, padding, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.center")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
index cbd8cdc550..218e2bc5ce 100644
--- a/src/awkward/operations/str/ak_count_substring.py
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -38,10 +38,9 @@ def count_substring(
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.count_substring")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
index f3041c32e0..46b36cfb73 100644
--- a/src/awkward/operations/str/ak_count_substring_regex.py
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -38,10 +38,9 @@ def count_substring_regex(
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.count_substring_regex")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/ak_ends_with.py
index 89f82c8fa0..189ef7b25a 100644
--- a/src/awkward/operations/str/ak_ends_with.py
+++ b/src/awkward/operations/str/ak_ends_with.py
@@ -36,10 +36,9 @@ def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=Non
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.ends_with")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
index 9a4aecd038..ee0c899471 100644
--- a/src/awkward/operations/str/ak_extract_regex.py
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -61,10 +61,9 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None):
 
 
 def _impl(array, pattern, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.extract_regex")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
index e7a9acaaa5..7955ffbc14 100644
--- a/src/awkward/operations/str/ak_find_substring.py
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -37,10 +37,9 @@ def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavio
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.find_substring")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
index a0dc5b2ce0..58edb06794 100644
--- a/src/awkward/operations/str/ak_find_substring_regex.py
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -39,10 +39,9 @@ def find_substring_regex(
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.find_substring_regex")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index 2f93d87982..9866039f3d 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -39,10 +39,9 @@ def is_alnum(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_alnum")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index c40f612e75..76a6b5721c 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -39,10 +39,9 @@ def is_alpha(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_alpha")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/ak_is_ascii.py
index bc588f2888..77747b3639 100644
--- a/src/awkward/operations/str/ak_is_ascii.py
+++ b/src/awkward/operations/str/ak_is_ascii.py
@@ -39,10 +39,9 @@ def is_ascii(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_ascii")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index 26ff606bd0..fdf1a13942 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -39,10 +39,9 @@ def is_decimal(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_decimal")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 338b86d30a..3e66c21980 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -41,10 +41,9 @@ def is_digit(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_digit")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index 87dd3462a6..c36ab6056a 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -39,10 +39,9 @@ def is_lower(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_lower")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index 437ff31b47..6996782f3f 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -41,10 +41,9 @@ def is_numeric(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_numeric")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index 24c5184fde..fcfdde24d6 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -39,10 +39,9 @@ def is_printable(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_printable")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index 5b69031d1f..2b264664a0 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -39,10 +39,9 @@ def is_space(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_space")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
index 5275a1df0e..4e65d60037 100644
--- a/src/awkward/operations/str/ak_is_title.py
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -39,10 +39,9 @@ def is_title(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_title")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index fa20f04fe6..cf460e6aa3 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -39,10 +39,9 @@ def is_upper(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.is_upper")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/ak_length.py
index e5ef1c7b84..800afd4287 100644
--- a/src/awkward/operations/str/ak_length.py
+++ b/src/awkward/operations/str/ak_length.py
@@ -39,10 +39,9 @@ def length(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.length")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/ak_lower.py
index 971ffe043e..51c391311b 100644
--- a/src/awkward/operations/str/ak_lower.py
+++ b/src/awkward/operations/str/ak_lower.py
@@ -39,10 +39,9 @@ def lower(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.lower")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index 909f1663d9..ed9b5f98ac 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -44,10 +44,9 @@ def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
 
 
 def _impl(array, width, padding, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.lpad")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 0180270067..062c49ba95 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -43,10 +43,9 @@ def ltrim(array, characters, *, highlevel=True, behavior=None):
 
 
 def _impl(array, characters, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.ltrim")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
index e415a1400f..350cd52f05 100644
--- a/src/awkward/operations/str/ak_ltrim_whitespace.py
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -38,10 +38,9 @@ def ltrim_whitespace(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.ltrim_whitespace")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_match_like.py b/src/awkward/operations/str/ak_match_like.py
index ef8462d513..5515a829bd 100644
--- a/src/awkward/operations/str/ak_match_like.py
+++ b/src/awkward/operations/str/ak_match_like.py
@@ -38,10 +38,9 @@ def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=No
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.match_like")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/ak_match_substring.py
index 9530b96388..d1bf7626c6 100644
--- a/src/awkward/operations/str/ak_match_substring.py
+++ b/src/awkward/operations/str/ak_match_substring.py
@@ -38,10 +38,9 @@ def match_substring(
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.match_substring")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/ak_match_substring_regex.py
index bf0f659765..5a48f9ba62 100644
--- a/src/awkward/operations/str/ak_match_substring_regex.py
+++ b/src/awkward/operations/str/ak_match_substring_regex.py
@@ -38,9 +38,9 @@ def match_substring_regex(
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
-    import pyarrow.compute as pc
+    pc = import_pyarrow_compute("ak.str.match_substring_regex")
 
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index cd80f111aa..44161cb6c2 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -45,10 +45,9 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N
 
 
 def _impl(array, start, stop, replacement, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.replace_slice")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 691e9fd3e7..4408beb6fd 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -47,10 +47,9 @@ def replace_substring(
 
 
 def _impl(array, pattern, replacement, max_replacements, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.replace_substring")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index 77dc2c12b2..2380ba3e29 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -47,10 +47,9 @@ def replace_substring_regex(
 
 
 def _impl(array, pattern, replacement, max_replacements, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.replace_substring_regex")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/ak_reverse.py
index 6f15db9df8..2a573f0ccc 100644
--- a/src/awkward/operations/str/ak_reverse.py
+++ b/src/awkward/operations/str/ak_reverse.py
@@ -39,10 +39,9 @@ def reverse(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.reverse")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index da0cf61fb6..34748043b8 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -44,10 +44,9 @@ def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
 
 
 def _impl(array, width, padding, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.rpad")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index 3d1d518754..f83c651631 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -43,10 +43,9 @@ def rtrim(array, characters, *, highlevel=True, behavior=None):
 
 
 def _impl(array, characters, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.rtrim")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
index e2064bc412..cba8760bfe 100644
--- a/src/awkward/operations/str/ak_rtrim_whitespace.py
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -38,10 +38,9 @@ def rtrim_whitespace(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.rtrim_whitespace")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index b94187d9fe..c7a84b1bf9 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -40,10 +40,9 @@ def split_pattern(
 
 
 def _impl(array, pattern, max_splits, reverse, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.split_pattern")
     behavior = behavior_of(array, behavior=behavior)
     action = ak.operations.str._get_split_action(
         pc.split_pattern,
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index 56a7876efd..c870a922b3 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -40,10 +40,9 @@ def split_pattern_regex(
 
 
 def _impl(array, pattern, max_splits, reverse, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.split_pattern_regex")
     behavior = behavior_of(array, behavior=behavior)
     action = ak.operations.str._get_split_action(
         pc.split_pattern_regex,
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index 07be7a0e5c..31bab91854 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -48,9 +48,9 @@ def split_whitespace(
 
 
 def _impl(array, max_splits, reverse, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.split_whitespace")
     behavior = behavior_of(array, behavior=behavior)
     action = ak.operations.str._get_split_action(
         pc.utf8_split_whitespace,
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/ak_starts_with.py
index a203c5a318..3a0799334b 100644
--- a/src/awkward/operations/str/ak_starts_with.py
+++ b/src/awkward/operations/str/ak_starts_with.py
@@ -36,10 +36,9 @@ def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=N
 
 
 def _impl(array, pattern, ignore_case, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.starts_with")
     layout = ak.to_layout(array, allow_record=False, allow_other=True)
     behavior = behavior_of(array, behavior=behavior)
     apply = ak.operations.str._get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
index 36d6d53e11..6c99413dc5 100644
--- a/src/awkward/operations/str/ak_swapcase.py
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -39,10 +39,9 @@ def swapcase(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.swapcase")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/ak_title.py
index cdd147c012..e8bbd1af92 100644
--- a/src/awkward/operations/str/ak_title.py
+++ b/src/awkward/operations/str/ak_title.py
@@ -39,10 +39,9 @@ def title(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.title")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index c43df209be..626e762702 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -43,10 +43,9 @@ def trim(array, characters, *, highlevel=True, behavior=None):
 
 
 def _impl(array, characters, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.trim")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
index 197aa777cd..0453b8a03a 100644
--- a/src/awkward/operations/str/ak_trim_whitespace.py
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -38,10 +38,9 @@ def trim_whitespace(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.trim_whitespace")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/ak_upper.py
index 776b0526c0..dd25dc5de8 100644
--- a/src/awkward/operations/str/ak_upper.py
+++ b/src/awkward/operations/str/ak_upper.py
@@ -39,10 +39,9 @@ def upper(array, *, highlevel=True, behavior=None):
 
 
 def _impl(array, highlevel, behavior):
-    import awkward._connect.pyarrow  # noqa: F401, I001
-
-    import pyarrow.compute as pc
+    from awkward._connect.pyarrow import import_pyarrow_compute
 
+    pc = import_pyarrow_compute("ak.str.upper")
     behavior = behavior_of(array, behavior=behavior)
 
     out = ak._do.recursively_apply(

From ec6cefa0450e275978e40567564f47ca4e1b976b Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 09:40:20 +0100
Subject: [PATCH 65/73] refactor: add `module` and `name` arguments to
 `high_level_function`

---
 src/awkward/operations/str/ak_capitalize.py              | 2 +-
 src/awkward/operations/str/ak_center.py                  | 2 +-
 src/awkward/operations/str/ak_count_substring.py         | 2 +-
 src/awkward/operations/str/ak_count_substring_regex.py   | 2 +-
 src/awkward/operations/str/ak_ends_with.py               | 2 +-
 src/awkward/operations/str/ak_extract_regex.py           | 2 +-
 src/awkward/operations/str/ak_find_substring.py          | 2 +-
 src/awkward/operations/str/ak_find_substring_regex.py    | 2 +-
 src/awkward/operations/str/ak_index_in.py                | 2 +-
 src/awkward/operations/str/ak_is_alnum.py                | 2 +-
 src/awkward/operations/str/ak_is_alpha.py                | 2 +-
 src/awkward/operations/str/ak_is_ascii.py                | 2 +-
 src/awkward/operations/str/ak_is_decimal.py              | 2 +-
 src/awkward/operations/str/ak_is_digit.py                | 2 +-
 src/awkward/operations/str/ak_is_in.py                   | 2 +-
 src/awkward/operations/str/ak_is_lower.py                | 2 +-
 src/awkward/operations/str/ak_is_numeric.py              | 2 +-
 src/awkward/operations/str/ak_is_printable.py            | 2 +-
 src/awkward/operations/str/ak_is_space.py                | 2 +-
 src/awkward/operations/str/ak_is_title.py                | 2 +-
 src/awkward/operations/str/ak_is_upper.py                | 2 +-
 src/awkward/operations/str/ak_join.py                    | 2 +-
 src/awkward/operations/str/ak_join_element_wise.py       | 2 +-
 src/awkward/operations/str/ak_length.py                  | 2 +-
 src/awkward/operations/str/ak_lower.py                   | 2 +-
 src/awkward/operations/str/ak_lpad.py                    | 2 +-
 src/awkward/operations/str/ak_ltrim.py                   | 2 +-
 src/awkward/operations/str/ak_ltrim_whitespace.py        | 2 +-
 src/awkward/operations/str/ak_match_like.py              | 2 +-
 src/awkward/operations/str/ak_match_substring.py         | 2 +-
 src/awkward/operations/str/ak_match_substring_regex.py   | 2 +-
 src/awkward/operations/str/ak_repeat.py                  | 2 +-
 src/awkward/operations/str/ak_replace_slice.py           | 2 +-
 src/awkward/operations/str/ak_replace_substring.py       | 2 +-
 src/awkward/operations/str/ak_replace_substring_regex.py | 2 +-
 src/awkward/operations/str/ak_reverse.py                 | 2 +-
 src/awkward/operations/str/ak_rpad.py                    | 2 +-
 src/awkward/operations/str/ak_rtrim.py                   | 2 +-
 src/awkward/operations/str/ak_rtrim_whitespace.py        | 2 +-
 src/awkward/operations/str/ak_slice.py                   | 2 +-
 src/awkward/operations/str/ak_split_pattern.py           | 2 +-
 src/awkward/operations/str/ak_split_pattern_regex.py     | 2 +-
 src/awkward/operations/str/ak_split_whitespace.py        | 2 +-
 src/awkward/operations/str/ak_starts_with.py             | 2 +-
 src/awkward/operations/str/ak_swapcase.py                | 2 +-
 src/awkward/operations/str/ak_title.py                   | 2 +-
 src/awkward/operations/str/ak_trim.py                    | 2 +-
 src/awkward/operations/str/ak_trim_whitespace.py         | 2 +-
 src/awkward/operations/str/ak_upper.py                   | 2 +-
 49 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
index 65fd9164db..d555d23138 100644
--- a/src/awkward/operations/str/ak_capitalize.py
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def capitalize(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 5999372824..3a21d1520f 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def center(array, width, padding=" ", *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
index 218e2bc5ce..bae8942694 100644
--- a/src/awkward/operations/str/ak_count_substring.py
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def count_substring(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
index 46b36cfb73..63349c9d75 100644
--- a/src/awkward/operations/str/ak_count_substring_regex.py
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def count_substring_regex(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/ak_ends_with.py
index 189ef7b25a..898acf5e9b 100644
--- a/src/awkward/operations/str/ak_ends_with.py
+++ b/src/awkward/operations/str/ak_ends_with.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
index ee0c899471..b4e5f522e6 100644
--- a/src/awkward/operations/str/ak_extract_regex.py
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def extract_regex(array, pattern, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
index 7955ffbc14..875e3c6f25 100644
--- a/src/awkward/operations/str/ak_find_substring.py
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
index 58edb06794..952c51c41b 100644
--- a/src/awkward/operations/str/ak_find_substring_regex.py
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def find_substring_regex(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/ak_index_in.py
index 2e0b681d7b..02cd16f997 100644
--- a/src/awkward/operations/str/ak_index_in.py
+++ b/src/awkward/operations/str/ak_index_in.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index 9866039f3d..23bbb4e2be 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_alnum(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 76a6b5721c..31fcb06275 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_alpha(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/ak_is_ascii.py
index 77747b3639..d7ddb3c103 100644
--- a/src/awkward/operations/str/ak_is_ascii.py
+++ b/src/awkward/operations/str/ak_is_ascii.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_ascii(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index fdf1a13942..524ea18c7b 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_decimal(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 3e66c21980..1fc5fafe59 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_digit(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/ak_is_in.py
index 3240ed5b39..528f8f8558 100644
--- a/src/awkward/operations/str/ak_is_in.py
+++ b/src/awkward/operations/str/ak_is_in.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index c36ab6056a..39afa0e21e 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_lower(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index 6996782f3f..438e6fb01a 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_numeric(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index fcfdde24d6..5fdd3b50d4 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_printable(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index 2b264664a0..a3acce9a91 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_space(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
index 4e65d60037..d8f050725f 100644
--- a/src/awkward/operations/str/ak_is_title.py
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_title(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index cf460e6aa3..f814d77d35 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def is_upper(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/ak_join.py
index 40289bc4a6..b04f6a1dd9 100644
--- a/src/awkward/operations/str/ak_join.py
+++ b/src/awkward/operations/str/ak_join.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def join(array, separator, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_join_element_wise.py b/src/awkward/operations/str/ak_join_element_wise.py
index ad3639adb6..80efa60b18 100644
--- a/src/awkward/operations/str/ak_join_element_wise.py
+++ b/src/awkward/operations/str/ak_join_element_wise.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def join_element_wise(*arrays, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/ak_length.py
index 800afd4287..fba1e2f4a6 100644
--- a/src/awkward/operations/str/ak_length.py
+++ b/src/awkward/operations/str/ak_length.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def length(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/ak_lower.py
index 51c391311b..61453bb0a4 100644
--- a/src/awkward/operations/str/ak_lower.py
+++ b/src/awkward/operations/str/ak_lower.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def lower(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index ed9b5f98ac..11575a34ea 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 062c49ba95..4164700111 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def ltrim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
index 350cd52f05..d095f93247 100644
--- a/src/awkward/operations/str/ak_ltrim_whitespace.py
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def ltrim_whitespace(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_match_like.py b/src/awkward/operations/str/ak_match_like.py
index 5515a829bd..7b688f69a2 100644
--- a/src/awkward/operations/str/ak_match_like.py
+++ b/src/awkward/operations/str/ak_match_like.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/ak_match_substring.py
index d1bf7626c6..f81f1a7fcd 100644
--- a/src/awkward/operations/str/ak_match_substring.py
+++ b/src/awkward/operations/str/ak_match_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def match_substring(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/ak_match_substring_regex.py
index 5a48f9ba62..4a8d4b515f 100644
--- a/src/awkward/operations/str/ak_match_substring_regex.py
+++ b/src/awkward/operations/str/ak_match_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def match_substring_regex(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index 75324de63a..7110721729 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -13,7 +13,7 @@
 np = NumpyMetadata.instance()
 
 
-@high_level_function
+@high_level_function()
 def repeat(array, num_repeats, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index 44161cb6c2..c1c478c435 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 4408beb6fd..328c8a36ac 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def replace_substring(
     array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index 2380ba3e29..68ef66ad40 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def replace_substring_regex(
     array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/ak_reverse.py
index 2a573f0ccc..a360970404 100644
--- a/src/awkward/operations/str/ak_reverse.py
+++ b/src/awkward/operations/str/ak_reverse.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def reverse(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index 34748043b8..69499caa75 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index f83c651631..4aca4c0e8c 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def rtrim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
index cba8760bfe..e61037574d 100644
--- a/src/awkward/operations/str/ak_rtrim_whitespace.py
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def rtrim_whitespace(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/ak_slice.py
index 7afaab7d93..dc7e9df98c 100644
--- a/src/awkward/operations/str/ak_slice.py
+++ b/src/awkward/operations/str/ak_slice.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index c7a84b1bf9..e967106c4f 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def split_pattern(
     array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index c870a922b3..e74e8c05b1 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def split_pattern_regex(
     array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index 31bab91854..aa4ffeaf78 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def split_whitespace(
     array, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/ak_starts_with.py
index 3a0799334b..69e9192a65 100644
--- a/src/awkward/operations/str/ak_starts_with.py
+++ b/src/awkward/operations/str/ak_starts_with.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
index 6c99413dc5..208c384c51 100644
--- a/src/awkward/operations/str/ak_swapcase.py
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def swapcase(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/ak_title.py
index e8bbd1af92..87e8feaca3 100644
--- a/src/awkward/operations/str/ak_title.py
+++ b/src/awkward/operations/str/ak_title.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def title(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index 626e762702..4d05fa8c98 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def trim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
index 0453b8a03a..edb8e22878 100644
--- a/src/awkward/operations/str/ak_trim_whitespace.py
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def trim_whitespace(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/ak_upper.py
index dd25dc5de8..8132071295 100644
--- a/src/awkward/operations/str/ak_upper.py
+++ b/src/awkward/operations/str/ak_upper.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function
+@high_level_function()
 def upper(array, *, highlevel=True, behavior=None):
     """
     Args:

From 307a3ea3dc5117af545bb81c4e238c002113cdac Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 09:47:07 +0100
Subject: [PATCH 66/73] fix: pass `module` to str `high_level_function`

---
 src/awkward/operations/str/ak_capitalize.py              | 2 +-
 src/awkward/operations/str/ak_center.py                  | 2 +-
 src/awkward/operations/str/ak_count_substring.py         | 2 +-
 src/awkward/operations/str/ak_count_substring_regex.py   | 2 +-
 src/awkward/operations/str/ak_ends_with.py               | 2 +-
 src/awkward/operations/str/ak_extract_regex.py           | 2 +-
 src/awkward/operations/str/ak_find_substring.py          | 2 +-
 src/awkward/operations/str/ak_find_substring_regex.py    | 2 +-
 src/awkward/operations/str/ak_index_in.py                | 2 +-
 src/awkward/operations/str/ak_is_alnum.py                | 2 +-
 src/awkward/operations/str/ak_is_alpha.py                | 2 +-
 src/awkward/operations/str/ak_is_ascii.py                | 2 +-
 src/awkward/operations/str/ak_is_decimal.py              | 2 +-
 src/awkward/operations/str/ak_is_digit.py                | 2 +-
 src/awkward/operations/str/ak_is_in.py                   | 2 +-
 src/awkward/operations/str/ak_is_lower.py                | 2 +-
 src/awkward/operations/str/ak_is_numeric.py              | 2 +-
 src/awkward/operations/str/ak_is_printable.py            | 2 +-
 src/awkward/operations/str/ak_is_space.py                | 2 +-
 src/awkward/operations/str/ak_is_title.py                | 2 +-
 src/awkward/operations/str/ak_is_upper.py                | 2 +-
 src/awkward/operations/str/ak_join.py                    | 2 +-
 src/awkward/operations/str/ak_join_element_wise.py       | 2 +-
 src/awkward/operations/str/ak_length.py                  | 2 +-
 src/awkward/operations/str/ak_lower.py                   | 2 +-
 src/awkward/operations/str/ak_lpad.py                    | 2 +-
 src/awkward/operations/str/ak_ltrim.py                   | 2 +-
 src/awkward/operations/str/ak_ltrim_whitespace.py        | 2 +-
 src/awkward/operations/str/ak_match_like.py              | 2 +-
 src/awkward/operations/str/ak_match_substring.py         | 2 +-
 src/awkward/operations/str/ak_match_substring_regex.py   | 2 +-
 src/awkward/operations/str/ak_repeat.py                  | 2 +-
 src/awkward/operations/str/ak_replace_slice.py           | 2 +-
 src/awkward/operations/str/ak_replace_substring.py       | 2 +-
 src/awkward/operations/str/ak_replace_substring_regex.py | 2 +-
 src/awkward/operations/str/ak_reverse.py                 | 2 +-
 src/awkward/operations/str/ak_rpad.py                    | 2 +-
 src/awkward/operations/str/ak_rtrim.py                   | 2 +-
 src/awkward/operations/str/ak_rtrim_whitespace.py        | 2 +-
 src/awkward/operations/str/ak_slice.py                   | 2 +-
 src/awkward/operations/str/ak_split_pattern.py           | 2 +-
 src/awkward/operations/str/ak_split_pattern_regex.py     | 2 +-
 src/awkward/operations/str/ak_split_whitespace.py        | 2 +-
 src/awkward/operations/str/ak_starts_with.py             | 2 +-
 src/awkward/operations/str/ak_swapcase.py                | 2 +-
 src/awkward/operations/str/ak_title.py                   | 2 +-
 src/awkward/operations/str/ak_trim.py                    | 2 +-
 src/awkward/operations/str/ak_trim_whitespace.py         | 2 +-
 src/awkward/operations/str/ak_upper.py                   | 2 +-
 49 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
index d555d23138..6592da247a 100644
--- a/src/awkward/operations/str/ak_capitalize.py
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def capitalize(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 3a21d1520f..8ccc51c3fb 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def center(array, width, padding=" ", *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
index bae8942694..6ac2608f0d 100644
--- a/src/awkward/operations/str/ak_count_substring.py
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def count_substring(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
index 63349c9d75..f84d898eff 100644
--- a/src/awkward/operations/str/ak_count_substring_regex.py
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def count_substring_regex(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/ak_ends_with.py
index 898acf5e9b..76cd93949f 100644
--- a/src/awkward/operations/str/ak_ends_with.py
+++ b/src/awkward/operations/str/ak_ends_with.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
index b4e5f522e6..29f2a05dce 100644
--- a/src/awkward/operations/str/ak_extract_regex.py
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def extract_regex(array, pattern, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
index 875e3c6f25..626e027fb2 100644
--- a/src/awkward/operations/str/ak_find_substring.py
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
index 952c51c41b..68b206d5ba 100644
--- a/src/awkward/operations/str/ak_find_substring_regex.py
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def find_substring_regex(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/ak_index_in.py
index 02cd16f997..559856feb8 100644
--- a/src/awkward/operations/str/ak_index_in.py
+++ b/src/awkward/operations/str/ak_index_in.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index 23bbb4e2be..d3a9b8e0a1 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_alnum(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 31fcb06275..987538ca95 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_alpha(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/ak_is_ascii.py
index d7ddb3c103..4fcdd9d518 100644
--- a/src/awkward/operations/str/ak_is_ascii.py
+++ b/src/awkward/operations/str/ak_is_ascii.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_ascii(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index 524ea18c7b..8d5b607791 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_decimal(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 1fc5fafe59..2c8ba67827 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_digit(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/ak_is_in.py
index 528f8f8558..2df2dfe74d 100644
--- a/src/awkward/operations/str/ak_is_in.py
+++ b/src/awkward/operations/str/ak_is_in.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index 39afa0e21e..f9cbb78fb7 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_lower(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index 438e6fb01a..3f1817c169 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_numeric(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index 5fdd3b50d4..574439723d 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_printable(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index a3acce9a91..884521eb45 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_space(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
index d8f050725f..38b105224e 100644
--- a/src/awkward/operations/str/ak_is_title.py
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_title(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index f814d77d35..a8a301a65f 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def is_upper(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/ak_join.py
index b04f6a1dd9..8504e8ccb3 100644
--- a/src/awkward/operations/str/ak_join.py
+++ b/src/awkward/operations/str/ak_join.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def join(array, separator, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_join_element_wise.py b/src/awkward/operations/str/ak_join_element_wise.py
index 80efa60b18..f78a354b4f 100644
--- a/src/awkward/operations/str/ak_join_element_wise.py
+++ b/src/awkward/operations/str/ak_join_element_wise.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def join_element_wise(*arrays, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/ak_length.py
index fba1e2f4a6..3471d61c12 100644
--- a/src/awkward/operations/str/ak_length.py
+++ b/src/awkward/operations/str/ak_length.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def length(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/ak_lower.py
index 61453bb0a4..098aa24423 100644
--- a/src/awkward/operations/str/ak_lower.py
+++ b/src/awkward/operations/str/ak_lower.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def lower(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index 11575a34ea..2398463eab 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 4164700111..f5f4dca355 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def ltrim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
index d095f93247..73e4624ced 100644
--- a/src/awkward/operations/str/ak_ltrim_whitespace.py
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def ltrim_whitespace(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_match_like.py b/src/awkward/operations/str/ak_match_like.py
index 7b688f69a2..95db65ba7e 100644
--- a/src/awkward/operations/str/ak_match_like.py
+++ b/src/awkward/operations/str/ak_match_like.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/ak_match_substring.py
index f81f1a7fcd..3bf474c050 100644
--- a/src/awkward/operations/str/ak_match_substring.py
+++ b/src/awkward/operations/str/ak_match_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def match_substring(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/ak_match_substring_regex.py
index 4a8d4b515f..3a0b65d11f 100644
--- a/src/awkward/operations/str/ak_match_substring_regex.py
+++ b/src/awkward/operations/str/ak_match_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def match_substring_regex(
     array, pattern, *, ignore_case=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index 7110721729..c2e4704dad 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -13,7 +13,7 @@
 np = NumpyMetadata.instance()
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def repeat(array, num_repeats, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index c1c478c435..573359e140 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 328c8a36ac..595f606787 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def replace_substring(
     array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index 68ef66ad40..e20f1e662c 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def replace_substring_regex(
     array, pattern, replacement, *, max_replacements=None, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/ak_reverse.py
index a360970404..bcc249e7e4 100644
--- a/src/awkward/operations/str/ak_reverse.py
+++ b/src/awkward/operations/str/ak_reverse.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def reverse(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index 69499caa75..e46e43a5d8 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index 4aca4c0e8c..88f562a1a5 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def rtrim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
index e61037574d..e438a98363 100644
--- a/src/awkward/operations/str/ak_rtrim_whitespace.py
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def rtrim_whitespace(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/ak_slice.py
index dc7e9df98c..06b67c59e7 100644
--- a/src/awkward/operations/str/ak_slice.py
+++ b/src/awkward/operations/str/ak_slice.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index e967106c4f..d2ef682562 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def split_pattern(
     array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index e74e8c05b1..373a1c0db6 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def split_pattern_regex(
     array, pattern, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index aa4ffeaf78..f534de5bd5 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def split_whitespace(
     array, *, max_splits=None, reverse=False, highlevel=True, behavior=None
 ):
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/ak_starts_with.py
index 69e9192a65..d055c93355 100644
--- a/src/awkward/operations/str/ak_starts_with.py
+++ b/src/awkward/operations/str/ak_starts_with.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
index 208c384c51..e5eb0ee52e 100644
--- a/src/awkward/operations/str/ak_swapcase.py
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def swapcase(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/ak_title.py
index 87e8feaca3..aac266547b 100644
--- a/src/awkward/operations/str/ak_title.py
+++ b/src/awkward/operations/str/ak_title.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def title(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index 4d05fa8c98..1796fbaeb5 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def trim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
index edb8e22878..6568249969 100644
--- a/src/awkward/operations/str/ak_trim_whitespace.py
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -9,7 +9,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def trim_whitespace(array, *, highlevel=True, behavior=None):
     """
     Args:
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/ak_upper.py
index 8132071295..2391b439af 100644
--- a/src/awkward/operations/str/ak_upper.py
+++ b/src/awkward/operations/str/ak_upper.py
@@ -8,7 +8,7 @@
 from awkward._layout import wrap_layout
 
 
-@high_level_function()
+@high_level_function(module="ak.str")
 def upper(array, *, highlevel=True, behavior=None):
     """
     Args:

From 51a5c5c9f18efef11640b6046fd71a477ed4cf1f Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 10:25:20 +0100
Subject: [PATCH 67/73] docs: homogenize docstrings

---
 src/awkward/operations/str/ak_capitalize.py   | 11 +++++----
 src/awkward/operations/str/ak_center.py       | 17 +++++++++-----
 .../operations/str/ak_count_substring.py      | 13 +++++++----
 .../str/ak_count_substring_regex.py           | 13 +++++++----
 src/awkward/operations/str/ak_ends_with.py    | 13 +++++++----
 .../operations/str/ak_extract_regex.py        | 13 +++++------
 .../operations/str/ak_find_substring.py       | 14 +++++++----
 .../operations/str/ak_find_substring_regex.py | 16 ++++++++-----
 src/awkward/operations/str/ak_index_in.py     | 14 +++++++----
 src/awkward/operations/str/ak_is_alnum.py     | 10 ++++----
 src/awkward/operations/str/ak_is_alpha.py     | 10 ++++----
 src/awkward/operations/str/ak_is_ascii.py     | 10 ++++----
 src/awkward/operations/str/ak_is_decimal.py   | 10 ++++----
 src/awkward/operations/str/ak_is_digit.py     | 10 ++++----
 src/awkward/operations/str/ak_is_in.py        | 12 ++++++----
 src/awkward/operations/str/ak_is_lower.py     | 10 ++++----
 src/awkward/operations/str/ak_is_numeric.py   | 10 ++++----
 src/awkward/operations/str/ak_is_printable.py | 10 ++++----
 src/awkward/operations/str/ak_is_space.py     | 10 ++++----
 src/awkward/operations/str/ak_is_title.py     | 18 ++++++++++-----
 src/awkward/operations/str/ak_is_upper.py     | 10 ++++----
 src/awkward/operations/str/ak_join.py         | 13 +++++++----
 .../operations/str/ak_join_element_wise.py    |  6 +++--
 src/awkward/operations/str/ak_length.py       |  7 +++---
 src/awkward/operations/str/ak_lower.py        |  7 +++---
 src/awkward/operations/str/ak_lpad.py         | 17 +++++++++-----
 src/awkward/operations/str/ak_ltrim.py        | 16 ++++++++-----
 .../operations/str/ak_ltrim_whitespace.py     |  4 ++--
 src/awkward/operations/str/ak_match_like.py   | 20 +++++++++++-----
 .../operations/str/ak_match_substring.py      | 11 +++++----
 .../str/ak_match_substring_regex.py           | 11 +++++----
 src/awkward/operations/str/ak_repeat.py       |  8 ++++---
 .../operations/str/ak_replace_slice.py        | 14 +++++++----
 .../operations/str/ak_replace_substring.py    | 10 ++++----
 .../str/ak_replace_substring_regex.py         | 10 ++++----
 src/awkward/operations/str/ak_reverse.py      |  9 +++++---
 src/awkward/operations/str/ak_rpad.py         | 17 +++++++++-----
 src/awkward/operations/str/ak_rtrim.py        | 13 +++++++----
 .../operations/str/ak_rtrim_whitespace.py     |  4 ++--
 src/awkward/operations/str/ak_slice.py        | 14 +++++++----
 .../operations/str/ak_split_pattern.py        | 14 +++++++----
 .../operations/str/ak_split_pattern_regex.py  | 17 +++++++++-----
 .../operations/str/ak_split_whitespace.py     | 23 +++++++++++--------
 src/awkward/operations/str/ak_starts_with.py  | 13 +++++++----
 src/awkward/operations/str/ak_swapcase.py     | 10 ++++----
 src/awkward/operations/str/ak_title.py        | 12 ++++++----
 src/awkward/operations/str/ak_trim.py         | 16 ++++++++-----
 .../operations/str/ak_trim_whitespace.py      |  7 +++---
 src/awkward/operations/str/ak_upper.py        | 10 ++++----
 49 files changed, 369 insertions(+), 218 deletions(-)

diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/ak_capitalize.py
index 6592da247a..1c33e480f8 100644
--- a/src/awkward/operations/str/ak_capitalize.py
+++ b/src/awkward/operations/str/ak_capitalize.py
@@ -18,12 +18,15 @@ def capitalize(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with a capitalized version (correctly transforming Unicode characters), with the first character uppercased and the others lowercased.
+    Replaces any string-valued data with a capitalized version
+    (correctly transforming Unicode characters), with the first character
+    uppercased and the others lowercased.
 
-    Replaces any bytestring-valued data with a capitalized version (transforming ASCII characters only).
+    Replaces any bytestring-valued data with a capitalized version
+    (transforming ASCII characters only).
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_capitalize](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_capitalize.html)
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/ak_center.py
index 8ccc51c3fb..d7d1801136 100644
--- a/src/awkward/operations/str/ak_center.py
+++ b/src/awkward/operations/str/ak_center.py
@@ -15,20 +15,25 @@ def center(array, width, padding=" ", *, highlevel=True, behavior=None):
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
         width (int): Desired string length.
-        padding (str or bytes): What to pad the string with. Should be one codepoint or byte.
+        padding (str or bytes): What to pad the string with. Should be one
+            codepoint or byte.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string or bytestring-valued data with centered strings/bytestrings of a given `width`, padding both sides with the given `padding` codepoint or byte.
+    Replaces any string or bytestring-valued data with centered
+    strings/bytestrings of a given `width`, padding both sides with the given
+    `padding` codepoint or byte.
 
-    If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
+    If the data are strings, `width` is measured in codepoints and `padding`
+    must be one codepoint.
 
-    If the data are bytestrings, `width` is measured in bytes and `padding` must be one byte.
+    If the data are bytestrings, `width` is measured in bytes and `padding`
+    must be one byte.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_center](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_center.html)
diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
index 6ac2608f0d..36cd8febca 100644
--- a/src/awkward/operations/str/ak_count_substring.py
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -16,16 +16,21 @@ def count_substring(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Substring pattern to count for each string in
+            `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, count the number of occurrences of the given literal pattern.
+    Counts the number of occurrences of the given literal `pattern` in every
+    string in `array`. Depending upon the value of `ignore_case`, the matching
+    function will be case-insensitive.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
index f84d898eff..113d8acb9b 100644
--- a/src/awkward/operations/str/ak_count_substring_regex.py
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -16,16 +16,21 @@ def count_substring_regex(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Regular expression that matches substrings to
+            count for each string in `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, count the number of occurrences of the given regular expression pattern.
+    Counts the number of occurrences of the given regular expression `pattern`
+    in every string in `array`. Depending upon the value of `ignore_case`, the
+    matching function will be case-insensitive.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.count_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring_regex.html).
diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/ak_ends_with.py
index 76cd93949f..ed68476a1f 100644
--- a/src/awkward/operations/str/ak_ends_with.py
+++ b/src/awkward/operations/str/ak_ends_with.py
@@ -14,16 +14,21 @@ def ends_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=Non
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Substring pattern to test against the ending
+            of each string in `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine whether it ends with the given literal suffix.
+    Returns True for every string in `array` if it ends with the given literal
+    suffix `pattern`. Depending upon the value of `ignore_case`, the matching
+    function will be case-insensitive.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.ends_with](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ends_with.html).
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/ak_extract_regex.py
index 29f2a05dce..2592ba1268 100644
--- a/src/awkward/operations/str/ak_extract_regex.py
+++ b/src/awkward/operations/str/ak_extract_regex.py
@@ -20,7 +20,9 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with None if the `pattern` does not match or records whose fields are named capture groups and the substrings they've captured if `pattern` does match.
+    Returns None for every string in `array` if it does not match `pattern`;
+    otherwise, a record whose fields are named capture groups and whose
+    contents are the substrings they've captured.
 
     Uses [Google RE2](https://github.com/google/re2/wiki/Syntax), and `pattern` must
     contain named groups. The syntax for a named group is `(?P<...>...)` in which
@@ -44,14 +46,11 @@ def extract_regex(array, pattern, *, highlevel=True, behavior=None):
 
     Regular expressions with unnamed groups or features not implemented by RE2 raise an error.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.extract_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.extract_regex.html)
-    or
-    [pyarrow.compute.extract_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.extract_regex.html)
-    on strings and bytestrings, respectively.
+    [pyarrow.compute.extract_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.extract_regex.html).
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
index 626e027fb2..28d5d86aec 100644
--- a/src/awkward/operations/str/ak_find_substring.py
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -14,17 +14,21 @@ def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavio
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Substring pattern to find inside each string
+            in `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine the index at which the first occurrence of the given literal pattern is
-    found. If the literal pattern is not found inside the string, the index is taken to be -1.
+    Returns the index of the first occurrence of the given literal `pattern`
+    for each string in `array`. If the literal pattern is not found inside the
+    string, the index is taken to be -1.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
index 68b206d5ba..90ab671ce5 100644
--- a/src/awkward/operations/str/ak_find_substring_regex.py
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -16,20 +16,24 @@ def find_substring_regex(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Regular expression that matches substrings to
+            find inside each string in `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine the index at which the first occurrence of the given regular expression
-    pattern. is found. If the regular expression pattern is not found inside the string, the index is taken to be -1.
+    Returns the index of the first occurrence of the given regular expression
+    `pattern` for each string in `array`. If the literal pattern is not found
+    inside the string, the index is taken to be -1.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
-    [pyarrow.compute.find_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring_regex.html).
+    [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/ak_index_in.py
index 559856feb8..84f1a2af1d 100644
--- a/src/awkward/operations/str/ak_index_in.py
+++ b/src/awkward/operations/str/ak_index_in.py
@@ -14,17 +14,21 @@ def index_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=Non
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        value_set: Array-like data (anything #ak.to_layout recognizes), set of values to search for.
-        skip_nones (bool): If True, None values in `array` are not matched against `value_set`; otherwise, they are.
+        value_set: Array-like data (anything #ak.to_layout recognizes), set of
+            values to search for in `array`.
+        skip_nones (bool): If True, None values in `array` are not matched
+            against `value_set`; otherwise, None is considered a legal value.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine where it is found within the given set of values. If the string is
-    not found within the value set, the index is set to None.
+    Returns the index of the first pattern in `value_set` that each string in
+    `array` matches. If the string is not found within `value_set`, then the
+    index is set to None.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.index_in](https://arrow.apache.org/docs/python/generated/pyarrow.compute.index_in.html).
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/ak_is_alnum.py
index d3a9b8e0a1..d18d42f2a9 100644
--- a/src/awkward/operations/str/ak_is_alnum.py
+++ b/src/awkward/operations/str/ak_is_alnum.py
@@ -18,12 +18,14 @@ def is_alnum(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of alphanumeric Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of alphanumeric Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of alphanumeric ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of alphanumeric ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_alnum](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alnum.html)
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/ak_is_alpha.py
index 987538ca95..892336f661 100644
--- a/src/awkward/operations/str/ak_is_alpha.py
+++ b/src/awkward/operations/str/ak_is_alpha.py
@@ -18,12 +18,14 @@ def is_alpha(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of alphabetic Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of alphabetic Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of alphabetic ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of alphabetic ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_alpha](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_alpha.html)
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/ak_is_ascii.py
index 4fcdd9d518..db7092c842 100644
--- a/src/awkward/operations/str/ak_is_ascii.py
+++ b/src/awkward/operations/str/ak_is_ascii.py
@@ -18,12 +18,14 @@ def is_ascii(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True iff the string consists only of ASCII characters, False otherwise.
+    Replaces any string-valued data with True iff the string consists only of
+    ASCII characters, False otherwise.
 
-    Replaces any bytestring-valued data with True iff the string consists only of ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True iff the string consists only
+    of ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.string_is_ascii](https://arrow.apache.org/docs/python/generated/pyarrow.compute.string_is_ascii.html)
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/ak_is_decimal.py
index 8d5b607791..7599f150d2 100644
--- a/src/awkward/operations/str/ak_is_decimal.py
+++ b/src/awkward/operations/str/ak_is_decimal.py
@@ -18,12 +18,14 @@ def is_decimal(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of decimal Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of decimal Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of decimal ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of decimal ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_decimal](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_decimal.html)
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/ak_is_digit.py
index 2c8ba67827..2838d5e39a 100644
--- a/src/awkward/operations/str/ak_is_digit.py
+++ b/src/awkward/operations/str/ak_is_digit.py
@@ -18,12 +18,14 @@ def is_digit(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of Unicode digits, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of Unicode digits, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of Unicode digits, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of Unicode digits, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_digit](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_digit.html)
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/ak_is_in.py
index 2df2dfe74d..3ac79b44ec 100644
--- a/src/awkward/operations/str/ak_is_in.py
+++ b/src/awkward/operations/str/ak_is_in.py
@@ -14,16 +14,20 @@ def is_in(array, value_set, *, skip_nones=False, highlevel=True, behavior=None):
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        value_set: Array-like data (anything #ak.to_layout recognizes), set of values to search for.
-        skip_nones (bool): If True, None values in `array` are not matched against `value_set`; otherwise, they are.
+        value_set: Array-like data (anything #ak.to_layout recognizes), set of
+            values to search for in `array`.
+        skip_nones (bool): If True, None values in `array` are not matched
+            against `value_set`; otherwise, None is considered a legal value.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine whether it is found within the given set of values.
+    Returns True for each string in `array` if it matches any pattern in
+    `value_set`; otherwise, returns False.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.is_in](https://arrow.apache.org/docs/python/generated/pyarrow.compute.is_in.html).
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/ak_is_lower.py
index f9cbb78fb7..5b502ec5e6 100644
--- a/src/awkward/operations/str/ak_is_lower.py
+++ b/src/awkward/operations/str/ak_is_lower.py
@@ -18,12 +18,14 @@ def is_lower(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of lowercase Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of lowercase Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of lowercase ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of lowercase ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_lower](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_lower.html)
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/ak_is_numeric.py
index 3f1817c169..e7e5a0db49 100644
--- a/src/awkward/operations/str/ak_is_numeric.py
+++ b/src/awkward/operations/str/ak_is_numeric.py
@@ -18,12 +18,14 @@ def is_numeric(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of numeric Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of numeric Unicode characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of numeric Unicode characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_numeric](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_numeric.html)
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/ak_is_printable.py
index 574439723d..3b825acf6a 100644
--- a/src/awkward/operations/str/ak_is_printable.py
+++ b/src/awkward/operations/str/ak_is_printable.py
@@ -18,12 +18,14 @@ def is_printable(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of printable Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of printable Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of printable ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of printable ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_printable](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_printable.html)
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/ak_is_space.py
index 884521eb45..624691cdf5 100644
--- a/src/awkward/operations/str/ak_is_space.py
+++ b/src/awkward/operations/str/ak_is_space.py
@@ -18,12 +18,14 @@ def is_space(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of whitespace Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of whitespace Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of whitespace ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of whitespace ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_space](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_space.html)
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/ak_is_title.py
index 38b105224e..05a13377dc 100644
--- a/src/awkward/operations/str/ak_is_title.py
+++ b/src/awkward/operations/str/ak_is_title.py
@@ -18,12 +18,18 @@ def is_title(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is title-cased, i.e. it has at least one cased character, each uppercase character follows an uncased character, and each lowercase character follows an uppercase character, otherwise False.
-
-    Replaces any bytestring-valued data with True if the string is title-cased, i.e. it has at least one cased character, each uppercase character follows an uncased character, and each lowercase character follows an uppercase character, otherwise False.
-
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Replaces any string-valued data with True if the string is title-cased,
+    i.e. it has at least one cased character, each uppercase character follows
+    an uncased character, and each lowercase character follows an uppercase
+    character, otherwise False.
+
+    Replaces any bytestring-valued data with True if the string is
+    title-cased, i.e. it has at least one cased character, each uppercase
+    character follows an uncased character, and each lowercase character
+    follows an uppercase character, otherwise False.
+
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_title](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_title.html)
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/ak_is_upper.py
index a8a301a65f..b37aa1c843 100644
--- a/src/awkward/operations/str/ak_is_upper.py
+++ b/src/awkward/operations/str/ak_is_upper.py
@@ -18,12 +18,14 @@ def is_upper(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with True if the string is non-empty and consists only of uppercase Unicode characters, False otherwise.
+    Replaces any string-valued data with True if the string is non-empty and
+    consists only of uppercase Unicode characters, False otherwise.
 
-    Replaces any bytestring-valued data with True if the string is non-empty and consists only of uppercase ASCII characters, False otherwise.
+    Replaces any bytestring-valued data with True if the string is non-empty
+    and consists only of uppercase ASCII characters, False otherwise.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_is_upper](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_is_upper.html)
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/ak_join.py
index 8504e8ccb3..062067b3db 100644
--- a/src/awkward/operations/str/ak_join.py
+++ b/src/awkward/operations/str/ak_join.py
@@ -13,17 +13,20 @@ def join(array, separator, *, highlevel=True, behavior=None):
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        separator (str, bytes, or array of them to broadcast): separator to insert
-            between strings. If array-like, `separator` is broadcast against `array`
-            which permits a unique separator for each list of strings.
+        separator (str, bytes, or array of them to broadcast): separator to
+            insert between strings. If array-like, `separator` is broadcast
+            against `array`.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Concatenate the strings in `array`. The separator is inserted between each string.
+    Concatenate the strings in `array`. The `separator` is inserted between
+    each string. If array-like, `separator` is broadcast against `array` which
+    permits a unique separator for each list of strings in `array`.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.binary_join](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_join.html).
diff --git a/src/awkward/operations/str/ak_join_element_wise.py b/src/awkward/operations/str/ak_join_element_wise.py
index f78a354b4f..2026bb8ae0 100644
--- a/src/awkward/operations/str/ak_join_element_wise.py
+++ b/src/awkward/operations/str/ak_join_element_wise.py
@@ -18,9 +18,11 @@ def join_element_wise(*arrays, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Broadcasts and concatenates all but the last array of strings in `arrays`; the last is used as a separator.
+    Broadcasts and concatenates all but the last array of strings in `arrays`;
+    the last is used as a separator.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.binary_join_element_wise](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_join_element_wise.html).
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/ak_length.py
index 3471d61c12..700dbe534c 100644
--- a/src/awkward/operations/str/ak_length.py
+++ b/src/awkward/operations/str/ak_length.py
@@ -18,12 +18,13 @@ def length(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with its length in Unicode characters (not its length in bytes).
+    Replaces any string-valued data with its length in Unicode characters
+    (not its length in bytes).
 
     Replaces any bytestring-valued data with its length of bytes.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_length](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_length.html)
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/ak_lower.py
index 098aa24423..ade17e10ac 100644
--- a/src/awkward/operations/str/ak_lower.py
+++ b/src/awkward/operations/str/ak_lower.py
@@ -18,12 +18,13 @@ def lower(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with a lowercase version (correctly transforming Unicode characters).
+    Replaces any string-valued data with a lowercase version (correctly
+    transforming Unicode characters).
 
     Replaces any bytestring-valued data with a lowercase version (transforming ASCII characters only).
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_lower](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_lower.html)
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/ak_lpad.py
index 2398463eab..431557d086 100644
--- a/src/awkward/operations/str/ak_lpad.py
+++ b/src/awkward/operations/str/ak_lpad.py
@@ -15,20 +15,25 @@ def lpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
         width (int): Desired string length.
-        padding (str or bytes): What to pad the string with. Should be one codepoint or byte.
+        padding (str or bytes): What to pad the string with. Should be one
+            codepoint or byte.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string or bytestring-valued data with right-aligned strings/bytestrings of a given `width`, padding the left side with the given `padding` codepoint or byte.
+    Replaces any string or bytestring-valued data with right-aligned
+    strings/bytestrings of a given `width`, padding the left side with the
+    given `padding` codepoint or byte.
 
-    If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
+    If the data are strings, `width` is measured in codepoints and `padding`
+    must be one codepoint.
 
-    If the data are bytestrings, `width` is measured in bytes and `padding` must be one byte.
+    If the data are bytestrings, `width` is measured in bytes and `padding`
+    must be one byte.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_lpad](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_lpad.html)
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index f5f4dca355..5274ed5ec7 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -14,20 +14,24 @@ def ltrim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        characters (str or bytes): Individual characters to be trimmed from the string.
+        characters (str or bytes): Individual characters to be trimmed
+            from the string.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Removes any leading characters of `characters` from any string or bytestring-valued data.
+    Removes any leading characters of `characters` from any string or
+    bytestring-valued data.
 
-    If the data are strings, `characters` are interpreted as unordered, individual codepoints.
+    If the data are strings, `characters` are interpreted as unordered,
+    individual codepoints.
 
-    If the data are bytestrings, `characters` are interpreted as unordered, individual bytes.
+    If the data are bytestrings, `characters` are interpreted as unordered,
+    individual bytes.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_ltrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_ltrim.html)
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
index 73e4624ced..ca7f917e4e 100644
--- a/src/awkward/operations/str/ak_ltrim_whitespace.py
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -21,8 +21,8 @@ def ltrim_whitespace(array, *, highlevel=True, behavior=None):
 
     Removes any leading whitespace from any string or bytestring-valued data.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_ltrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_ltrim_whitespace.html)
diff --git a/src/awkward/operations/str/ak_match_like.py b/src/awkward/operations/str/ak_match_like.py
index 95db65ba7e..6cc83443bb 100644
--- a/src/awkward/operations/str/ak_match_like.py
+++ b/src/awkward/operations/str/ak_match_like.py
@@ -14,18 +14,26 @@ def match_like(array, pattern, *, ignore_case=False, highlevel=True, behavior=No
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): SQL-style LIKE pattern to match against
+            strings in `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine whether it matches the given SQL-style LIKE pattern.
-    '%' matches any number of characters, '_' matches exactly one character, and any other character matches itself.
-    To match a literal '%', '_', or "'", the character must be preceded with a backslash.
+    For each string in the array, determine whether it matches the given
+    SQL-style LIKE pattern, which obeys the following rules:
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    - '%' matches any number of characters.
+    - '_' matches exactly one character.
+    - Any other character matches itself.
+    - To match a literal '%', '_', or "'", the character must be preceded
+      with a backslash.
+
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.match_like](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_like.html).
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/ak_match_substring.py
index 3bf474c050..0b5ac3421e 100644
--- a/src/awkward/operations/str/ak_match_substring.py
+++ b/src/awkward/operations/str/ak_match_substring.py
@@ -16,16 +16,19 @@ def match_substring(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Substring pattern to look for inside `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine whether it contains the given literal pattern.
+    For each string in the array, determine whether it contains the given
+    literal `pattern`.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.match_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_substring.html).
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/ak_match_substring_regex.py
index 3a0b65d11f..f909936a42 100644
--- a/src/awkward/operations/str/ak_match_substring_regex.py
+++ b/src/awkward/operations/str/ak_match_substring_regex.py
@@ -16,16 +16,19 @@ def match_substring_regex(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Regular expression to search for inside `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine whether it contains the given regular expression pattern.
+    For each string in the array, determine whether any substring matches the
+    given regular expression `pattern`
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.match_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_substring_regex.html).
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/ak_repeat.py
index c2e4704dad..3d0edaa755 100644
--- a/src/awkward/operations/str/ak_repeat.py
+++ b/src/awkward/operations/str/ak_repeat.py
@@ -24,10 +24,12 @@ def repeat(array, num_repeats, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued or bytestring-valued data with the same value repeated `num_repeats` times, which can be a scalar integer or a (broadcasted) array of integers.
+    Replaces any string-valued or bytestring-valued data with the same value
+    repeated `num_repeats` times, which can be a scalar integer or a
+    (broadcasted) array of integers.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.binary_repeat](https://arrow.apache.org/docs/python/generated/pyarrow.compute.binary_repeat.html)
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/ak_replace_slice.py
index 573359e140..fc0668daf9 100644
--- a/src/awkward/operations/str/ak_replace_slice.py
+++ b/src/awkward/operations/str/ak_replace_slice.py
@@ -22,14 +22,18 @@ def replace_slice(array, start, stop, replacement, *, highlevel=True, behavior=N
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces slices of any string or bytestring-valued data with `replacement` between `start` and `stop` indexes; `start` is inclusive and `stop` is exclusive and both are 0-indexed.
+    Replaces slices of any string or bytestring-valued data with `replacement`
+    between `start` and `stop` indexes; `start` is inclusive and `stop` is
+    exclusive and both are 0-indexed.
 
-    For strings, `start` and `stop` are measured in Unicode characters; for bytestrings, `start` and `stop` are measured in bytes.
+    For strings, `start` and `stop` are measured in Unicode characters; for
+    bytestrings, `start` and `stop` are measured in bytes.
 
-    The `start`, `stop`, and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+    The `start`, `stop`, and `replacement` are scalars; they cannot be
+    different for each string/bytestring in the sample.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_replace_slice](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_replace_slice.html)
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 595f606787..9214c94199 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -26,12 +26,14 @@ def replace_substring(
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces non-overlapping subsequences of any string or bytestring-valued data that match a literal `pattern` with `replacement`.
+    Replaces non-overlapping subsequences of any string or bytestring-valued
+    data that match a literal `pattern` with `replacement`.
 
-    The `pattern` and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+    The `pattern` and `replacement` are scalars; they cannot be different for
+    each string/bytestring in the sample.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.replace_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring.html)
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index e20f1e662c..ae2d0ff043 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -26,12 +26,14 @@ def replace_substring_regex(
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces non-overlapping subsequences of any string or bytestring-valued data that match a regular expression `pattern` with `replacement`.
+    Replaces non-overlapping subsequences of any string or bytestring-valued
+    data that match a regular expression `pattern` with `replacement`.
 
-    The `pattern` and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+    The `pattern` and `replacement` are scalars; they cannot be different
+    for each string/bytestring in the sample.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.replace_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring_regex.html)
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/ak_reverse.py
index bcc249e7e4..bd5e6c79f2 100644
--- a/src/awkward/operations/str/ak_reverse.py
+++ b/src/awkward/operations/str/ak_reverse.py
@@ -18,12 +18,15 @@ def reverse(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Reverses the order of Unicode characters in any string-valued data. (This function operates on Unicode codepoints, not grapheme clusters. Hence, it will not correctly reverse grapheme clusters composed of multiple codepoints.)
+    Reverses the order of Unicode characters in any string-valued data.
+    (This function operates on Unicode codepoints, not grapheme clusters.
+    Hence, it will not correctly reverse grapheme clusters composed of
+    multiple codepoints.)
 
     Reverses the order of bytes in any bytestring-valued data.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_reverse](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_reverse.html)
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/ak_rpad.py
index e46e43a5d8..99fe323d60 100644
--- a/src/awkward/operations/str/ak_rpad.py
+++ b/src/awkward/operations/str/ak_rpad.py
@@ -15,20 +15,25 @@ def rpad(array, width, padding=" ", *, highlevel=True, behavior=None):
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
         width (int): Desired string length.
-        padding (str or bytes): What to pad the string with. Should be one codepoint or byte.
+        padding (str or bytes): What to pad the string with. Should be one
+            codepoint or byte.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string or bytestring-valued data with left-aligned strings/bytestrings of a given `width`, padding the right side with the given `padding` codepoint or byte.
+    Replaces any string or bytestring-valued data with left-aligned
+    strings/bytestrings of a given `width`, padding the right side with the
+    given `padding` codepoint or byte.
 
-    If the data are strings, `width` is measured in codepoints and `padding` must be one codepoint.
+    If the data are strings, `width` is measured in codepoints and `padding`
+    must be one codepoint.
 
-    If the data are bytestrings, `width` is measured in bytes and `padding` must be one byte.
+    If the data are bytestrings, `width` is measured in bytes and `padding`
+    must be one byte.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_rpad](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_rpad.html)
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index 88f562a1a5..db96fe2061 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -20,14 +20,17 @@ def rtrim(array, characters, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Removes any trailing characters of `characters` from any string or bytestring-valued data.
+    Removes any trailing characters of `characters` from any string or
+    bytestring-valued data.
 
-    If the data are strings, `characters` are interpreted as unordered, individual codepoints.
+    If the data are strings, `characters` are interpreted as unordered,
+    individual codepoints.
 
-    If the data are bytestrings, `characters` are interpreted as unordered, individual bytes.
+    If the data are bytestrings, `characters` are interpreted as unordered,
+    individual bytes.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_rtrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_rtrim.html)
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
index e438a98363..1216d3d5ce 100644
--- a/src/awkward/operations/str/ak_rtrim_whitespace.py
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -21,8 +21,8 @@ def rtrim_whitespace(array, *, highlevel=True, behavior=None):
 
     Removes any trailing whitespace from any string or bytestring-valued data.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_rtrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_rtrim_whitespace.html)
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/ak_slice.py
index 06b67c59e7..89385c1a03 100644
--- a/src/awkward/operations/str/ak_slice.py
+++ b/src/awkward/operations/str/ak_slice.py
@@ -23,14 +23,18 @@ def slice(array, start, stop=None, step=1, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string or bytestring-valued data with a slice between `start` and `stop` indexes; `start` is inclusive and `stop` is exclusive and both are 0-indexed.
+    Replaces any string or bytestring-valued data with a slice between `start`
+    and `stop` indexes; `start` is inclusive and `stop` is exclusive and both
+    are 0-indexed.
 
-    For strings, `start` and `stop` are measured in Unicode characters; for bytestrings, `start` and `stop` are measured in bytes.
+    For strings, `start` and `stop` are measured in Unicode characters; for
+    bytestrings, `start` and `stop` are measured in bytes.
 
-    The `start`, `stop`, and `replacement` are scalars; they cannot be different for each string/bytestring in the sample.
+    The `start`, `stop`, and `replacement` are scalars; they cannot be
+    different for each string/bytestring in the sample.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_slice_codeunits](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_slice_codeunits.html)
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index d2ef682562..604532edc0 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -17,17 +17,21 @@ def split_pattern(
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
         pattern (str or bytes): Pattern of characters/bytes to split on.
-        max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
-        reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
-            from the beginning of each value. This flag only has an effect if `max_splits` is not None.
+        max_splits (None or int): Maximum number of splits for each input
+            value. If None, unlimited.
+        reverse (bool): If True, start splitting from the end of each input
+            value; otherwise, start splitting from the beginning of each
+            value. This flag only has an effect if `max_splits` is not None.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Splits any string or bytestring-valued data into a list of substrings according to the given separator.
+    Splits any string or bytestring-valued data into a list of substrings
+    according to the given separator.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index 373a1c0db6..69a88e9b1e 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -16,18 +16,23 @@ def split_pattern_regex(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str or bytes): Regular expression of characters/bytes to split on.
-        max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
-        reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
-            from the beginning of each value. This flag only has an effect if `max_splits` is not None.
+        pattern (str or bytes): Regular expression of characters/bytes to
+            split on.
+        max_splits (None or int): Maximum number of splits for each input
+            value. If None, unlimited.
+        reverse (bool): If True, start splitting from the end of each input
+            value; otherwise, start splitting from the beginning of each
+            value. This flag only has an effect if `max_splits` is not None.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Splits any string or bytestring-valued data into a list of substrings according to the given regular expression.
+    Splits any string or bytestring-valued data into a list of substrings
+    according to the given regular expression.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index f534de5bd5..bf35a2c1b9 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -16,24 +16,29 @@ def split_whitespace(
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        max_splits (None or int): Maximum number of splits for each input value. If None, unlimited.
-        reverse (bool): If True, start splitting from the end of each input value; otherwise, start splitting
-            from the beginning of each value. This flag only has an effect if `max_splits` is not None.
+        max_splits (None or int): Maximum number of splits for each input
+            value. If None, unlimited.
+        reverse (bool): If True, start splitting from the end of each input
+            value; otherwise, start splitting from the beginning of each
+            value. This flag only has an effect if `max_splits` is not None.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Splits any string or bytestring-valued data into a list of substrings according to any non-zero length sequence of
+    Splits any string or bytestring-valued data into a list of substrings
+    according to any non-zero length sequence of
     whitespace characters.
 
-    For strings, a split is performed for every sequence of Unicode whitespace characters; for bytestrings, splitting
-    is performed for sequences of ascii whitespace characters.
+    For strings, a split is performed for every sequence of Unicode whitespace
+    characters; for bytestrings, splitting is performed for sequences of ascii
+    whitespace characters.
 
-    The `max_splits`, and `reverse` arguments are scalars; they cannot be different for each string/bytestring in the
-    sample.
+    The `max_splits`, and `reverse` arguments are scalars; they cannot be
+    different for each string/bytestring in the sample.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_split_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_split_whitespace.html)
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/ak_starts_with.py
index d055c93355..e035c53920 100644
--- a/src/awkward/operations/str/ak_starts_with.py
+++ b/src/awkward/operations/str/ak_starts_with.py
@@ -14,16 +14,21 @@ def starts_with(array, pattern, *, ignore_case=False, highlevel=True, behavior=N
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        pattern (str, or bytes): Substring pattern to look for inside the given array.
-        ignore_case (bool): If True, perform a case-insensitive match; otherwise, the match is case-sensitive.
+        pattern (str or bytes): Substring pattern to test against the start
+            of each string in `array`.
+        ignore_case (bool): If True, perform a case-insensitive match;
+            otherwise, the match is case-sensitive.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    For each string in the array, determine whether it starts with the given literal suffix.
+    Returns True for every string in `array` if it starts with the given literal
+    suffix `pattern`. Depending upon the value of `ignore_case`, the matching
+    function will be case-insensitive.
 
-    Note: this function does not raise an error if the `array` does not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.starts_with](https://arrow.apache.org/docs/python/generated/pyarrow.compute.starts_with.html).
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/ak_swapcase.py
index e5eb0ee52e..1629c65fdc 100644
--- a/src/awkward/operations/str/ak_swapcase.py
+++ b/src/awkward/operations/str/ak_swapcase.py
@@ -18,12 +18,14 @@ def swapcase(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with uppercase characters transformed to lowercase and vice-versa (correctly transforming Unicode characters).
+    Replaces any string-valued data with uppercase characters transformed to
+    lowercase and vice-versa (correctly transforming Unicode characters).
 
-    Replaces any bytestring-valued data with uppercase characters transformed to lowercase and vice-versa (transforming ASCII characters only).
+    Replaces any bytestring-valued data with uppercase characters transformed
+    to lowercase and vice-versa (transforming ASCII characters only).
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_swapcase](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_swapcase.html)
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/ak_title.py
index aac266547b..8c7d0361b4 100644
--- a/src/awkward/operations/str/ak_title.py
+++ b/src/awkward/operations/str/ak_title.py
@@ -18,12 +18,16 @@ def title(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with a titlecase version (correctly transforming Unicode characters). Each word in the output will start with an uppercase character and its remaining characters will be lowercase.
+    Replaces any string-valued data with a titlecase version (correctly
+    transforming Unicode characters). Each word in the output will start with
+    an uppercase character and its remaining characters will be lowercase.
 
-    Replaces any bytestring-valued data with a titlecase version (transforming ASCII characters only). Each word in the output will start with an uppercase character and its remaining characters will be lowercase.
+    Replaces any bytestring-valued data with a titlecase version (transforming
+    ASCII characters only). Each word in the output will start with an
+    uppercase character and its remaining characters will be lowercase.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_title](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_title.html)
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index 1796fbaeb5..ecf7d14f90 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -14,20 +14,24 @@ def trim(array, characters, *, highlevel=True, behavior=None):
     """
     Args:
         array: Array-like data (anything #ak.to_layout recognizes).
-        characters (str or bytes): Individual characters to be trimmed from the string.
+        characters (str or bytes): Individual characters to be trimmed from
+            the string.
         highlevel (bool): If True, return an #ak.Array; otherwise, return
             a low-level #ak.contents.Content subclass.
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Removes any leading or trailing characters of `characters` from any string or bytestring-valued data.
+    Removes any leading or trailing characters of `characters` from any string
+    or bytestring-valued data.
 
-    If the data are strings, `characters` are interpreted as unordered, individual codepoints.
+    If the data are strings, `characters` are interpreted as unordered,
+    individual codepoints.
 
-    If the data are bytestrings, `characters` are interpreted as unordered, individual bytes.
+    If the data are bytestrings, `characters` are interpreted as unordered,
+    individual bytes.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_trim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_trim.html)
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
index 6568249969..de34c5fa87 100644
--- a/src/awkward/operations/str/ak_trim_whitespace.py
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -19,10 +19,11 @@ def trim_whitespace(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Removes any leading or trailing whitespace from any string or bytestring-valued data.
+    Removes any leading or trailing whitespace from any string or
+    bytestring-valued data.
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_trim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_trim_whitespace.html)
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/ak_upper.py
index 2391b439af..4f0a8bf920 100644
--- a/src/awkward/operations/str/ak_upper.py
+++ b/src/awkward/operations/str/ak_upper.py
@@ -18,12 +18,14 @@ def upper(array, *, highlevel=True, behavior=None):
         behavior (None or dict): Custom #ak.behavior for the output array, if
             high-level.
 
-    Replaces any string-valued data with a uppercase version (correctly transforming Unicode characters).
+    Replaces any string-valued data with an uppercase version (correctly
+    transforming Unicode characters).
 
-    Replaces any bytestring-valued data with a uppercase version (transforming ASCII characters only).
+    Replaces any bytestring-valued data with am uppercase version (transforming
+    ASCII characters only).
 
-    Note: this function does not raise an error if the `array` does
-    not contain any string or bytestring data.
+    Note: this function does not raise an error if the `array` does not
+    contain any string or bytestring data.
 
     Requires the pyarrow library and calls
     [pyarrow.compute.utf8_upper](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_upper.html)

From 447cde7d2ba2e7cb01dfe9e54e5e858fc706b37b Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 11:02:07 +0100
Subject: [PATCH 68/73] docs: add see also

---
 src/awkward/operations/str/ak_count_substring.py         | 2 ++
 src/awkward/operations/str/ak_count_substring_regex.py   | 2 ++
 src/awkward/operations/str/ak_find_substring.py          | 2 ++
 src/awkward/operations/str/ak_find_substring_regex.py    | 2 ++
 src/awkward/operations/str/ak_ltrim.py                   | 2 ++
 src/awkward/operations/str/ak_ltrim_whitespace.py        | 2 ++
 src/awkward/operations/str/ak_match_substring.py         | 2 ++
 src/awkward/operations/str/ak_match_substring_regex.py   | 2 ++
 src/awkward/operations/str/ak_replace_substring.py       | 2 ++
 src/awkward/operations/str/ak_replace_substring_regex.py | 2 ++
 src/awkward/operations/str/ak_rtrim.py                   | 2 ++
 src/awkward/operations/str/ak_rtrim_whitespace.py        | 2 ++
 src/awkward/operations/str/ak_split_pattern.py           | 2 ++
 src/awkward/operations/str/ak_split_pattern_regex.py     | 2 ++
 src/awkward/operations/str/ak_split_whitespace.py        | 2 ++
 src/awkward/operations/str/ak_trim.py                    | 2 ++
 src/awkward/operations/str/ak_trim_whitespace.py         | 2 ++
 17 files changed, 34 insertions(+)

diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/ak_count_substring.py
index 36cd8febca..8bbc44bcd7 100644
--- a/src/awkward/operations/str/ak_count_substring.py
+++ b/src/awkward/operations/str/ak_count_substring.py
@@ -34,6 +34,8 @@ def count_substring(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.count_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring.html).
+
+    See also: #ak.str.count_substring_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/ak_count_substring_regex.py
index 113d8acb9b..4cd7f3fe8a 100644
--- a/src/awkward/operations/str/ak_count_substring_regex.py
+++ b/src/awkward/operations/str/ak_count_substring_regex.py
@@ -34,6 +34,8 @@ def count_substring_regex(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.count_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.count_substring_regex.html).
+
+    See also: #ak.str.count_substring.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/ak_find_substring.py
index 28d5d86aec..f936bdac86 100644
--- a/src/awkward/operations/str/ak_find_substring.py
+++ b/src/awkward/operations/str/ak_find_substring.py
@@ -32,6 +32,8 @@ def find_substring(array, pattern, *, ignore_case=False, highlevel=True, behavio
 
     Requires the pyarrow library and calls
     [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
+
+    See also: #ak.str.find_substring_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/ak_find_substring_regex.py
index 90ab671ce5..e5059f846a 100644
--- a/src/awkward/operations/str/ak_find_substring_regex.py
+++ b/src/awkward/operations/str/ak_find_substring_regex.py
@@ -34,6 +34,8 @@ def find_substring_regex(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.find_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.find_substring.html).
+
+    See also: #ak.str.find_substring.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/ak_ltrim.py
index 5274ed5ec7..a6904c1d11 100644
--- a/src/awkward/operations/str/ak_ltrim.py
+++ b/src/awkward/operations/str/ak_ltrim.py
@@ -38,6 +38,8 @@ def ltrim(array, characters, *, highlevel=True, behavior=None):
     or
     [pyarrow.compute.ascii_ltrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_ltrim.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.ltrim_whitespace.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/ak_ltrim_whitespace.py
index ca7f917e4e..060af89288 100644
--- a/src/awkward/operations/str/ak_ltrim_whitespace.py
+++ b/src/awkward/operations/str/ak_ltrim_whitespace.py
@@ -29,6 +29,8 @@ def ltrim_whitespace(array, *, highlevel=True, behavior=None):
     or
     [pyarrow.compute.ascii_ltrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_ltrim_whitespace.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.ltrim.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/ak_match_substring.py
index 0b5ac3421e..29778364bb 100644
--- a/src/awkward/operations/str/ak_match_substring.py
+++ b/src/awkward/operations/str/ak_match_substring.py
@@ -32,6 +32,8 @@ def match_substring(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.match_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_substring.html).
+
+    See also: #ak.str.match_substring_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/ak_match_substring_regex.py
index f909936a42..85bbe38eb5 100644
--- a/src/awkward/operations/str/ak_match_substring_regex.py
+++ b/src/awkward/operations/str/ak_match_substring_regex.py
@@ -32,6 +32,8 @@ def match_substring_regex(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.match_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.match_substring_regex.html).
+
+    See also: #ak.str.match_substring.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/ak_replace_substring.py
index 9214c94199..38f00cbe42 100644
--- a/src/awkward/operations/str/ak_replace_substring.py
+++ b/src/awkward/operations/str/ak_replace_substring.py
@@ -40,6 +40,8 @@ def replace_substring(
     or
     [pyarrow.compute.replace_substring](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.replace_substring_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/ak_replace_substring_regex.py
index ae2d0ff043..832bf83552 100644
--- a/src/awkward/operations/str/ak_replace_substring_regex.py
+++ b/src/awkward/operations/str/ak_replace_substring_regex.py
@@ -40,6 +40,8 @@ def replace_substring_regex(
     or
     [pyarrow.compute.replace_substring_regex](https://arrow.apache.org/docs/python/generated/pyarrow.compute.replace_substring_regex.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.replace_substring_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/ak_rtrim.py
index db96fe2061..816605de40 100644
--- a/src/awkward/operations/str/ak_rtrim.py
+++ b/src/awkward/operations/str/ak_rtrim.py
@@ -37,6 +37,8 @@ def rtrim(array, characters, *, highlevel=True, behavior=None):
     or
     [pyarrow.compute.ascii_rtrim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_rtrim.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.rtrim_whitespace.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/ak_rtrim_whitespace.py
index 1216d3d5ce..19c18677b0 100644
--- a/src/awkward/operations/str/ak_rtrim_whitespace.py
+++ b/src/awkward/operations/str/ak_rtrim_whitespace.py
@@ -29,6 +29,8 @@ def rtrim_whitespace(array, *, highlevel=True, behavior=None):
     or
     [pyarrow.compute.ascii_rtrim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_rtrim_whitespace.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.rtrim.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/ak_split_pattern.py
index 604532edc0..e311ade93d 100644
--- a/src/awkward/operations/str/ak_split_pattern.py
+++ b/src/awkward/operations/str/ak_split_pattern.py
@@ -35,6 +35,8 @@ def split_pattern(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
+
+    See also: #ak.str.split_whitespace, #ak.str.split_pattern_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/ak_split_pattern_regex.py
index 69a88e9b1e..dd71e8b9b5 100644
--- a/src/awkward/operations/str/ak_split_pattern_regex.py
+++ b/src/awkward/operations/str/ak_split_pattern_regex.py
@@ -36,6 +36,8 @@ def split_pattern_regex(
 
     Requires the pyarrow library and calls
     [pyarrow.compute.split_pattern](https://arrow.apache.org/docs/python/generated/pyarrow.compute.split_pattern.html).
+
+    See also: #ak.str.split_whitespace, #ak.str.split_pattern.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/ak_split_whitespace.py
index bf35a2c1b9..5bfb9e77a8 100644
--- a/src/awkward/operations/str/ak_split_whitespace.py
+++ b/src/awkward/operations/str/ak_split_whitespace.py
@@ -44,6 +44,8 @@ def split_whitespace(
     [pyarrow.compute.utf8_split_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.utf8_split_whitespace.html)
     or [pyarrow.compute.ascii_split_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_split_whitespace.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.split_pattern, #ak.str.split_pattern_regex.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/ak_trim.py
index ecf7d14f90..aa5352cd5e 100644
--- a/src/awkward/operations/str/ak_trim.py
+++ b/src/awkward/operations/str/ak_trim.py
@@ -38,6 +38,8 @@ def trim(array, characters, *, highlevel=True, behavior=None):
     or
     [pyarrow.compute.ascii_trim](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_trim.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.trim_whitespace.
     """
     # Dispatch
     yield (array,)
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/ak_trim_whitespace.py
index de34c5fa87..200118fdb7 100644
--- a/src/awkward/operations/str/ak_trim_whitespace.py
+++ b/src/awkward/operations/str/ak_trim_whitespace.py
@@ -30,6 +30,8 @@ def trim_whitespace(array, *, highlevel=True, behavior=None):
     or
     [pyarrow.compute.ascii_trim_whitespace](https://arrow.apache.org/docs/python/generated/pyarrow.compute.ascii_trim_whitespace.html)
     on strings and bytestrings, respectively.
+
+    See also: #ak.str.trim.
     """
     # Dispatch
     yield (array,)

From cbba554e2a534c68389dac5de35d65223f0c720c Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 11:02:17 +0100
Subject: [PATCH 69/73] docs: include `ak.str` in toctree

---
 docs/prepare_docstrings.py |  1 +
 docs/reference/toctree.txt | 73 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/docs/prepare_docstrings.py b/docs/prepare_docstrings.py
index 963a3cb408..fa1f6c81b5 100644
--- a/docs/prepare_docstrings.py
+++ b/docs/prepare_docstrings.py
@@ -303,6 +303,7 @@ def dofunction(link, linelink, shortname, name, astfcn):
         .replace(".behaviors.string", "")
     )
     shortname = re.sub(r"\.operations\.ak_\w+", "", shortname)
+    shortname = re.sub(r"\.operations\.str\.ak_\w+", ".str", shortname)
     shortname = re.sub(r"\.(contents|types|forms)\.\w+", r".\1", shortname)
 
     if (
diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt
index f442d9cb2c..2304bba695 100644
--- a/docs/reference/toctree.txt
+++ b/docs/reference/toctree.txt
@@ -145,6 +145,79 @@
     generated/ak.argcartesian
     generated/ak.argcombinations
 
+.. toctree::
+    :caption: String predicates
+
+    generated/ak.str.is_alnum
+    generated/ak.str.is_alpha
+    generated/ak.str.is_ascii
+    generated/ak.str.is_decimal
+    generated/ak.str.is_digit
+    generated/ak.str.is_lower
+    generated/ak.str.is_numeric
+    generated/ak.str.is_printable
+    generated/ak.str.is_space
+    generated/ak.str.is_title
+    generated/ak.str.is_upper
+
+.. toctree::
+    :caption: String transforms
+
+    generated/ak.str.capitalize
+    generated/ak.str.length
+    generated/ak.str.lower
+    generated/ak.str.repeat
+    generated/ak.str.replace_slice
+    generated/ak.str.replace_substring
+    generated/ak.str.replace_substring_regex
+    generated/ak.str.reverse
+    generated/ak.str.swapcase
+    generated/ak.str.title
+    generated/ak.str.upper
+
+.. toctree::
+    :caption: String padding and trimming
+
+    generated/ak.str.center
+    generated/ak.str.lpad
+    generated/ak.str.rpad
+    generated/ak.str.ltrim
+    generated/ak.str.ltrim_whitespace
+    generated/ak.str.rtrim
+    generated/ak.str.rtrim_whitespace
+    generated/ak.str.trim
+    generated/ak.str.trim_whitespace
+
+.. toctree::
+    :caption: String splitting and joining
+
+    generated/ak.str.split_pattern
+    generated/ak.str.split_pattern_regex
+    generated/ak.str.split_whitespace
+    generated/ak.str.join
+    generated/ak.str.join_element_wise
+
+.. toctree::
+    :caption: String slicing and decomposition
+
+    generated/ak.str.slice
+    generated/ak.str.extract_regex
+
+.. toctree::
+    :caption: String containment tests
+
+    generated/ak.str.count_substring
+    generated/ak.str.count_substring_regex
+    generated/ak.str.ends_with
+    generated/ak.str.find_substring
+    generated/ak.str.find_substring_regex
+    generated/ak.str.index_in
+    generated/ak.str.is_in
+    generated/ak.str.match_like
+    generated/ak.str.match_substring
+    generated/ak.str.match_substring_regex
+    generated/ak.str.starts_with
+
 .. toctree::
     :caption: Value and type conversions
 

From 6e39bf168c564b1d4e4cbbd30cad00ae803bfb63 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Aug 2023 09:28:33 +0100
Subject: [PATCH 70/73] chore: update pre-commit hooks (#2619)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

From 9fee3fc809948b07b1b0d16e90e8f317efabbc74 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 12:05:40 +0100
Subject: [PATCH 71/73] refactor: cleanup error handling

---
 src/awkward/operations/str/ak_index_in.py          |  4 ++--
 src/awkward/operations/str/ak_is_in.py             |  4 ++--
 src/awkward/operations/str/ak_join.py              | 14 +++-----------
 src/awkward/operations/str/ak_join_element_wise.py |  6 +++---
 4 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/ak_index_in.py
index 84f1a2af1d..3c71e0a281 100644
--- a/src/awkward/operations/str/ak_index_in.py
+++ b/src/awkward/operations/str/ak_index_in.py
@@ -58,12 +58,12 @@ def _impl(array, value_set, skip_nones, highlevel, behavior):
     value_set_layout = ak.to_layout(value_set, allow_record=False, allow_other=True)
 
     if not _is_maybe_optional_list_of_string(value_set_layout):
-        raise TypeError("`value_set` must be 1D array of (maybe missing) strings")
+        raise TypeError("`value_set` must be 1D array of (possibly missing) strings")
 
     behavior = behavior_of(array, value_set, behavior=behavior)
 
     def apply(layout, **kwargs):
-        if _is_maybe_optional_list_of_string(layout) and layout.purelist_depth == 1:
+        if _is_maybe_optional_list_of_string(layout):
             return ak.from_arrow(
                 pc.index_in(
                     ak.to_arrow(layout, extensionarray=False),
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/ak_is_in.py
index 3ac79b44ec..99223cc5e0 100644
--- a/src/awkward/operations/str/ak_is_in.py
+++ b/src/awkward/operations/str/ak_is_in.py
@@ -57,12 +57,12 @@ def _impl(array, value_set, skip_nones, highlevel, behavior):
     value_set_layout = ak.to_layout(value_set, allow_record=False, allow_other=True)
 
     if not _is_maybe_optional_list_of_string(value_set_layout):
-        raise TypeError("`value_set` must be 1D array of (maybe missing) strings")
+        raise TypeError("`value_set` must be 1D array of (possibly missing) strings")
 
     behavior = behavior_of(array, value_set, behavior=behavior)
 
     def apply(layout, **kwargs):
-        if _is_maybe_optional_list_of_string(layout) and layout.purelist_depth == 1:
+        if _is_maybe_optional_list_of_string(layout):
             return ak.from_arrow(
                 pc.is_in(
                     ak.to_arrow(layout, extensionarray=False),
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/ak_join.py
index 062067b3db..7f0f93e5fb 100644
--- a/src/awkward/operations/str/ak_join.py
+++ b/src/awkward/operations/str/ak_join.py
@@ -57,14 +57,9 @@ def _impl(array, separator, highlevel, behavior):
     import pyarrow.compute as pc
 
     def apply_unary(layout, **kwargs):
-        if not (layout.is_list and layout.purelist_depth == 2):
+        if not (layout.is_list and _is_maybe_optional_list_of_string(layout.content)):
             return
 
-        if not _is_maybe_optional_list_of_string(layout.content):
-            return
-
-        # We have (maybe option/indexed type wrapping) strings
-
         arrow_array = to_arrow(
             # Arrow needs an option type here
             layout.copy(content=ak.contents.UnmaskedArray.simplified(layout.content)),
@@ -80,15 +75,12 @@ def apply_unary(layout, **kwargs):
 
     def apply_binary(layouts, **kwargs):
         layout, separator_layout = layouts
-        if not (layout.is_list and layout.purelist_depth == 2):
-            return
-
-        if not _is_maybe_optional_list_of_string(layout.content):
+        if not (layout.is_list and _is_maybe_optional_list_of_string(layout.content)):
             return
 
         if not _is_maybe_optional_list_of_string(separator_layout):
             raise TypeError(
-                f"separator must be a list of strings, not {type(separator_layout)}"
+                f"`separator` must be a list of (possibly missing) strings, not {ak.type(separator_layout)}"
             )
 
         # We have (maybe option/indexed type wrapping) strings
diff --git a/src/awkward/operations/str/ak_join_element_wise.py b/src/awkward/operations/str/ak_join_element_wise.py
index 2026bb8ae0..cde4eef163 100644
--- a/src/awkward/operations/str/ak_join_element_wise.py
+++ b/src/awkward/operations/str/ak_join_element_wise.py
@@ -48,12 +48,12 @@ def _impl(arrays, highlevel, behavior):
 
     import pyarrow.compute as pc
 
-    layouts = [ak.to_layout(x) for x in arrays]
-    behavior = behavior_of(*arrays, behavior=behavior)
-
     if len(arrays) < 1:
         raise TypeError("at least one array is required")
 
+    layouts = [ak.to_layout(x) for x in arrays]
+    behavior = behavior_of(*arrays, behavior=behavior)
+
     def action(layouts, **kwargs):
         if all(
             x.is_list and x.parameter("__array__") in ("string", "bytestring")

From c5f5cb73fee92d16aa537953a2e25d2086934132 Mon Sep 17 00:00:00 2001
From: Jim Pivarski <jpivarski@gmail.com>
Date: Tue, 8 Aug 2023 10:04:41 -0500
Subject: [PATCH 72/73] Rename ak_*.py modules -> akstr_*.py.

---
 src/awkward/operations/str/__init__.py        | 98 +++++++++----------
 .../{ak_capitalize.py => akstr_capitalize.py} |  0
 .../str/{ak_center.py => akstr_center.py}     |  0
 ..._substring.py => akstr_count_substring.py} |  0
 ...egex.py => akstr_count_substring_regex.py} |  0
 .../{ak_ends_with.py => akstr_ends_with.py}   |  0
 ...xtract_regex.py => akstr_extract_regex.py} |  0
 ...d_substring.py => akstr_find_substring.py} |  0
 ...regex.py => akstr_find_substring_regex.py} |  0
 .../str/{ak_index_in.py => akstr_index_in.py} |  0
 .../str/{ak_is_alnum.py => akstr_is_alnum.py} |  0
 .../str/{ak_is_alpha.py => akstr_is_alpha.py} |  0
 .../str/{ak_is_ascii.py => akstr_is_ascii.py} |  0
 .../{ak_is_decimal.py => akstr_is_decimal.py} |  0
 .../str/{ak_is_digit.py => akstr_is_digit.py} |  0
 .../str/{ak_is_in.py => akstr_is_in.py}       |  0
 .../str/{ak_is_lower.py => akstr_is_lower.py} |  0
 .../{ak_is_numeric.py => akstr_is_numeric.py} |  0
 ..._is_printable.py => akstr_is_printable.py} |  0
 .../str/{ak_is_space.py => akstr_is_space.py} |  0
 .../str/{ak_is_title.py => akstr_is_title.py} |  0
 .../str/{ak_is_upper.py => akstr_is_upper.py} |  0
 .../str/{ak_join.py => akstr_join.py}         |  0
 ...ent_wise.py => akstr_join_element_wise.py} |  0
 .../str/{ak_length.py => akstr_length.py}     |  0
 .../str/{ak_lower.py => akstr_lower.py}       |  0
 .../str/{ak_lpad.py => akstr_lpad.py}         |  0
 .../str/{ak_ltrim.py => akstr_ltrim.py}       |  0
 ...hitespace.py => akstr_ltrim_whitespace.py} |  0
 .../{ak_match_like.py => akstr_match_like.py} |  0
 ..._substring.py => akstr_match_substring.py} |  0
 ...egex.py => akstr_match_substring_regex.py} |  0
 .../str/{ak_repeat.py => akstr_repeat.py}     |  0
 ...eplace_slice.py => akstr_replace_slice.py} |  0
 ...ubstring.py => akstr_replace_substring.py} |  0
 ...ex.py => akstr_replace_substring_regex.py} |  0
 .../str/{ak_reverse.py => akstr_reverse.py}   |  0
 .../str/{ak_rpad.py => akstr_rpad.py}         |  0
 .../str/{ak_rtrim.py => akstr_rtrim.py}       |  0
 ...hitespace.py => akstr_rtrim_whitespace.py} |  0
 .../str/{ak_slice.py => akstr_slice.py}       |  0
 ...plit_pattern.py => akstr_split_pattern.py} |  0
 ..._regex.py => akstr_split_pattern_regex.py} |  0
 ...hitespace.py => akstr_split_whitespace.py} |  0
 ...ak_starts_with.py => akstr_starts_with.py} |  0
 .../str/{ak_swapcase.py => akstr_swapcase.py} |  0
 .../str/{ak_title.py => akstr_title.py}       |  0
 .../str/{ak_trim.py => akstr_trim.py}         |  0
 ...whitespace.py => akstr_trim_whitespace.py} |  0
 .../str/{ak_upper.py => akstr_upper.py}       |  0
 50 files changed, 49 insertions(+), 49 deletions(-)
 rename src/awkward/operations/str/{ak_capitalize.py => akstr_capitalize.py} (100%)
 rename src/awkward/operations/str/{ak_center.py => akstr_center.py} (100%)
 rename src/awkward/operations/str/{ak_count_substring.py => akstr_count_substring.py} (100%)
 rename src/awkward/operations/str/{ak_count_substring_regex.py => akstr_count_substring_regex.py} (100%)
 rename src/awkward/operations/str/{ak_ends_with.py => akstr_ends_with.py} (100%)
 rename src/awkward/operations/str/{ak_extract_regex.py => akstr_extract_regex.py} (100%)
 rename src/awkward/operations/str/{ak_find_substring.py => akstr_find_substring.py} (100%)
 rename src/awkward/operations/str/{ak_find_substring_regex.py => akstr_find_substring_regex.py} (100%)
 rename src/awkward/operations/str/{ak_index_in.py => akstr_index_in.py} (100%)
 rename src/awkward/operations/str/{ak_is_alnum.py => akstr_is_alnum.py} (100%)
 rename src/awkward/operations/str/{ak_is_alpha.py => akstr_is_alpha.py} (100%)
 rename src/awkward/operations/str/{ak_is_ascii.py => akstr_is_ascii.py} (100%)
 rename src/awkward/operations/str/{ak_is_decimal.py => akstr_is_decimal.py} (100%)
 rename src/awkward/operations/str/{ak_is_digit.py => akstr_is_digit.py} (100%)
 rename src/awkward/operations/str/{ak_is_in.py => akstr_is_in.py} (100%)
 rename src/awkward/operations/str/{ak_is_lower.py => akstr_is_lower.py} (100%)
 rename src/awkward/operations/str/{ak_is_numeric.py => akstr_is_numeric.py} (100%)
 rename src/awkward/operations/str/{ak_is_printable.py => akstr_is_printable.py} (100%)
 rename src/awkward/operations/str/{ak_is_space.py => akstr_is_space.py} (100%)
 rename src/awkward/operations/str/{ak_is_title.py => akstr_is_title.py} (100%)
 rename src/awkward/operations/str/{ak_is_upper.py => akstr_is_upper.py} (100%)
 rename src/awkward/operations/str/{ak_join.py => akstr_join.py} (100%)
 rename src/awkward/operations/str/{ak_join_element_wise.py => akstr_join_element_wise.py} (100%)
 rename src/awkward/operations/str/{ak_length.py => akstr_length.py} (100%)
 rename src/awkward/operations/str/{ak_lower.py => akstr_lower.py} (100%)
 rename src/awkward/operations/str/{ak_lpad.py => akstr_lpad.py} (100%)
 rename src/awkward/operations/str/{ak_ltrim.py => akstr_ltrim.py} (100%)
 rename src/awkward/operations/str/{ak_ltrim_whitespace.py => akstr_ltrim_whitespace.py} (100%)
 rename src/awkward/operations/str/{ak_match_like.py => akstr_match_like.py} (100%)
 rename src/awkward/operations/str/{ak_match_substring.py => akstr_match_substring.py} (100%)
 rename src/awkward/operations/str/{ak_match_substring_regex.py => akstr_match_substring_regex.py} (100%)
 rename src/awkward/operations/str/{ak_repeat.py => akstr_repeat.py} (100%)
 rename src/awkward/operations/str/{ak_replace_slice.py => akstr_replace_slice.py} (100%)
 rename src/awkward/operations/str/{ak_replace_substring.py => akstr_replace_substring.py} (100%)
 rename src/awkward/operations/str/{ak_replace_substring_regex.py => akstr_replace_substring_regex.py} (100%)
 rename src/awkward/operations/str/{ak_reverse.py => akstr_reverse.py} (100%)
 rename src/awkward/operations/str/{ak_rpad.py => akstr_rpad.py} (100%)
 rename src/awkward/operations/str/{ak_rtrim.py => akstr_rtrim.py} (100%)
 rename src/awkward/operations/str/{ak_rtrim_whitespace.py => akstr_rtrim_whitespace.py} (100%)
 rename src/awkward/operations/str/{ak_slice.py => akstr_slice.py} (100%)
 rename src/awkward/operations/str/{ak_split_pattern.py => akstr_split_pattern.py} (100%)
 rename src/awkward/operations/str/{ak_split_pattern_regex.py => akstr_split_pattern_regex.py} (100%)
 rename src/awkward/operations/str/{ak_split_whitespace.py => akstr_split_whitespace.py} (100%)
 rename src/awkward/operations/str/{ak_starts_with.py => akstr_starts_with.py} (100%)
 rename src/awkward/operations/str/{ak_swapcase.py => akstr_swapcase.py} (100%)
 rename src/awkward/operations/str/{ak_title.py => akstr_title.py} (100%)
 rename src/awkward/operations/str/{ak_trim.py => akstr_trim.py} (100%)
 rename src/awkward/operations/str/{ak_trim_whitespace.py => akstr_trim_whitespace.py} (100%)
 rename src/awkward/operations/str/{ak_upper.py => akstr_upper.py} (100%)

diff --git a/src/awkward/operations/str/__init__.py b/src/awkward/operations/str/__init__.py
index 7d4357d12a..610a99de4b 100644
--- a/src/awkward/operations/str/__init__.py
+++ b/src/awkward/operations/str/__init__.py
@@ -3,75 +3,75 @@
 # https://arrow.apache.org/docs/python/api/compute.html#string-predicates
 
 # string predicates
-from awkward.operations.str.ak_is_alnum import *
-from awkward.operations.str.ak_is_alpha import *
-from awkward.operations.str.ak_is_decimal import *
-from awkward.operations.str.ak_is_digit import *
-from awkward.operations.str.ak_is_lower import *
-from awkward.operations.str.ak_is_numeric import *
-from awkward.operations.str.ak_is_printable import *
-from awkward.operations.str.ak_is_space import *
-from awkward.operations.str.ak_is_upper import *
-from awkward.operations.str.ak_is_title import *
-from awkward.operations.str.ak_is_ascii import *
+from awkward.operations.str.akstr_is_alnum import *
+from awkward.operations.str.akstr_is_alpha import *
+from awkward.operations.str.akstr_is_decimal import *
+from awkward.operations.str.akstr_is_digit import *
+from awkward.operations.str.akstr_is_lower import *
+from awkward.operations.str.akstr_is_numeric import *
+from awkward.operations.str.akstr_is_printable import *
+from awkward.operations.str.akstr_is_space import *
+from awkward.operations.str.akstr_is_upper import *
+from awkward.operations.str.akstr_is_title import *
+from awkward.operations.str.akstr_is_ascii import *
 
 # string transforms
-from awkward.operations.str.ak_capitalize import *
-from awkward.operations.str.ak_length import *
-from awkward.operations.str.ak_lower import *
-from awkward.operations.str.ak_swapcase import *
-from awkward.operations.str.ak_title import *
-from awkward.operations.str.ak_upper import *
-from awkward.operations.str.ak_repeat import *
-from awkward.operations.str.ak_replace_slice import *
-from awkward.operations.str.ak_reverse import *
-from awkward.operations.str.ak_replace_substring import *
-from awkward.operations.str.ak_replace_substring_regex import *
+from awkward.operations.str.akstr_capitalize import *
+from awkward.operations.str.akstr_length import *
+from awkward.operations.str.akstr_lower import *
+from awkward.operations.str.akstr_swapcase import *
+from awkward.operations.str.akstr_title import *
+from awkward.operations.str.akstr_upper import *
+from awkward.operations.str.akstr_repeat import *
+from awkward.operations.str.akstr_replace_slice import *
+from awkward.operations.str.akstr_reverse import *
+from awkward.operations.str.akstr_replace_substring import *
+from awkward.operations.str.akstr_replace_substring_regex import *
 
 # string padding
-from awkward.operations.str.ak_center import *
-from awkward.operations.str.ak_lpad import *
-from awkward.operations.str.ak_rpad import *
+from awkward.operations.str.akstr_center import *
+from awkward.operations.str.akstr_lpad import *
+from awkward.operations.str.akstr_rpad import *
 
 # string trimming
-from awkward.operations.str.ak_ltrim import *
-from awkward.operations.str.ak_ltrim_whitespace import *
-from awkward.operations.str.ak_rtrim import *
-from awkward.operations.str.ak_rtrim_whitespace import *
-from awkward.operations.str.ak_trim import *
-from awkward.operations.str.ak_trim_whitespace import *
+from awkward.operations.str.akstr_ltrim import *
+from awkward.operations.str.akstr_ltrim_whitespace import *
+from awkward.operations.str.akstr_rtrim import *
+from awkward.operations.str.akstr_rtrim_whitespace import *
+from awkward.operations.str.akstr_trim import *
+from awkward.operations.str.akstr_trim_whitespace import *
 
 # string splitting
-from awkward.operations.str.ak_split_whitespace import *
-from awkward.operations.str.ak_split_pattern import *
-from awkward.operations.str.ak_split_pattern_regex import *
+from awkward.operations.str.akstr_split_whitespace import *
+from awkward.operations.str.akstr_split_pattern import *
+from awkward.operations.str.akstr_split_pattern_regex import *
 
 # string component extraction
 
-from awkward.operations.str.ak_extract_regex import *
+from awkward.operations.str.akstr_extract_regex import *
 
 # string joining
 
-from awkward.operations.str.ak_join import *
-from awkward.operations.str.ak_join_element_wise import *
+from awkward.operations.str.akstr_join import *
+from awkward.operations.str.akstr_join_element_wise import *
 
 # string slicing
 
-from awkward.operations.str.ak_slice import *
+from awkward.operations.str.akstr_slice import *
 
 # containment tests
 
-from awkward.operations.str.ak_count_substring import *
-from awkward.operations.str.ak_count_substring_regex import *
-from awkward.operations.str.ak_ends_with import *
-from awkward.operations.str.ak_find_substring import *
-from awkward.operations.str.ak_find_substring_regex import *
-from awkward.operations.str.ak_index_in import *
-from awkward.operations.str.ak_is_in import *
-from awkward.operations.str.ak_match_like import *
-from awkward.operations.str.ak_match_substring import *
-from awkward.operations.str.ak_match_substring_regex import *
-from awkward.operations.str.ak_starts_with import *
+from awkward.operations.str.akstr_count_substring import *
+from awkward.operations.str.akstr_count_substring_regex import *
+from awkward.operations.str.akstr_ends_with import *
+from awkward.operations.str.akstr_find_substring import *
+from awkward.operations.str.akstr_find_substring_regex import *
+from awkward.operations.str.akstr_index_in import *
+from awkward.operations.str.akstr_is_in import *
+from awkward.operations.str.akstr_match_like import *
+from awkward.operations.str.akstr_match_substring import *
+from awkward.operations.str.akstr_match_substring_regex import *
+from awkward.operations.str.akstr_starts_with import *
 
 
 def _get_ufunc_action(
diff --git a/src/awkward/operations/str/ak_capitalize.py b/src/awkward/operations/str/akstr_capitalize.py
similarity index 100%
rename from src/awkward/operations/str/ak_capitalize.py
rename to src/awkward/operations/str/akstr_capitalize.py
diff --git a/src/awkward/operations/str/ak_center.py b/src/awkward/operations/str/akstr_center.py
similarity index 100%
rename from src/awkward/operations/str/ak_center.py
rename to src/awkward/operations/str/akstr_center.py
diff --git a/src/awkward/operations/str/ak_count_substring.py b/src/awkward/operations/str/akstr_count_substring.py
similarity index 100%
rename from src/awkward/operations/str/ak_count_substring.py
rename to src/awkward/operations/str/akstr_count_substring.py
diff --git a/src/awkward/operations/str/ak_count_substring_regex.py b/src/awkward/operations/str/akstr_count_substring_regex.py
similarity index 100%
rename from src/awkward/operations/str/ak_count_substring_regex.py
rename to src/awkward/operations/str/akstr_count_substring_regex.py
diff --git a/src/awkward/operations/str/ak_ends_with.py b/src/awkward/operations/str/akstr_ends_with.py
similarity index 100%
rename from src/awkward/operations/str/ak_ends_with.py
rename to src/awkward/operations/str/akstr_ends_with.py
diff --git a/src/awkward/operations/str/ak_extract_regex.py b/src/awkward/operations/str/akstr_extract_regex.py
similarity index 100%
rename from src/awkward/operations/str/ak_extract_regex.py
rename to src/awkward/operations/str/akstr_extract_regex.py
diff --git a/src/awkward/operations/str/ak_find_substring.py b/src/awkward/operations/str/akstr_find_substring.py
similarity index 100%
rename from src/awkward/operations/str/ak_find_substring.py
rename to src/awkward/operations/str/akstr_find_substring.py
diff --git a/src/awkward/operations/str/ak_find_substring_regex.py b/src/awkward/operations/str/akstr_find_substring_regex.py
similarity index 100%
rename from src/awkward/operations/str/ak_find_substring_regex.py
rename to src/awkward/operations/str/akstr_find_substring_regex.py
diff --git a/src/awkward/operations/str/ak_index_in.py b/src/awkward/operations/str/akstr_index_in.py
similarity index 100%
rename from src/awkward/operations/str/ak_index_in.py
rename to src/awkward/operations/str/akstr_index_in.py
diff --git a/src/awkward/operations/str/ak_is_alnum.py b/src/awkward/operations/str/akstr_is_alnum.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_alnum.py
rename to src/awkward/operations/str/akstr_is_alnum.py
diff --git a/src/awkward/operations/str/ak_is_alpha.py b/src/awkward/operations/str/akstr_is_alpha.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_alpha.py
rename to src/awkward/operations/str/akstr_is_alpha.py
diff --git a/src/awkward/operations/str/ak_is_ascii.py b/src/awkward/operations/str/akstr_is_ascii.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_ascii.py
rename to src/awkward/operations/str/akstr_is_ascii.py
diff --git a/src/awkward/operations/str/ak_is_decimal.py b/src/awkward/operations/str/akstr_is_decimal.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_decimal.py
rename to src/awkward/operations/str/akstr_is_decimal.py
diff --git a/src/awkward/operations/str/ak_is_digit.py b/src/awkward/operations/str/akstr_is_digit.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_digit.py
rename to src/awkward/operations/str/akstr_is_digit.py
diff --git a/src/awkward/operations/str/ak_is_in.py b/src/awkward/operations/str/akstr_is_in.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_in.py
rename to src/awkward/operations/str/akstr_is_in.py
diff --git a/src/awkward/operations/str/ak_is_lower.py b/src/awkward/operations/str/akstr_is_lower.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_lower.py
rename to src/awkward/operations/str/akstr_is_lower.py
diff --git a/src/awkward/operations/str/ak_is_numeric.py b/src/awkward/operations/str/akstr_is_numeric.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_numeric.py
rename to src/awkward/operations/str/akstr_is_numeric.py
diff --git a/src/awkward/operations/str/ak_is_printable.py b/src/awkward/operations/str/akstr_is_printable.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_printable.py
rename to src/awkward/operations/str/akstr_is_printable.py
diff --git a/src/awkward/operations/str/ak_is_space.py b/src/awkward/operations/str/akstr_is_space.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_space.py
rename to src/awkward/operations/str/akstr_is_space.py
diff --git a/src/awkward/operations/str/ak_is_title.py b/src/awkward/operations/str/akstr_is_title.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_title.py
rename to src/awkward/operations/str/akstr_is_title.py
diff --git a/src/awkward/operations/str/ak_is_upper.py b/src/awkward/operations/str/akstr_is_upper.py
similarity index 100%
rename from src/awkward/operations/str/ak_is_upper.py
rename to src/awkward/operations/str/akstr_is_upper.py
diff --git a/src/awkward/operations/str/ak_join.py b/src/awkward/operations/str/akstr_join.py
similarity index 100%
rename from src/awkward/operations/str/ak_join.py
rename to src/awkward/operations/str/akstr_join.py
diff --git a/src/awkward/operations/str/ak_join_element_wise.py b/src/awkward/operations/str/akstr_join_element_wise.py
similarity index 100%
rename from src/awkward/operations/str/ak_join_element_wise.py
rename to src/awkward/operations/str/akstr_join_element_wise.py
diff --git a/src/awkward/operations/str/ak_length.py b/src/awkward/operations/str/akstr_length.py
similarity index 100%
rename from src/awkward/operations/str/ak_length.py
rename to src/awkward/operations/str/akstr_length.py
diff --git a/src/awkward/operations/str/ak_lower.py b/src/awkward/operations/str/akstr_lower.py
similarity index 100%
rename from src/awkward/operations/str/ak_lower.py
rename to src/awkward/operations/str/akstr_lower.py
diff --git a/src/awkward/operations/str/ak_lpad.py b/src/awkward/operations/str/akstr_lpad.py
similarity index 100%
rename from src/awkward/operations/str/ak_lpad.py
rename to src/awkward/operations/str/akstr_lpad.py
diff --git a/src/awkward/operations/str/ak_ltrim.py b/src/awkward/operations/str/akstr_ltrim.py
similarity index 100%
rename from src/awkward/operations/str/ak_ltrim.py
rename to src/awkward/operations/str/akstr_ltrim.py
diff --git a/src/awkward/operations/str/ak_ltrim_whitespace.py b/src/awkward/operations/str/akstr_ltrim_whitespace.py
similarity index 100%
rename from src/awkward/operations/str/ak_ltrim_whitespace.py
rename to src/awkward/operations/str/akstr_ltrim_whitespace.py
diff --git a/src/awkward/operations/str/ak_match_like.py b/src/awkward/operations/str/akstr_match_like.py
similarity index 100%
rename from src/awkward/operations/str/ak_match_like.py
rename to src/awkward/operations/str/akstr_match_like.py
diff --git a/src/awkward/operations/str/ak_match_substring.py b/src/awkward/operations/str/akstr_match_substring.py
similarity index 100%
rename from src/awkward/operations/str/ak_match_substring.py
rename to src/awkward/operations/str/akstr_match_substring.py
diff --git a/src/awkward/operations/str/ak_match_substring_regex.py b/src/awkward/operations/str/akstr_match_substring_regex.py
similarity index 100%
rename from src/awkward/operations/str/ak_match_substring_regex.py
rename to src/awkward/operations/str/akstr_match_substring_regex.py
diff --git a/src/awkward/operations/str/ak_repeat.py b/src/awkward/operations/str/akstr_repeat.py
similarity index 100%
rename from src/awkward/operations/str/ak_repeat.py
rename to src/awkward/operations/str/akstr_repeat.py
diff --git a/src/awkward/operations/str/ak_replace_slice.py b/src/awkward/operations/str/akstr_replace_slice.py
similarity index 100%
rename from src/awkward/operations/str/ak_replace_slice.py
rename to src/awkward/operations/str/akstr_replace_slice.py
diff --git a/src/awkward/operations/str/ak_replace_substring.py b/src/awkward/operations/str/akstr_replace_substring.py
similarity index 100%
rename from src/awkward/operations/str/ak_replace_substring.py
rename to src/awkward/operations/str/akstr_replace_substring.py
diff --git a/src/awkward/operations/str/ak_replace_substring_regex.py b/src/awkward/operations/str/akstr_replace_substring_regex.py
similarity index 100%
rename from src/awkward/operations/str/ak_replace_substring_regex.py
rename to src/awkward/operations/str/akstr_replace_substring_regex.py
diff --git a/src/awkward/operations/str/ak_reverse.py b/src/awkward/operations/str/akstr_reverse.py
similarity index 100%
rename from src/awkward/operations/str/ak_reverse.py
rename to src/awkward/operations/str/akstr_reverse.py
diff --git a/src/awkward/operations/str/ak_rpad.py b/src/awkward/operations/str/akstr_rpad.py
similarity index 100%
rename from src/awkward/operations/str/ak_rpad.py
rename to src/awkward/operations/str/akstr_rpad.py
diff --git a/src/awkward/operations/str/ak_rtrim.py b/src/awkward/operations/str/akstr_rtrim.py
similarity index 100%
rename from src/awkward/operations/str/ak_rtrim.py
rename to src/awkward/operations/str/akstr_rtrim.py
diff --git a/src/awkward/operations/str/ak_rtrim_whitespace.py b/src/awkward/operations/str/akstr_rtrim_whitespace.py
similarity index 100%
rename from src/awkward/operations/str/ak_rtrim_whitespace.py
rename to src/awkward/operations/str/akstr_rtrim_whitespace.py
diff --git a/src/awkward/operations/str/ak_slice.py b/src/awkward/operations/str/akstr_slice.py
similarity index 100%
rename from src/awkward/operations/str/ak_slice.py
rename to src/awkward/operations/str/akstr_slice.py
diff --git a/src/awkward/operations/str/ak_split_pattern.py b/src/awkward/operations/str/akstr_split_pattern.py
similarity index 100%
rename from src/awkward/operations/str/ak_split_pattern.py
rename to src/awkward/operations/str/akstr_split_pattern.py
diff --git a/src/awkward/operations/str/ak_split_pattern_regex.py b/src/awkward/operations/str/akstr_split_pattern_regex.py
similarity index 100%
rename from src/awkward/operations/str/ak_split_pattern_regex.py
rename to src/awkward/operations/str/akstr_split_pattern_regex.py
diff --git a/src/awkward/operations/str/ak_split_whitespace.py b/src/awkward/operations/str/akstr_split_whitespace.py
similarity index 100%
rename from src/awkward/operations/str/ak_split_whitespace.py
rename to src/awkward/operations/str/akstr_split_whitespace.py
diff --git a/src/awkward/operations/str/ak_starts_with.py b/src/awkward/operations/str/akstr_starts_with.py
similarity index 100%
rename from src/awkward/operations/str/ak_starts_with.py
rename to src/awkward/operations/str/akstr_starts_with.py
diff --git a/src/awkward/operations/str/ak_swapcase.py b/src/awkward/operations/str/akstr_swapcase.py
similarity index 100%
rename from src/awkward/operations/str/ak_swapcase.py
rename to src/awkward/operations/str/akstr_swapcase.py
diff --git a/src/awkward/operations/str/ak_title.py b/src/awkward/operations/str/akstr_title.py
similarity index 100%
rename from src/awkward/operations/str/ak_title.py
rename to src/awkward/operations/str/akstr_title.py
diff --git a/src/awkward/operations/str/ak_trim.py b/src/awkward/operations/str/akstr_trim.py
similarity index 100%
rename from src/awkward/operations/str/ak_trim.py
rename to src/awkward/operations/str/akstr_trim.py
diff --git a/src/awkward/operations/str/ak_trim_whitespace.py b/src/awkward/operations/str/akstr_trim_whitespace.py
similarity index 100%
rename from src/awkward/operations/str/ak_trim_whitespace.py
rename to src/awkward/operations/str/akstr_trim_whitespace.py
diff --git a/src/awkward/operations/str/ak_upper.py b/src/awkward/operations/str/akstr_upper.py
similarity index 100%
rename from src/awkward/operations/str/ak_upper.py
rename to src/awkward/operations/str/akstr_upper.py

From 7bcb12c5e30959452859871896b955321a377783 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 8 Aug 2023 16:12:30 +0100
Subject: [PATCH 73/73] docs: be explicit about `ak_str_`

---
 docs/prepare_docstrings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/prepare_docstrings.py b/docs/prepare_docstrings.py
index fa1f6c81b5..35756f516a 100644
--- a/docs/prepare_docstrings.py
+++ b/docs/prepare_docstrings.py
@@ -303,7 +303,7 @@ def dofunction(link, linelink, shortname, name, astfcn):
         .replace(".behaviors.string", "")
     )
     shortname = re.sub(r"\.operations\.ak_\w+", "", shortname)
-    shortname = re.sub(r"\.operations\.str\.ak_\w+", ".str", shortname)
+    shortname = re.sub(r"\.operations\.str\.akstr_\w+", ".str", shortname)
     shortname = re.sub(r"\.(contents|types|forms)\.\w+", r".\1", shortname)
 
     if (