diff --git a/.all-contributorsrc b/.all-contributorsrc
index 68800f50f0..ccaf3440b8 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -465,6 +465,24 @@
"contributions": [
"code"
]
+ },
+ {
+ "login": "ariostas",
+ "name": "Andres Rios Tascon",
+ "avatar_url": "https://avatars.githubusercontent.com/u/7596837?v=4",
+ "profile": "http://www.ariostas.com",
+ "contributions": [
+ "code"
+ ]
+ },
+ {
+ "login": "maxymnaumchyk",
+ "name": "maxymnaumchyk",
+ "avatar_url": "https://avatars.githubusercontent.com/u/70752300?v=4",
+ "profile": "https://github.com/maxymnaumchyk",
+ "contributions": [
+ "code"
+ ]
}
],
"contributorsPerLine": 7,
diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml
index d6f992588e..361d292285 100644
--- a/.github/workflows/build-wheels.yml
+++ b/.github/workflows/build-wheels.yml
@@ -105,7 +105,7 @@ jobs:
- name: Prepare build files
run: pipx run nox -s prepare
- - uses: pypa/cibuildwheel@v2.19
+ - uses: pypa/cibuildwheel@v2.20
env:
CIBW_BUILD: "${{ matrix.build }}*"
CIBW_ARCHS: ${{ matrix.arch }}
@@ -136,7 +136,7 @@ jobs:
SOURCE_DATE_EPOCH: ${{ needs.determine-source-date-epoch.outputs.source-date-epoch }}
strategy:
matrix:
- python: [38, 39, 310, 311, 312]
+ python: [38, 39, 310, 311, 312, 313]
arch: [aarch64]
steps:
@@ -157,7 +157,7 @@ jobs:
- uses: docker/setup-qemu-action@v3.2.0
- - uses: pypa/cibuildwheel@v2.19
+ - uses: pypa/cibuildwheel@v2.20
env:
CIBW_BUILD: cp${{ matrix.python }}-*
CIBW_ARCHS: ${{ matrix.arch }}
diff --git a/.github/workflows/deploy-cpp.yml b/.github/workflows/deploy-cpp.yml
index cfa540fbc2..67fa3624bf 100644
--- a/.github/workflows/deploy-cpp.yml
+++ b/.github/workflows/deploy-cpp.yml
@@ -35,8 +35,8 @@ jobs:
run: ls -l dist/
- name: Generate artifact attestation for sdist and wheel
- uses: actions/attest-build-provenance@5e9cb68e95676991667494a6a4e59b8a2f13e1d0 # v1.3.3
+ uses: actions/attest-build-provenance@1c608d11d69870c2092266b3f9a6f3abbf17002c # v1.4.3
with:
subject-path: "dist/awkward*cpp-*"
- - uses: pypa/gh-action-pypi-publish@v1.9.0
+ - uses: pypa/gh-action-pypi-publish@v1.10.1
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 4e6c2aa746..e62bca6c2b 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -88,7 +88,7 @@ jobs:
run: pipx run twine check dist/*
- name: Generate artifact attestation for sdist and wheel
- uses: actions/attest-build-provenance@5e9cb68e95676991667494a6a4e59b8a2f13e1d0 # v1.3.3
+ uses: actions/attest-build-provenance@1c608d11d69870c2092266b3f9a6f3abbf17002c # v1.4.3
with:
subject-path: "dist/awkward-*"
@@ -135,7 +135,7 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: gh attestation verify dist/awkward-*.whl --repo ${{ github.repository }}
- - uses: pypa/gh-action-pypi-publish@v1.9.0
+ - uses: pypa/gh-action-pypi-publish@v1.10.1
publish-headers:
name: "Publish header-only libraries alongside release"
diff --git a/.github/workflows/packaging-test.yml b/.github/workflows/packaging-test.yml
index 8317cc58a2..20ae0fe714 100644
--- a/.github/workflows/packaging-test.yml
+++ b/.github/workflows/packaging-test.yml
@@ -68,7 +68,7 @@ jobs:
- name: Prepare build files
run: pipx run nox -s prepare
- - uses: pypa/cibuildwheel@v2.19
+ - uses: pypa/cibuildwheel@v2.20
env:
CIBW_ARCHS_MACOS: universal2
CIBW_BUILD: cp39-win_amd64 cp310-manylinux_x86_64 cp38-macosx_universal2
@@ -76,7 +76,7 @@ jobs:
config-file: cibuildwheel.toml
package-dir: awkward-cpp
- - uses: pypa/cibuildwheel@v2.19
+ - uses: pypa/cibuildwheel@v2.20
if: matrix.os == 'ubuntu-latest'
env:
CIBW_BUILD: cp312-manylinux_x86_64
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ea52274645..2c58f66746 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -39,6 +39,7 @@ jobs:
- ubuntu-latest
- macos-13
python-version:
+ - '3.13'
- '3.12'
- '3.11'
- '3.10'
@@ -65,6 +66,10 @@ jobs:
python-architecture: x64
runs-on: ubuntu-latest
dependencies-kind: pypy
+ - python-version: '3.11'
+ python-architecture: x64
+ runs-on: ubuntu-latest
+ dependencies-kind: ml
runs-on: ${{ matrix.runs-on }}
@@ -106,6 +111,11 @@ jobs:
files: |
awkward-cpp/dist/*.whl
+ - name: Add workaround for 3.13 + cramjam
+ if: matrix.python-version == '3.13'
+ run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV
+ shell: bash
+
- name: Install awkward, awkward-cpp, and dependencies
run: >-
python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index acd55d8d29..d8dac77444 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
additional_dependencies: [pyyaml]
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.5.5
+ rev: v0.6.4
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
@@ -62,13 +62,13 @@ repos:
files: ^tests/
- repo: https://github.com/python-jsonschema/check-jsonschema
- rev: 0.29.1
+ rev: 0.29.2
hooks:
- id: check-github-workflows
args: ["--verbose"]
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v1.11.0
+ rev: v1.11.2
hooks:
- id: mypy
files: src
@@ -76,6 +76,6 @@ repos:
- numpy>=1.24
- repo: https://github.com/abravalheri/validate-pyproject
- rev: v0.18
+ rev: v0.19
hooks:
- id: validate-pyproject
diff --git a/CITATION.cff b/CITATION.cff
index 110ddc1c16..326f2ed02a 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -51,6 +51,6 @@ authors:
email: "nick.smith@cern.ch"
- family-names: "Goyal"
given-names: "Manasvi"
- affiliation: "Delhi Technological University"
+ affiliation: "Harvard University"
orcid: "https://orcid.org/0000-0001-6321-7491"
email: "mg.manasvi@gmail.com"
diff --git a/README.md b/README.md
index dd5d38399f..65d916050d 100644
--- a/README.md
+++ b/README.md
@@ -152,7 +152,7 @@ year = {2018}
# Acknowledgements
-Support for this work was provided by NSF cooperative agreements [OAC-1836650](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1836650) and [PHY-2323298](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2323298) (IRIS-HEP), grant [OAC-1450377](https://nsf.gov/awardsearch/showAward?AWD_ID=1450377) (DIANA/HEP), [PHY-2121686](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2121686) (US-CMS LHC Ops), and [OAC-2103945](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2103945) (Awkward Array).
+Support for this work was provided by NSF cooperative agreement [OAC-1836650](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1836650) (IRIS-HEP 1), [PHY-2323298](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2323298) (IRIS-HEP 2), grant [OAC-1450377](https://nsf.gov/awardsearch/showAward?AWD_ID=1450377) (DIANA/HEP), [PHY-1520942](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1520942) and [PHY-2121686](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2121686) (US-CMS LHC Ops), and [OAC-2103945](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2103945) (Awkward Array).
We also thank [Erez Shinan](https://github.com/erezsh) and the developers of the [Lark standalone parser](https://github.com/lark-parser/lark), which is used to parse type strings as type objects.
@@ -228,6 +228,8 @@ Thanks especially to the gracious help of Awkward Array contributors (including
Peter Fackeldey 💻 |
+ Andres Rios Tascon 💻 |
+ maxymnaumchyk 💻 |
diff --git a/awkward-cpp/pyproject.toml b/awkward-cpp/pyproject.toml
index 9d64da31ab..43411b9299 100644
--- a/awkward-cpp/pyproject.toml
+++ b/awkward-cpp/pyproject.toml
@@ -1,13 +1,13 @@
[build-system]
requires = [
- "scikit-build-core>=0.9",
+ "scikit-build-core>=0.10",
"pybind11",
]
build-backend = "scikit_build_core.build"
[project]
name = "awkward_cpp"
-version = "37"
+version = "38"
dependencies = [
"numpy>=1.18.0",
"importlib_resources;python_version < \"3.9\""
@@ -59,7 +59,7 @@ Releases = "https://github.com/scikit-hep/awkward-1.0/releases"
[tool.scikit-build]
-minimum-version = "0.9"
+minimum-version = "build-system.requires"
build-dir = "build/{cache_tag}"
sdist.reproducible = true
sdist.include = [
diff --git a/cibuildwheel.toml b/cibuildwheel.toml
index 96f94455fe..91827c69a0 100644
--- a/cibuildwheel.toml
+++ b/cibuildwheel.toml
@@ -23,5 +23,5 @@ build-verbosity = 1
PIP_ONLY_BINARY = "cmake,numpy"
[[tool.cibuildwheel.overrides]]
-select = "cp312-*"
-environment.PIP_PRE = "1"
+select = "cp313*"
+environment.PYO3_USE_ABI3_FORWARD_COMPATIBILITY = "1"
diff --git a/docs/_templates/funding.html b/docs/_templates/funding.html
index 8fb3a94738..09bb9ad107 100644
--- a/docs/_templates/funding.html
+++ b/docs/_templates/funding.html
@@ -1 +1 @@
-Support for this work was provided by NSF cooperative agreement OAC-1836650 (IRIS-HEP), grant OAC-1450377 (DIANA/HEP), PHY-1520942 (US-CMS LHC Ops), and OAC-2103945 (Awkward Array).
+Support for this work was provided by NSF cooperative agreement OAC-1836650 (IRIS-HEP 1), PHY-2323298 (IRIS-HEP 2), grant OAC-1450377 (DIANA/HEP), PHY-1520942 and PHY-2121686 (US-CMS LHC Ops), and OAC-2103945 (Awkward Array).
diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt
index 126e0a2b9c..4b6ae1154d 100644
--- a/docs/reference/toctree.txt
+++ b/docs/reference/toctree.txt
@@ -299,11 +299,12 @@
generated/ak.backend
.. toctree::
- :caption: Approximation
+ :caption: Approximation and comparison
generated/ak.round
generated/ak.isclose
generated/ak.almost_equal
+ generated/ak.array_equal
.. toctree::
:caption: NumPy compatibility
diff --git a/pyproject.toml b/pyproject.toml
index 678c07a279..b2876972bd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "hatchling.build"
[project]
name = "awkward"
-version = "2.6.7"
+version = "2.6.8"
description = "Manipulate JSON-like data with NumPy-like idioms."
license = { text = "BSD-3-Clause" }
requires-python = ">=3.8"
@@ -32,6 +32,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Scientific/Engineering :: Mathematics",
@@ -40,7 +41,7 @@ classifiers = [
"Topic :: Utilities",
]
dependencies = [
- "awkward_cpp==37",
+ "awkward_cpp==38",
"importlib_metadata>=4.13.0;python_version < \"3.12\"",
"numpy>=1.18.0",
"packaging",
@@ -136,7 +137,7 @@ filterwarnings = [
]
log_cli_level = "info"
-[tool.pylint.master]
+[tool.pylint]
py-version = "3.8"
jobs = "0"
ignore-paths = [
@@ -251,7 +252,6 @@ ignore_errors = true
ignore_missing_imports = true
[tool.ruff]
-src = ["src"]
extend-exclude = [
"studies",
"pybind11",
@@ -289,10 +289,6 @@ ignore = [
"PLC1901", # x == "" can be simplified to not x (empty string is falsey)
"ISC001", # Conflicts with the formatter in 0.1.2
]
-unfixable = [
- "T20", # Removes print statements
- "F841", # Removes unused variables
-]
typing-modules = ["awkward._typing"]
external = []
mccabe.max-complexity = 100
diff --git a/requirements-test-full.txt b/requirements-test-full.txt
index 51e47e53f2..ded83569b7 100644
--- a/requirements-test-full.txt
+++ b/requirements-test-full.txt
@@ -1,9 +1,9 @@
fsspec>=2022.11.0;sys_platform != "win32"
-jax[cpu]>=0.2.15;sys_platform != "win32" and python_version < "3.12"
-numba>=0.50.0;sys_platform != "win32" and python_version < "3.12"
-numexpr>=2.7; python_version < "3.12"
-pandas>=0.24.0;sys_platform != "win32" and python_version < "3.12"
-pyarrow==16.0.0;sys_platform != "win32" and python_version < "3.12"
+jax[cpu]>=0.2.15;sys_platform != "win32" and python_version < "3.13"
+numba>=0.50.0;sys_platform != "win32" and python_version < "3.13"
+numexpr>=2.7; python_version < "3.13"
+pandas>=0.24.0;sys_platform != "win32" and python_version < "3.13"
+pyarrow==16.0.0;sys_platform != "win32" and python_version < "3.13"
pytest>=6
pytest-cov
pytest-xdist
diff --git a/requirements-test-ml.txt b/requirements-test-ml.txt
new file mode 100644
index 0000000000..d715854439
--- /dev/null
+++ b/requirements-test-ml.txt
@@ -0,0 +1,6 @@
+fsspec>=2022.11.0;sys_platform != "win32"
+pytest>=6
+pytest-cov
+pytest-xdist
+tensorflow >= 2.12
+torch >= 2.4.0
diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py
index 7c69212dc2..7eb2300372 100644
--- a/src/awkward/_broadcasting.py
+++ b/src/awkward/_broadcasting.py
@@ -701,6 +701,36 @@ def broadcast_any_list():
for x, p in zip(outcontent, parameters)
)
+ def broadcast_any_option_all_UnmaskedArray():
+ nextinputs = []
+ nextparameters = []
+ for x in inputs:
+ if isinstance(x, UnmaskedArray):
+ nextinputs.append(x.content)
+ nextparameters.append(x._parameters)
+ elif isinstance(x, Content):
+ nextinputs.append(x)
+ nextparameters.append(x._parameters)
+ else:
+ nextinputs.append(x)
+ nextparameters.append(NO_PARAMETERS)
+
+ outcontent = apply_step(
+ backend,
+ nextinputs,
+ action,
+ depth,
+ copy.copy(depth_context),
+ lateral_context,
+ options,
+ )
+ assert isinstance(outcontent, tuple)
+ parameters = parameters_factory(nextparameters, len(outcontent))
+
+ return tuple(
+ UnmaskedArray(x, parameters=p) for x, p in zip(outcontent, parameters)
+ )
+
def broadcast_any_option():
mask = None
for x in contents:
@@ -712,7 +742,7 @@ def broadcast_any_option():
mask = backend.index_nplike.logical_or(mask, m, maybe_out=mask)
nextmask = Index8(mask.view(np.int8))
- index = backend.index_nplike.full(mask.shape[0], -1, dtype=np.int64)
+ index = backend.index_nplike.full(mask.shape[0], np.int64(-1), dtype=np.int64)
index[~mask] = backend.index_nplike.arange(
backend.index_nplike.shape_item_as_index(mask.shape[0])
- backend.index_nplike.count_nonzero(mask),
@@ -1045,7 +1075,9 @@ def continuation():
# Any option-types?
elif any(x.is_option for x in contents):
- if options["function_name"] == "ak.where":
+ if all(not x.is_option or isinstance(x, UnmaskedArray) for x in contents):
+ return broadcast_any_option_all_UnmaskedArray()
+ elif options["function_name"] == "ak.where":
return broadcast_any_option_akwhere()
else:
return broadcast_any_option()
diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu
index 7026072b04..9905358181 100644
--- a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu
+++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu
@@ -4,9 +4,9 @@
// def f(grid, block, args):
// (tmpptr, fromstarts, fromstops, length, toequal, invocation_index, err_code) = args
// if length > 1:
-// scan_in_array = cupy.full((length - 1) * (length - 2), 0, dtype=cupy.int64)
+// scan_in_array = cupy.full((length - 1) * (length - 2), cupy.array(0), dtype=cupy.int64)
// else:
-// scan_in_array = cupy.full(0, 0, dtype=cupy.int64)
+// scan_in_array = cupy.full(0, cupy.array(0), dtype=cupy.int64)
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_NumpyArray_subrange_equal_bool", bool_, fromstarts.dtype, fromstops.dtype, bool_]))(grid, block, (tmpptr, fromstarts, fromstops, length, toequal, scan_in_array, invocation_index, err_code))
// toequal[0] = cupy.any(scan_in_array == True)
// out["awkward_NumpyArray_subrange_equal_bool", {dtype_specializations}] = None
diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu
index 2941aa417e..878d87fc85 100644
--- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu
+++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu
@@ -7,7 +7,7 @@
// grid_size = math.floor((lenparents + block[0] - 1) / block[0])
// else:
// grid_size = 1
-// temp = cupy.full(lenparents, identity, dtype=toptr.dtype)
+// temp = cupy.full(lenparents, cupy.array([identity]), dtype=toptr.dtype)
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code))
// out["awkward_reduce_max_a", {dtype_specializations}] = None
@@ -60,7 +60,7 @@ awkward_reduce_max_b(
T val = identity;
if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
- val = temp[idx - stride];
+ val = temp[thread_id - stride];
}
__syncthreads();
temp[thread_id] = val > temp[thread_id] ? val : temp[thread_id];
diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu
index e709d687f8..d27c5e1b80 100644
--- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu
+++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu
@@ -7,7 +7,7 @@
// grid_size = math.floor((lenparents + block[0] - 1) / block[0])
// else:
// grid_size = 1
-// temp = cupy.full(lenparents, identity, dtype=toptr.dtype)
+// temp = cupy.full(lenparents, cupy.array([identity]), dtype=toptr.dtype)
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code))
// out["awkward_reduce_min_a", {dtype_specializations}] = None
diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu
index 9a0c66846f..2e84a0cf9c 100644
--- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu
+++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu
@@ -30,8 +30,8 @@ awkward_reduce_prod_complex_a(
int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_id < outlength) {
- toptr[thread_id * 2] = (T)1.0f;
- toptr[thread_id * 2 + 1] = (T)0.0f;
+ toptr[thread_id * 2] = (T)1;
+ toptr[thread_id * 2 + 1] = (T)0;
}
}
}
@@ -59,8 +59,8 @@ awkward_reduce_prod_complex_b(
if (thread_id < lenparents) {
for (int64_t stride = 1; stride < blockDim.x; stride *= 2) {
- T real = (T)1.0f;
- T imag = (T)0.0f;
+ T real = (T)1;
+ T imag = (T)0;
if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) {
real = temp[(idx - stride) * 2];
imag = temp[(idx - stride) * 2 + 1];
diff --git a/src/awkward/_connect/pyarrow/table_conv.py b/src/awkward/_connect/pyarrow/table_conv.py
index 53f286c730..4434c94009 100644
--- a/src/awkward/_connect/pyarrow/table_conv.py
+++ b/src/awkward/_connect/pyarrow/table_conv.py
@@ -221,7 +221,7 @@ def replace_schema(table: pyarrow.Table, new_schema: pyarrow.Schema) -> pyarrow.
new_batches.append(
pyarrow.RecordBatch.from_arrays(arrays=columns, schema=new_schema)
)
- return pyarrow.Table.from_batches(new_batches)
+ return pyarrow.Table.from_batches(new_batches, schema=new_schema)
def array_with_replacement_type(
diff --git a/src/awkward/_nplikes/array_module.py b/src/awkward/_nplikes/array_module.py
index 5217ace411..568c7fc29e 100644
--- a/src/awkward/_nplikes/array_module.py
+++ b/src/awkward/_nplikes/array_module.py
@@ -118,7 +118,7 @@ def full(
*,
dtype: DTypeLike | None = None,
) -> ArrayLikeT:
- return self._module.full(shape, fill_value, dtype=dtype)
+ return self._module.full(shape, self._module.array(fill_value), dtype=dtype)
def zeros_like(
self, x: ArrayLikeT | PlaceholderArray, *, dtype: DTypeLike | None = None
@@ -146,7 +146,9 @@ def full_like(
if isinstance(x, PlaceholderArray):
return self.full(x.shape, fill_value, dtype=dtype or x.dtype)
else:
- return self._module.full_like(x, fill_value, dtype=dtype)
+ return self._module.full_like(
+ x, self._module.array(fill_value), dtype=dtype
+ )
def arange(
self,
diff --git a/src/awkward/contents/bitmaskedarray.py b/src/awkward/contents/bitmaskedarray.py
index 0e12133d6e..9c70bfc4b5 100644
--- a/src/awkward/contents/bitmaskedarray.py
+++ b/src/awkward/contents/bitmaskedarray.py
@@ -11,6 +11,7 @@
from awkward._backends.backend import Backend
from awkward._meta.bitmaskedmeta import BitMaskedMeta
from awkward._nplikes.array_like import ArrayLike
+from awkward._nplikes.cupy import Cupy
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.placeholder import PlaceholderArray
@@ -687,6 +688,24 @@ def _to_arrow(
pyarrow, mask_node, validbytes, length, options
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ cp = Cupy.instance()._module
+
+ assert mask is None # this class has its own mask
+ if not self.lsb_order:
+ m = cp.flip(
+ cp.packbits(cp.flip(cp.unpackbits(cp.asarray(self._mask.data))))
+ )
+ else:
+ m = self._mask.data
+
+ if m.nbytes % 64:
+ m = cp.resize(m, ((m.nbytes // 64) + 1) * 64)
+ m = cudf.core.buffer.as_buffer(m)
+ inner = self._content._to_cudf(cudf, mask=None, length=length)
+ inner.set_base_mask(m)
+ return inner
+
def _to_backend_array(self, allow_missing, backend):
return self.to_ByteMaskedArray()._to_backend_array(allow_missing, backend)
diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py
index 65ad948a16..87beb5f59f 100644
--- a/src/awkward/contents/bytemaskedarray.py
+++ b/src/awkward/contents/bytemaskedarray.py
@@ -12,6 +12,7 @@
from awkward._layout import maybe_posaxis
from awkward._meta.bytemaskedmeta import ByteMaskedMeta
from awkward._nplikes.array_like import ArrayLike
+from awkward._nplikes.cupy import Cupy
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.placeholder import PlaceholderArray
@@ -1051,6 +1052,18 @@ def _to_arrow(
options,
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ cp = Cupy.instance()._module
+
+ assert mask is None # this class has its own mask
+ m = cp.packbits(cp.asarray(self._mask), bitorder="little")
+ if m.nbytes % 64:
+ m = cp.resize(m, ((m.nbytes // 64) + 1) * 64)
+ m = cudf.core.buffer.as_buffer(m)
+ inner = self._content._to_cudf(cudf, mask=None, length=length)
+ inner.set_base_mask(m)
+ return inner
+
def _to_backend_array(self, allow_missing, backend):
return self.to_IndexedOptionArray64()._to_backend_array(allow_missing, backend)
diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py
index 1a0fe080a9..d0169ee2eb 100644
--- a/src/awkward/contents/content.py
+++ b/src/awkward/contents/content.py
@@ -1010,6 +1010,10 @@ def _to_arrow(
):
raise NotImplementedError
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ # prototype abstract signature
+ raise NotImplementedError
+
def to_backend_array(
self, allow_missing: bool = True, *, backend: Backend | str | None = None
):
diff --git a/src/awkward/contents/emptyarray.py b/src/awkward/contents/emptyarray.py
index 112effddf0..06447f2d8b 100644
--- a/src/awkward/contents/emptyarray.py
+++ b/src/awkward/contents/emptyarray.py
@@ -387,6 +387,15 @@ def _to_arrow(
)
return next._to_arrow(pyarrow, mask_node, validbytes, length, options)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ dtype = np.dtype("float64")
+ next = ak.contents.NumpyArray(
+ numpy.empty(length, dtype=dtype),
+ parameters=self._parameters,
+ backend=self._backend,
+ )
+ return next._to_cudf(cudf, None, 0)
+
@classmethod
def _arrow_needs_option_type(cls):
return True # This overrides Content._arrow_needs_option_type
diff --git a/src/awkward/contents/indexedarray.py b/src/awkward/contents/indexedarray.py
index 6fb4ea3c69..6421f51742 100644
--- a/src/awkward/contents/indexedarray.py
+++ b/src/awkward/contents/indexedarray.py
@@ -1049,6 +1049,16 @@ def _to_arrow(
)
return next2._to_arrow(pyarrow, mask_node, validbytes, length, options)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ if self._content.length == 0:
+ # IndexedOptionArray._to_arrow replaces -1 in the index with 0. So behind
+ # every masked value is self._content[0], unless self._content.length == 0.
+ # In that case, don't call self._content[index]; it's empty anyway.
+ next = self._content
+ else:
+ next = self._content._carry(self._index, False)
+ return next._to_cudf(cudf, None, len(next))
+
def _to_backend_array(self, allow_missing, backend):
return self.project()._to_backend_array(allow_missing, backend)
diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py
index 0e68461dc5..2162fb72c4 100644
--- a/src/awkward/contents/indexedoptionarray.py
+++ b/src/awkward/contents/indexedoptionarray.py
@@ -1576,6 +1576,9 @@ def _to_arrow(
options,
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ return self.to_ByteMaskedArray(True)._to_cudf(cudf, mask, length)
+
def _to_backend_array(self, allow_missing, backend):
nplike = backend.nplike
index_nplike = backend.index_nplike
diff --git a/src/awkward/contents/listarray.py b/src/awkward/contents/listarray.py
index 722b9044dd..a05eeaea55 100644
--- a/src/awkward/contents/listarray.py
+++ b/src/awkward/contents/listarray.py
@@ -1498,6 +1498,9 @@ def _to_arrow(
pyarrow, mask_node, validbytes, length, options
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ return self.to_ListOffsetArray64(False)._to_cudf(cudf, mask, length)
+
def _to_backend_array(self, allow_missing, backend):
array_param = self.parameter("__array__")
if array_param in {"bytestring", "string"}:
diff --git a/src/awkward/contents/listoffsetarray.py b/src/awkward/contents/listoffsetarray.py
index 4aa149b69d..003467c24b 100644
--- a/src/awkward/contents/listoffsetarray.py
+++ b/src/awkward/contents/listoffsetarray.py
@@ -10,6 +10,7 @@
from awkward._layout import maybe_posaxis
from awkward._meta.listoffsetmeta import ListOffsetMeta
from awkward._nplikes.array_like import ArrayLike
+from awkward._nplikes.cupy import Cupy
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.placeholder import PlaceholderArray
@@ -1999,6 +2000,39 @@ def _to_arrow(
),
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ cupy = Cupy.instance()
+ index = self._offsets.raw(cupy).astype("int32")
+ buf = cudf.core.buffer.as_buffer(index)
+ ind_buf = cudf.core.column.numerical.NumericalColumn(
+ buf, index.dtype, None, size=len(index)
+ )
+ cont = self._content._to_cudf(cudf, None, len(self._content))
+ if mask is not None:
+ m = np._module.packbits(mask, bitorder="little")
+ if m.nbytes % 64:
+ m = cupy.resize(m, ((m.nbytes // 64) + 1) * 64)
+ m = cudf.core.buffer.as_buffer(cupy.asarray(m))
+ else:
+ m = None
+ if self.parameters.get("__array__") == "string":
+ from cudf.core.column.string import StringColumn
+
+ data = cudf.core.buffer.as_buffer(cupy.asarray(self._content.data))
+ # docs for StringColumn says there should be two children instead of a data=
+ return StringColumn(
+ data=data,
+ children=(ind_buf,),
+ mask=m,
+ )
+
+ return cudf.core.column.lists.ListColumn(
+ length,
+ mask=m,
+ children=(ind_buf, cont),
+ dtype=cudf.core.dtypes.ListDtype(cont.dtype),
+ )
+
def _to_backend_array(self, allow_missing, backend):
array_param = self.parameter("__array__")
if array_param == "string":
diff --git a/src/awkward/contents/numpyarray.py b/src/awkward/contents/numpyarray.py
index 11a73bb124..315d9383b7 100644
--- a/src/awkward/contents/numpyarray.py
+++ b/src/awkward/contents/numpyarray.py
@@ -14,6 +14,7 @@
from awkward._meta.numpymeta import NumpyMeta
from awkward._nplikes import to_nplike
from awkward._nplikes.array_like import ArrayLike
+from awkward._nplikes.cupy import Cupy
from awkward._nplikes.jax import Jax
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
@@ -1220,6 +1221,20 @@ def _to_arrow(
),
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ cupy = Cupy.instance()
+ from cudf.core.column.column import as_column
+
+ assert self._backend.nplike.known_data
+ data = as_column(self._data)
+ if mask is not None:
+ m = cupy.packbits(cupy.asarray(mask), bitorder="little")
+ if m.nbytes % 64:
+ m = cupy.resize(m, ((m.nbytes // 64) + 1) * 64)
+ m = cudf.core.buffer.as_buffer(m)
+ data.set_base_data(m)
+ return data
+
def _to_backend_array(self, allow_missing, backend):
return to_nplike(self.data, backend.nplike, from_nplike=self._backend.nplike)
diff --git a/src/awkward/contents/recordarray.py b/src/awkward/contents/recordarray.py
index c091d45365..4aafcfd6b2 100644
--- a/src/awkward/contents/recordarray.py
+++ b/src/awkward/contents/recordarray.py
@@ -1101,6 +1101,23 @@ def _to_arrow(
children=values,
)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ children = tuple(
+ c._to_cudf(cudf, mask=None, length=length) for c in self.contents
+ )
+ dt = cudf.core.dtypes.StructDtype(
+ {field: c.dtype for field, c in zip(self.fields, children)}
+ )
+ m = mask._to_cudf(cudf, None, length) if mask else None
+ return cudf.core.column.struct.StructColumn(
+ data=None,
+ children=children,
+ dtype=dt,
+ mask=m,
+ size=length,
+ offset=0,
+ )
+
def _to_backend_array(self, allow_missing, backend):
if self.fields is None:
return backend.nplike.empty(self.length, dtype=[])
diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py
index cbc726b310..0dd500ebc1 100644
--- a/src/awkward/contents/unmaskedarray.py
+++ b/src/awkward/contents/unmaskedarray.py
@@ -498,6 +498,9 @@ def _to_arrow(
):
return self._content._to_arrow(pyarrow, self, None, length, options)
+ def _to_cudf(self, cudf: Any, mask: Content | None, length: int):
+ return self._content._to_cudf(cudf, mask, length)
+
def _to_backend_array(self, allow_missing, backend):
content = self.content._to_backend_array(allow_missing, backend)
if allow_missing:
diff --git a/src/awkward/forms/form.py b/src/awkward/forms/form.py
index 1c72bf740f..3f9bec55eb 100644
--- a/src/awkward/forms/form.py
+++ b/src/awkward/forms/form.py
@@ -333,7 +333,7 @@ def __call__(self, field: str, *, next_match_if_empty: bool = False) -> Self | N
has_matched = True
next_specifiers.extend(self._match_to_next_specifiers[field])
- # Fixed-strings are an O(n) lookup
+ # Patterns are an O(n) lookup
for pattern in self._patterns:
if fnmatchcase(field, pattern):
has_matched = True
@@ -437,29 +437,59 @@ def columns(self, list_indicator=None, column_prefix=()):
def select_columns(
self, specifier, expand_braces=True, *, prune_unions_and_records: bool = True
):
+ """
+ select_columns returns a new Form with only columns and sub-columns selected.
+ Returns an empty Form if no columns matched the specifier(s).
+
+ `specifier` can be a `str | Iterable[str | Iterable[str]]`.
+ Strings may include shell-globbing-style wildcards "*" and "?".
+ If `expand_braces` is `True` (the default), strings may include alternatives in braces.
+ For example, `["a.{b,c}.d"]` is equivalent to `["a.b.d", "a.c.d"]`.
+ Glob-style matching would also suit this single-character instance: `"a.[bc].d"`.
+ If specifier is a list which contains a list/tuple, that inner list will be interpreted as
+ column and subcolumn specifiers. They *may* contain wildcards, but "." will not be
+ interpreted as a `.` pattern.
+ """
if isinstance(specifier, str):
specifier = {specifier}
# Only take unique specifiers
for item in specifier:
- if not isinstance(item, str):
+ if isinstance(item, str):
+ if item == "":
+ raise ValueError(
+ "a column-selection specifier cannot be an empty string"
+ )
+ elif isinstance(item, Iterable):
+ for field in item:
+ if not isinstance(field, str):
+ raise ValueError("a sub-column specifier must be a string")
+ else:
raise TypeError(
- "a column-selection specifier must be a list of non-empty strings"
- )
- if not item:
- raise ValueError(
- "a column-selection specifier must be a list of non-empty strings"
+ "a column specifier must be a string or an iterable of strings"
)
if expand_braces:
next_specifier = []
for item in specifier:
- for result in _expand_braces(item):
- next_specifier.append(result)
+ if isinstance(item, str):
+ for result in _expand_braces(item):
+ next_specifier.append(result)
+ else:
+ next_specifier.append(item)
specifier = next_specifier
- specifier = [[] if item == "" else item.split(".") for item in set(specifier)]
- match_specifier = _SpecifierMatcher(specifier, match_if_empty=False)
+ # specifier = set(specifier)
+ specifier_lists: list[list[str]] = []
+ for item in specifier:
+ if isinstance(item, str):
+ if item == "":
+ specifier_lists.append([])
+ else:
+ specifier_lists.append(item.split("."))
+ else:
+ specifier_lists.append(item)
+ match_specifier = _SpecifierMatcher(specifier_lists, match_if_empty=False)
selection = self._select_columns(match_specifier)
assert selection is not None, "top-level selections always return a Form"
diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
index d0cee81508..e9b1a3818b 100644
--- a/src/awkward/operations/__init__.py
+++ b/src/awkward/operations/__init__.py
@@ -44,6 +44,7 @@
from awkward.operations.ak_from_json import *
from awkward.operations.ak_from_numpy import *
from awkward.operations.ak_from_parquet import *
+from awkward.operations.ak_from_raggedtensor import *
from awkward.operations.ak_from_rdataframe import *
from awkward.operations.ak_from_regular import *
from awkward.operations.ak_full_like import *
@@ -85,6 +86,7 @@
from awkward.operations.ak_to_arrow_table import *
from awkward.operations.ak_to_backend import *
from awkward.operations.ak_to_buffers import *
+from awkward.operations.ak_to_cudf import *
from awkward.operations.ak_to_cupy import *
from awkward.operations.ak_to_dataframe import *
from awkward.operations.ak_to_feather import *
@@ -97,6 +99,7 @@
from awkward.operations.ak_to_parquet import *
from awkward.operations.ak_to_parquet_dataset import *
from awkward.operations.ak_to_parquet_row_groups import *
+from awkward.operations.ak_to_raggedtensor import *
from awkward.operations.ak_to_rdataframe import *
from awkward.operations.ak_to_regular import *
from awkward.operations.ak_transform import *
diff --git a/src/awkward/operations/ak_almost_equal.py b/src/awkward/operations/ak_almost_equal.py
index 78461de65c..66f67e4d8a 100644
--- a/src/awkward/operations/ak_almost_equal.py
+++ b/src/awkward/operations/ak_almost_equal.py
@@ -2,10 +2,11 @@
from __future__ import annotations
-from awkward._backends.dispatch import backend_of_obj
+from awkward._backends.dispatch import backend_of
from awkward._backends.numpy import NumpyBackend
from awkward._behavior import behavior_of, get_array_class, get_record_class
from awkward._dispatch import high_level_function
+from awkward._layout import ensure_same_backend
from awkward._nplikes.numpy_like import NumpyMetadata
from awkward._parameters import parameters_are_equal
from awkward.operations.ak_to_layout import to_layout
@@ -82,14 +83,13 @@ def _impl(
left_behavior = behavior_of(left)
right_behavior = behavior_of(right)
- left_backend = backend_of_obj(left, default=cpu)
- right_backend = backend_of_obj(right, default=cpu)
- if left_backend is not right_backend:
- return False
- backend = left_backend
-
- left_layout = to_layout(left, allow_record=False).to_packed()
- right_layout = to_layout(right, allow_record=False).to_packed()
+ layouts = ensure_same_backend(
+ to_layout(left, allow_record=False),
+ to_layout(right, allow_record=False),
+ )
+ left_layout = layouts[0].to_packed()
+ right_layout = layouts[1].to_packed()
+ backend = backend_of(left_layout)
if not backend.nplike.known_data:
raise NotImplementedError(
diff --git a/src/awkward/operations/ak_array_equal.py b/src/awkward/operations/ak_array_equal.py
index 2a7221baab..398db6b2a6 100644
--- a/src/awkward/operations/ak_array_equal.py
+++ b/src/awkward/operations/ak_array_equal.py
@@ -8,6 +8,7 @@
__all__ = ("array_equal",)
+@ak._connect.numpy.implements("array_equal")
@high_level_function()
def array_equal(
a1,
diff --git a/src/awkward/operations/ak_from_parquet.py b/src/awkward/operations/ak_from_parquet.py
index 83c8732b56..1cb3fdfbc7 100644
--- a/src/awkward/operations/ak_from_parquet.py
+++ b/src/awkward/operations/ak_from_parquet.py
@@ -32,10 +32,11 @@ def from_parquet(
Args:
path (str): Local filename or remote URL, passed to fsspec for resolution.
May contain glob patterns.
- columns (None, str, or list of str): Glob pattern(s) with bash-like curly
+ columns (None, str, or iterable of (str or iterable of str)): Glob pattern(s) including bash-like curly
brackets for matching column names. Nested records are separated by dots.
If a list of patterns, the logical-or is matched. If None, all columns
- are read.
+ are read. A list of lists can be provided to select columns with literal dots
+ in their names -- The inner list provides column names or patterns.
row_groups (None or set of int): Row groups to read; must be non-negative.
Order is ignored: the output array is presented in the order specified by
Parquet metadata. If None, all row groups/all rows are read.
diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py
new file mode 100644
index 0000000000..1c895506c2
--- /dev/null
+++ b/src/awkward/operations/ak_from_raggedtensor.py
@@ -0,0 +1,67 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+
+__all__ = ("from_raggedtensor",)
+
+
+@high_level_function()
+def from_raggedtensor(array):
+ """
+ Args:
+ array: (`tensorflow.RaggedTensor`):
+ RaggedTensor to convert into an Awkward Array.
+
+ Converts a TensorFlow RaggedTensor into an Awkward Array.
+
+ If `array` contains any other data types the function raises an error.
+ """
+
+ # Dispatch
+ yield (array,)
+
+ # Implementation
+ return _impl(array)
+
+
+def _impl(array):
+ try:
+ # get the flat values
+ content = array.flat_values.numpy()
+ except AttributeError as err:
+ raise TypeError(
+ """only RaggedTensor can be converted to awkward array"""
+ ) from err
+ # convert them to ak.contents right away
+ content = ak.contents.NumpyArray(content)
+
+ # get the offsets
+ offsets_arr = []
+ for splits in array.nested_row_splits:
+ split = splits.numpy()
+ # convert to ak.index
+ offset = ak.index.Index64(split)
+ offsets_arr.append(offset)
+
+ # if a tensor has one *ragged dimension*
+ if len(offsets_arr) == 1:
+ result = ak.contents.ListOffsetArray(offsets_arr[0], content)
+ return ak.Array(result)
+
+ # if a tensor has multiple *ragged dimensions*
+ return ak.Array(_recursive_call(content, offsets_arr, 0))
+
+
+def _recursive_call(content, offsets_arr, count):
+ if count == len(offsets_arr) - 2:
+ return ak.contents.ListOffsetArray(
+ offsets_arr[count],
+ ak.contents.ListOffsetArray(offsets_arr[count + 1], content),
+ )
+ else:
+ return ak.contents.ListOffsetArray(
+ offsets_arr[count], _recursive_call(content, offsets_arr, count)
+ )
diff --git a/src/awkward/operations/ak_to_cudf.py b/src/awkward/operations/ak_to_cudf.py
new file mode 100644
index 0000000000..e45fe041a2
--- /dev/null
+++ b/src/awkward/operations/ak_to_cudf.py
@@ -0,0 +1,21 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+from __future__ import annotations
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+
+__all__ = ("to_cudf",)
+
+
+@high_level_function()
+def to_cudf(
+ array: ak.Array,
+):
+ """Create a cuDF.Series out of the given ak array
+
+ Buffers that are not already in GPU memory will be transferred, and some
+ structural reformatting may happen to account for differences in architecture.
+ """
+ import cudf
+
+ return cudf.Series(array.layout._to_cudf(cudf, None, len(array)))
diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py
new file mode 100644
index 0000000000..5fcb2e2d5f
--- /dev/null
+++ b/src/awkward/operations/ak_to_raggedtensor.py
@@ -0,0 +1,84 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+
+__all__ = ("to_raggedtensor",)
+
+
+@high_level_function()
+def to_raggedtensor(array):
+ """
+ Args:
+ array: Array-like data. May be a high level #ak.Array,
+ or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray,
+ #ak.contents.RegularArray, #ak.contents.NumpyArray
+
+ Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported)
+ into a ragged tensor, if possible.
+
+ If `array` contains any other data types (RecordArray for example) the function raises an error.
+ """
+
+ # Dispatch
+ yield (array,)
+
+ # Implementation
+ return _impl(array)
+
+
+def _impl(array):
+ try:
+ import tensorflow as tf
+ except ImportError as err:
+ raise ImportError(
+ """to use ak.to_raggedtensor, you must install the 'tensorflow' package with:
+
+ pip install tensorflow
+or
+ conda install tensorflow"""
+ ) from err
+
+ # unwrap the awkward array if it was made with ak.Array function
+ # also transforms a python list to awkward array
+ array = ak.to_layout(array, allow_record=False)
+
+ if isinstance(array, ak.contents.numpyarray.NumpyArray):
+ return tf.RaggedTensor.from_row_splits(
+ values=array.data, row_splits=[0, array.__len__()]
+ )
+ else:
+ flat_values, nested_row_splits = _recursive_call(array, ())
+
+ return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits)
+
+
+def _recursive_call(layout, offsets_arr):
+ try:
+ # change all the possible layout types to ListOffsetArray
+ if isinstance(layout, ak.contents.listarray.ListArray):
+ layout = layout.to_ListOffsetArray64()
+ elif isinstance(layout, ak.contents.regulararray.RegularArray):
+ layout = layout.to_ListOffsetArray64()
+ elif not isinstance(
+ layout,
+ (
+ ak.contents.listoffsetarray.ListOffsetArray,
+ ak.contents.numpyarray.NumpyArray,
+ ),
+ ):
+ raise TypeError(
+ "Only arrays containing variable-length lists (var *) or"
+ " regular-length lists (# *) of numbers can be converted into a TensorFlow RaggedTensor"
+ )
+
+ # recursively gather all of the offsets of an array
+ offsets_arr += (layout.offsets.data,)
+
+ except AttributeError:
+ # at the last iteration form a ragged tensor from the
+ # accumulated offsets and flattened values of the array
+ return layout.data, offsets_arr
+ return _recursive_call(layout.content, offsets_arr)
diff --git a/tests-cuda/test_3051_to_cuda.py b/tests-cuda/test_3051_to_cuda.py
new file mode 100644
index 0000000000..af02ed798f
--- /dev/null
+++ b/tests-cuda/test_3051_to_cuda.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import pytest
+
+import awkward as ak
+
+cudf = pytest.importorskip("cudf")
+cupy = pytest.importorskip("cupy")
+
+
+def test_jagged():
+ arr = ak.Array([[[1, 2, 3], [], [3, 4]], []])
+ out = ak.to_cudf(arr)
+ assert isinstance(out, cudf.Series)
+ assert out.to_arrow().tolist() == [[[1, 2, 3], [], [3, 4]], []]
+
+
+def test_nested():
+ arr = ak.Array(
+ [{"a": 0, "b": 1.0, "c": {"d": 0}}, {"a": 1, "b": 0.0, "c": {"d": 1}}]
+ )
+ out = ak.to_cudf(arr)
+ assert isinstance(out, cudf.Series)
+ assert out.to_arrow().tolist() == [
+ {"a": 0, "b": 1.0, "c": {"d": 0}},
+ {"a": 1, "b": 0.0, "c": {"d": 1}},
+ ]
+
+
+def test_null():
+ arr = ak.Array([12, None, 21, 12])
+ # calls ByteMaskedArray._to_cudf not NumpyArray
+ out = ak.to_cudf(arr)
+ assert isinstance(out, cudf.Series)
+ assert out.to_arrow().tolist() == [12, None, 21, 12]
+
+ # True is valid, LSB order
+ arr2 = ak.Array(arr.layout.to_BitMaskedArray(True, True))
+ out = ak.to_cudf(arr2)
+ assert isinstance(out, cudf.Series)
+ assert out.to_arrow().tolist() == [12, None, 21, 12]
+
+ # reversed LSB (should be rare, involves extra work!)
+ arr3 = ak.Array(arr.layout.to_BitMaskedArray(True, False))
+ out = ak.to_cudf(arr3)
+ assert isinstance(out, cudf.Series)
+ assert out.to_arrow().tolist() == [12, None, 21, 12]
+
+
+def test_strings():
+ arr = ak.Array(["hey", "hi", "hum"])
+ out = ak.to_cudf(arr)
+ assert out.to_arrow().tolist() == ["hey", "hi", "hum"]
+
+ arr = ak.Array(["hey", "hi", None, "hum"])
+ out = ak.to_cudf(arr)
+ assert out.to_arrow().tolist() == ["hey", "hi", None, "hum"]
diff --git a/tests-cuda/test_3149_complex_reducers.py b/tests-cuda/test_3149_complex_reducers.py
index bd53020721..abc921b30d 100644
--- a/tests-cuda/test_3149_complex_reducers.py
+++ b/tests-cuda/test_3149_complex_reducers.py
@@ -302,8 +302,8 @@ def test_0652_minmax():
def test_block_boundary_sum_complex():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -323,7 +323,6 @@ def test_block_boundary_sum_complex():
def test_block_boundary_prod_complex1():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 1))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -341,7 +340,6 @@ def test_block_boundary_prod_complex1():
def test_block_boundary_prod_complex2():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 1))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -359,7 +357,6 @@ def test_block_boundary_prod_complex2():
def test_block_boundary_prod_complex3():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 1))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -377,7 +374,6 @@ def test_block_boundary_prod_complex3():
def test_block_boundary_prod_complex4():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 1.01))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -397,7 +393,6 @@ def test_block_boundary_prod_complex4():
def test_block_boundary_prod_complex5():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 1.01))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -417,7 +412,6 @@ def test_block_boundary_prod_complex5():
def test_block_boundary_prod_complex6():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 1.01))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -437,7 +431,6 @@ def test_block_boundary_prod_complex6():
def test_block_boundary_prod_complex7():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 0.99))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -457,7 +450,6 @@ def test_block_boundary_prod_complex7():
def test_block_boundary_prod_complex8():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 0.99))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -477,7 +469,6 @@ def test_block_boundary_prod_complex8():
def test_block_boundary_prod_complex9():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 0.99))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -497,7 +488,6 @@ def test_block_boundary_prod_complex9():
def test_block_boundary_prod_complex10():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 1.1))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -517,7 +507,6 @@ def test_block_boundary_prod_complex10():
def test_block_boundary_prod_complex11():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 1.1))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -537,7 +526,6 @@ def test_block_boundary_prod_complex11():
def test_block_boundary_prod_complex12():
- np.random.seed(42)
complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 1.1))
content = ak.contents.NumpyArray(complex_array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -557,8 +545,8 @@ def test_block_boundary_prod_complex12():
def test_block_boundary_prod_complex13():
- np.random.seed(42)
- array = np.random.randint(50, size=1000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(50, size=1000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -580,8 +568,8 @@ def test_block_boundary_prod_complex13():
def test_block_boundary_any_complex():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -601,8 +589,8 @@ def test_block_boundary_any_complex():
def test_block_boundary_all_complex():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -622,8 +610,8 @@ def test_block_boundary_all_complex():
def test_block_boundary_min_complex1():
- np.random.seed(42)
- array = np.random.randint(5, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(5, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -643,8 +631,8 @@ def test_block_boundary_min_complex1():
def test_block_boundary_min_complex2():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -664,8 +652,8 @@ def test_block_boundary_min_complex2():
def test_block_boundary_max_complex1():
- np.random.seed(42)
- array = np.random.randint(5, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(5, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -685,8 +673,8 @@ def test_block_boundary_max_complex1():
def test_block_boundary_max_complex2():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -706,8 +694,8 @@ def test_block_boundary_max_complex2():
def test_block_boundary_sum_bool_complex():
- np.random.seed(42)
- array = np.random.randint(2, size=6000, dtype=np.bool_)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(2, size=6000, dtype=np.bool_)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -727,8 +715,8 @@ def test_block_boundary_sum_bool_complex():
def test_block_boundary_countnonzero_complex_1():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -749,8 +737,8 @@ def test_block_boundary_countnonzero_complex_1():
def test_block_boundary_countnonzero_complex_2():
- np.random.seed(42)
- array = np.random.randint(2, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(2, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -772,8 +760,8 @@ def test_block_boundary_countnonzero_complex_2():
@pytest.mark.skip(reason="awkward_reduce_argmax_complex is not implemented")
def test_block_boundary_argmax_complex1():
- np.random.seed(42)
- array = np.random.randint(5, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(5, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -794,8 +782,8 @@ def test_block_boundary_argmax_complex1():
@pytest.mark.skip(reason="awkward_reduce_argmax_complex is not implemented")
def test_block_boundary_argmax_complex2():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -816,8 +804,8 @@ def test_block_boundary_argmax_complex2():
@pytest.mark.skip(reason="awkward_reduce_argmin_complex is not implemented")
def test_block_boundary_argmin_complex1():
- np.random.seed(42)
- array = np.random.randint(5, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(5, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
@@ -838,8 +826,8 @@ def test_block_boundary_argmin_complex1():
@pytest.mark.skip(reason="awkward_reduce_argmin_complex is not implemented")
def test_block_boundary_argmin_complex2():
- np.random.seed(42)
- array = np.random.randint(6000, size=6000)
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(6000, size=6000)
complex_array = np.vectorize(complex)(
array[0 : len(array) : 2], array[1 : len(array) : 2]
)
diff --git a/tests-cuda/test_3150_combinations_n_equal_2.py b/tests-cuda/test_3150_combinations_n_equal_2.py
index d65ef8416a..801d83599a 100644
--- a/tests-cuda/test_3150_combinations_n_equal_2.py
+++ b/tests-cuda/test_3150_combinations_n_equal_2.py
@@ -1188,7 +1188,6 @@ def test_1074_combinations_UnmaskedArray():
def test_block_boundary_combinations():
- np.random.seed(42)
content = ak.contents.NumpyArray(np.arange(300))
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -1219,7 +1218,6 @@ def test_block_boundary_combinations():
def test_block_boundary_argcombinations():
- np.random.seed(42)
content = ak.contents.NumpyArray(np.arange(300))
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
diff --git a/tests-cuda/test_3162_block_boundary_reducers.py b/tests-cuda/test_3162_block_boundary_reducers.py
index cd0b57a0c8..deb52da002 100644
--- a/tests-cuda/test_3162_block_boundary_reducers.py
+++ b/tests-cuda/test_3162_block_boundary_reducers.py
@@ -17,8 +17,9 @@ def cleanup_cuda():
def test_block_boundary_sum():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.sum(cuda_content, -1, highlevel=False) == ak.sum(
content, -1, highlevel=False
@@ -34,8 +35,9 @@ def test_block_boundary_sum():
def test_block_boundary_any():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.any(cuda_content, -1, highlevel=False) == ak.any(
content, -1, highlevel=False
@@ -51,8 +53,9 @@ def test_block_boundary_any():
def test_block_boundary_all():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.all(cuda_content, -1, highlevel=False) == ak.all(
content, -1, highlevel=False
@@ -68,8 +71,9 @@ def test_block_boundary_all():
def test_block_boundary_sum_bool():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(2, size=3000, dtype=np.bool_))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(2, size=3000, dtype=np.bool_)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.sum(cuda_content, -1, highlevel=False) == ak.sum(
content, -1, highlevel=False
@@ -85,9 +89,13 @@ def test_block_boundary_sum_bool():
def test_block_boundary_max():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ print(array)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
+ print(ak.max(content, -1, highlevel=False))
+ print(ak.max(cuda_content, -1, highlevel=False))
assert ak.max(cuda_content, -1, highlevel=False) == ak.max(
content, -1, highlevel=False
)
@@ -102,8 +110,27 @@ def test_block_boundary_max():
def test_block_boundary_min():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
+ cuda_content = ak.to_backend(content, "cuda", highlevel=False)
+ assert ak.min(cuda_content, -1, highlevel=False) == ak.min(
+ content, -1, highlevel=False
+ )
+
+ offsets = ak.index.Index64(np.array([0, 1, 2998, 3000], dtype=np.int64))
+ depth1 = ak.contents.ListOffsetArray(offsets, content)
+ cuda_depth1 = ak.to_backend(depth1, "cuda", highlevel=False)
+ assert to_list(ak.min(cuda_depth1, -1, highlevel=False)) == to_list(
+ ak.min(depth1, -1, highlevel=False)
+ )
+ del cuda_content, cuda_depth1
+
+
+def test_block_boundary_negative_min():
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000) * -1
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.min(cuda_content, -1, highlevel=False) == ak.min(
content, -1, highlevel=False
@@ -120,8 +147,9 @@ def test_block_boundary_min():
@pytest.mark.skip(reason="awkward_reduce_argmin is not implemented")
def test_block_boundary_argmin():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.argmin(cuda_content, -1, highlevel=False) == ak.argmin(
content, -1, highlevel=False
@@ -138,8 +166,9 @@ def test_block_boundary_argmin():
@pytest.mark.skip(reason="awkward_reduce_argmax is not implemented")
def test_block_boundary_argmax():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.argmax(cuda_content, -1, highlevel=False) == ak.argmax(
content, -1, highlevel=False
@@ -155,8 +184,9 @@ def test_block_boundary_argmax():
def test_block_boundary_count():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(3000, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(3000, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.count(cuda_content, -1, highlevel=False) == ak.count(
content, -1, highlevel=False
@@ -172,8 +202,9 @@ def test_block_boundary_count():
def test_block_boundary_count_nonzero():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(2, size=3000))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(2, size=3000)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.count_nonzero(cuda_content, -1, highlevel=False) == ak.count_nonzero(
content, -1, highlevel=False
@@ -189,7 +220,6 @@ def test_block_boundary_count_nonzero():
def test_block_boundary_prod():
- np.random.seed(42)
primes = [x for x in range(2, 30000) if all(x % n != 0 for n in range(2, x))]
content = ak.contents.NumpyArray(primes)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
@@ -207,8 +237,9 @@ def test_block_boundary_prod():
def test_block_boundary_prod_bool():
- np.random.seed(42)
- content = ak.contents.NumpyArray(np.random.randint(2, size=3000, dtype=np.bool_))
+ rng = np.random.default_rng(seed=42)
+ array = rng.integers(2, size=3000, dtype=np.bool_)
+ content = ak.contents.NumpyArray(array)
cuda_content = ak.to_backend(content, "cuda", highlevel=False)
assert ak.prod(cuda_content, -1, highlevel=False) == ak.prod(
content, -1, highlevel=False
diff --git a/tests/test_1105_ak_aray_equal.py b/tests/test_1105_ak_aray_equal.py
index 519d0039c5..7512f50316 100644
--- a/tests/test_1105_ak_aray_equal.py
+++ b/tests/test_1105_ak_aray_equal.py
@@ -88,3 +88,10 @@ def test_array_equal_with_params():
)
assert not ak.array_equal(a1, a2)
assert ak.array_equal(a1, a2, check_parameters=False)
+
+
+def test_array_equal_numpy_override():
+ assert np.array_equal(
+ ak.Array([[1, 2], [], [3, 4, 5]]),
+ ak.Array([[1, 2], [], [3, 4, 5]]),
+ )
diff --git a/tests/test_2305_nep_18_lazy_conversion.py b/tests/test_2305_nep_18_lazy_conversion.py
index 8ce4c88763..a3c9cd71d0 100644
--- a/tests/test_2305_nep_18_lazy_conversion.py
+++ b/tests/test_2305_nep_18_lazy_conversion.py
@@ -11,7 +11,10 @@
def test_binary():
ak_array = ak.Array(np.arange(10, dtype=" RaggedTensor
+ array1 = ak.contents.ListArray(starts1, stops1, content)
+ assert to_raggedtensor(array1).to_list() == [
+ [1.1, 2.2, 3.3],
+ [],
+ [4.4, 5.5],
+ [6.6],
+ [7.7, 8.8, 9.9],
+ ]
+
+ # a test for awkward.highlevel.Array -> RaggedTensor
+ array2 = ak.Array(array1)
+ assert to_raggedtensor(array2).to_list() == [
+ [1.1, 2.2, 3.3],
+ [],
+ [4.4, 5.5],
+ [6.6],
+ [7.7, 8.8, 9.9],
+ ]
+
+ # a test for NumpyArray -> RaggedTensor
+ array3 = content
+ assert to_raggedtensor(array3).to_list() == [
+ [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]
+ ]
+
+ # a test for RegularArray -> RaggedTensor
+ array4 = ak.contents.RegularArray(content, size=2)
+ assert to_raggedtensor(array4).to_list() == [
+ [1.1, 2.2],
+ [3.3, 4.4],
+ [5.5, 6.6],
+ [7.7, 8.8],
+ ]
+
+ # try a single line awkward array
+ array5 = ak.Array([3, 1, 4, 1, 9, 2, 6])
+ assert to_raggedtensor(array5).to_list() == [[3, 1, 4, 1, 9, 2, 6]]
+
+ # try a multiple ragged array
+ array6 = ak.Array([[[1.1, 2.2], [3.3]], [], [[4.4, 5.5]]])
+ assert to_raggedtensor(array6).to_list() == [[[1.1, 2.2], [3.3]], [], [[4.4, 5.5]]]
+
+ # try a listoffset array inside a listoffset array
+ array7 = ak.contents.ListOffsetArray(
+ outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2)
+ )
+ assert to_raggedtensor(array7).to_list() == [
+ [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]],
+ [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]],
+ ]
+
+ # try a list array inside a list array
+
+ array8 = ak.contents.ListArray(
+ starts2, stops2, ak.contents.ListArray(starts1, stops1, content)
+ )
+ assert to_raggedtensor(array8).to_list() == [
+ [[1.1, 2.2, 3.3], [], [4.4, 5.5]],
+ [[6.6], [7.7, 8.8, 9.9]],
+ ]
+
+ # try just a python list
+ array9 = [3, 1, 4, 1, 9, 2, 6]
+ assert to_raggedtensor(array9).to_list() == [[3, 1, 4, 1, 9, 2, 6]]
+
+
+np_array1 = np.array([1.1, 2.2, 3.3, 4.4, 5.5], dtype=np.float32)
+
+offsets1 = ak.index.Index64(np.array([0, 2, 3, 3, 5]))
+content1 = ak.contents.NumpyArray(np_array1)
+
+
+def test_convert_from_raggedtensor():
+ tf_array1 = tf.RaggedTensor.from_row_splits(
+ values=[1.1, 2.2, 3.3, 4.4, 5.5], row_splits=[0, 2, 3, 3, 5]
+ )
+
+ ak_array1 = ak.contents.ListOffsetArray(offsets1, content1)
+ result1 = ak.to_layout(from_raggedtensor(tf_array1), allow_record=False)
+ assert (result1.content.data == np_array1).all()
+ assert (result1.offsets.data == [0, 2, 3, 3, 5]).all()
+ assert from_raggedtensor(tf_array1).to_list() == ak_array1.to_list()
+
+ tf_array2 = tf.RaggedTensor.from_nested_row_splits(
+ flat_values=[3, 1, 4, 1, 5, 9, 2, 6],
+ nested_row_splits=([0, 3, 3, 5], [0, 4, 4, 7, 8, 8]),
+ )
+ assert from_raggedtensor(tf_array2).to_list() == [
+ [[3, 1, 4, 1], [], [5, 9, 2]],
+ [],
+ [[6], []],
+ ]