scikit-hep · maxymnaumchyk · Oct 3, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 29, 2024
diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
@@ -47,6 +47,7 @@
 from awkward.operations.ak_from_raggedtensor import *
 from awkward.operations.ak_from_rdataframe import *
 from awkward.operations.ak_from_regular import *
+from awkward.operations.ak_from_torch import *
 from awkward.operations.ak_full_like import *
 from awkward.operations.ak_imag import *
 from awkward.operations.ak_is_categorical import *
@@ -102,6 +103,7 @@
 from awkward.operations.ak_to_raggedtensor import *
 from awkward.operations.ak_to_rdataframe import *
 from awkward.operations.ak_to_regular import *
+from awkward.operations.ak_to_torch import *
 from awkward.operations.ak_transform import *
 from awkward.operations.ak_type import *
 from awkward.operations.ak_unflatten import *

diff --git a/src/awkward/operations/ak_from_torch.py b/src/awkward/operations/ak_from_torch.py
@@ -0,0 +1,65 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+
+__all__ = ("from_torch",)
+
+
+@high_level_function()
+def from_torch(array):
+ """
+ Args:
+ array: (PyTorch Tensor):
+ Tensor to convert into an Awkward Array.
+
+ Converts a PyTorch Tensor into an Awkward Array.
+
+ If `array` contains any other data types the function raises an error.
+ """
+
+ # Dispatch
+ yield (array,)
+
+ # Implementation
+ return _impl(array)
+
+
+def _impl(array):
+ try:
+ import torch
+ except ImportError as err:
+ raise ImportError(
+ """to use ak.from_torch, you must install 'torch' package with:
+
+ pip install torch
+
+or
+
+ conda install pytorch"""
+ ) from err
+
+ # check if array is a Tensor
+ if not isinstance(array, torch.Tensor):
+ raise TypeError("""only PyTorch Tensor can be converted to Awkward Array""")
+
+ # keep the resulting array on the same device as input tensor
+ device = "cuda" if array.is_cuda else "cpu"
+
+ # convert tensors to cupy if they are on cuda
+ if device == "cuda":
+ from awkward._nplikes.cupy import Cupy
+
+ cp = Cupy.instance()
+
+ # zero-copy data exchange through DLPack
+ cp_array = cp.from_dlpack(array)
+ ak_array = ak.from_cupy(cp_array)
+
+ else:
+ np_array = array.numpy()
+ ak_array = ak.from_numpy(np_array)
+
+ return ak_array
diff --git a/src/awkward/operations/ak_to_torch.py b/src/awkward/operations/ak_to_torch.py
@@ -0,0 +1,74 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+from awkward._nplikes.numpy_like import NumpyMetadata
+
+__all__ = ("to_torch",)
+
+np = NumpyMetadata.instance()
+
+
+@high_level_function()
+def to_torch(array):
+ """
+ Args:
+ array: Array-like data. May be a high level #ak.Array,
+ or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray,
+ #ak.contents.RegularArray, #ak.contents.NumpyArray
+
+ Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported)
+ into a PyTorch Tensor, if possible.
+
+ If `array` contains any other data types (RecordArray for example) the function raises a TypeError.
+ """
+
+ # Dispatch
+ yield (array,)
+
+ # Implementation
+ return _impl(array)
+
+
+def _impl(array):
+ try:
+ import torch
+ except ImportError as err:
+ raise ImportError(
+ """to use ak.to_torch, you must install 'torch' package with:
+
+ pip install torch
+
+or
+
+ conda install pytorch"""
+ ) from err
+
+ # useful function that handles all possible input arrays
+ array = ak.to_layout(array, allow_record=False)
+
+ # get the device array is on
+ device = ak.backend(array)
+
+ if device not in ["cuda", "cpu"]:
+ raise ValueError("Only 'cpu' and 'cuda' backend conversions are allowed")
+
+ # convert to numpy or cupy if `array` on gpu
+ try:
+ backend_array = array.to_backend_array(allow_missing=False)
+ except ValueError as err:
+ raise TypeError(
+ "Only arrays containing equal-length lists of numbers can be converted into a PyTorch Tensor"
+ ) from err
+
+ # check if cupy or numpy
+ if isinstance(backend_array, np.ndarray):
+ # convert numpy to a torch tensor
+ tensor = torch.from_numpy(backend_array)
+ else:
+ # cupy -> torch tensor
+ tensor = torch.utils.dlpack.from_dlpack(backend_array.toDlpack())
+
+ return tensor
diff --git a/tests/test_3259_to_torch_from_torch.py b/tests/test_3259_to_torch_from_torch.py
@@ -0,0 +1,72 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+import awkward as ak
+
+to_torch = ak.operations.to_torch
+from_torch = ak.operations.from_torch
+
+torch = pytest.importorskip("torch")
+
+a = np.arange(2 * 2 * 2, dtype=np.float64).reshape(2, 2, 2)
+b = np.arange(2 * 2 * 2).reshape(2, 2, 2)
+
+array = np.arange(2 * 3 * 5).reshape(2, 3, 5)
+content2 = ak.contents.NumpyArray(array.reshape(-1))
+inneroffsets = ak.index.Index64(np.array([0, 5, 10, 15, 20, 25, 30]))
+outeroffsets = ak.index.Index64(np.array([0, 3, 6]))
+
+
+def test_to_torch():
+ # a basic test for a 4 dimensional array
+ array1 = ak.Array([a, b])
+ i = 0
+ for sub_array in [
+ [[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
+ [[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
+ ]:
+ assert to_torch(array1)[i].tolist() == sub_array
+ i += 1
+
+ # test that the data types are remaining the same (float64 in this case)
+ assert array1.layout.to_backend_array().dtype.name in str(to_torch(array1).dtype)
+
+ # try a listoffset array inside a listoffset array
+ array2 = ak.contents.ListOffsetArray(
+ outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2)
+ )
+ assert to_torch(array2)[0].tolist() == [
+ [0, 1, 2, 3, 4],
+ [5, 6, 7, 8, 9],
+ [10, 11, 12, 13, 14],
+ ]
+ assert to_torch(array2)[1].tolist() == [
+ [15, 16, 17, 18, 19],
+ [20, 21, 22, 23, 24],
+ [25, 26, 27, 28, 29],
+ ]
+
+ # try just a python list
+ array3 = [3, 1, 4, 1, 9, 2, 6]
+ assert to_torch(array3).tolist() == [3, 1, 4, 1, 9, 2, 6]
+
+
+array1 = torch.tensor([[1.0, -1.0], [1.0, -1.0]], dtype=torch.float32)
+array2 = torch.tensor(np.array([[1, 2, 3], [4, 5, 6]]))
+
+
+def test_from_torch():
+ # Awkward.to_list() == Tensor.tolist()
+ assert from_torch(array1).to_list() == array1.tolist()
+
+ assert from_torch(array2).to_list() == array2.tolist()
+
+ # test that the data types are remaining the same (int64 in this case)
+ assert from_torch(array1).layout.dtype.name in str(array1.dtype)
+
+ # test that the data types are remaining the same (float32 in this case)
+ assert from_torch(array2).layout.dtype.name in str(array2.dtype)