scikit-hep · maxymnaumchyk · Oct 1, 2024 · Oct 1, 2024 · Oct 8, 2024 · Oct 15, 2024
diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt
@@ -39,6 +39,14 @@
  generated/ak.to_feather
  generated/ak.from_avro_file
 
+.. toctree::
+ :caption: Conversions for machine learning
+
+ generated/ak.from_raggedtensor
+ generated/ak.to_raggedtensor
+ generated/ak.from_torch
+ generated/ak.to_torch
+
 .. toctree::
  :caption: Converting to Pandas DataFrames
 

diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py
@@ -30,18 +30,25 @@ def from_raggedtensor(array):
 def _impl(array):
  try:
  # get the flat values
- content = array.flat_values.numpy()
+ content = array.flat_values
  except AttributeError as err:
  raise TypeError(
  """only RaggedTensor can be converted to awkward array"""
  ) from err
- # convert them to ak.contents right away
+
+ # handle gpu and cpu instances separately
+ device = content.backing_device
+
+ content = _tensor_to_np_or_cp(content, device)
+
+ # convert flat_values to ak.contents right away
  content = ak.contents.NumpyArray(content)
 
  # get the offsets
  offsets_arr = []
  for splits in array.nested_row_splits:
- split = splits.numpy()
+ # handle gpu and cpu instances separately
+ split = _tensor_to_np_or_cp(splits, device)
  # convert to ak.index
  offset = ak.index.Index64(split)
  offsets_arr.append(offset)
@@ -55,6 +62,27 @@ def _impl(array):
  return ak.Array(_recursive_call(content, offsets_arr, 0))
 
 
+def _tensor_to_np_or_cp(array, device):
+ if device.endswith("GPU", 0, -2):
- if device.endswith("GPU", 0, -2):
+ if device.split("-")[0] == "GPU":
- if device.endswith("GPU", 0, -2):
+ if device.split("-")[0] == "GPU":
+ try:
+ import tensorflow as tf
+ except ImportError as err:
+ raise ImportError(
+ """to use ak.from_raggedtensor, you must install the 'tensorflow' package with:
+
+ pip install tensorflow
+ or
+ conda install tensorflow"""
+ ) from err
+
+ from awkward._nplikes.cupy import Cupy
+
+ cp = Cupy.instance()
+ return cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array))
+ else:
+ return array.numpy()
+
+
 def _recursive_call(content, offsets_arr, count):
  if count == len(offsets_arr) - 2:
  return ak.contents.ListOffsetArray(

diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py
@@ -4,9 +4,12 @@
 
 import awkward as ak
 from awkward._dispatch import high_level_function
+from awkward._nplikes.numpy_like import NumpyMetadata
 
 __all__ = ("to_raggedtensor",)
 
+np = NumpyMetadata.instance()
+
 
 @high_level_function()
 def to_raggedtensor(array):
@@ -45,14 +48,49 @@ def _impl(array):
  # also transforms a python list to awkward array
  array = ak.to_layout(array, allow_record=False)
 
+ # keep the same device
+ ak_device = ak.backend(array)
+ if ak_device not in ["cuda", "cpu"]:
+ raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""")
+
+ if ak_device == "cpu":
+ device = "CPU:0"
+ else:
+ id = _find_innermost_content(array).data.device.id
+ device = "GPU:" + str(id)
+
+ with tf.device(device):
+ if isinstance(array, ak.contents.numpyarray.NumpyArray):
+ values = array.data
+ # handle cupy separately
+ values = _convert_to_tensor_if_cupy(values)
+ return tf.RaggedTensor.from_row_splits(
+ values=values, row_splits=[0, array.__len__()]
+ )
+
+ else:
+ flat_values, nested_row_splits = _recursive_call(array, ())
+ return tf.RaggedTensor.from_nested_row_splits(
+ flat_values, nested_row_splits
+ )
+
+
+def _find_innermost_content(array):
  if isinstance(array, ak.contents.numpyarray.NumpyArray):
- return tf.RaggedTensor.from_row_splits(
- values=array.data, row_splits=[0, array.__len__()]
- )
+ return array
+ else:
+ return _find_innermost_content(array.content)
+
+
+def _convert_to_tensor_if_cupy(array):
+ if isinstance(array, np.ndarray):
+ return array
  else:
- flat_values, nested_row_splits = _recursive_call(array, ())
+ # converts cupy directly to tensor,
+ # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays
+ import tensorflow as tf
 
- return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits)
+ return tf.experimental.dlpack.from_dlpack(array.toDlpack())
 
 
 def _recursive_call(layout, offsets_arr):
@@ -75,10 +113,14 @@ def _recursive_call(layout, offsets_arr):
  )
 
  # recursively gather all of the offsets of an array
- offsets_arr += (layout.offsets.data,)
+ offset = layout.offsets.data
+ offset = _convert_to_tensor_if_cupy(offset)
+ offsets_arr += (offset,)
 
  except AttributeError:
  # at the last iteration form a ragged tensor from the
  # accumulated offsets and flattened values of the array
- return layout.data, offsets_arr
+ data = layout.data
+ data = _convert_to_tensor_if_cupy(data)
+ return data, offsets_arr
  return _recursive_call(layout.content, offsets_arr)