diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index 4b6ae1154d..0175459441 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -39,6 +39,14 @@ generated/ak.to_feather generated/ak.from_avro_file +.. toctree:: + :caption: Conversions for machine learning + + generated/ak.from_raggedtensor + generated/ak.to_raggedtensor + generated/ak.from_torch + generated/ak.to_torch + .. toctree:: :caption: Converting to Pandas DataFrames diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py index 1c895506c2..4cd4c09e23 100644 --- a/src/awkward/operations/ak_from_raggedtensor.py +++ b/src/awkward/operations/ak_from_raggedtensor.py @@ -30,18 +30,25 @@ def from_raggedtensor(array): def _impl(array): try: # get the flat values - content = array.flat_values.numpy() + content = array.flat_values except AttributeError as err: raise TypeError( """only RaggedTensor can be converted to awkward array""" ) from err - # convert them to ak.contents right away + + # handle gpu and cpu instances separately + device = content.backing_device + + content = _tensor_to_np_or_cp(content, device) + + # convert flat_values to ak.contents right away content = ak.contents.NumpyArray(content) # get the offsets offsets_arr = [] for splits in array.nested_row_splits: - split = splits.numpy() + # handle gpu and cpu instances separately + split = _tensor_to_np_or_cp(splits, device) # convert to ak.index offset = ak.index.Index64(split) offsets_arr.append(offset) @@ -55,6 +62,27 @@ def _impl(array): return ak.Array(_recursive_call(content, offsets_arr, 0)) +def _tensor_to_np_or_cp(array, device): + if device.endswith("GPU", 0, -2): + try: + import tensorflow as tf + except ImportError as err: + raise ImportError( + """to use ak.from_raggedtensor, you must install the 'tensorflow' package with: + + pip install tensorflow + or + conda install tensorflow""" + ) from err + + from awkward._nplikes.cupy import Cupy + + cp = Cupy.instance() + return cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array)) + else: + return array.numpy() + + def _recursive_call(content, offsets_arr, count): if count == len(offsets_arr) - 2: return ak.contents.ListOffsetArray( diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index 5fcb2e2d5f..0a8c797c63 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -4,9 +4,12 @@ import awkward as ak from awkward._dispatch import high_level_function +from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("to_raggedtensor",) +np = NumpyMetadata.instance() + @high_level_function() def to_raggedtensor(array): @@ -45,14 +48,49 @@ def _impl(array): # also transforms a python list to awkward array array = ak.to_layout(array, allow_record=False) + # keep the same device + ak_device = ak.backend(array) + if ak_device not in ["cuda", "cpu"]: + raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""") + + if ak_device == "cpu": + device = "CPU:0" + else: + id = _find_innermost_content(array).data.device.id + device = "GPU:" + str(id) + + with tf.device(device): + if isinstance(array, ak.contents.numpyarray.NumpyArray): + values = array.data + # handle cupy separately + values = _convert_to_tensor_if_cupy(values) + return tf.RaggedTensor.from_row_splits( + values=values, row_splits=[0, array.__len__()] + ) + + else: + flat_values, nested_row_splits = _recursive_call(array, ()) + return tf.RaggedTensor.from_nested_row_splits( + flat_values, nested_row_splits + ) + + +def _find_innermost_content(array): if isinstance(array, ak.contents.numpyarray.NumpyArray): - return tf.RaggedTensor.from_row_splits( - values=array.data, row_splits=[0, array.__len__()] - ) + return array + else: + return _find_innermost_content(array.content) + + +def _convert_to_tensor_if_cupy(array): + if isinstance(array, np.ndarray): + return array else: - flat_values, nested_row_splits = _recursive_call(array, ()) + # converts cupy directly to tensor, + # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays + import tensorflow as tf - return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) + return tf.experimental.dlpack.from_dlpack(array.toDlpack()) def _recursive_call(layout, offsets_arr): @@ -75,10 +113,14 @@ def _recursive_call(layout, offsets_arr): ) # recursively gather all of the offsets of an array - offsets_arr += (layout.offsets.data,) + offset = layout.offsets.data + offset = _convert_to_tensor_if_cupy(offset) + offsets_arr += (offset,) except AttributeError: # at the last iteration form a ragged tensor from the # accumulated offsets and flattened values of the array - return layout.data, offsets_arr + data = layout.data + data = _convert_to_tensor_if_cupy(data) + return data, offsets_arr return _recursive_call(layout.content, offsets_arr)