From e6c68df8a033c3a4ed0f7886d6cf389200b32dee Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Wed, 6 Nov 2024 18:04:25 +0200 Subject: [PATCH 1/2] add new functions --- docs/reference/toctree.txt | 2 + src/awkward/operations/__init__.py | 2 + src/awkward/operations/ak_from_tensorflow.py | 68 +++++++++++++++++ src/awkward/operations/ak_to_tensorflow.py | 80 ++++++++++++++++++++ 4 files changed, 152 insertions(+) create mode 100644 src/awkward/operations/ak_from_tensorflow.py create mode 100644 src/awkward/operations/ak_to_tensorflow.py diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index 0175459441..a54559280b 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -44,6 +44,8 @@ generated/ak.from_raggedtensor generated/ak.to_raggedtensor + generated/ak.from_tensorflow + generated/ak.to_tensorflow generated/ak.from_torch generated/ak.to_torch diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index 94dbd9ffac..91ebc9c184 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -47,6 +47,7 @@ from awkward.operations.ak_from_raggedtensor import * from awkward.operations.ak_from_rdataframe import * from awkward.operations.ak_from_regular import * +from awkward.operations.ak_from_tensorflow import * from awkward.operations.ak_from_torch import * from awkward.operations.ak_full_like import * from awkward.operations.ak_imag import * @@ -103,6 +104,7 @@ from awkward.operations.ak_to_raggedtensor import * from awkward.operations.ak_to_rdataframe import * from awkward.operations.ak_to_regular import * +from awkward.operations.ak_to_tensorflow import * from awkward.operations.ak_to_torch import * from awkward.operations.ak_transform import * from awkward.operations.ak_type import * diff --git a/src/awkward/operations/ak_from_tensorflow.py b/src/awkward/operations/ak_from_tensorflow.py new file mode 100644 index 0000000000..c166b40b6d --- /dev/null +++ b/src/awkward/operations/ak_from_tensorflow.py @@ -0,0 +1,68 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import re + +import awkward as ak +from awkward._dispatch import high_level_function + +__all__ = ("from_tensorflow",) + + +@high_level_function() +def from_tensorflow(array): + """ + Args: + array: (TensorFlow Tensor): + Tensor to convert into an Awkward Array. + Converts a TensorFlow Tensor into an Awkward Array. + If `array` contains any other data types the function raises an error. + """ + + # Dispatch + yield (array,) + + # Implementation + return _impl(array) + + +def _impl(array): + try: + import tensorflow as tf + except ImportError as err: + raise ImportError( + """to use ak.from_tensorflow, you must install the 'tensorflow' package with: + + pip install tensorflow +or + conda install tensorflow""" + ) from err + + # check if array is a Tensor + if not isinstance(array, tf.Tensor): + raise TypeError( + """only a TensorFlow Tensor can be converted to Awkward Array""" + ) + + # keep the resulting array on the same device as input tensor + device = array.backing_device + matched_device = re.match(".*:(CPU|GPU):[0-9]+", device) + + if matched_device is None: + raise NotImplementedError( + f"TensorFlow device has an unexpected format: {device!r}" + ) + elif matched_device.groups()[0] == "GPU": + from awkward._nplikes.cupy import Cupy + + cp = Cupy.instance() + # zero-copy data exchange through DLPack + cp_array = cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array)) + ak_array = ak.from_cupy(cp_array) + + elif matched_device.groups()[0] == "CPU": + np_array = array.numpy() + ak_array = ak.from_numpy(np_array) + + return ak_array diff --git a/src/awkward/operations/ak_to_tensorflow.py b/src/awkward/operations/ak_to_tensorflow.py new file mode 100644 index 0000000000..5726f185a5 --- /dev/null +++ b/src/awkward/operations/ak_to_tensorflow.py @@ -0,0 +1,80 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import awkward as ak +from awkward._dispatch import high_level_function +from awkward._nplikes.numpy_like import NumpyMetadata + +__all__ = ("to_tensorflow",) + +np = NumpyMetadata.instance() + + +@high_level_function() +def to_tensorflow(array): + """ + Args: + array: Array-like data. May be a high level #ak.Array, + or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray, + #ak.contents.RegularArray, #ak.contents.NumpyArray + + Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported) + into a TensorFlow Tensor, if possible. + + If `array` contains any other data types (RecordArray for example) the function raises a TypeError. + """ + + # Dispatch + yield (array,) + + # Implementation + return _impl(array) + + +def _impl(array): + try: + import tensorflow as tf + except ImportError as err: + raise ImportError( + """to use ak.to_tensorflow, you must install the 'tensorflow' package with: + + pip install tensorflow +or + conda install tensorflow""" + ) from err + + # useful function that handles all possible input arrays + array = ak.to_layout(array, allow_record=False) + + # get the device array is on + ak_device = ak.backend(array) + + if ak_device not in ["cuda", "cpu"]: + raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""") + + # convert to numpy or cupy if `array` on gpu + try: + backend_array = array.to_backend_array(allow_missing=False) + except ValueError as err: + raise TypeError( + "Only arrays containing equal-length lists of numbers can be converted into a TensorFlow Tensor" + ) from err + + if ak_device == "cpu": + device = "CPU:0" + else: + id = backend_array.data.device.id + device = "GPU:" + str(id) + + with tf.device(device): + # check if cupy or numpy + if isinstance(backend_array, np.ndarray): + # convert numpy to a tensorflow tensor + # this makes a copy unfortunately, since numpy is mutable and TensorFlow tensor is not + tensor = tf.convert_to_tensor(backend_array, dtype=tf.float64) + else: + # cupy -> tensorflow tensor + tensor = tf.experimental.dlpack.from_dlpack(backend_array.toDlpack()) + + return tensor From fbcf5719502e03af45f8fd25a647489455f8afa1 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Wed, 6 Nov 2024 18:34:19 +0200 Subject: [PATCH 2/2] add tests --- src/awkward/operations/ak_from_tensorflow.py | 1 + src/awkward/operations/ak_to_tensorflow.py | 1 - ...test_3292_to_tensorflow_from_tensorflow.py | 74 +++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 tests/test_3292_to_tensorflow_from_tensorflow.py diff --git a/src/awkward/operations/ak_from_tensorflow.py b/src/awkward/operations/ak_from_tensorflow.py index c166b40b6d..1272a822e7 100644 --- a/src/awkward/operations/ak_from_tensorflow.py +++ b/src/awkward/operations/ak_from_tensorflow.py @@ -62,6 +62,7 @@ def _impl(array): ak_array = ak.from_cupy(cp_array) elif matched_device.groups()[0] == "CPU": + # this makes a copy unfortunately, since numpy is mutable and TensorFlow tensor is not np_array = array.numpy() ak_array = ak.from_numpy(np_array) diff --git a/src/awkward/operations/ak_to_tensorflow.py b/src/awkward/operations/ak_to_tensorflow.py index 5726f185a5..9f26e0f9f1 100644 --- a/src/awkward/operations/ak_to_tensorflow.py +++ b/src/awkward/operations/ak_to_tensorflow.py @@ -71,7 +71,6 @@ def _impl(array): # check if cupy or numpy if isinstance(backend_array, np.ndarray): # convert numpy to a tensorflow tensor - # this makes a copy unfortunately, since numpy is mutable and TensorFlow tensor is not tensor = tf.convert_to_tensor(backend_array, dtype=tf.float64) else: # cupy -> tensorflow tensor diff --git a/tests/test_3292_to_tensorflow_from_tensorflow.py b/tests/test_3292_to_tensorflow_from_tensorflow.py new file mode 100644 index 0000000000..e14417ba53 --- /dev/null +++ b/tests/test_3292_to_tensorflow_from_tensorflow.py @@ -0,0 +1,74 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import numpy as np +import pytest + +import awkward as ak + +to_tensorflow = ak.operations.to_tensorflow +from_tensorflow = ak.operations.from_tensorflow + +tf = pytest.importorskip("tensorflow") + +a = np.arange(2 * 2 * 2, dtype=np.float64).reshape(2, 2, 2) +b = np.arange(2 * 2 * 2).reshape(2, 2, 2) + +array = np.arange(2 * 3 * 5).reshape(2, 3, 5) +content2 = ak.contents.NumpyArray(array.reshape(-1)) +inneroffsets = ak.index.Index64(np.array([0, 5, 10, 15, 20, 25, 30])) +outeroffsets = ak.index.Index64(np.array([0, 3, 6])) + + +def test_to_tensorflow(): + # a basic test for a 4 dimensional array + array1 = ak.Array([a, b]) + i = 0 + for sub_array in [ + [[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]], + [[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]], + ]: + assert to_tensorflow(array1)[i].numpy().tolist() == sub_array + i += 1 + + # test that the data types are remaining the same (float64 in this case) + assert array1.layout.to_backend_array().dtype.name in str( + to_tensorflow(array1).dtype + ) + + # try a listoffset array inside a listoffset array + array2 = ak.contents.ListOffsetArray( + outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2) + ) + assert to_tensorflow(array2)[0].numpy().tolist() == [ + [0, 1, 2, 3, 4], + [5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + ] + assert to_tensorflow(array2)[1].numpy().tolist() == [ + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24], + [25, 26, 27, 28, 29], + ] + + # try just a python list + array3 = [3, 1, 4, 1, 9, 2, 6] + assert to_tensorflow(array3).numpy().tolist() == [3, 1, 4, 1, 9, 2, 6] + + +array1 = tf.constant([[1.0, -1.0], [1.0, -1.0]], dtype=tf.float32) +array2 = tf.constant(np.array([[1, 2, 3], [4, 5, 6]])) + + +def test_from_tensorflow(): + # Awkward.to_list() == Tensor.numpy().tolist() + assert from_tensorflow(array1).to_list() == array1.numpy().tolist() + + assert from_tensorflow(array2).to_list() == [[1, 2, 3], [4, 5, 6]] + + # test that the data types are remaining the same (int64 in this case) + assert from_tensorflow(array1).layout.dtype.name in str(array1.dtype) + + # test that the data types are remaining the same (float32 in this case) + assert from_tensorflow(array2).layout.dtype.name in str(array2.dtype)