Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: to/from TensorFlow Tensor #3292

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/reference/toctree.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@

generated/ak.from_raggedtensor
generated/ak.to_raggedtensor
generated/ak.from_tensorflow
generated/ak.to_tensorflow
generated/ak.from_torch
generated/ak.to_torch

Expand Down
2 changes: 2 additions & 0 deletions src/awkward/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from awkward.operations.ak_from_raggedtensor import *
from awkward.operations.ak_from_rdataframe import *
from awkward.operations.ak_from_regular import *
from awkward.operations.ak_from_tensorflow import *
from awkward.operations.ak_from_torch import *
from awkward.operations.ak_full_like import *
from awkward.operations.ak_imag import *
Expand Down Expand Up @@ -103,6 +104,7 @@
from awkward.operations.ak_to_raggedtensor import *
from awkward.operations.ak_to_rdataframe import *
from awkward.operations.ak_to_regular import *
from awkward.operations.ak_to_tensorflow import *
from awkward.operations.ak_to_torch import *
from awkward.operations.ak_transform import *
from awkward.operations.ak_type import *
Expand Down
69 changes: 69 additions & 0 deletions src/awkward/operations/ak_from_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import re

import awkward as ak
from awkward._dispatch import high_level_function

__all__ = ("from_tensorflow",)


@high_level_function()
def from_tensorflow(array):
"""
Args:
array: (TensorFlow Tensor):
Tensor to convert into an Awkward Array.
Converts a TensorFlow Tensor into an Awkward Array.
If `array` contains any other data types the function raises an error.
"""

# Dispatch
yield (array,)

# Implementation
return _impl(array)


def _impl(array):
try:
import tensorflow as tf
except ImportError as err:
raise ImportError(
"""to use ak.from_tensorflow, you must install the 'tensorflow' package with:

pip install tensorflow
or
conda install tensorflow"""
) from err

# check if array is a Tensor
if not isinstance(array, tf.Tensor):
raise TypeError(
"""only a TensorFlow Tensor can be converted to Awkward Array"""
)

# keep the resulting array on the same device as input tensor
device = array.backing_device
matched_device = re.match(".*:(CPU|GPU):[0-9]+", device)

if matched_device is None:
raise NotImplementedError(
f"TensorFlow device has an unexpected format: {device!r}"
)
elif matched_device.groups()[0] == "GPU":
from awkward._nplikes.cupy import Cupy

cp = Cupy.instance()
# zero-copy data exchange through DLPack
cp_array = cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array))
ak_array = ak.from_cupy(cp_array)

elif matched_device.groups()[0] == "CPU":
# this makes a copy unfortunately, since numpy is mutable and TensorFlow tensor is not
np_array = array.numpy()
ak_array = ak.from_numpy(np_array)

return ak_array
79 changes: 79 additions & 0 deletions src/awkward/operations/ak_to_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import awkward as ak
from awkward._dispatch import high_level_function
from awkward._nplikes.numpy_like import NumpyMetadata

__all__ = ("to_tensorflow",)

np = NumpyMetadata.instance()


@high_level_function()
def to_tensorflow(array):
"""
Args:
array: Array-like data. May be a high level #ak.Array,
or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray,
#ak.contents.RegularArray, #ak.contents.NumpyArray

Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported)
into a TensorFlow Tensor, if possible.

If `array` contains any other data types (RecordArray for example) the function raises a TypeError.
"""

# Dispatch
yield (array,)

# Implementation
return _impl(array)


def _impl(array):
try:
import tensorflow as tf
except ImportError as err:
raise ImportError(
"""to use ak.to_tensorflow, you must install the 'tensorflow' package with:

pip install tensorflow
or
conda install tensorflow"""
) from err

# useful function that handles all possible input arrays
array = ak.to_layout(array, allow_record=False)

# get the device array is on
ak_device = ak.backend(array)

if ak_device not in ["cuda", "cpu"]:
raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""")

# convert to numpy or cupy if `array` on gpu
try:
backend_array = array.to_backend_array(allow_missing=False)
except ValueError as err:
raise TypeError(
"Only arrays containing equal-length lists of numbers can be converted into a TensorFlow Tensor"
) from err

if ak_device == "cpu":
device = "CPU:0"
else:
id = backend_array.data.device.id
device = "GPU:" + str(id)

with tf.device(device):
# check if cupy or numpy
if isinstance(backend_array, np.ndarray):
# convert numpy to a tensorflow tensor
tensor = tf.convert_to_tensor(backend_array, dtype=tf.float64)
else:
# cupy -> tensorflow tensor
tensor = tf.experimental.dlpack.from_dlpack(backend_array.toDlpack())

return tensor
74 changes: 74 additions & 0 deletions tests/test_3292_to_tensorflow_from_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import numpy as np
import pytest

import awkward as ak

to_tensorflow = ak.operations.to_tensorflow
from_tensorflow = ak.operations.from_tensorflow

tf = pytest.importorskip("tensorflow")

a = np.arange(2 * 2 * 2, dtype=np.float64).reshape(2, 2, 2)
b = np.arange(2 * 2 * 2).reshape(2, 2, 2)

array = np.arange(2 * 3 * 5).reshape(2, 3, 5)
content2 = ak.contents.NumpyArray(array.reshape(-1))
inneroffsets = ak.index.Index64(np.array([0, 5, 10, 15, 20, 25, 30]))
outeroffsets = ak.index.Index64(np.array([0, 3, 6]))


def test_to_tensorflow():
# a basic test for a 4 dimensional array
array1 = ak.Array([a, b])
i = 0
for sub_array in [
[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
]:
assert to_tensorflow(array1)[i].numpy().tolist() == sub_array
i += 1

# test that the data types are remaining the same (float64 in this case)
assert array1.layout.to_backend_array().dtype.name in str(
to_tensorflow(array1).dtype
)

# try a listoffset array inside a listoffset array
array2 = ak.contents.ListOffsetArray(
outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2)
)
assert to_tensorflow(array2)[0].numpy().tolist() == [
[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
]
assert to_tensorflow(array2)[1].numpy().tolist() == [
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29],
]

# try just a python list
array3 = [3, 1, 4, 1, 9, 2, 6]
assert to_tensorflow(array3).numpy().tolist() == [3, 1, 4, 1, 9, 2, 6]


array1 = tf.constant([[1.0, -1.0], [1.0, -1.0]], dtype=tf.float32)
array2 = tf.constant(np.array([[1, 2, 3], [4, 5, 6]]))


def test_from_tensorflow():
# Awkward.to_list() == Tensor.numpy().tolist()
assert from_tensorflow(array1).to_list() == array1.numpy().tolist()

assert from_tensorflow(array2).to_list() == [[1, 2, 3], [4, 5, 6]]

# test that the data types are remaining the same (int64 in this case)
assert from_tensorflow(array1).layout.dtype.name in str(array1.dtype)

# test that the data types are remaining the same (float32 in this case)
assert from_tensorflow(array2).layout.dtype.name in str(array2.dtype)
Loading