Skip to content

Commit

Permalink
#4858: Add support for typecast
Browse files Browse the repository at this point in the history
  : fix and update test for typecast to compare after round
  : add test for clone and typecast for multiple input and output data types
  : Add output dtype for typecast
  : Add input and output dtype for torch and device
  : update tests
  • Loading branch information
bharane-ab authored and muthutt committed Jan 31, 2024
1 parent 343e002 commit 2d0acb6
Show file tree
Hide file tree
Showing 11 changed files with 217 additions and 18 deletions.
2 changes: 2 additions & 0 deletions docs/source/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ but in general retaining the data.

.. autofunction:: tt_lib.tensor.clone

.. autofunction:: tt_lib.tensor.typecast

.. autofunction:: tt_lib.tensor.copy

Tensor creation operations
Expand Down
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
"tt_lib_op": tt_lib_ops.clone,
"pytorch_op": pytorch_ops.clone,
},
"typecast": {
"tt_lib_op": tt_lib_ops.typecast,
"pytorch_op": pytorch_ops.typecast,
},
"copy": {
"tt_lib_op": tt_lib_ops.copy,
"pytorch_op": pytorch_ops.copy,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
# SPDX-FileCopyrightText: © 2023-24 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -117,21 +117,59 @@ def test_run_assign_binary_op(
test_args,
)


@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 1, 30]], # Single core
[[1, 1, 300, 380]], # multi core
[[1, 3, 320, 380]], # multi core
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384]], # Multi core
],
)
@pytest.mark.parametrize(
"input_mem_config",
mem_configs,
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
)
@pytest.mark.parametrize(
"output_type",
[
ttl.tensor.DataType.BFLOAT16,
],
)
@pytest.mark.parametrize(
"input_type",
[
torch.float32,
torch.float16,
torch.bfloat16,
],
)
class TestClone:
def test_run_clone_op(
self,
input_type,
output_type,
input_shapes,
input_mem_config,
dst_mem_config,
device,
function_level_defaults,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), input_type)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args["input_mem_config"] = [input_mem_config]
test_args["dtype"] = [output_type]
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_equal
comparison_func = partial(comparison_funcs.comp_allclose, rtol=1e-1, atol=1e-1)
run_single_pytorch_test(
"clone",
input_shapes,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# SPDX-FileCopyrightText: © 2023-24 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pytest
import torch
from functools import partial
import tt_lib as ttl


from tests.tt_eager.python_api_testing.sweep_tests import (
comparison_funcs,
generation_funcs,
)
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)

mem_configs = [
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
]


@pytest.mark.parametrize(
"pt_input_dtype, tt_input_dtype",
(
(torch.float16, ttl.tensor.DataType.FLOAT32),
(torch.float32, ttl.tensor.DataType.BFLOAT8_B),
(torch.bfloat16, ttl.tensor.DataType.BFLOAT16),
(torch.int, ttl.tensor.DataType.UINT32),
),
)
@pytest.mark.parametrize(
"pt_output_dtype, tt_output_dtype",
(
(torch.bfloat16, ttl.tensor.DataType.BFLOAT16),
(torch.float32, ttl.tensor.DataType.BFLOAT8_B),
),
)
@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 320]], # multi core
[[1, 3, 320, 320]], # multi core
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384]], # Multi core
],
)
@pytest.mark.parametrize(
"input_mem_config",
mem_configs,
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
)
class TestTypecast:
def test_run_typecast_op(
self,
pt_output_dtype,
tt_output_dtype,
pt_input_dtype,
tt_input_dtype,
input_shapes,
input_mem_config,
dst_mem_config,
device,
function_level_defaults,
):
if tt_input_dtype in [ttl.tensor.DataType.FLOAT32, ttl.tensor.DataType.UINT32]:
pytest.skip(f"{tt_input_dtype} cannot be converted yet. Skip")
if tt_input_dtype == tt_output_dtype:
pytest.skip("Same I/O data types. Skip.")
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=0, high=10), pt_input_dtype)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args["pt_input_dtype"] = [pt_input_dtype]
test_args["tt_input_dtype"] = [tt_input_dtype]
test_args["pt_output_dtype"] = [pt_output_dtype]
test_args["tt_output_dtype"] = [tt_output_dtype]
test_args["input_mem_config"] = [input_mem_config]
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc

run_single_pytorch_test(
"typecast",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def clone(x, *args, **kwargs):
return torch.clone(x)


def typecast(x, pt_input_dtype, pt_output_dtype, *args, **kwargs):
return x.to(pt_input_dtype[0]).to(pt_output_dtype[0])


def move(x, *args, **kwargs):
return x

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def run_single_pytorch_test(
env="",
plot_func=None,
):
assert test_name in op_map.keys()
assert test_name in op_map

default_env_dict = {}
# Get env variables from CLI
Expand Down
19 changes: 19 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,25 @@ def clone(
return tt2torch_tensor(t1)


@setup_host_and_device
def typecast(
x,
*args,
device,
tt_input_dtype,
tt_output_dtype,
layout,
input_mem_config,
output_mem_config,
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], tt_input_dtype[0])

t1 = ttl.tensor.typecast(t0, tt_output_dtype[0], output_mem_config=output_mem_config)

return tt2torch_tensor(t1)


@setup_host_and_device
def move(
x,
Expand Down
24 changes: 24 additions & 0 deletions tt_eager/tt_dnn/op_library/copy/copy_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,30 @@ tt::stl::reflection::Attributes Copy::attributes() const {
};
}

Tensor copy(const Tensor& src_tensor, const Tensor& dst_tensor) {
operation::run(Copy{dst_tensor.memory_config(), dst_tensor.dtype()}, {src_tensor, dst_tensor});
return dst_tensor;
}

Tensor clone(const Tensor& input, const MemoryConfig& output_mem_config, std::optional<const DataType> output_dtype) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}

Tensor typecast(const Tensor& input_tensor, const DataType& dtype, const MemoryConfig& output_mem_config ) {
return operation::run(Copy{output_mem_config, dtype}, {input_tensor}).at(0);
}

//unary assign
Tensor assign(const Tensor& input, const MemoryConfig& output_mem_config , std::optional<const DataType> output_dtype ) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}

// binary assign
Tensor assign(const Tensor& input_a, const Tensor& input_b) {
operation::run(Copy{input_b.memory_config(), input_b.dtype()}, {input_a, input_b});
return input_b;
}

} // namespace tt_metal

} // namespace tt
20 changes: 6 additions & 14 deletions tt_eager/tt_dnn/op_library/copy/copy_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,17 @@ struct Copy {
operation::ProgramWithCallbacks copy_multi_core(const Tensor &input, const Tensor &output, bool backwards = false);
operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tensor &output, bool backwards = false);

inline Tensor copy(const Tensor& src_tensor, const Tensor& dst_tensor) {
operation::run(Copy{dst_tensor.memory_config(), dst_tensor.dtype()}, {src_tensor, dst_tensor});
return dst_tensor;
}
Tensor copy(const Tensor& src_tensor, const Tensor& dst_tensor);

inline Tensor clone(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::optional<const DataType> output_dtype = std::nullopt) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}
Tensor clone(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::optional<const DataType> output_dtype = std::nullopt);

Tensor typecast(const Tensor& input_tensor, const DataType& dtype, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

//unary assign
inline Tensor assign(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::optional<const DataType> output_dtype = std::nullopt) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}
Tensor assign(const Tensor& input, const MemoryConfig& output_mem_config, std::optional<const DataType> output_dtype = std::nullopt);

// binary assign
inline Tensor assign(const Tensor& input_a, const Tensor& input_b) {
operation::run(Copy{input_b.memory_config(), input_b.dtype()}, {input_a, input_b});
return input_b;
}
Tensor assign(const Tensor& input_a, const Tensor& input_b);

} // namespace tt_metal

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "tt_metal/host_api.hpp"
#include "tt_metal/common/constants.hpp"
#include "tt_metal/detail/util.hpp"
#include "tt_metal/common/tt_backend_api_types.hpp"
#include "common/bfloat8.hpp"

using namespace tt::constants;

Expand Down
18 changes: 18 additions & 0 deletions tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_dm_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@ namespace tt::tt_metal::detail{
detail::bind_unary_op<true, true>(m_tensor, "assign", py::overload_cast<const Tensor&, const MemoryConfig&, std::optional<const DataType>>(&assign), R"doc( Returns a new tensor which is a new copy of input tensor ``{0}``.)doc");

// *** tensor manipulation ***
m_tensor.def("typecast", &typecast,
py::arg("input_tensors").noconvert(), py::arg("dtype"), py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, R"doc(
Returns a new tensor which is a typecast of input tensor with new datatype``{0}``.
Input tensors must be on device, in ROW MAJOR or TILE layout, and have matching data type.
Datatype must be one ofthe following types BFLOAT16,BFLOAT8_B,UINT32 and UINT16.
Output tensor will be on device, in same layout, and have the given data type.
.. csv-table::
:header: "Argument", "Description", "Data type", "Required"
"input_tensors", "Input tensors to typecast", "List of Tensors", "Yes"
"dtype", "datatype of typecast", "Datatype", "Yes"
"output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "No"
)doc"
);
m_tensor.def("concat", &concat,
py::arg("input_tensors").noconvert(), py::arg("dim") = 0, py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, R"doc(
Concatenates shape of tensors ``arg0`` and ``arg1`` to new shape ``[W, Z, Y, X]`` along the specified dimension ``arg1``.
Expand Down

0 comments on commit 2d0acb6

Please sign in to comment.