Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#4858: Add support for typecast #4840

Merged
merged 1 commit into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ but in general retaining the data.

.. autofunction:: tt_lib.tensor.clone

.. autofunction:: tt_lib.tensor.typecast

.. autofunction:: tt_lib.tensor.copy

Tensor creation operations
Expand Down
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
"tt_lib_op": tt_lib_ops.clone,
"pytorch_op": pytorch_ops.clone,
},
"typecast": {
"tt_lib_op": tt_lib_ops.typecast,
"pytorch_op": pytorch_ops.typecast,
},
"copy": {
"tt_lib_op": tt_lib_ops.copy,
"pytorch_op": pytorch_ops.copy,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
# SPDX-FileCopyrightText: © 2023-24 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -117,21 +117,59 @@ def test_run_assign_binary_op(
test_args,
)


@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 1, 30]], # Single core
[[1, 1, 300, 380]], # multi core
[[1, 3, 320, 380]], # multi core
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384]], # Multi core
],
)
@pytest.mark.parametrize(
"input_mem_config",
mem_configs,
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
)
@pytest.mark.parametrize(
"output_type",
[
ttl.tensor.DataType.BFLOAT16,
],
)
@pytest.mark.parametrize(
"input_type",
[
torch.float32,
torch.float16,
torch.bfloat16,
],
)
class TestClone:
def test_run_clone_op(
self,
input_type,
output_type,
input_shapes,
input_mem_config,
dst_mem_config,
device,
function_level_defaults,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.bfloat16)
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), input_type)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args["input_mem_config"] = [input_mem_config]
test_args["dtype"] = [output_type]
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_equal
comparison_func = partial(comparison_funcs.comp_allclose, rtol=1e-1, atol=1e-1)
run_single_pytorch_test(
"clone",
input_shapes,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# SPDX-FileCopyrightText: © 2023-24 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pytest
import torch
from functools import partial
import tt_lib as ttl


from tests.tt_eager.python_api_testing.sweep_tests import (
comparison_funcs,
generation_funcs,
)
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)

mem_configs = [
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),
ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1),
]


@pytest.mark.parametrize(
"pt_input_dtype, tt_input_dtype",
(
(torch.float16, ttl.tensor.DataType.FLOAT32),
(torch.float32, ttl.tensor.DataType.BFLOAT8_B),
(torch.bfloat16, ttl.tensor.DataType.BFLOAT16),
(torch.int, ttl.tensor.DataType.UINT32),
),
)
@pytest.mark.parametrize(
"pt_output_dtype, tt_output_dtype",
(
(torch.bfloat16, ttl.tensor.DataType.BFLOAT16),
(torch.float32, ttl.tensor.DataType.BFLOAT8_B),
),
)
@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 320]], # multi core
[[1, 3, 320, 320]], # multi core
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384]], # Multi core
],
)
@pytest.mark.parametrize(
"input_mem_config",
mem_configs,
)
@pytest.mark.parametrize(
"dst_mem_config",
mem_configs,
)
class TestTypecast:
def test_run_typecast_op(
self,
pt_output_dtype,
tt_output_dtype,
pt_input_dtype,
tt_input_dtype,
input_shapes,
input_mem_config,
dst_mem_config,
device,
function_level_defaults,
):
if tt_input_dtype in [ttl.tensor.DataType.FLOAT32, ttl.tensor.DataType.UINT32]:
pytest.skip(f"{tt_input_dtype} cannot be converted yet. Skip")
if tt_input_dtype == tt_output_dtype:
pytest.skip("Same I/O data types. Skip.")
muthutt marked this conversation as resolved.
Show resolved Hide resolved
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=0, high=10), pt_input_dtype)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args["pt_input_dtype"] = [pt_input_dtype]
test_args["tt_input_dtype"] = [tt_input_dtype]
test_args["pt_output_dtype"] = [pt_output_dtype]
test_args["tt_output_dtype"] = [tt_output_dtype]
test_args["input_mem_config"] = [input_mem_config]
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc

run_single_pytorch_test(
"typecast",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def clone(x, *args, **kwargs):
return torch.clone(x)


def typecast(x, pt_input_dtype, pt_output_dtype, *args, **kwargs):
return x.to(pt_input_dtype[0]).to(pt_output_dtype[0])


def move(x, *args, **kwargs):
return x

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def run_single_pytorch_test(
env="",
plot_func=None,
):
assert test_name in op_map.keys()
assert test_name in op_map

default_env_dict = {}
# Get env variables from CLI
Expand Down
19 changes: 19 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,25 @@ def clone(
return tt2torch_tensor(t1)


@setup_host_and_device
def typecast(
x,
*args,
device,
tt_input_dtype,
tt_output_dtype,
layout,
input_mem_config,
output_mem_config,
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], tt_input_dtype[0])

t1 = ttl.tensor.typecast(t0, tt_output_dtype[0], output_mem_config=output_mem_config)

return tt2torch_tensor(t1)


@setup_host_and_device
def move(
x,
Expand Down
24 changes: 24 additions & 0 deletions tt_eager/tt_dnn/op_library/copy/copy_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,30 @@ tt::stl::reflection::Attributes Copy::attributes() const {
};
}

Tensor copy(const Tensor& src_tensor, const Tensor& dst_tensor) {
operation::run(Copy{dst_tensor.memory_config(), dst_tensor.dtype()}, {src_tensor, dst_tensor});
return dst_tensor;
}

Tensor clone(const Tensor& input, const MemoryConfig& output_mem_config, std::optional<const DataType> output_dtype) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}

Tensor typecast(const Tensor& input_tensor, const DataType& dtype, const MemoryConfig& output_mem_config ) {
return operation::run(Copy{output_mem_config, dtype}, {input_tensor}).at(0);
}

//unary assign
Tensor assign(const Tensor& input, const MemoryConfig& output_mem_config , std::optional<const DataType> output_dtype ) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}

// binary assign
Tensor assign(const Tensor& input_a, const Tensor& input_b) {
operation::run(Copy{input_b.memory_config(), input_b.dtype()}, {input_a, input_b});
return input_b;
}

} // namespace tt_metal

} // namespace tt
20 changes: 6 additions & 14 deletions tt_eager/tt_dnn/op_library/copy/copy_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,17 @@ struct Copy {
operation::ProgramWithCallbacks copy_multi_core(const Tensor &input, const Tensor &output, bool backwards = false);
operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tensor &output, bool backwards = false);

inline Tensor copy(const Tensor& src_tensor, const Tensor& dst_tensor) {
operation::run(Copy{dst_tensor.memory_config(), dst_tensor.dtype()}, {src_tensor, dst_tensor});
return dst_tensor;
}
Tensor copy(const Tensor& src_tensor, const Tensor& dst_tensor);

inline Tensor clone(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::optional<const DataType> output_dtype = std::nullopt) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}
Tensor clone(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::optional<const DataType> output_dtype = std::nullopt);

Tensor typecast(const Tensor& input_tensor, const DataType& dtype, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

//unary assign
inline Tensor assign(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::optional<const DataType> output_dtype = std::nullopt) {
return operation::run(Copy{output_mem_config, output_dtype.value_or(input.dtype())}, {input}).at(0);
}
Tensor assign(const Tensor& input, const MemoryConfig& output_mem_config, std::optional<const DataType> output_dtype = std::nullopt);

// binary assign
inline Tensor assign(const Tensor& input_a, const Tensor& input_b) {
operation::run(Copy{input_b.memory_config(), input_b.dtype()}, {input_a, input_b});
return input_b;
}
Tensor assign(const Tensor& input_a, const Tensor& input_b);

} // namespace tt_metal

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "tt_metal/host_api.hpp"
#include "tt_metal/common/constants.hpp"
#include "tt_metal/detail/util.hpp"
#include "tt_metal/common/tt_backend_api_types.hpp"
#include "common/bfloat8.hpp"

using namespace tt::constants;

Expand Down
18 changes: 18 additions & 0 deletions tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_dm_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@ namespace tt::tt_metal::detail{
detail::bind_unary_op<true, true>(m_tensor, "assign", py::overload_cast<const Tensor&, const MemoryConfig&, std::optional<const DataType>>(&assign), R"doc( Returns a new tensor which is a new copy of input tensor ``{0}``.)doc");

// *** tensor manipulation ***
m_tensor.def("typecast", &typecast,
py::arg("input_tensors").noconvert(), py::arg("dtype"), py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, R"doc(
Returns a new tensor which is a typecast of input tensor with new datatype``{0}``.

Input tensors must be on device, in ROW MAJOR or TILE layout, and have matching data type.

Datatype must be one ofthe following types BFLOAT16,BFLOAT8_B,UINT32 and UINT16.

Output tensor will be on device, in same layout, and have the given data type.

.. csv-table::
:header: "Argument", "Description", "Data type", "Required"

"input_tensors", "Input tensors to typecast", "List of Tensors", "Yes"
"dtype", "datatype of typecast", "Datatype", "Yes"
"output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "No"
)doc"
);
m_tensor.def("concat", &concat,
py::arg("input_tensors").noconvert(), py::arg("dim") = 0, py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, R"doc(
Concatenates shape of tensors ``arg0`` and ``arg1`` to new shape ``[W, Z, Y, X]`` along the specified dimension ``arg1``.
Expand Down
Loading