Skip to content

Commit

Permalink
#0: Add support for typecast
Browse files Browse the repository at this point in the history
  • Loading branch information
bharane-ab committed Dec 8, 2023
1 parent 8d2ca5c commit 8f2e54a
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 19 deletions.
2 changes: 2 additions & 0 deletions docs/source/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ but in general retaining the data.

.. autofunction:: tt_lib.tensor.clone

.. autofunction:: tt_lib.tensor.typecast

.. autofunction:: tt_lib.tensor.copy

Tensor creation operations
Expand Down
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
"tt_lib_op": tt_lib_ops.clone,
"pytorch_op": pytorch_ops.clone,
},
"typecast": {
"tt_lib_op": tt_lib_ops.typecast,
"pytorch_op": pytorch_ops.typecast,
},
"copy": {
"tt_lib_op": tt_lib_ops.copy,
"pytorch_op": pytorch_ops.copy,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
@pytest.mark.parametrize(
"input_shapes",
[
[[1, 1, 1, 30]], # Single core
[[1, 1, 300, 380]], # multi core
[[1, 3, 320, 380]], # multi core
[[1, 1, 32, 32]], # Single core
[[1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384]], # Multi core
# [[1, 1, 300, 380]], # multi core
# [[1, 3, 320, 380]], # multi core
# [[1, 1, 32, 32]], # Single core
# [[1, 1, 320, 384]], # Multi core
# [[1, 3, 320, 384]], # Multi core
],
)
@pytest.mark.parametrize(
Expand Down Expand Up @@ -140,3 +140,29 @@ def test_run_clone_op(
device,
test_args,
)

def test_run_typecast_op(
self,
input_shapes,
input_mem_config,
dst_mem_config,
device,
function_level_defaults,
):
datagen_func = [
generation_funcs.gen_func_with_cast(
partial(generation_funcs.gen_constant, constant=torch.pi), torch.float32
)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args["input_mem_config"] = [input_mem_config]
test_args.update({"output_mem_config": dst_mem_config})
comparison_func = comparison_funcs.comp_pcc
run_single_pytorch_test(
"typecast",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)
5 changes: 5 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def clone(x, *args, **kwargs):
return torch.clone(x)


def typecast(x, *args, **kwargs):
return x.to(torch.float32)


def move(x, *args, **kwargs):
return x

Expand Down Expand Up @@ -1035,6 +1039,7 @@ def eltwise_rsub(x, *args, **kwargs):
dim = kwargs["factor"]
return torch.sub(dim, x)


def eltwise_identity(x, *args, **kwargs):
return x

Expand Down
18 changes: 18 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,23 @@ def clone(
return tt2torch_tensor(t1)


@setup_host_and_device
def typecast(
x,
*args,
device,
dtype,
layout,
input_mem_config,
output_mem_config,
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = ttl.tensor.typecast(t0, output_mem_config=output_mem_config)

return tt2torch_tensor(t1)


@setup_host_and_device
def move(
x,
Expand Down Expand Up @@ -1702,6 +1719,7 @@ def eltwise_rsub(

return tt2torch_tensor(t1)


@setup_host_and_device
def eltwise_identity(
x,
Expand Down
2 changes: 1 addition & 1 deletion tt_eager/tt_dnn/op_library/copy/copy_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ std::vector<Tensor> Copy::create_output_tensors(const std::vector<Tensor> &input
return {};
} else {
const auto& input_tensor = input_tensors.at(0);
return operation::generic_create_output_tensors(*this, input_tensors, input_tensor.dtype(), input_tensor.layout(), this->output_mem_config);
return operation::generic_create_output_tensors(*this, input_tensors, tt::tt_metal::DataType::BFLOAT16, input_tensor.layout(), this->output_mem_config);
}
}

Expand Down
4 changes: 4 additions & 0 deletions tt_eager/tt_dnn/op_library/copy/copy_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ inline Tensor clone(const Tensor& input_tensor, const MemoryConfig& output_mem_c
return operation::run(Copy{output_mem_config}, {input_tensor}).at(0);
}

inline Tensor typecast(const Tensor& input_tensor, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG) {
return operation::run(Copy{output_mem_config}, {input_tensor}).at(0);
}

//unary assign
inline Tensor assign(const Tensor& input_a, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG) {
return operation::run(Copy{output_mem_config}, {input_a}).at(0);
Expand Down
29 changes: 16 additions & 13 deletions tt_eager/tt_dnn/op_library/copy/single_core/copy_op_single_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "tt_metal/host_api.hpp"
#include "tt_metal/common/constants.hpp"
#include "tt_metal/detail/util.hpp"
#include "tt_metal/common/tt_backend_api_types.hpp"
#include "common/bfloat8.hpp"

using namespace tt::constants;

Expand Down Expand Up @@ -35,18 +37,19 @@ operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tens
uint32_t src0_cb_index = CB::c_in0;
uint32_t num_input_units = 2;
uint32_t aligned_input_unit_size = round_up_to_mul32(input_unit_size);
uint32_t aligned_output_unit_size = round_up_to_mul32(output_unit_size);
tt_metal::CircularBufferConfig cb_src0_config = tt_metal::CircularBufferConfig(num_input_units * aligned_input_unit_size, {{src0_cb_index, input_cb_data_format}})
.set_page_size(src0_cb_index, aligned_input_unit_size);
auto cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);

uint32_t output_cb_index = src0_cb_index; // same as input cb
/* If we need dataformat conversion, use output buffer + compute kernel
// uint32_t output_cb_index = src0_cb_index; // same as input cb
// If we need dataformat conversion, use output buffer + compute kernel
uint32_t output_cb_index = 16; // output operands start at index 16
uint32_t num_output_tiles = 2;
tt_metal::CircularBufferConfig output_cb_config = tt_metal::CircularBufferConfig(num_output_tiles * output_single_tile_size, {{output_cb_index, output_cb_data_format}})
.set_page_size(output_cb_index, output_single_tile_size);
tt_metal::CircularBufferConfig output_cb_config = tt_metal::CircularBufferConfig(num_output_tiles * aligned_output_unit_size, {{output_cb_index, output_cb_data_format}})
.set_page_size(output_cb_index, aligned_output_unit_size);
auto cb_output = tt_metal::CreateCircularBuffer(program, core, output_cb_config);
*/
// */

auto src_buffer = input.buffer();
auto dst_buffer = output.buffer();
Expand Down Expand Up @@ -96,22 +99,22 @@ operation::ProgramWithCallbacks copy_single_core(const Tensor &input, const Tens
core,
tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default, .compile_args = writer_compile_time_args, .defines = kernel_defines});

/* If we need dataformat conversion, use compute kernel
bool fp32_dest_acc_en = false;
bool math_approx_mode = false;
vector<uint32_t> compute_kernel_args = {
num_tiles
uint(num_units) // per_core_tile_cnt
};

auto eltwise_unary_kernel = tt_metal::CreateKernel(
program,
"tt_eager/tt_dnn/kernels/compute/eltwise_copy.cpp",
core,
compute_kernel_args,
MathFidelity::HiFi4,
fp32_dest_acc_en,
math_approx_mode
);
*/
tt_metal::ComputeConfig{
.math_fidelity = MathFidelity::HiFi4,
.fp32_dest_acc_en = fp32_dest_acc_en,
.math_approx_mode = math_approx_mode,
.compile_args = compute_kernel_args
});

if (tilized) {
SetRuntimeArgs(
Expand Down
1 change: 1 addition & 0 deletions tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_dm_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ namespace tt::tt_metal::detail{
detail::export_enum<BcastOpDim>(m_tensor);

detail::bind_unary_op(m_tensor, "clone", &clone, R"doc( Returns a new tensor which is a new copy of input tensor ``{0}``.)doc");
detail::bind_unary_op(m_tensor, "typecast", &typecast, R"doc( Returns a new tensor which is a new copy of input tensor with new datatype``{0}``.)doc");
detail::bind_binary_op<false, false, false>(m_tensor, "copy", &copy, R"doc( Copies the elements from ``{0}`` into ``{1}``. ``{1}`` is modified in place.)doc");

// *** tensor manipulation ***
Expand Down

0 comments on commit 8f2e54a

Please sign in to comment.