Skip to content

Commit

Permalink
#8682: Add forward support for unary fmod (#9579)
Browse files Browse the repository at this point in the history
Co-authored-by: mouliraj-mcw <[email protected]>
  • Loading branch information
mcw-anasuya and mouliraj-mcw authored Jun 25, 2024
1 parent 24b210a commit 95f8ddc
Show file tree
Hide file tree
Showing 15 changed files with 224 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/source/ttnn/ttnn/dependencies/tt_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,8 @@ Tensor elementwise operations
.. autofunction:: tt_lib.tensor.unary_remainder

.. autofunction:: tt_lib.tensor.remainder

.. autofunction:: tt_lib.tensor.unary_fmod

.. autofunction:: tt_lib.tensor.fmod

Expand Down
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,10 @@
"tt_op": tt_lib_ops.eltwise_fmod,
"pytorch_op": pytorch_ops.fmod,
},
"eltwise-unary_fmod": {
"tt_op": tt_lib_ops.eltwise_unary_fmod,
"pytorch_op": pytorch_ops.unary_fmod,
},
"eltwise-unary_ne": {
"tt_op": tt_lib_ops.eltwise_unary_ne,
"pytorch_op": pytorch_ops.unary_ne,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,36 @@ def test_run_eltwise_unary_remainder(
test_args,
)

@skip_for_grayskull("#ToDo: GS implementation needs to be done for fmod op")
def test_run_eltwise_unary_fmod(
self,
input_shapes,
device,
function_level_defaults,
input_mem_config,
output_mem_config,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-1e5, high=1e5), torch.bfloat16)
]
test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
test_args.update({"value": np.random.randint(-100, 100) + 0.5})
test_args.update(
{
"input_mem_config": [input_mem_config],
"output_mem_config": output_mem_config,
}
)
comparison_func = comparison_funcs.comp_pcc
run_single_pytorch_test(
"eltwise-unary_fmod",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)

@pytest.mark.parametrize("unary_comp", ["unary_ne"])
@pytest.mark.parametrize("scalar", [0.5, 1.0, -1.0, 0.0])
def test_run_eltwise_unary_comp(
Expand Down
6 changes: 6 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/pytorch_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,12 @@ def fmod(x, y, *args, **kwargs):
return result


def unary_fmod(x, *args, **kwargs):
value = kwargs.pop("value")
result = torch.fmod(x, value)
return result


def unary_ne(x, *args, **kwargs):
value = kwargs.pop("scalar")
result = torch.ne(x, value)
Expand Down
18 changes: 18 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,24 @@ def eltwise_unary_remainder(
return tt2torch_tensor(t1)


@setup_host_and_device
def eltwise_unary_fmod(
x,
*args,
value,
device,
dtype,
layout,
input_mem_config,
output_mem_config,
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = ttl.tensor.unary_fmod(t0, value, output_mem_config=output_mem_config)

return tt2torch_tensor(t1)


@setup_host_and_device
def eltwise_heaviside(
x,
Expand Down
11 changes: 11 additions & 0 deletions tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ void update_macro_defines(UnaryOpType op_type, std::map<std::string, std::string
case UnaryOpType::FLOOR: defines["SFPU_OP_FLOOR_INCLUDE"] = "1"; break;
case UnaryOpType::LEFT_SHIFT: defines["SFPU_OP_LEFT_SHIFT_INCLUDE"] = "1"; break;
case UnaryOpType::REMAINDER: defines["SFPU_OP_REMAINDER_INCLUDE"] = "1"; break;
case UnaryOpType::FMOD: defines["SFPU_OP_FMOD_INCLUDE"] = "1"; break;
default: defines["SFPU_OP_COMPUTE_KERNEL_API_INCLUDE"] = "1"; break;
};
}
Expand Down Expand Up @@ -137,6 +138,11 @@ std::pair<string, string> get_op_init_and_func_parameterized(
"remainder_tile_init();",
fmt::format("remainder_tile({}, {}u, {}u);", idst, Converter::to_hex(param0), Converter::to_hex(1.0f/param0))};
break;
case UnaryOpType::FMOD:
op_init_and_name = {
"fmod_tile_init();",
fmt::format("fmod_tile({}, {}u, {}u);", idst, Converter::to_hex(param0), Converter::to_hex(1.0f/param0))};
break;
case UnaryOpType::EXP:
op_init_and_name = {
fmt::format("exp_tile_init<{}u>();", std::to_string((uint32_t)param0)),
Expand Down Expand Up @@ -365,6 +371,11 @@ inline void validate_supported_arch_dtype(tt::ARCH arch, DataType input_datatype
TT_FATAL(input_datatype == DataType::INT32, "Data type is not supported for Bitwise operations");
TT_FATAL(output_datatype == DataType::INT32, "Data type is not supported for Bitwise operations");
break;
case UnaryOpType::FMOD:
TT_FATAL(arch == tt::ARCH::WORMHOLE_B0, "Op is only supported on Wormhole");
TT_FATAL(input_datatype == DataType::BFLOAT16, "Data type is not supported for Fmod operations");
TT_FATAL(output_datatype == DataType::BFLOAT16, "Data type is not supported for Fmod operations");
break;
default:
return;
}
Expand Down
7 changes: 5 additions & 2 deletions tt_eager/tt_dnn/op_library/eltwise_unary/eltwise_unary_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ enum class UnaryOpType {
RIGHT_SHIFT,
FLOOR,
LEFT_SHIFT,
REMAINDER
REMAINDER,
FMOD
};

template <typename T>
Expand Down Expand Up @@ -116,7 +117,8 @@ bool is_parametrized_type(T val) {
case UnaryOpType::BITWISE_NOT:
case UnaryOpType::RIGHT_SHIFT:
case UnaryOpType::LEFT_SHIFT:
case UnaryOpType::REMAINDER: return true;
case UnaryOpType::REMAINDER:
case UnaryOpType::FMOD: return true;
default: return false;
}
return false;
Expand Down Expand Up @@ -424,6 +426,7 @@ constexpr auto bitwise_not = make_eltwise_unary_with_param<UnaryOpType::BITWISE_
constexpr auto right_shift = make_eltwise_unary_with_param<UnaryOpType::RIGHT_SHIFT>{};
constexpr auto left_shift = make_eltwise_unary_with_param<UnaryOpType::LEFT_SHIFT>{};
constexpr auto unary_remainder = make_eltwise_unary_with_param<UnaryOpType::REMAINDER>{};
constexpr auto unary_fmod = make_eltwise_unary_with_param<UnaryOpType::FMOD>{};
constexpr auto unary_ne = make_eltwise_unary_with_param<UnaryOpType::UNARY_NE>{};
constexpr auto rsub = make_eltwise_unary_with_param<UnaryOpType::RSUB>{};
constexpr auto silu = make_eltwise_unary<UnaryOpType::SILU>{};
Expand Down
7 changes: 7 additions & 0 deletions tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,13 @@ namespace tt::tt_metal::detail {
R"doc(Perform an eltwise-modulus operation on ``{0}`` and ``{1}``. Formula : ``a - a.div(b, rounding_mode="floor") * b`` . Support provided only for WH_B0.)doc",
R"doc("value", "float", "")doc"

);
detail::bind_unary_op_with_param(
m_tensor, "unary_fmod", unary_fmod,
py::arg("value"),
R"doc(Perform an eltwise-fmod operation on ``{0}`` and ``{1}``. Formula : ``a - a.div(b, rounding_mode="trunc") * b`` . Support provided only for WH_B0.)doc",
R"doc("value", "float", "")doc"

);
detail::bind_unary_op_with_param(
m_tensor, "unary_ne", unary_ne,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@
#include "llk_math_eltwise_unary_sfpu_remainder.h"
#include "llk_math_eltwise_unary_sfpu_bitwise_xor.h"
#include "llk_math_eltwise_unary_sfpu_bitwise_not.h"
#include "llk_math_eltwise_unary_sfpu_fmod.h"
#include "llk_math_eltwise_unary_sfpu_right_shift.h"
#include "llk_math_eltwise_unary_sfpu_left_shift.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "ckernel.h"
#include "ckernel_defs.h"
#include "noc_nonblocking_api.h"
#include "ckernel_sfpu_converter.h"
#include "ckernel_sfpu_recip.h"

using namespace sfpi;
namespace ckernel {
namespace sfpu {

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_fmod(const uint value, const uint recip) {

// SFPU microcode
Converter c_value;
c_value.u = value;
vFloat s = c_value.f;
s = sfpi::abs(s);

c_value.u = recip;
vFloat recip_val = c_value.f;
recip_val = sfpi::abs(recip_val);

#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
vFloat val = dst_reg[0];
vFloat v = sfpi::abs(val);

vFloat quotient = v*recip_val;

vInt tmp = float_to_int16(quotient); //TODO: Replace float_to_int16 to float_to_int32 once it is available
vFloat newquotient= int32_to_float(tmp);
v_if (newquotient > quotient){
newquotient = newquotient - 1;
}
v_endif;

v = v - newquotient * s;
v = setsgn(v, val);

v_if(s==0){
v = std::numeric_limits<float>::quiet_NaN();
}
v_endif;

dst_reg[0] = v;
dst_reg++;
}
}

} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "ckernel_sfpu_recip.h"

using namespace sfpi;

namespace ckernel {
namespace sfpu {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "ckernel_sfpu_fmod.h"
#include "llk_math_eltwise_unary_sfpu_params.h"
#include "llk_math_eltwise_unary_sfpu_init.h"

namespace ckernel {

// New LLK SFPU APIs

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_fmod_init() {
llk_math_eltwise_unary_sfpu_init<SfpuType::fmod, APPROXIMATE>();
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_fmod(uint dst_index, uint param0, uint param1, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_fmod<APPROXIMATE>,
dst_index,
vector_mode,
param0,
param1);
}

} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,6 @@ enum SfpuType {
floor,
left_shift,
remainder,
fmod,
unused,
};
47 changes: 47 additions & 0 deletions tt_metal/include/compute_kernel_api/eltwise_unary/fmod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once


#include "compute_kernel_api/common_globals.h"
#ifdef TRISC_MATH
#include "llk_math_eltwise_unary_sfpu_fmod.h"
#define MAIN math_main()
#define MATH(x) x
#else
#define MATH(x)
#endif



namespace ckernel {

/**
* Performs element-wise fmod computation on input x by y , where x is each element of a tile
* in DST register at index tile_index. The input can be of float data type. The value is provided as const param0 The DST register buffer must be in
* acquired state via *acquire_dst* call. This call is blocking and is only
* available on the compute engine.
*
* Return value: None
*
* | Argument | Description | Type | Valid Range | Required |
* |----------------|-----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst | The index of the tile in DST register buffer to perform fmod operation | uint32_t | Must be less than the size of the DST register buffer | True |
* | param0 | Denominator value to perform fmod operation | uint32_t | | True |
* | param1 | Reciprocal of param0, calculated on-host | uint32_t | | False |
*/


ALWI void fmod_tile(uint32_t idst, uint32_t param0, uint32_t param1) {
MATH((llk_math_eltwise_unary_sfpu_fmod<APPROX>(idst, param0, param1)));
}

/**
* Please refer to documentation for any_init.
*/
ALWI void fmod_tile_init() { MATH((llk_math_eltwise_unary_sfpu_fmod_init<APPROX>())); }


} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@
#include "compute_kernel_api/eltwise_unary/remainder.h"
#endif

#if SFPU_OP_FMOD_INCLUDE
#include "compute_kernel_api/eltwise_unary/fmod.h"
#endif

#if SFPU_OP_BINOP_WITH_SCALAR_INCLUDE
#include "compute_kernel_api/eltwise_unary/binop_with_scalar.h"
#endif
Expand Down

0 comments on commit 95f8ddc

Please sign in to comment.