#14466: cleanup unary composite (#14682)

### Ticket Link to Github Issue #14466 ### Problem description Use binary tensor-scalar support in composite ops instead of creation ops ### What's changed wherever binary tensor-scalar support is possible, removed usage of creation ops on main op,count,python min dispatch time (ms),python mean dispatch time(ms),python mean dispatch + sync time (ms),C++ mean dispatch time (ms) ttnn.cosh,800,0.178,0.184,0.71,0.061 ttnn.hardsigmoid,800,0.185,0.212,0.561,0.056 ttnn.lgamma,120,1.279,1.403,5.029,0.493 ttnn.sinh,800,0.179,0.191,0.71,0.062 vs on branch ttnn.cosh,800,0.137,0.144,0.576,0.051 ttnn.hardsigmoid,800,0.091,0.097,0.29,0.031 ttnn.lgamma,120,1.208,1.225,4.953,0.464 ttnn.sinh,800,0.136,0.141,0.576,0.05 ### Checklist - [x] Post commit CI passes https://github.com/tenstorrent/tt-metal/actions/runs/11741081069 https://github.com/tenstorrent/tt-metal/actions/runs/11766921873 - [x] Nightly FD https://github.com/tenstorrent/tt-metal/actions/runs/11741696706 https://github.com/tenstorrent/tt-metal/actions/runs/11766922914 - [ ] Blackhole Post commit (if applicable) - [ ] Model regression CI testing passes (if applicable) - [ ] Device performance regression CI testing passes (if applicable) - [x] New/Existing tests provide coverage for changes
tenstorrent · Nov 10, 2024 · 486862f · 486862f
1 parent b75f637
commit 486862f
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 28 deletions.
diff --git a/tests/ttnn/unit_tests/operations/eltwise/test_ternary.py b/tests/ttnn/unit_tests/operations/eltwise/test_ternary.py
@@ -11,10 +11,6 @@
 from tests.ttnn.utils_for_testing import assert_with_pcc
 
 
-def torch_mac(input, tensor1, tensor2):
-    return torch.add(torch.mul(input, tensor1), tensor2)
-
-
 @pytest.mark.parametrize("h", [64])
 @pytest.mark.parametrize("w", [128])
 def test_mac_all_tensors(device, h, w):
@@ -23,7 +19,9 @@ def test_mac_all_tensors(device, h, w):
     torch_input_tensor = torch.rand((h, w), dtype=torch.bfloat16)
     torch_input_tensor1 = torch.rand((h, w), dtype=torch.bfloat16)
     torch_input_tensor2 = torch.rand((h, w), dtype=torch.bfloat16)
-    torch_output_tensor = torch_mac(torch_input_tensor, torch_input_tensor1, torch_input_tensor2)
+
+    golden_fn = ttnn.get_golden_function(ttnn.mac)
+    torch_output_tensor = golden_fn(torch_input_tensor, torch_input_tensor1, torch_input_tensor2)
 
     input_tensor = ttnn.from_torch(torch_input_tensor, layout=ttnn.TILE_LAYOUT, device=device)
     input_tensor = ttnn.to_device(input_tensor, device)
@@ -49,9 +47,9 @@ def test_mac_tensor_with_2_scalaras(device, h, w, scalar1, scalar2):
     torch_input_tensor = torch.rand((h, w), dtype=torch.bfloat16)
     torch_input_tensor1 = scalar1
     torch_input_tensor2 = scalar2
-    torch_output_tensor = torch.unsqueeze(
-        torch.unsqueeze(torch_mac(torch_input_tensor, torch_input_tensor1, torch_input_tensor2), 0), 0
-    )
+
+    golden_fn = ttnn.get_golden_function(ttnn.mac)
+    torch_output_tensor = golden_fn(torch_input_tensor, torch_input_tensor1, torch_input_tensor2)
 
     input_tensor = ttnn.from_torch(torch_input_tensor, layout=ttnn.TILE_LAYOUT, device=device)
     input_tensor = ttnn.to_device(input_tensor, device)

diff --git a/ttnn/cpp/ttnn/operations/eltwise/ternary/ternary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/ternary/ternary_composite_op.cpp
@@ -106,13 +106,9 @@ Tensor _mac(const Tensor& a, const Tensor& b, const Tensor& c, const std::option
     return ttnn::add(ttnn::multiply(a, b), c);
 }
 
+// y = a * b + c
 Tensor _mac_overload(const Tensor& a, float b, float c, const std::optional<MemoryConfig>& output_mem_config) {
-    Tensor t_b = ttnn::operations::creation::create_scalar(b, a.get_dtype(), Layout::TILE, a.device());
-    Tensor t_c = ttnn::operations::creation::create_scalar(c, a.get_dtype(), Layout::TILE, a.device());
-    Tensor return_tensor = _mac(a, t_b, t_c, output_mem_config);
-    t_b.deallocate();
-    t_c.deallocate();
-    return return_tensor;
+    return ttnn::add(ttnn::multiply(a, b, std::nullopt, output_mem_config), c, std::nullopt, output_mem_config);
 }
 
 } // namespace ttnn::operations::ternary
diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp
@@ -173,8 +173,7 @@ Tensor _cosh(const Tensor& input_a, const std::optional<MemoryConfig>& output_me
    Tensor nr_term = ttnn::add(e_pos_x, e_neg_x, std::nullopt, output_mem_config);
    e_pos_x.deallocate();
    e_neg_x.deallocate();
-   Tensor scalar = ttnn::full_like(input_a, 0.5f);
-   return ttnn::multiply(nr_term, scalar, std::nullopt, output_mem_config);
+   return ttnn::multiply(nr_term, 0.5f, std::nullopt, output_mem_config);
 }
 
 // TODO: In future will uplift the op once the floor and tan has supported.
@@ -294,12 +293,10 @@ Tensor _lgamma(const Tensor& x,  const std::optional<MemoryConfig>& output_mem_c
         result = ttnn::subtract(result, t, std::nullopt, output_mem_config);
         {
             {
-                Tensor t_one = ttnn::full_like(x, 1.0f);
-                result = ttnn::where(ttnn::eq(x, t_one, std::nullopt, output_mem_config), 0.0f, result);
+                result = ttnn::where(ttnn::eq(x, 1.0f, std::nullopt, output_mem_config), 0.0f, result);
             }
             {
-                Tensor t_two = ttnn::full_like(x, 2.0f);
-                result = ttnn::where(ttnn::eq(x, t_two, std::nullopt, output_mem_config), 0.0f, result);
+                result = ttnn::where(ttnn::eq(x, 2.0f, std::nullopt, output_mem_config), 0.0f, result);
             }
         }
     }
@@ -309,8 +306,7 @@ Tensor _lgamma(const Tensor& x,  const std::optional<MemoryConfig>& output_mem_c
 // log1p 1
 // use transformation y = log(1.0 + x) by broadcast
 Tensor _log1p(const Tensor& x, const std::optional<MemoryConfig>& output_mem_config) {
-    Tensor t_one = ttnn::full_like(x, 1.0f);
-    Tensor x_1 = ttnn::add(t_one, x, std::nullopt, output_mem_config);
+    Tensor x_1 = ttnn::add(x, 1.0f, std::nullopt, output_mem_config);
     Tensor result_log1p = ttnn::log(x_1, output_mem_config);
     return result_log1p;
 }
@@ -350,8 +346,7 @@ Tensor _sinh(const Tensor& input_a, const std::optional<MemoryConfig>& output_me
     Tensor nr_term = ttnn::subtract(e_pos_x, e_neg_x, std::nullopt, output_mem_config);
     e_pos_x.deallocate();
     e_neg_x.deallocate();
-    Tensor scalar = ttnn::full_like(input_a, 0.5f);
-    return ttnn::multiply(nr_term, scalar, std::nullopt, output_mem_config);
+    return ttnn::multiply(nr_term, 0.5f, std::nullopt, output_mem_config);
 }
 
 // Function: softsign
@@ -372,8 +367,8 @@ Tensor _swish(const Tensor& a, const std::optional<MemoryConfig>& output_mem_con
 
 Tensor ExecuteTrunc::invoke(uint8_t queue_id, const Tensor& input, const std::optional<MemoryConfig>& output_mem_config, std::optional<Tensor> output_tensor) {
     auto arch = input.device()->arch();
-    output_tensor = output_tensor.value_or(ttnn::empty_like(input));
     TT_FATAL(arch != tt::ARCH::GRAYSKULL, "Op is not supported on Grayskull");
+    output_tensor = output_tensor.value_or(ttnn::empty_like(input));
     Tensor floor_res = ttnn::floor(queue_id, input, output_mem_config);
     ttnn::where(queue_id, ttnn::ne(queue_id, input, floor_res), ttnn::add(queue_id, floor_res, 1.0f, std::nullopt, output_mem_config), floor_res, output_mem_config, output_tensor);
     ttnn::where(queue_id, ttnn::gtz(queue_id, input, output_mem_config), floor_res, output_tensor.value(), output_mem_config, output_tensor);
@@ -449,9 +444,7 @@ Tensor _normalize(const Tensor& y, const std::optional<MemoryConfig>& output_mem
 // PyTorch version:
 // hard sigmoid(x) = { x <= -3: 0, x >= +3: +3, x/6 + 0.5 otherwise}
 Tensor _hardsigmoid(const Tensor& a, float value_1, float value_2, const std::optional<MemoryConfig>& output_mem_config) {
-   Tensor a_t = ttnn::full_like(a,value_1);
-   Tensor b_t = ttnn::full_like(a,value_2);
-   Tensor a_mac = ttnn::mac(a, a_t, b_t);  // multiply and add.
+   Tensor a_mac = ttnn::mac(a, value_1, value_2);  // multiply and add.
    Tensor a_clip = relu_max(a_mac, 1.0f);
    return a_clip;
 }