#8415: fix ttnn async multi-device for binary_op with scalar

tenstorrent · May 13, 2024 · 33c0d9c · 33c0d9c
1 parent 90955b9
commit 33c0d9c
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 3 deletions.
diff --git a/tests/ttnn/unit_tests/test_multi_device_async.py b/tests/ttnn/unit_tests/test_multi_device_async.py
@@ -12,8 +12,7 @@
 
 
 #######
-# Multi-Device Tensor tests running in async mode
-#######
+# Multi-Device Tensor tests running in async mode #######
 
 
 @pytest.mark.parametrize("layout", [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT])
@@ -283,3 +282,28 @@ def test_multi_device_explicit_dealloc(pcie_device_mesh):
 
     for device in pcie_device_mesh.get_device_ids():
         pcie_device_mesh.get_device(device).enable_async(False)
+
+
+@pytest.mark.parametrize("scalar", [3])
+@pytest.mark.parametrize("size", [64])
+@pytest.mark.parametrize("pcie_device_mesh", [2], indirect=True)
+def test_add_1D_tensor_and_scalar(pcie_device_mesh, scalar, size):
+    torch.manual_seed(0)
+
+    for device in pcie_device_mesh.get_device_ids():
+        pcie_device_mesh.get_device(device).enable_async(True)
+
+    torch_input_tensor = torch.rand((size,), dtype=torch.bfloat16)
+    torch_output_tensor = torch_input_tensor + scalar
+
+    input_tensor = ttnn.from_torch(
+        torch_input_tensor,
+        layout=ttnn.TILE_LAYOUT,
+        device=pcie_device_mesh,
+        mesh_mapper=ttnn.ReplicateTensorToMesh(pcie_device_mesh),
+    )
+    output_tensor = input_tensor + scalar
+    output_tensors = ttnn.to_torch(output_tensor, mesh_composer=ttnn.ListMeshToTensor(pcie_device_mesh))
+    for output_tensor in output_tensors:
+        assert ttnn.pearson_correlation_coefficient(torch_output_tensor, output_tensor) >= 0.99988
+        assert output_tensor.shape == (1, size)
diff --git a/ttnn/cpp/ttnn/op_library/binary/binary_op.hpp b/ttnn/cpp/ttnn/op_library/binary/binary_op.hpp
@@ -165,7 +165,7 @@ struct Binary {
             ttnn::Shape(std::array<std::uint32_t, 2>{1, 1}, std::array<std::uint32_t, 2>{TILE_HEIGHT, TILE_WIDTH}),
             DataType::BFLOAT16,
             Layout::TILE);
-        Tensor scalar_tensor_device = scalar_tensor_host.to(input_tensor_a.get_workers());
+        Tensor scalar_tensor_device = scalar_tensor_host.to(input_tensor_a.device());
         // TODO(arakhmati): #7637 pass in memory_config instead of operation::DEFAULT_OUTPUT_MEMORY_CONFIG
         return Binary::execute(
             input_tensor_a, scalar_tensor_device, operation::DEFAULT_OUTPUT_MEMORY_CONFIG, dtype, activations);