Skip to content

Commit

Permalink
#8415: fix ttnn async multi-device for binary_op with scalar
Browse files Browse the repository at this point in the history
  • Loading branch information
cfjchu committed May 13, 2024
1 parent 90955b9 commit 33c0d9c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
28 changes: 26 additions & 2 deletions tests/ttnn/unit_tests/test_multi_device_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@


#######
# Multi-Device Tensor tests running in async mode
#######
# Multi-Device Tensor tests running in async mode #######


@pytest.mark.parametrize("layout", [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT])
Expand Down Expand Up @@ -283,3 +282,28 @@ def test_multi_device_explicit_dealloc(pcie_device_mesh):

for device in pcie_device_mesh.get_device_ids():
pcie_device_mesh.get_device(device).enable_async(False)


@pytest.mark.parametrize("scalar", [3])
@pytest.mark.parametrize("size", [64])
@pytest.mark.parametrize("pcie_device_mesh", [2], indirect=True)
def test_add_1D_tensor_and_scalar(pcie_device_mesh, scalar, size):
torch.manual_seed(0)

for device in pcie_device_mesh.get_device_ids():
pcie_device_mesh.get_device(device).enable_async(True)

torch_input_tensor = torch.rand((size,), dtype=torch.bfloat16)
torch_output_tensor = torch_input_tensor + scalar

input_tensor = ttnn.from_torch(
torch_input_tensor,
layout=ttnn.TILE_LAYOUT,
device=pcie_device_mesh,
mesh_mapper=ttnn.ReplicateTensorToMesh(pcie_device_mesh),
)
output_tensor = input_tensor + scalar
output_tensors = ttnn.to_torch(output_tensor, mesh_composer=ttnn.ListMeshToTensor(pcie_device_mesh))
for output_tensor in output_tensors:
assert ttnn.pearson_correlation_coefficient(torch_output_tensor, output_tensor) >= 0.99988
assert output_tensor.shape == (1, size)
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/op_library/binary/binary_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ struct Binary {
ttnn::Shape(std::array<std::uint32_t, 2>{1, 1}, std::array<std::uint32_t, 2>{TILE_HEIGHT, TILE_WIDTH}),
DataType::BFLOAT16,
Layout::TILE);
Tensor scalar_tensor_device = scalar_tensor_host.to(input_tensor_a.get_workers());
Tensor scalar_tensor_device = scalar_tensor_host.to(input_tensor_a.device());
// TODO(arakhmati): #7637 pass in memory_config instead of operation::DEFAULT_OUTPUT_MEMORY_CONFIG
return Binary::execute(
input_tensor_a, scalar_tensor_device, operation::DEFAULT_OUTPUT_MEMORY_CONFIG, dtype, activations);
Expand Down

0 comments on commit 33c0d9c

Please sign in to comment.