Skip to content

Commit

Permalink
#7511: Use 1d matmul if any dim is 1 tile, adjust fidelity and tests …
Browse files Browse the repository at this point in the history
…slightly
  • Loading branch information
bbradelTT committed May 11, 2024
1 parent 1a35e85 commit 780b0f3
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
@pytest.mark.parametrize(
"batch_size, test, expected_perf",
[
[1, "BFLOAT16-L1-falcon_7b-layers_32-prefill_seq256", 3.49],
[1, "BFLOAT16-L1-falcon_7b-layers_32-prefill_seq256", 3.44],
[32, "BFLOAT16-L1-falcon_7b-layers_32-decode_batch32", 139],
],
)
Expand Down
2 changes: 1 addition & 1 deletion tests/ttnn/unit_tests/operations/test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,4 +243,4 @@ def test_bloom_ff2_linear(device):
dtype=ttnn.bfloat16,
)

assert ttnn.pearson_correlation_coefficient(torch_output, output) >= 0.9989
assert ttnn.pearson_correlation_coefficient(torch_output, output) >= 0.9988
3 changes: 2 additions & 1 deletion tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,7 @@ MatmulProgramConfig create_matmul_program_config(const Tensor& input_tensor_a, c
uint32_t batch_size_a = get_batch_size(a_padded_shape);
uint32_t batch_size_b = get_batch_size(b_padded_shape);
bool input_b_is_batched = batch_size_b > 1;
bool any_size_within_tile = k_size <= ttnn::TILE_SIZE || m_size <= ttnn::TILE_SIZE || n_size <= ttnn::TILE_SIZE;
auto input_tensor_a_memory_config = input_tensor_a.memory_config();
auto input_tensor_b_memory_config = input_tensor_b.memory_config();
bool fp32_dest_acc_en = bmm_op_utils::get_fp32_dest_acc_en(compute_kernel_config);
Expand Down Expand Up @@ -1285,7 +1286,7 @@ MatmulProgramConfig create_matmul_program_config(const Tensor& input_tensor_a, c
auto height = batch_size_a * m_size;
auto width = n_size;
auto height_width_ratio = (height > width) ? height / width : width / height;
if (height_width_ratio > 8) {
if (height_width_ratio > 8 || any_size_within_tile) {
return create_matmul_1d_systolic_array_program_config(a_shape, b_shape, core_coord, fused_activation, fp32_dest_acc_en);
}
if (!a_is_sharded) {
Expand Down
4 changes: 2 additions & 2 deletions tt_eager/tt_dnn/op_library/bmm/bmm_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,8 @@ inline Tensor matmul(
const auto& input_tensor_a = input_tensors.at(0);
const auto& input_tensor_b = input_tensors.at(1);
auto arch = input_tensor_a.device()->arch();
const auto program_config_default = is_program_config_default(program_config);
auto math_fidelity = program_config_default ? MathFidelity::HiFi2 : MathFidelity::LoFi;
const auto increase_fidelity = is_program_config_default(program_config) || user_core_coord.has_value();
auto math_fidelity = increase_fidelity ? MathFidelity::HiFi2 : MathFidelity::LoFi;
auto kernel_config_val = init_device_compute_kernel_config(arch, compute_kernel_config, math_fidelity);
bool broadcast_batch = get_broadcast_batch(input_tensor_a, input_tensor_b, program_config);
auto matmul_program_config = program_config;
Expand Down

0 comments on commit 780b0f3

Please sign in to comment.