From 58e7370e80da5b185c2449fc64119e89a9bb5166 Mon Sep 17 00:00:00 2001 From: Akhmed Rakhmati Date: Thu, 30 May 2024 17:10:07 +0000 Subject: [PATCH 1/2] #5389: removed early return from validate when enable_fast_runtime_mode was set to true --- tests/ttnn/integration_tests/mistral/test_mistral_attention.py | 3 +++ tt_eager/tt_dnn/op_library/operation.hpp | 3 --- ttnn/ttnn/__init__.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ttnn/integration_tests/mistral/test_mistral_attention.py b/tests/ttnn/integration_tests/mistral/test_mistral_attention.py index efc2dc36a8b..c3a516d12df 100644 --- a/tests/ttnn/integration_tests/mistral/test_mistral_attention.py +++ b/tests/ttnn/integration_tests/mistral/test_mistral_attention.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 +import pytest + import torch import ttnn import tt_lib @@ -19,6 +21,7 @@ from tests.ttnn.utils_for_testing import assert_with_pcc +@pytest.mark.skip(reason="https://github.com/tenstorrent/tt-metal/issues/9076") @skip_for_wormhole_b0() def test_mistral_attention_inference(model_location_generator, device, reset_seeds): model_path = model_location_generator("mistral-7B-v0.1", model_subdir="Mistral") diff --git a/tt_eager/tt_dnn/op_library/operation.hpp b/tt_eager/tt_dnn/op_library/operation.hpp index 6ef4b8fc33d..26285d0b5e8 100644 --- a/tt_eager/tt_dnn/op_library/operation.hpp +++ b/tt_eager/tt_dnn/op_library/operation.hpp @@ -528,9 +528,6 @@ struct DeviceOperation final { const Tensors& input_tensors, const OptionalConstTensors& optional_input_tensors, const OptionalTensors& optional_output_tensors) -> void { - if (ttnn::CONFIG.enable_fast_runtime_mode) { - return; - } const auto& operation = *reinterpret_cast*>(&storage); if constexpr ( (detail::implements_validate() or diff --git a/ttnn/ttnn/__init__.py b/ttnn/ttnn/__init__.py index 889a517af46..ea52b8fb386 100644 --- a/ttnn/ttnn/__init__.py +++ b/ttnn/ttnn/__init__.py @@ -57,7 +57,7 @@ def validate(self, name): if self.enable_fast_runtime_mode: if self.enable_logging: logger.warning( - "Running in fast runtime mode without logging. Please disable fast runtime mode if you want to enable logging." + "Logging cannot be enabled in fast runtime mode. Please disable fast runtime mode if you want to enable logging." ) if name in { From 798628cfb5fe67d056fbde54da70797d130c86fe Mon Sep 17 00:00:00 2001 From: yugaoT Date: Mon, 3 Jun 2024 22:18:56 +0000 Subject: [PATCH 2/2] #0: fix matmul dram sharded validation --- tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp b/tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp index 3100d466520..29cbae91947 100644 --- a/tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp +++ b/tt_eager/tt_dnn/op_library/bmm/bmm_op.cpp @@ -1041,7 +1041,7 @@ void Matmul::validate( // subbblock constraint TT_FATAL(program_config.out_subblock_w == per_core_N || program_config.out_subblock_h == 1); // tensor in1 - TT_FATAL(input_tensor_b.memory_config().memory_layout == TensorMemoryLayout::INTERLEAVED); + TT_FATAL(input_tensor_b.memory_config().memory_layout == TensorMemoryLayout::WIDTH_SHARDED); } else if constexpr (std::is_same_v) { if (input_tensor_a.memory_config().is_sharded()) { auto tensor_a_memory_layout = input_tensor_a.memory_config().memory_layout;