Apply Alexandra comments

openvinotoolkit · Dec 3, 2024 · 47d887a · 47d887a
1 parent 7d87b9d
commit 47d887a
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 26 deletions.
diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -450,16 +450,10 @@ void Subgraph::initSupportedPrimitiveDescriptors() {
         config.inConfs.resize(inputShapes.size());
         for (size_t i = 0; i < inputShapes.size(); i++) {
             const auto originalInputPrecision = getOriginalInputPrecisionAtPort(i);
-            auto precision = ((originalInputPrecision == ov::element::f32) &&
-                                     context->getConfig().inferencePrecision == ov::element::bf16 &&
+            const auto precision = ((originalInputPrecision == ov::element::f32) &&
+                                    one_of(context->getConfig().inferencePrecision, ov::element::bf16, ov::element::f16) &&
                                      subgraph_attrs->snippet->has_domain_sensitive_ops()) ?
-                static_cast<ov::element::Type>(ov::element::bf16) :
-                originalInputPrecision;
-            precision = ((originalInputPrecision == ov::element::f32) &&
-                                     context->getConfig().inferencePrecision == ov::element::f16 &&
-                                     subgraph_attrs->snippet->has_domain_sensitive_ops()) ?
-                static_cast<ov::element::Type>(ov::element::f16) :
-                precision;
+                                     context->getConfig().inferencePrecision : originalInputPrecision;
             if (supportedPrecisions.count(precision) == 0)
                 OPENVINO_THROW("Subgraph node with name `", getName(), "` doesn't support ", precision, " precision.");
 
@@ -644,8 +638,7 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() {
     SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON(Place::PipelineStart, ConvertToSwishCPU);
     SNIPPETS_REGISTER_PASS_RELATIVE_COMMON(Place::After, ov::snippets::pass::Canonicalization,
                                            ov::snippets::pass::AnalyzeBroadcastableInputs, broadcastable_inputs);
-    if ((context->getConfig().inferencePrecision == ov::element::bf16 || context->getConfig().inferencePrecision == ov::element::f16)
-         && subgraph_attrs->snippet->has_domain_sensitive_ops()) {
+    if (one_of(context->getConfig().inferencePrecision, ov::element::bf16, ov::element::f16) && subgraph_attrs->snippet->has_domain_sensitive_ops()) {
         // enforce BF16 precisions to supported operations
         // MatMul has to be decomposed to Brgemm operations before enforcement
         // Note, MatMul decomposition will be run later again for case if BF16 enforcement is not happened

diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/enforce_precision.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/enforce_precision.cpp
@@ -122,8 +122,12 @@ bool EnforcePrecision::run_on_model(const std::shared_ptr<ov::Model>& f) {
 
 std::set<std::vector<ov::element::Type>> EnforcePrecision::get_supported_precisions_default(
     const std::shared_ptr<ov::Node>&op) noexcept {
-    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16) && ov::is_type<snippets::op::Brgemm>(op)) {
-        return {{element::bf16, element::bf16}};
+    std::set<std::vector<ov::element::Type>> types;
+    if (ov::is_type<snippets::op::Brgemm>(op)) {
+        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx_fp16))
+            types.insert({element::f16, element::f16});
+        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
+            types.insert({element::bf16, element::bf16});
     }
-    return {};
+    return types;
 }
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -821,7 +821,8 @@ void Transformations::PostLpt() {
     CPU_DISABLE_PASS_COMMON(postLPTPassManager, ov::pass::MoveEltwiseUpThroughDataMovPerChannel);
     CPU_SET_CALLBACK_COMMON(postLPTPassManager,
         [](const std::shared_ptr<const ov::Node>& node) -> bool {
-            if (!ov::is_type<const ov::op::v0::FakeQuantize>(node) && node->get_output_element_type(0) != node->get_input_element_type(0))
+            if (!ov::is_type<const ov::op::v0::FakeQuantize>(node) &&
+                node->get_output_element_type(0).size() > node->get_input_element_type(0).size())
                 return true;
             if (node->get_input_size() >= 2) {
                 return node->get_input_element_type(1) == ov::element::i8 ||

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -563,6 +563,11 @@ std::vector<std::string> disabledTestPatterns() {
         retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)");
         retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)");
     }
+    // MHA FP16 precision is only supported on amx_fp16 platform
+    if (!ov::with_cpu_x86_avx512_core_amx_fp16()) {
+        retVector.emplace_back(R"(.*smoke_Snippets_MHA.*FP16.*)");
+    }
+
 #ifdef SNIPPETS_LIBXSMM_TPP
     // GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234)
     retVector.emplace_back(R"(.*smoke_Snippets_GroupNormalization.*)");

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp
@@ -124,30 +124,43 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D,
                                             ::testing::Values(CPUTestUtils::empty_plugin_config)),
                          MHA::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFP16_4D,
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16,
                          MHA,
                          ::testing::Combine(::testing::ValuesIn(transposedShape_4D()),
+                                            ::testing::ValuesIn(precision_f32(4)),
+                                            ::testing::Values(ov::element::bf16),
+                                            ::testing::ValuesIn({false}),
+                                            ::testing::Values(MHA::default_thread_count),
+                                            ::testing::Values(7),
+                                            ::testing::Values(6),
+                                            ::testing::Values(ov::test::utils::DEVICE_CPU),
+                                            ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
+                         MHA::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_FP16_4D,
+                         MHA,
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D(false)),
                                             ::testing::ValuesIn(precision_fp16_if_supported(4)),
-                                            ::testing::Values(ov::element::f32),
+                                            ::testing::Values(ov::element::f16),
                                             ::testing::ValuesIn({false, true}),
                                             ::testing::Values(MHA::default_thread_count),
-                                            ::testing::Values(1),  // MHA + 5 Converts + 1 Transpose on output
-                                            ::testing::Values(1),  // MHA + 5 Converts on inputs and output
+                                            ::testing::Values(2),
+                                            ::testing::Values(1),
                                             ::testing::Values(ov::test::utils::DEVICE_CPU),
                                             ::testing::Values(CPUTestUtils::empty_plugin_config)),
                          MHA::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16,
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceFP16,
                          MHA,
-                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D()),
+                         ::testing::Combine(::testing::ValuesIn(transposedShape_4D(false)),
                                             ::testing::ValuesIn(precision_f32(4)),
-                                            ::testing::Values(ov::element::bf16),
-                                            ::testing::ValuesIn({false}),
+                                            ::testing::Values(ov::element::f16),
+                                            ::testing::ValuesIn({false, true}),
                                             ::testing::Values(MHA::default_thread_count),
-                                            ::testing::Values(7),
-                                            ::testing::Values(6),
+                                            ::testing::Values(2),
+                                            ::testing::Values(1),
                                             ::testing::Values(ov::test::utils::DEVICE_CPU),
-                                            ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
+                                            ::testing::Values(CPUTestUtils::cpu_f16_plugin_config)),
                          MHA::getTestCaseName);
 
 }  // namespace