do not use dyn_quan for u8 wei and wzp

openvinotoolkit · Sep 19, 2024 · f73e04c · f73e04c
1 parent 16ae143
commit f73e04c
Showing 1 changed file with 3 additions and 4 deletions.
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -875,10 +875,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                     return true;
                 }
 
-                // OneDNN accuracy issue
-                if ((root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)
-                    && dynamic_quantization_group_size != UINT64_MAX) {
-                    GPU_DEBUG_TRACE << root->get_friendly_name() << " : dynamic quantization is not supported because of library accuracy issue" << std::endl;
+                bool has_wzp = root->get_input_size() > 4;
+                if ((root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8) && has_wzp) {
+                    GPU_DEBUG_TRACE << root->get_friendly_name() << " : dynamic quantization is turned off because weight may not representable in 8 bit" << std::endl;
                     return true;
                 }
                 return false;