From a82b7a0d82d4aa736615d2b8067c6810bf139da7 Mon Sep 17 00:00:00 2001 From: Jing Fang Date: Wed, 17 Jul 2024 16:52:08 -0700 Subject: [PATCH] try fixing Mac CI --- .../qdq_transformer/selectors_actions/qdq_actions.cc | 12 ++++++------ .../qdq_transformer/selectors_actions/qdq_actions.h | 6 +++--- .../qdq_selector_action_transformer.cc | 6 +++--- .../optimizer/qdq_matmulnbits_transformer_test.cc | 3 +++ 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc index 5bba433c5eab5..0d51e6cea32a7 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.cc @@ -275,8 +275,8 @@ Status MatMulReplaceWithQLinear::Run(Graph& graph, const NodesToOptimize& select } } -DQMatMulReplaceWithMatMulNBits::DQMatMulReplaceWithMatMulNBits(int64_t accuracy_level, - concurrency::ThreadPool* intra_op_thread_pool) +DQMatMulToMatMulNBitsAction::DQMatMulToMatMulNBitsAction(int64_t accuracy_level, + concurrency::ThreadPool* intra_op_thread_pool) : accuracy_level_{accuracy_level}, domain_{kMSDomain}, op_type_{"MatMulNBits"}, @@ -291,7 +291,7 @@ DQMatMulReplaceWithMatMulNBits::DQMatMulReplaceWithMatMulNBits(int64_t accuracy_ } NodeAttributes -DQMatMulReplaceWithMatMulNBits::ExtraAttributes(const RuntimeState& runtime_state) const { +DQMatMulToMatMulNBitsAction::ExtraAttributes(const RuntimeState& runtime_state) const { NodeAttributes extra_attributes; const auto* dq_node = runtime_state.selected_nodes.Input(0); @@ -308,9 +308,9 @@ DQMatMulReplaceWithMatMulNBits::ExtraAttributes(const RuntimeState& runtime_stat return extra_attributes; } -Status DQMatMulReplaceWithMatMulNBits::ProcessNewNode(Graph& graph, - const NodesToOptimize& selected_nodes, - Node& replacement_node) const { +Status DQMatMulToMatMulNBitsAction::ProcessNewNode(Graph& graph, + const NodesToOptimize& selected_nodes, + Node& replacement_node) const { ORT_RETURN_IF_NOT(intra_op_thread_pool_, "Passed in thread pool should not be null"); const auto* dq_node = selected_nodes.Input(0); const auto* weight_arg = dq_node->InputDefs()[0]; diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.h index d80c3f9d183bf..47821619db65a 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_actions.h @@ -82,9 +82,9 @@ struct MatMulReplaceWithQLinear : public Action { }; // used together with DQMatMulNodeGroupSelector, which does the sanity check -struct DQMatMulReplaceWithMatMulNBits : public ReplaceWithNew { - DQMatMulReplaceWithMatMulNBits(int64_t accuracy_level, - concurrency::ThreadPool* intra_op_thread_pool); +struct DQMatMulToMatMulNBitsAction : public ReplaceWithNew { + DQMatMulToMatMulNBitsAction(int64_t accuracy_level, + concurrency::ThreadPool* intra_op_thread_pool); private: std::string OpType(const RuntimeState&) const override { return op_type_; } diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index 0b10f092cc565..17e66a3953b97 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -234,11 +234,11 @@ void DQMatMulToMatMulNBitsRules(SelectorActionRegistry& qdq_selector_action_regi // 2 nodes. DQ -> MatMul. DQ is the second input to MatMul. // DQ's weight is int4/uint4. DQ's scale is float/float16. // DQ is block-quantized along axis 0, with block_size >= 16 and as 2's power. - const std::string action_name{"DQMatMul"}; + const std::string action_name{"DQMatMulToMatMulNBits"}; std::unique_ptr action = - std::make_unique(qdq_matmulnbits_accuracy_level, - intra_op_thread_pool); + std::make_unique(qdq_matmulnbits_accuracy_level, + intra_op_thread_pool); #if !defined(ORT_MINIMAL_BUILD) std::unique_ptr selector = std::make_unique(); diff --git a/onnxruntime/test/optimizer/qdq_matmulnbits_transformer_test.cc b/onnxruntime/test/optimizer/qdq_matmulnbits_transformer_test.cc index 3d117794104fa..f80425616a497 100644 --- a/onnxruntime/test/optimizer/qdq_matmulnbits_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_matmulnbits_transformer_test.cc @@ -408,6 +408,9 @@ RunDQMatMulConverted(const std::vector& input1_shape, } TEST(QDQTransformerTests, DQMatMulConvertedToMatMulNBits) { + if constexpr (!SessionOptions::DEFAULT_USE_PER_SESSION_THREADS) { + GTEST_SKIP() << "Skipping the test"; + } // DQ contrib op schema is not updated to support blocked quantization RunDQMatMulConverted({12, 12}, {12, 37}, {37, 12}, 0, 16, 0); RunDQMatMulConverted({12, 12}, {12, 37}, {37, 12}, 0, 16, 0);