From 08f73a0b08960a218caa87cfe6453d50b5ed39a6 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Oct 2023 18:27:26 -0700 Subject: [PATCH 1/4] Support InstanceNorm with rank 3 inputs with N != 1 on HTP backend --- .../opbuilder/instance_norm_op_builder.cc | 130 +++++++++++++++++- .../providers/qnn/instance_norm_htp_test.cc | 93 +++++++++++-- 2 files changed, 208 insertions(+), 15 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index e1ea22b886268..28603b5f1100b 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -24,6 +24,12 @@ class InstanceNormOpBuilder : public BaseOpBuilder { const logging::Logger& logger) const override final ORT_MUST_USE_RESULT; protected: + Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const override ORT_MUST_USE_RESULT; + Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -81,6 +87,73 @@ Status InstanceNormOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, return Status::OK(); } +Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const { + const auto& inputs = node_unit.Inputs(); + assert(inputs.size() == 3); + + OnnxInputInfo input0_info = {}; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info)); + + // HTP backend can only handle rank 3 inputs if the batch size is 1. If the batch size is not 1, + // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4. + if (input0_info.shape.size() != 3 || input0_info.shape[0] == 1) { + return BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation); + } + + // + // Input 0 is rank 3 with batch size != 1. Must reshape the input to rank 4. + // + + { + const std::string& input0_name = inputs[0].node_arg.Name(); + const std::string op_input0_name = input0_info.is_initializer ? input0_name + : input0_name + "_ort_qnn_ep_reshape"; + input_names.push_back(op_input0_name); + + std::vector initializer_data; + if (input0_info.is_initializer) { + ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input0_info.initializer_tensor, initializer_data)); + } + + assert(node_unit.Domain() == kMSInternalNHWCDomain); + std::vector op_shape = { + input0_info.shape[0], // N + 1, // Height == 1 + input0_info.shape[1], // Width + input0_info.shape[2] // Channels + }; + + if (!input0_info.is_initializer) { + // Add Reshape node to transform 1D input to 2D (i.e., set height to 1). + // We don't need to do this for initializers, because the number of elements does not change. We can just + // modify the shape dimensions. + bool is_graph_input = qnn_model_wrapper.IsGraphInput(input0_name); + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(input0_name, + op_input0_name, + input0_info.shape, + op_shape, + input0_info.qnn_data_type, + input0_info.quant_param, + do_op_validation, + is_graph_input)); + } + + Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input0_name); + QnnTensorWrapper input_tensorwrapper(op_input0_name, tensor_type, input0_info.qnn_data_type, input0_info.quant_param, + std::move(op_shape), std::move(initializer_data)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); + } + + ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[1], logger, input_names)); // Scale + ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[2], logger, input_names)); // Bias + + return Status::OK(); +} + Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -100,11 +173,60 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m param_tensor_names.push_back(epsilon_param_wrapper.GetParamTensorName()); qnn_model_wrapper.AddParamWrapper(std::move(epsilon_param_wrapper)); - ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, - std::move(input_names), - std::move(param_tensor_names), - logger, do_op_validation, GetQnnOpType(node_unit.OpType()))); + const auto& outputs = node_unit.Outputs(); + assert(outputs.size() == 1); + + OnnxInputInfo output_info = {}; + ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info)); + + // HTP backend can only handle rank 3 inputs/outputs if the batch size is 1. If the batch size is not 1, + // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4. + if (output_info.shape.size() != 3 || output_info.shape[0] == 1) { + return ProcessOutputs(qnn_model_wrapper, node_unit, + std::move(input_names), + std::move(param_tensor_names), + logger, do_op_validation, GetQnnOpType(node_unit.OpType())); + } + // + // The output is meant to be rank 3 with batch size != 1. Must create a QNN InstanceNorm op with a rank 4 output + // that is then reshaped to rank 3 again. + // + + const std::string& orig_output_name = outputs[0].node_arg.Name(); + std::string op_output_name = orig_output_name + "_ort_qnn_ep_reshape"; + + assert(node_unit.Domain() == kMSInternalNHWCDomain); + std::vector op_output_shape = { + output_info.shape[0], // N + 1, // H == 1 + output_info.shape[1], // W + output_info.shape[2], // C + }; + + QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type, + output_info.quant_param, std::vector(op_output_shape)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor."); + ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit), + QNN_OP_PACKAGE_NAME_QTI_AISW, + GetQnnOpType(node_unit.OpType()), + std::move(input_names), + {op_output_name}, + std::move(param_tensor_names)), + "Failed to add node."); + + const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name); + + // Add Reshape to convert QNN InstanceNorm output back to rank 3 (as expected by the rest of the ONNX graph). + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(op_output_name, + orig_output_name, + op_output_shape, + output_info.shape, + output_info.qnn_data_type, + output_info.quant_param, + do_op_validation, + false, + is_graph_output)); return Status::OK(); } diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc index a2aa74c876936..3598ba1ac8851 100644 --- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc +++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc @@ -21,21 +21,26 @@ template static GetTestQDQModelFn BuildQDQInstanceNormTestCase(const TestInputDef& input_def, const TestInputDef& scale_def, const TestInputDef& bias_def, - const std::vector& attrs) { - return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder, - std::vector>& output_qparams) { + const std::vector& attrs, + bool use_contrib_qdq = false) { + return [input_def, scale_def, bias_def, attrs, + use_contrib_qdq](ModelTestBuilder& builder, + std::vector>& output_qparams) { // input => Q => DQ => NodeArg* input = MakeTestInput(builder, input_def); QuantParams input_qparams = GetTestInputQuantParams(input_def); - NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point, + use_contrib_qdq); // scale => Q => DQ => NodeArg* scale = MakeTestInput(builder, scale_def); QuantParams scale_qparams = GetTestInputQuantParams(scale_def); - NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point); + NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point, + use_contrib_qdq); // bias (as int32) => DQ => - NodeArg* bias_qdq = MakeTestQDQBiasInput(builder, bias_def, input_qparams.scale * scale_qparams.scale); + NodeArg* bias_qdq = MakeTestQDQBiasInput(builder, bias_def, input_qparams.scale * scale_qparams.scale, + use_contrib_qdq); // InstanceNormalization operator. auto* instance_norm_output = builder.MakeIntermediate(); @@ -46,7 +51,8 @@ static GetTestQDQModelFn BuildQDQInstanceNormTestCase(const TestInput } // Add instance_norm_output -> Q -> output_u8 - AddQDQNodePairWithOutputAsGraphOutput(builder, instance_norm_output, output_qparams[0].scale, output_qparams[0].zero_point); + AddQDQNodePairWithOutputAsGraphOutput(builder, instance_norm_output, output_qparams[0].scale, + output_qparams[0].zero_point, use_contrib_qdq); }; } @@ -65,7 +71,8 @@ static void RunInstanceNormQDQTest(const TestInputDef& input_def, const TestInputDef& scale_def, const TestInputDef& bias_def, const std::vector& attrs, - ExpectedEPNodeAssignment expected_ep_assignment) { + ExpectedEPNodeAssignment expected_ep_assignment, + bool use_contrib_qdq = false) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -75,11 +82,10 @@ static void RunInstanceNormQDQTest(const TestInputDef& input_def, // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs. TestQDQModelAccuracy(BuildOpTestCase("InstanceNormalization", {input_def, scale_def, bias_def}, {}, attrs), - BuildQDQInstanceNormTestCase(input_def, scale_def, bias_def, attrs), + BuildQDQInstanceNormTestCase(input_def, scale_def, bias_def, attrs, use_contrib_qdq), provider_options, 18, - expected_ep_assignment, - 1e-5f); + expected_ep_assignment); } // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit. @@ -97,6 +103,19 @@ TEST_F(QnnHTPBackendTests, InstanceNormU8) { ExpectedEPNodeAssignment::All); } +TEST_F(QnnHTPBackendTests, InstanceNormU16) { + std::vector input_data = {3.21289f, -5.9981f, -1.72799f, 6.27263f, 3.36205f, -1.93515f, -5.40113f, 3.75648f, 6.15357f, + -5.25769f, 2.73637f, -0.901382f, -6.55612f, 1.99497f, -4.79228f, 2.69813f, 8.3064f, 0.0362501f}; + std::vector scale_data = {-0.148738f, -1.45158f}; + std::vector bias_data = {-2.2785083772f, 2.3338717017f}; + RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3}, false, input_data).OverrideValueRange(-10.0f, 10.0f), + TestInputDef({2}, true, scale_data).OverrideValueRange(-2.0f, 2.0f), + TestInputDef({2}, true, bias_data).OverrideValueRange(-3.0f, 3.0f), + {}, + ExpectedEPNodeAssignment::All, + true); // Use contrib Q/DQ ops for 16bit support. +} + // Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3) { @@ -107,6 +126,58 @@ TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3) { ExpectedEPNodeAssignment::All); } +// Test 8-bit QDQ InstanceNormalization with an input of rank 3 with N != 1, +// which requires wrapping the QNN InstanceNorm op with reshapes. +TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3_BatchSizeNot1) { + std::vector input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f, + -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f}; + RunInstanceNormQDQTest(TestInputDef({2, 2, 3}, false, input_data), + TestInputDef({2}, true, {1.0f, 2.0f}), + TestInputDef({2}, true, {1.0f, 3.0f}), + {}, + ExpectedEPNodeAssignment::All); +} + +// Test 16-bit QDQ InstanceNormalization with an input of rank 3 with N != 1, +// which requires wrapping the QNN InstanceNorm op with reshapes. +TEST_F(QnnHTPBackendTests, InstanceNormU16Rank3_BatchSizeNot1) { + std::vector input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f, + -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f}; + RunInstanceNormQDQTest(TestInputDef({2, 2, 3}, false, input_data), + TestInputDef({2}, true, {1.0f, 2.0f}), + TestInputDef({2}, true, {1.0f, 3.0f}), + {}, + ExpectedEPNodeAssignment::All, + true); // Use contrib Q/DQ ops for 16bit support. +} + +// Test 8-bit QDQ InstanceNormalization with an input of rank 3 with N != 1, +// which requires wrapping the QNN InstanceNorm op with reshapes. +// Input 0 is an initializer. +TEST_F(QnnHTPBackendTests, InstanceNormU8Rank3_BatchSizeNot1_Initializer) { + std::vector input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f, + -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f}; + RunInstanceNormQDQTest(TestInputDef({2, 2, 3}, true, input_data), + TestInputDef({2}, true, {1.0f, 2.0f}), + TestInputDef({2}, false, {1.0f, 3.0f}), + {}, + ExpectedEPNodeAssignment::All); +} + +// Test 16-bit QDQ InstanceNormalization with an input of rank 3 with N != 1, +// which requires wrapping the QNN InstanceNorm op with reshapes. +// Input 0 is an initializer. +TEST_F(QnnHTPBackendTests, InstanceNormU16Rank3_BatchSizeNot1_Initializer) { + std::vector input_data = {6.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f, + -8.0f, -6.0f, 0.0f, 1.0f, 3.0f, 6.0f}; + RunInstanceNormQDQTest(TestInputDef({2, 2, 3}, true, input_data), + TestInputDef({2}, true, {1.0f, 2.0f}), + TestInputDef({2}, false, {1.0f, 3.0f}), + {}, + ExpectedEPNodeAssignment::All, + true); // Use contrib Q/DQ ops for 16-bit support. +} + // Check that QNN InstanceNorm operator does not handle inputs with rank > 4. TEST_F(QnnHTPBackendTests, InstanceNormU8Rank5) { RunInstanceNormQDQTest(TestInputDef({1, 2, 3, 3, 3}, false, -10.0f, 10.0f), From da042db8184e3a9ed339da41c81cd419c57fb314 Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Wed, 11 Oct 2023 18:37:34 -0700 Subject: [PATCH 2/4] Update onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc --- .../providers/qnn/builder/opbuilder/instance_norm_op_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index 28603b5f1100b..0a304d13e484d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -129,7 +129,7 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, if (!input0_info.is_initializer) { // Add Reshape node to transform 1D input to 2D (i.e., set height to 1). - // We don't need to do this for initializers, because the number of elements does not change. We can just + // We don't need to do this for initializers, because the element layout does not change. We can just // modify the shape dimensions. bool is_graph_input = qnn_model_wrapper.IsGraphInput(input0_name); ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(input0_name, From 6268100302c352e53775aec7d42625e3984b359d Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Oct 2023 18:46:10 -0700 Subject: [PATCH 3/4] Simplify input handling logic --- .../opbuilder/instance_norm_op_builder.cc | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index 0a304d13e484d..a02d093eff28f 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -100,18 +100,10 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, // HTP backend can only handle rank 3 inputs if the batch size is 1. If the batch size is not 1, // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4. - if (input0_info.shape.size() != 3 || input0_info.shape[0] == 1) { - return BaseOpBuilder::ProcessInputs(qnn_model_wrapper, node_unit, logger, input_names, do_op_validation); - } - - // - // Input 0 is rank 3 with batch size != 1. Must reshape the input to rank 4. - // - - { - const std::string& input0_name = inputs[0].node_arg.Name(); - const std::string op_input0_name = input0_info.is_initializer ? input0_name - : input0_name + "_ort_qnn_ep_reshape"; + if (input0_info.shape.size() == 3 && input0_info.shape[0] != 1) { + const std::string& orig_input0_name = inputs[0].node_arg.Name(); + const std::string op_input0_name = input0_info.is_initializer ? orig_input0_name + : orig_input0_name + "_ort_qnn_ep_reshape"; input_names.push_back(op_input0_name); std::vector initializer_data; @@ -131,8 +123,8 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, // Add Reshape node to transform 1D input to 2D (i.e., set height to 1). // We don't need to do this for initializers, because the element layout does not change. We can just // modify the shape dimensions. - bool is_graph_input = qnn_model_wrapper.IsGraphInput(input0_name); - ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(input0_name, + bool is_graph_input = qnn_model_wrapper.IsGraphInput(orig_input0_name); + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddReshapeNode(orig_input0_name, op_input0_name, input0_info.shape, op_shape, @@ -146,6 +138,8 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, QnnTensorWrapper input_tensorwrapper(op_input0_name, tensor_type, input0_info.qnn_data_type, input0_info.quant_param, std::move(op_shape), std::move(initializer_data)); ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); + } else { + ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names)); // Input 0 } ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[1], logger, input_names)); // Scale From 50d34d95faf2b0cee5a9e84ce6afc628c18137f7 Mon Sep 17 00:00:00 2001 From: adrianlizarraga Date: Wed, 11 Oct 2023 22:07:00 -0700 Subject: [PATCH 4/4] Remove asserts. Handle CPU backend normally --- .../qnn/builder/opbuilder/instance_norm_op_builder.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc index a02d093eff28f..6d39cd8102094 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/instance_norm_op_builder.cc @@ -93,14 +93,14 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, std::vector& input_names, bool do_op_validation) const { const auto& inputs = node_unit.Inputs(); - assert(inputs.size() == 3); OnnxInputInfo input0_info = {}; ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info)); // HTP backend can only handle rank 3 inputs if the batch size is 1. If the batch size is not 1, // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4. - if (input0_info.shape.size() == 3 && input0_info.shape[0] != 1) { + if (IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()) && + input0_info.shape.size() == 3 && input0_info.shape[0] != 1) { const std::string& orig_input0_name = inputs[0].node_arg.Name(); const std::string op_input0_name = input0_info.is_initializer ? orig_input0_name : orig_input0_name + "_ort_qnn_ep_reshape"; @@ -111,7 +111,6 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input0_info.initializer_tensor, initializer_data)); } - assert(node_unit.Domain() == kMSInternalNHWCDomain); std::vector op_shape = { input0_info.shape[0], // N 1, // Height == 1 @@ -168,14 +167,14 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m qnn_model_wrapper.AddParamWrapper(std::move(epsilon_param_wrapper)); const auto& outputs = node_unit.Outputs(); - assert(outputs.size() == 1); OnnxInputInfo output_info = {}; ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info)); // HTP backend can only handle rank 3 inputs/outputs if the batch size is 1. If the batch size is not 1, // QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4. - if (output_info.shape.size() != 3 || output_info.shape[0] == 1) { + if (!IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()) || + output_info.shape.size() != 3 || output_info.shape[0] == 1) { return ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names), @@ -190,7 +189,6 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m const std::string& orig_output_name = outputs[0].node_arg.Name(); std::string op_output_name = orig_output_name + "_ort_qnn_ep_reshape"; - assert(node_unit.Domain() == kMSInternalNHWCDomain); std::vector op_output_shape = { output_info.shape[0], // N 1, // H == 1