update1

microsoft · Dec 23, 2024 · a4dac51 · a4dac51
1 parent d556acb
commit a4dac51
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 6 deletions.
diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc
@@ -26,6 +26,7 @@ struct ConvOpAndTestAttributes {
   std::unordered_set<std::string> excluded_providers;
   string activation = "";
   vector<float> activation_parameters = {};
+  string domain = onnxruntime::kMSDomain;
 };
 
 /*
@@ -48,7 +49,19 @@ void TestConvFp16Op(const ConvOpAndTestAttributes& attributes,
                     int opset = 11) {
   std::unique_ptr<OpTester> tester;
   if (!attributes.activation.empty()) {
-    tester = std::make_unique<OpTester>("NhwcFusedConv", 1, onnxruntime::kMSDomain);
+    std::string_view op;
+    if (attributes.domain == onnxruntime::kMSDomain) {
+      op = "NhwcFusedConv";
+      tester = std::make_unique<OpTester>(op, 1, attributes.domain);
+    } else if (attributes.domain == onnxruntime::kMSInternalNHWCDomain) {
+      op = "Conv";
+      tester = std::make_unique<OpTester>(op, opset, attributes.domain);
+    } else if (attributes.domain == onnxruntime::kOnnxDomain) {  
+      op = "FusedConv";
+    } else {
+      ORT_THROW("Unsupported domain: ", attributes.domain);
+    }
+
     tester->AddAttribute("activation", attributes.activation);
 
     if (!attributes.activation_parameters.empty()) {
@@ -1127,7 +1140,7 @@ TEST(ConvFp16Test, Pointwise_Relu) {
       vector<int64_t>{1, 1},        // kernel_shape
       vector<int64_t>{0, 0, 0, 0},  // pads
       vector<int64_t>{1, 1},        // strides
-      {},                           // excluded EPs
+      {kXnnpackExecutionProvider},  // excluded EPs
       "Relu"                        // activation
   };
 
@@ -1157,8 +1170,14 @@ TEST(ConvFp16Test, Pointwise_Relu) {
       MLFloat16(0.f), MLFloat16(0.f),
       MLFloat16(17.5f), MLFloat16(9.5f)};
 
-  TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape);
-  TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true);
+  auto run_test = [&](const ConvOpAndTestAttributes& test_attrs) {
+    TestConvFp16Op(test_attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape);
+    TestConvFp16Op(test_attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true);
+  };
+  run_test(attrs);
+  attrs.domain = kMSInternalNHWCDomain;
+  attrs.excluded_providers = {kCpuExecutionProvider};   
+  run_test(attrs);
 }
 
 TEST(ConvFp16Test, Conv2D_HardSigmoid) {

diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc
@@ -92,6 +92,47 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion) {
 }
 
 #ifdef XNNPACK_FP16_SUPPORTED
+// This test can be removed if Mlas implemented FP16 Clip fusion.
+// Now TestNhwcConvReluClipFusion_FP16 skipped output verification
+TEST(XnnpackEP, TestNhwcConvReluFusion_FP16) {
+  const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "conv_relu_model_fp16.onnx";
+
+  RandomValueGenerator generator;
+  TensorShape input_shape_x{1, 16, 16, 192};
+  std::vector<MLFloat16> input_x = generator.Uniform<MLFloat16>(input_shape_x.GetDims(), -128, 128);
+
+  OrtValue ml_value_x;
+  CreateMLValue<MLFloat16>(input_shape_x.GetDims(), input_x.data(), OrtMemoryInfo(), &ml_value_x);
+
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("model_input", ml_value_x));
+
+  std::function<void(const Graph&)> verify = [](const Graph& graph) -> void {
+    ASSERT_EQ(graph.NumberOfNodes(), 2) << "Transpose nodes should have been removed, and "
+                                           "Conv+Relu should have been fused, leaving 2 nodes.";
+    auto node_iter = graph.Nodes().begin();
+    auto check_node = [](const Node& node, const std::string& fusion_type) {
+      const auto& attr = node.GetAttributes();
+      auto activation = attr.find("activation");
+      ASSERT_NE(activation, attr.cend()) << "Fused node should have activation attribute";
+      ASSERT_EQ(activation->second.s(), fusion_type);
+    };
+
+    ++node_iter;
+    check_node(*node_iter, "Relu");
+  };
+
+  EPVerificationParams params;
+  params.ep_node_assignment = ExpectedEPNodeAssignment::Some;
+  params.fp32_abs_err = 0.5f;
+  params.graph_verifier = &verify;
+
+  auto ep = DefaultXnnpackExecutionProvider();
+  // So far, CPU EP doensn't support Fp16 Conv fusion, so verify_outputs is skipped.
+  RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluFusion_FP16", std::move(ep), feeds, params);
+};
+
+// Now, this Test is mainly check whether Xnnpack Clip fusion works.
 TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) {
   const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_conv_clip_relu_fp16.onnx";
 
@@ -126,8 +167,8 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) {
   };
 
   EPVerificationParams params;
-  params.ep_node_assignment = ExpectedEPNodeAssignment::All;
-  params.fp32_abs_err = 0.0002f;
+  params.ep_node_assignment = ExpectedEPNodeAssignment::Some;
+  params.fp32_abs_err = 0.5f;
   params.graph_verifier = &verify;
 
   auto ep = DefaultXnnpackExecutionProvider();

diff --git a/onnxruntime/test/testdata/conv_relu_model_fp16.onnx b/onnxruntime/test/testdata/conv_relu_model_fp16.onnx