From 53be802f39687b29b32fbd5c842e18c7567a0a9e Mon Sep 17 00:00:00 2001
From: Yi Zhang <zhanyi@microsoft.com>
Date: Tue, 10 Oct 2023 13:03:58 +0800
Subject: [PATCH] Onnx_test_runner and onnxruntime_test_all use the same broken
 test list. (#17840)

---
 onnxruntime/test/onnx/TestCase.cc             | 553 ++++++++++++++++++
 onnxruntime/test/onnx/TestCase.h              |  15 +
 onnxruntime/test/onnx/main.cc                 | 504 +---------------
 onnxruntime/test/providers/cpu/model_tests.cc | 507 +---------------
 4 files changed, 583 insertions(+), 996 deletions(-)
diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc
index fcef036163d4c..087b9d604128e 100644
--- a/onnxruntime/test/onnx/TestCase.cc
+++ b/onnxruntime/test/onnx/TestCase.cc
@@ -11,6 +11,7 @@
 #include <sstream>
 #include <map>
 #include <regex>
+#include <set>
 #include <string>
 
 #include "callback.h"
@@ -816,3 +817,555 @@ double TestTolerances::relative(const std::string& name) const {
   }
   return iter->second;
 }
+
+std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider_name) {
+  auto broken_tests = std::make_unique<std::set<BrokenTest>>(std::initializer_list<BrokenTest>{
+      {"slice_neg_steps",
+       "Type parameter (Tind) bound to different types (tensor(int64) and tensor(int32) in node ()."},
+      {"cast_BFLOAT16_to_FLOAT", "Unexpected input data type"},
+      {"loop13_seq", "Creation of empty sequences is currently not supported in the test runner"},
+      {"sequence_insert_at_front", "shape mismatch, expect {4} got {3}"},
+      {"cast_FLOAT_to_BFLOAT16", "expect uint16 got bfloat16"},
+      {"mnist", "Input data isn't in valid range"},
+      {"BERT_Squad", "test data bug"},
+      {"constantofshape_float_ones", "test data bug", {"opset9", "opset10"}},
+      {"constantofshape_int_zeros", "test data bug", {"opset9", "opset10"}},
+      {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"opset9", "opset10"}},
+      // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
+      // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
+      // which doesn't match the expected output that was generated with numpy.
+      {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
+      {"tf_nasnet_large", "disable temporarily"},
+      {"tf_nasnet_mobile", "disable temporarily"},
+      {"tf_pnasnet_large", "disable temporarily"},
+      {"shrink", "test case is wrong", {"opset9"}},
+      {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
+      {"tf_inception_v2", "result mismatch"},
+      {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
+      {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
+      {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
+      {"mxnet_arcface", "Model is an invalid ONNX model"},
+      {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
+      {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
+      {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
+      {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
+      {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
+      {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
+      {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
+      {"maxunpool_export_with_output_shape",
+       "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
+      {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose"},
+      {"training_dropout", "result differs", {}},               // Temporary, subsequent PR will remove this.
+      {"training_dropout_default", "result differs", {}},       // Temporary, subsequent PR will remove this.
+      {"training_dropout_default_mask", "result differs", {}},  // Temporary, subsequent PR will remove this.
+      {"training_dropout_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
+      {"batchnorm_epsilon_training_mode", "training only", {}},
+      {"batchnorm_example_training_mode", "training only", {}},
+      {"bernoulli", "type error", {}},
+      {"bernoulli_double", "type error", {}},
+      {"bernoulli_double_expanded", "type error", {}},
+      {"bernoulli_expanded", "type error", {}},
+      {"bernoulli_seed", "type error", {}},
+      {"bernoulli_seed_expanded", "type error", {}},
+      {"castlike_BFLOAT16_to_FLOAT", "type error", {}},
+      {"castlike_BFLOAT16_to_FLOAT_expanded", "type error", {}},
+      {"castlike_FLOAT_to_BFLOAT16", "type error", {}},
+      {"castlike_FLOAT_to_BFLOAT16_expanded", "type error", {}},
+      {"castlike_FLOAT_to_STRING", "type error", {}},
+      {"castlike_FLOAT_to_STRING_expanded", "type error", {}},
+      {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"opset13", "opset14"}},
+      {"gru_batchwise", "type error", {}},
+      {"lstm_batchwise", "type error", {}},
+      {"optional_get_element", "type error", {}},
+      {"optional_get_element_sequence", "type error", {}},
+      {"optional_has_element", "type error", {}},
+      {"optional_has_element_empty", "type error", {}},
+      {"shape_end_1", "type error", {}},
+      {"shape_end_negative_1", "type error", {}},
+      {"shape_start_1", "type error", {}},
+      {"shape_start_1_end_2", "type error", {}},
+      {"shape_start_1_end_negative_1", "type error", {}},
+      {"shape_start_negative_1", "type error", {}},
+      {"simple_rnn_batchwise", "type error", {}},
+      {"mod_float_mixed_sign_example", "fmod attribute must be true for floating point types", {}},
+      {"col2im_pads", "result mismatch", {"opset18"}},
+#ifdef ENABLE_TRAINING_CORE
+      {"adagrad", "not a registered function/op", {}},                  // Op not registered.
+      {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
+      {"adam", "not a registered function/op", {}},                     // Op not registered.
+      {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
+      {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
+      {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
+      {"momentum", "not a registered function/op", {}},                 // Op not registered.
+      {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
+      {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_4d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_mean_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none_weights_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_sum_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_sum", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_none_weights", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "type error", {"opset12"}},
+#endif
+      {"mask_rcnn_keras", "this model currently has an invalid contrib op version set to 10", {}}});
+
+  // Some EPs may fail to pass some specific testcases.
+  // For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
+  // Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
+  // specific keyword.
+  // std::set<std::string> broken_tests_keyword_set = {};
+
+  if (provider_name == "cuda") {
+#ifdef _WIN32
+    broken_tests->insert({"LSTM_Seq_lens_unpacked", "this test fails with new image since Aug 25."});
+    broken_tests->insert({"bidaf", "this test fails with new image since Aug 25."});
+    broken_tests->insert({"Candy", "Flaky test, need to investigate", {"opset9"}});
+#else
+    broken_tests->insert({"bidaf", "this test should be recovered when multi-gpu pipeline deprecates NV12", {"opset9"}});
+#endif
+  }
+
+  if (provider_name == "nnapi") {
+    broken_tests->insert({"scan9_sum", "Error with the extra graph"});
+    broken_tests->insert({"scan_sum", "Error with the extra graph"});
+    broken_tests->insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
+    broken_tests->insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"gemm_transposeB", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"range_float_type_positive_delta_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"range_int32_type_negative_delta_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"convtranspose_1d", "1d convtranspose not supported yet"});
+    broken_tests->insert({"convtranspose_3d", "3d convtranspose not supported yet"});
+    broken_tests->insert({"maxpool_2d_uint8", "result mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
+    // Disable based on George Wu's recommendation.
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded",
+         "shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded",
+         "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index",
+                          "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                          "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+         "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded",
+         "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+                          "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
+         "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded",
+                          "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_3d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_mean_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_none_weights_log_prob_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
+    broken_tests->insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
+  }
+
+  if (provider_name == "tensorrt") {
+    broken_tests->insert({"convtranspose_with_kernel", "It causes segmentation fault"});
+    broken_tests->insert({"convtranspose_pad", "It causes segmentation fault"});
+    broken_tests->insert({"convtranspose_kernel_shape", "It causes segmentation fault"});
+    broken_tests->insert({"dynamicquantizelinear_expanded", "It causes segmentation fault"});
+    broken_tests->insert({"dynamicquantizelinear_min_adjusted_expanded", "It causes segmentation fault"});
+    broken_tests->insert({"dynamicquantizelinear_max_adjusted_expanded", "It causes segmentation fault"});
+
+    broken_tests->insert({"basic_conv_with_padding",
+                          "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                          "engine for fused node"});
+    broken_tests->insert({"basic_conv_without_padding",
+                          "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                          "engine for fused node"});
+    broken_tests->insert({"conv_with_strides_no_padding",
+                          "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                          "engine for fused node"});
+
+    broken_tests->insert({"conv_with_autopad_same",
+                          "Internal Error (node_of_y: Cannot set more than one input unless network has Q/DQ layers.)"});
+
+    // unsupported tests since opset16
+    broken_tests->insert({"sequence_map_add_2_sequences", "not supported by TensorRT EP"});
+    broken_tests->insert({"sequence_map_extract_shapes", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_add_1_sequence_1_tensor", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_identity_1_sequence", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_identity_2_sequences", "not supported by TensorRT EP."});
+    broken_tests->insert({"sequence_map_identity_1_sequence_1_tensor", "not supported by TensorRT EP."});
+    broken_tests->insert({"leakyrelu_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"leakyrelu_default_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"leakyrelu_example_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"prelu_broadcast_expanded", "not supported by TensorRT EP."});
+    broken_tests->insert({"prelu_example_expanded", "not supported by TensorRT EP."});
+  }
+
+  if (provider_name == "dml") {
+    broken_tests->insert({"tinyyolov3", "The parameter is incorrect"});
+    broken_tests->insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
+    broken_tests->insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
+    broken_tests->insert({"resize_downsample_linear",
+                          "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests->insert(
+        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests->insert(
+        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+
+    // These tests are temporarily disabled pending investigation
+    broken_tests->insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"mxnet_arcface", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"yolov3", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"tf_inception_v2", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"fp16_inception_v1", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"candy", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"BERT_Squad", "Temporarily disabled pending investigation"});
+    broken_tests->insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
+
+    broken_tests->insert({"resize_downsample_scales_linear",
+                          "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
+    broken_tests->insert({"resize_downsample_sizes_linear_pytorch_half_pixel",
+                          "DML does not support downsampling by such a large factor - skips input pixels"});
+    broken_tests->insert({"resize_downsample_sizes_nearest",
+                          "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
+    broken_tests->insert({"resize_upsample_sizes_nearest",
+                          "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
+    broken_tests->insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
+    broken_tests->insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
+
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+         "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
+         "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob",
+                          "DML does not support 5D+ tensors"});
+    broken_tests->insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded",
+                          "DML does not support 5D+ tensors"});
+  }
+
+  if (provider_name == "qnn") {
+    broken_tests->insert({"gemm_default_no_bias", "result differs"});
+    broken_tests->insert({"resize_downsample_scales_linear", "result differs"});
+    broken_tests->insert({"resize_downsample_scales_linear_antialias", "result differs"});
+    broken_tests->insert({"resize_downsample_sizes_linear_antialias", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_expanded", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_log_prob", "result differs"});
+    broken_tests->insert({"sce_NCd1_mean_weight_negative_ii_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean", "result differs"});
+    broken_tests->insert({"sce_mean_3d", "result differs"});
+    broken_tests->insert({"sce_mean_3d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_3d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_3d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_3d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_4d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_no_weight_ii_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight", "result differs"});
+    broken_tests->insert({"sce_mean_weight_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_3d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_4d_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_ii_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_mean_weight_log_prob", "result differs"});
+    broken_tests->insert({"sce_mean_weight_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_none", "result differs"});
+    broken_tests->insert({"sce_none_expanded", "result differs"});
+    broken_tests->insert({"sce_none_log_prob", "result differs"});
+    broken_tests->insert({"sce_none_log_prob_expanded", "result differs"});
+    broken_tests->insert({"sce_sum", "result differs"});
+    broken_tests->insert({"sce_sum_expanded", "result differs"});
+    broken_tests->insert({"sce_sum_log_prob", "result differs"});
+    broken_tests->insert({"sce_sum_log_prob_expanded", "result differs"});
+    broken_tests->insert({"gridsample_reflection_padding", "result differs"});
+    broken_tests->insert({"spacetodepth", "result differs"});
+  }
+#ifdef DISABLE_CONTRIB_OPS
+  broken_tests->insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Padding-Upsampling-Normalizer_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"tiny_yolov2", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_tiny_yolov2", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Pooling_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Padding_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Normalizer_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_sklearn_load_breast_cancer", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_ImageNet_small", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_ImageNet_large", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_linear_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_leakyrelu_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_hard_sigmoid_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_elu_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Dense_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Conv2D_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_VGG16_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_Resnet50_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_Inceptionv3_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_FNS-Candy_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"coreml_AgeNet_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_thresholdedrelu_ImageNet_large", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_thresholdedrelu_ImageNet_small", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_thresholdedrelu_sklearn_load_breast_cancer", "This model uses contrib ops."});
+  broken_tests->insert({"thresholdedrelu", "This model uses contrib ops."});
+  broken_tests->insert({"thresholdedrelu_default", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_default_axes", "This model uses contrib ops."});
+  broken_tests->insert({"thresholdedrelu_example", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_neg failed", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_start_out_of_bounds", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_end_out_of_bounds", "This model uses contrib ops."});
+  broken_tests->insert({"dynamic_slice_neg", "This model uses contrib ops."});
+  broken_tests->insert({"mvn", "This model uses contrib ops.", {"onnx130"}});
+  broken_tests->insert({"cdist_float32_euclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_euclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_euclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float32_sqeuclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_euclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_euclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_euclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
+  broken_tests->insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"bidaf", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_test_tiny_yolov2", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_coreml_FNS-Candy", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Repeat_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_BiDirectional_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"fp16_coreml_LinearRegression_NYCTaxi", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_GRU_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_SimpleRNN_ImageNet", "This model uses contrib ops."});
+  broken_tests->insert({"keras2coreml_Dot_imageNet", "This model uses contrib ops."});
+#endif
+  return broken_tests;
+}
+
+// Some EPs may fail to pass some specific testcases.
+// For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
+// Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
+// specific keyword.
+std::unique_ptr<std::set<std::string>> GetBrokenTestsKeyWordSet(const std::string& provider_name) {
+  auto broken_tests_keyword_set = std::make_unique<std::set<std::string>>();
+  if (provider_name == "tensorrt") {
+    broken_tests_keyword_set->insert({"scatternd_add"});
+    broken_tests_keyword_set->insert({"scatternd_multiply"});
+    broken_tests_keyword_set->insert({"scatter_elements_with_duplicate_indices"});
+
+    // sce op is not supported
+    broken_tests_keyword_set->insert({"sce"});
+
+    // TensorRT EP CI uses Nvidia Tesla M60 which doesn't support fp16.
+    broken_tests_keyword_set->insert({"FLOAT16"});
+  }
+  return broken_tests_keyword_set;
+}
diff --git a/onnxruntime/test/onnx/TestCase.h b/onnxruntime/test/onnx/TestCase.h
index 0e3e7852f5180..4d4b2177019c9 100644
--- a/onnxruntime/test/onnx/TestCase.h
+++ b/onnxruntime/test/onnx/TestCase.h
@@ -106,3 +106,18 @@ void LoadTests(const std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_paths
                const TestTolerances& tolerances,
                const std::unordered_set<std::basic_string<ORTCHAR_T>>& disabled_tests,
                const std::function<void(std::unique_ptr<ITestCase>)>& process_function);
+
+struct BrokenTest {
+  std::string test_name_;
+  std::string reason_;
+  std::set<std::string> broken_opset_versions_ = {};  // apply to all versions if empty
+  BrokenTest(std::string name, std::string reason) : test_name_(std::move(name)), reason_(std::move(reason)) {}
+  BrokenTest(std::string name, std::string reason, const std::initializer_list<std::string>& versions) : test_name_(std::move(name)), reason_(std::move(reason)), broken_opset_versions_(versions) {}
+  bool operator<(const struct BrokenTest& test) const {
+    return strcmp(test_name_.c_str(), test.test_name_.c_str()) < 0;
+  }
+};
+
+std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider_name);
+
+std::unique_ptr<std::set<std::string>> GetBrokenTestsKeyWordSet(const std::string& provider_name);
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index da67987f523d8..f165b3a4a647a 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -171,6 +171,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
   bool user_graph_optimization_level_set = false;
   bool set_denormal_as_zero = false;
   std::basic_string<ORTCHAR_T> ep_runtime_config_string;
+  std::string provider_name = "cpu";
 
   OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_ERROR;
   bool verbose_logging_required = false;
@@ -216,6 +217,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
           whitelisted_test_cases.emplace_back(optarg);
           break;
         case 'e':
+          provider_name = ToUTF8String(optarg);
           if (!CompareCString(optarg, ORT_TSTR("cpu"))) {
             // do nothing
           } else if (!CompareCString(optarg, ORT_TSTR("cuda"))) {
@@ -487,7 +489,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
         } else if (key == "qnn_saver_path") {
           // no validation
         } else {
-          ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', 
+          ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
 'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
         }
 
@@ -801,505 +803,17 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     fwrite(res.c_str(), 1, res.size(), stdout);
   }
 
-  struct BrokenTest {
-    std::string test_name_;
-    std::string reason_;
-    std::set<std::string> broken_versions_ = {};  // apply to all versions if empty
-    BrokenTest(std::string name, std::string reason) : test_name_(std::move(name)), reason_(std::move(reason)) {}
-    BrokenTest(std::string name, std::string reason, const std::initializer_list<std::string>& versions) : test_name_(std::move(name)), reason_(std::move(reason)), broken_versions_(versions) {}
-    bool operator<(const struct BrokenTest& test) const {
-      return strcmp(test_name_.c_str(), test.test_name_.c_str()) < 0;
-    }
-  };
-
-  std::set<BrokenTest> broken_tests = {
-    {"BERT_Squad", "test data bug"},
-    {"constantofshape_float_ones", "test data bug", {"onnx141", "onnx150"}},
-    {"constantofshape_int_zeros", "test data bug", {"onnx141", "onnx150"}},
-    {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"onnx180", "onnx181", "onnx190"}},
-    {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"onnx141"}},
-    // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
-    // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
-    // which doesn't match the expected output that was generated with numpy.
-    {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
-    {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose", {}},
-    {"tf_nasnet_large", "disable temporarily"},
-    {"tf_nasnet_mobile", "disable temporarily"},
-    {"tf_pnasnet_large", "disable temporarily"},
-    {"shrink", "test case is wrong", {"onnx141"}},
-    {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
-    {"tf_inception_v2", "result mismatch"},
-    {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
-    {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
-    {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
-    {"mxnet_arcface", "Model is an invalid ONNX model"},
-    {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
-    {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
-    {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
-    {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
-    {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
-    {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
-    {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
-    {"maxunpool_export_with_output_shape", "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
-    {"training_dropout", "result differs", {}},                       // Temporary, subsequent PR will remove this.
-    {"training_dropout_default", "result differs", {}},               // Temporary, subsequent PR will remove this.
-    {"training_dropout_default_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
-    {"training_dropout_mask", "result differs", {}},                  // Temporary, subsequent PR will remove this.
-    {"adagrad", "not a registered function/op", {}},                  // Op not registered.
-    {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
-    {"adam", "not a registered function/op", {}},                     // Op not registered.
-    {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
-    {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
-    {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
-    {"momentum", "not a registered function/op", {}},                 // Op not registered.
-    {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
-    {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
-    {"sequence_insert_at_back", "onnx currently not supporting loading segment", {}},
-    {"sequence_insert_at_front", "onnx currently not supporting loading segment", {}},
-    {"loop13_seq", "ORT api does not currently support creating empty sequences (needed for this test)", {}},
-    {"cast_FLOAT_to_BFLOAT16", "onnx generate bfloat tensor as uint16 type", {}},
-    {"cast_BFLOAT16_to_FLOAT", "onnx generate bfloat tensor as uint16 type", {}},
-    {"castlike_FLOAT_to_BFLOAT16", "Depends on cast.", {}},
-    {"castlike_BFLOAT16_to_FLOAT", "Depends on cast", {}},
-    {"castlike_FLOAT_to_BFLOAT16_expanded", "Depends on cast.", {}},
-    {"castlike_BFLOAT16_to_FLOAT_expanded", "Depends on cast", {}},
-    {"castlike_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results.", {}},
-    {"castlike_FLOAT_to_STRING_expanded", "Numpy float to string has unexpected rounding for some results.", {}},
-    {"bernoulli", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_double", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_double_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_seed", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_seed_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"test_roialign_aligned_true", "Opset 16 not supported yet."},
-    {"test_roialign_aligned_false", "Opset 16 not supported yet."},
-    {"test_roialign_mode_max", "Onnx roialign mode expected output is incorrect."},
-    {"test_scatternd_add", "Opset 16 not supported yet."},
-    {"test_scatternd_multiply", "Opset 16 not supported yet."},
-    {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."},
-    {"col2im_pads", "onnx 18 test data error."},
-
-#if defined(DISABLE_OPTIONAL_TYPE)
-    {"test_optional_get_element", "Optional type not supported in this build flavor."},
-    {"test_optional_get_element_sequence", "Optional type not supported in this build flavor."},
-    {"test_optional_has_element", "Optional type not supported in this build flavor."},
-    {"test_optional_has_element_empty", "Optional type not supported in this build flavor."},
-    {"test_if_opt", "Optional type not supported in this build flavor."},
-    {"test_loop16_seq_none", "Optional type not supported in this build flavor."},
-    {"test_identity_opt", "Optional type not supported in this build flavor."},
-#endif
-
-  };
-
-#ifdef DISABLE_ML_OPS
-  auto starts_with = [](const std::string& find_in, const std::string& find_what) {
-    return find_in.compare(0, find_what.size(), find_what) == 0;
-  };
-  for (const auto& test_ptr : owned_tests) {
-    const std::string& test_name = test_ptr->GetTestCaseName();
-    if (starts_with(test_name, "XGBoost_") ||
-        starts_with(test_name, "coreml_") ||
-        starts_with(test_name, "scikit_") ||
-        starts_with(test_name, "libsvm_")) {
-      broken_tests.insert({test_name, "Traditional ML ops are disabled in this build."});
-    }
-  }
-#endif
-
-  if (enable_openvino) {
-    broken_tests.insert({"operator_permute2", "Disabled temporariliy"});
-    broken_tests.insert({"operator_repeat", "Disabled temporariliy"});
-    broken_tests.insert({"operator_repeat_dim_overflow", "Disabled temporariliy"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "Disabled temporariliy"});
-    broken_tests.insert({"candy", "Results mismatch: 1 of 150528"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "OpenVino does not support 5D+ tensors"});
-  }
-
-  if (enable_dnnl) {
-    broken_tests.insert({"tf_mobilenet_v2_1.0_224", "result mismatch"});
-    broken_tests.insert({"tf_mobilenet_v2_1.4_224", "result mismatch"});
-    broken_tests.insert({"tf_mobilenet_v1_1.0_224", "result mismatch"});
-    broken_tests.insert({"mobilenetv2-1.0", "result mismatch"});
-    broken_tests.insert({"candy", "result mismatch"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "get unknown exception from DNNL EP"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "get unknown exception from DNNL EP"});
-    broken_tests.insert({"averagepool_2d_ceil", "maxpool ceiling not supported"});
-    broken_tests.insert({"maxpool_2d_ceil", "maxpool ceiling not supported"});
-    broken_tests.insert({"maxpool_2d_dilations", "maxpool dilations not supported"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "test pass on dev box but fails on CI build"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "Does not work on DNNL, NNAPI"});
-  }
-
-  if (enable_nnapi) {
-    broken_tests.insert({"scan9_sum", "Error with the extra graph"});
-    broken_tests.insert({"scan_sum", "Error with the extra graph"});
-    broken_tests.insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"gemm_transposeB", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "result mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
-    // Disable based on George Wu's recommendation.
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"nllloss_NCd1_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_with_weight_reduction_mean", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_with_weight_reduction_mean_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2d3d4d5_mean_weight", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2d3d4d5_mean_weight_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ii_expanded", "wait for investigation"});
-  }
-
-  if (enable_tensorrt) {
-    broken_tests.insert({"fp16_shufflenet", "TRT EP bug"});
-    broken_tests.insert({"fp16_inception_v1", "TRT EP bug"});
-    broken_tests.insert({"fp16_tiny_yolov2", "TRT EP bug"});
-    broken_tests.insert({"tf_inception_v3", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v1_1.0_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v2_1.0_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v2_1.4_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_101", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_152", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_50", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_101", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_152", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_50", "TRT Engine couldn't be created"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-  }
-
-  if (enable_cuda) {
-    broken_tests.insert({"candy", "result mismatch"});
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"mlperf_ssd_mobilenet_300", "unknown error"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "unknown error"});
-    broken_tests.insert({"tf_inception_v1", "flaky test"});  // TODO: Investigate cause for flakiness
-    broken_tests.insert({"faster_rcnn", "Linux: faster_rcnn:output=6383:shape mismatch, expect {77} got {57}"});
-    broken_tests.insert({"split_zero_size_splits", "alloc failed"});
-  }
-
-  if (enable_dml) {
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
-    broken_tests.insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
-    broken_tests.insert({"resize_downsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert({"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert({"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-
-    // These tests are temporarily disabled pending investigation
-    broken_tests.insert({"dynamicquantizelinear", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"mxnet_arcface", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"yolov3", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"tf_inception_v2", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"fp16_inception_v1", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"candy", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"BERT_Squad", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
-
-    broken_tests.insert({"resize_downsample_scales_linear", "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
-    broken_tests.insert({"resize_downsample_sizes_linear_pytorch_half_pixel", "DML does not support downsampling by such a large factor - skips input pixels"});
-    broken_tests.insert({"resize_downsample_sizes_nearest", "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
-    broken_tests.insert({"resize_upsample_sizes_nearest", "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
-    broken_tests.insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
-    broken_tests.insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
-
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "DML does not support 5D+ tensors"});
-
-    // TODO: Remove identity tests when fixed #42638109
-    broken_tests.insert({"identity_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_2_sequences_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_2_sequences_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_extract_shapes_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_extract_shapes_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_2_sequences_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_2_sequences_expanded_cpu", "Optional type not yet supported for identity-16."});
-  }
-  if (enable_qnn) {
-    broken_tests.insert({"gemm_default_no_bias", "result differs"});
-    broken_tests.insert({"resize_downsample_scales_linear", "result differs"});
-    broken_tests.insert({"resize_downsample_scales_linear_antialias", "result differs"});
-    broken_tests.insert({"resize_downsample_sizes_linear_antialias", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean", "result differs"});
-    broken_tests.insert({"sce_mean_3d", "result differs"});
-    broken_tests.insert({"sce_mean_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight", "result differs"});
-    broken_tests.insert({"sce_mean_weight_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_none", "result differs"});
-    broken_tests.insert({"sce_none_expanded", "result differs"});
-    broken_tests.insert({"sce_none_log_prob", "result differs"});
-    broken_tests.insert({"sce_none_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_sum", "result differs"});
-    broken_tests.insert({"sce_sum_expanded", "result differs"});
-    broken_tests.insert({"sce_sum_log_prob", "result differs"});
-    broken_tests.insert({"sce_sum_log_prob_expanded", "result differs"});
-    broken_tests.insert({"gridsample_reflection_padding", "result differs"});
-    broken_tests.insert({"spacetodepth", "result differs"});
-  }
-#if defined(_WIN32) && !defined(_WIN64)
-  broken_tests.insert({"vgg19", "failed: bad allocation"});
-#endif
-
-  // Disable mask_rcnn_keras as this model currently has an invalid contrib op version set to 10
-  broken_tests.insert({"mask_rcnn_keras", "This model uses contrib ops."});
-
-#ifdef DISABLE_CONTRIB_OPS
-  broken_tests.insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding-Upsampling-Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Pooling_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_leakyrelu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_hard_sigmoid_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_elu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Dense_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Conv2D_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_VGG16_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Resnet50_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Inceptionv3_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_FNS-Candy_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_AgeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_default", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_default_axes", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_example", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg failed", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_start_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_end_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg", "This model uses contrib ops."});
-  broken_tests.insert({"mvn", "This model uses contrib ops.", {"onnx130"}});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
-#endif
-
+  auto broken_tests = GetBrokenTests(provider_name);
   int result = 0;
   for (const auto& p : stat.GetFailedTest()) {
     BrokenTest t = {p.first, ""};
-    auto iter = broken_tests.find(t);
-    if (iter == broken_tests.end() || (p.second != TestModelInfo::unknown_version && !iter->broken_versions_.empty() &&
-                                       iter->broken_versions_.find(p.second) == iter->broken_versions_.end())) {
+    auto iter = broken_tests->find(t);
+    if (iter == broken_tests->end() || (p.second != TestModelInfo::unknown_version && !iter->broken_opset_versions_.empty() &&
+                                        iter->broken_opset_versions_.find(p.second) == iter->broken_opset_versions_.end())) {
       fprintf(stderr, "test %s failed, please fix it\n", p.first.c_str());
       result = -1;
+    } else {
+      fprintf(stderr, "test %s failed, but it is a known broken test, so we ignore it\n", p.first.c_str());
     }
   }
   return result;
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 999f04398d8dd..13dcded6f3b86 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -13,6 +13,7 @@
 #include "asserts.h"
 #include <core/platform/path_lib.h>
 #include "default_providers.h"
+#include "test/onnx/TestCase.h"
 #include <string>
 #include <codecvt>
 #include <locale>
@@ -66,23 +67,6 @@ namespace test {
 // parameter is provider_name + "_" + model_path
 class ModelTest : public testing::TestWithParam<std::basic_string<ORTCHAR_T>> {};
 
-namespace {
-struct BrokenTest {
-  std::string test_name_;
-  std::string reason_;
-  std::set<std::string> broken_opset_versions_ = {};  // apply to all versions if empty
-  BrokenTest(std::string name, std::string reason) : test_name_(std::move(name)), reason_(std::move(reason)) {
-  }
-
-  BrokenTest(std::string name, std::string reason, const std::initializer_list<std::string>& opversions)
-      : test_name_(std::move(name)), reason_(std::move(reason)), broken_opset_versions_(opversions) {
-  }
-
-  bool operator<(const struct BrokenTest& test) const {
-    return strcmp(test_name_.c_str(), test.test_name_.c_str()) < 0;
-  }
-};
-}  // namespace
 #ifdef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ModelTest);
 #endif
@@ -114,488 +98,9 @@ TEST_P(ModelTest, Run) {
     SkipTest("it has the training domain. No pipeline should need to run these tests.");
     return;
   }
-  std::set<BrokenTest> broken_tests = {
-      {"slice_neg_steps",
-       "Type parameter (Tind) bound to different types (tensor(int64) and tensor(int32) in node ()."},
-      {"cast_BFLOAT16_to_FLOAT", "Unexpected input data type"},
-      {"loop13_seq", "Creation of empty sequences is currently not supported in the test runner"},
-      {"sequence_insert_at_front", "shape mismatch, expect {4} got {3}"},
-      {"cast_FLOAT_to_BFLOAT16", "expect uint16 got bfloat16"},
-      {"mnist", "Input data isn't in valid range"},
-      {"BERT_Squad", "test data bug"},
-      {"constantofshape_float_ones", "test data bug", {"opset9", "opset10"}},
-      {"constantofshape_int_zeros", "test data bug", {"opset9", "opset10"}},
-      {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"opset9", "opset10"}},
-      // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
-      // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
-      // which doesn't match the expected output that was generated with numpy.
-      {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
-      {"tf_nasnet_large", "disable temporarily"},
-      {"tf_nasnet_mobile", "disable temporarily"},
-      {"tf_pnasnet_large", "disable temporarily"},
-      {"shrink", "test case is wrong", {"opset9"}},
-      {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
-      {"tf_inception_v2", "result mismatch"},
-      {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
-      {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
-      {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
-      {"mxnet_arcface", "Model is an invalid ONNX model"},
-      {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
-      {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
-      {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
-      {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
-      {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
-      {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
-      {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
-      {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
-      {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
-      {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
-      {"maxunpool_export_with_output_shape",
-       "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
-      {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose"},
-      {"training_dropout", "result differs", {}},               // Temporary, subsequent PR will remove this.
-      {"training_dropout_default", "result differs", {}},       // Temporary, subsequent PR will remove this.
-      {"training_dropout_default_mask", "result differs", {}},  // Temporary, subsequent PR will remove this.
-      {"training_dropout_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
-      {"batchnorm_epsilon_training_mode", "training only", {}},
-      {"batchnorm_example_training_mode", "training only", {}},
-      {"bernoulli", "type error", {}},
-      {"bernoulli_double", "type error", {}},
-      {"bernoulli_double_expanded", "type error", {}},
-      {"bernoulli_expanded", "type error", {}},
-      {"bernoulli_seed", "type error", {}},
-      {"bernoulli_seed_expanded", "type error", {}},
-      {"castlike_BFLOAT16_to_FLOAT", "type error", {}},
-      {"castlike_BFLOAT16_to_FLOAT_expanded", "type error", {}},
-      {"castlike_FLOAT_to_BFLOAT16", "type error", {}},
-      {"castlike_FLOAT_to_BFLOAT16_expanded", "type error", {}},
-      {"castlike_FLOAT_to_STRING", "type error", {}},
-      {"castlike_FLOAT_to_STRING_expanded", "type error", {}},
-      {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"opset13", "opset14"}},
-      {"gru_batchwise", "type error", {}},
-      {"lstm_batchwise", "type error", {}},
-      {"optional_get_element", "type error", {}},
-      {"optional_get_element_sequence", "type error", {}},
-      {"optional_has_element", "type error", {}},
-      {"optional_has_element_empty", "type error", {}},
-      {"shape_end_1", "type error", {}},
-      {"shape_end_negative_1", "type error", {}},
-      {"shape_start_1", "type error", {}},
-      {"shape_start_1_end_2", "type error", {}},
-      {"shape_start_1_end_negative_1", "type error", {}},
-      {"shape_start_negative_1", "type error", {}},
-      {"simple_rnn_batchwise", "type error", {}},
-      {"mod_float_mixed_sign_example", "fmod attribute must be true for floating point types", {}},
-      {"col2im_pads", "result mismatch", {"opset18"}},
-#ifdef ENABLE_TRAINING_CORE
-      {"adagrad", "not a registered function/op", {}},                  // Op not registered.
-      {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
-      {"adam", "not a registered function/op", {}},                     // Op not registered.
-      {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
-      {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
-      {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
-      {"momentum", "not a registered function/op", {}},                 // Op not registered.
-      {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
-      {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob",
-       "type error",
-       {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_3d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_4d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob",
-       "type error",
-       {"opset12"}},
-      {"softmax_cross_entropy_mean_3d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_none_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_3d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_none_weights_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_sum_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_sum", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
-       "type error",
-       {"opset12"}},
-      {"softmax_cross_entropy_none_weights", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
-      {"softmax_cross_entropy_none", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "type error", {"opset12"}},
-      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_weight", "type error", {"opset12"}},
-      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "type error", {"opset12"}},
-#endif
-      {"mask_rcnn_keras", "this model currently has an invalid contrib op version set to 10", {}}};
-
-  // Some EPs may fail to pass some specific testcases.
-  // For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
-  // Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
-  // specific keyword.
-  std::set<std::string> broken_tests_keyword_set = {};
-
-  if (provider_name == "cuda") {
-#ifdef _WIN32
-    broken_tests.insert({"LSTM_Seq_lens_unpacked", "this test fails with new image since Aug 25."});
-    broken_tests.insert({"bidaf", "this test fails with new image since Aug 25."});
-    broken_tests.insert({"Candy", "Flaky test, need to investigate", {"opset9"}});
-#else
-    broken_tests.insert({"bidaf", "this test should be recovered when multi-gpu pipeline deprecates NV12", {"opset9"}});
-#endif
-  }
-
-  if (provider_name == "nnapi") {
-    broken_tests.insert({"scan9_sum", "Error with the extra graph"});
-    broken_tests.insert({"scan_sum", "Error with the extra graph"});
-    broken_tests.insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"gemm_transposeB", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "result mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
-    // Disable based on George Wu's recommendation.
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded",
-         "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded",
-         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index",
-                         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
-                         "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-         "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded",
-         "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
-                         "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
-         "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded",
-                         "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_mean_weight_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_none_weights_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
-  }
-
-  if (provider_name == "tensorrt") {
-    broken_tests.insert({"convtranspose_with_kernel", "It causes segmentation fault"});
-    broken_tests.insert({"convtranspose_pad", "It causes segmentation fault"});
-    broken_tests.insert({"convtranspose_kernel_shape", "It causes segmentation fault"});
-    broken_tests.insert({"dynamicquantizelinear_expanded", "It causes segmentation fault"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "It causes segmentation fault"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "It causes segmentation fault"});
-
-    broken_tests.insert({"basic_conv_with_padding",
-                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
-                         "engine for fused node"});
-    broken_tests.insert({"basic_conv_without_padding",
-                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
-                         "engine for fused node"});
-    broken_tests.insert({"conv_with_strides_no_padding",
-                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
-                         "engine for fused node"});
-
-    broken_tests.insert({"conv_with_autopad_same",
-                         "Internal Error (node_of_y: Cannot set more than one input unless network has Q/DQ layers.)"});
-
-    // unsupported tests since opset16
-    broken_tests.insert({"sequence_map_add_2_sequences", "not supported by TensorRT EP"});
-    broken_tests.insert({"sequence_map_extract_shapes", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_identity_1_sequence", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_identity_2_sequences", "not supported by TensorRT EP."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor", "not supported by TensorRT EP."});
-    broken_tests.insert({"leakyrelu_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"leakyrelu_default_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"leakyrelu_example_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"prelu_broadcast_expanded", "not supported by TensorRT EP."});
-    broken_tests.insert({"prelu_example_expanded", "not supported by TensorRT EP."});
-    broken_tests_keyword_set.insert({"scatternd_add"});
-    broken_tests_keyword_set.insert({"scatternd_multiply"});
-    broken_tests_keyword_set.insert({"scatter_elements_with_duplicate_indices"});
-
-    // sce op is not supported
-    broken_tests_keyword_set.insert({"sce"});
-
-    // TensorRT EP CI uses Nvidia Tesla M60 which doesn't support fp16.
-    broken_tests_keyword_set.insert({"FLOAT16"});
-  }
-
-  if (provider_name == "dml") {
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
-    broken_tests.insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
-    broken_tests.insert({"resize_downsample_linear",
-                         "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert(
-        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert(
-        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-
-    // These tests are temporarily disabled pending investigation
-    broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"mxnet_arcface", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"yolov3", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"tf_inception_v2", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"fp16_inception_v1", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"candy", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"BERT_Squad", "Temporarily disabled pending investigation"});
-    broken_tests.insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
-
-    broken_tests.insert({"resize_downsample_scales_linear",
-                         "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
-    broken_tests.insert({"resize_downsample_sizes_linear_pytorch_half_pixel",
-                         "DML does not support downsampling by such a large factor - skips input pixels"});
-    broken_tests.insert({"resize_downsample_sizes_nearest",
-                         "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
-    broken_tests.insert({"resize_upsample_sizes_nearest",
-                         "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
-    broken_tests.insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
-    broken_tests.insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
-
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
-         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert(
-        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob",
-                         "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded",
-                         "DML does not support 5D+ tensors"});
-  }
-
-#ifdef DISABLE_CONTRIB_OPS
-  broken_tests.insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Permute_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_ReLU_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding-Upsampling-Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Pooling_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Padding_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Normalizer_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_linear_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_leakyrelu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_hard_sigmoid_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_elu_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Dense_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Conv2D_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_VGG16_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Resnet50_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_Inceptionv3_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_FNS-Candy_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"coreml_AgeNet_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_large", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_ImageNet_small", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_thresholdedrelu_sklearn_load_breast_cancer", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_default", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_default_axes", "This model uses contrib ops."});
-  broken_tests.insert({"thresholdedrelu_example", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg failed", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_start_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_end_out_of_bounds", "This model uses contrib ops."});
-  broken_tests.insert({"dynamic_slice_neg", "This model uses contrib ops."});
-  broken_tests.insert({"mvn", "This model uses contrib ops.", {"onnx130"}});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float32_sqeuclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_euclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
-  broken_tests.insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"bidaf", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_test_tiny_yolov2", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_coreml_FNS-Candy", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Repeat_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_BiDirectional_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"fp16_coreml_LinearRegression_NYCTaxi", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_GRU_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_SimpleRNN_ImageNet", "This model uses contrib ops."});
-  broken_tests.insert({"keras2coreml_Dot_imageNet", "This model uses contrib ops."});
-#endif
 
+  auto broken_tests = GetBrokenTests(provider_name);
+  auto broken_tests_keyword_set = GetBrokenTestsKeyWordSet(provider_name);
   std::basic_string<ORTCHAR_T> model_dir;
   (void)GetDirNameFromFilePath(model_path, model_dir);
   std::basic_string<PATH_CHAR_TYPE> test_case_name = GetLastComponent(model_dir);
@@ -603,16 +108,16 @@ TEST_P(ModelTest, Run) {
     test_case_name = test_case_name.substr(5);
   {
     BrokenTest t = {ToUTF8String(test_case_name), ""};
-    auto iter = broken_tests.find(t);
+    auto iter = broken_tests->find(t);
     auto opset_version = model_info->GetNominalOpsetVersion();
-    if (iter != broken_tests.end() &&
+    if (iter != broken_tests->end() &&
         (opset_version == TestModelInfo::unknown_version || iter->broken_opset_versions_.empty() ||
          iter->broken_opset_versions_.find(opset_version) != iter->broken_opset_versions_.end())) {
       SkipTest("It's in broken_tests");
       return;
     }
 
-    for (auto iter2 = broken_tests_keyword_set.begin(); iter2 != broken_tests_keyword_set.end(); ++iter2) {
+    for (auto iter2 = broken_tests_keyword_set->begin(); iter2 != broken_tests_keyword_set->end(); ++iter2) {
       std::string keyword = *iter2;
       if (ToUTF8String(test_case_name).find(keyword) != std::string::npos) {
         SkipTest("It's in broken_tests_keyword");