From 86bfa8ce030d11e1985eeda93f181697ed3a9a2c Mon Sep 17 00:00:00 2001
From: Yi Zhang <zhanyi@microsoft.com>
Date: Sat, 7 Oct 2023 16:37:12 +0800
Subject: [PATCH] update broken tests

---
 onnxruntime/test/onnx/main.cc | 634 +++++++++++++++++-----------------
 1 file changed, 319 insertions(+), 315 deletions(-)
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index 062ca4ece86bf..b2b2a35a8c3c1 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -168,6 +168,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
   bool user_graph_optimization_level_set = false;
   bool set_denormal_as_zero = false;
   std::basic_string<ORTCHAR_T> ep_runtime_config_string;
+  std::string provider_name;
 
   OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_ERROR;
   bool verbose_logging_required = false;
@@ -213,6 +214,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
           whitelisted_test_cases.emplace_back(optarg);
           break;
         case 'e':
+          provider_name = ToUTF8String(optarg);
           if (!CompareCString(optarg, ORT_TSTR("cpu"))) {
             // do nothing
           } else if (!CompareCString(optarg, ORT_TSTR("cuda"))) {
@@ -478,7 +480,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
             ORT_THROW("Wrong value for htp_performance_mode. select from: " + str);
           }
         } else {
-          ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', 
+          ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
 'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
         }
 
@@ -804,140 +806,148 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
   };
 
   std::set<BrokenTest> broken_tests = {
-    {"BERT_Squad", "test data bug"},
-    {"constantofshape_float_ones", "test data bug", {"onnx141", "onnx150"}},
-    {"constantofshape_int_zeros", "test data bug", {"onnx141", "onnx150"}},
-    {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"onnx180", "onnx181", "onnx190"}},
-    {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"onnx141"}},
-    // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
-    // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
-    // which doesn't match the expected output that was generated with numpy.
-    {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
-    {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose", {}},
-    {"tf_nasnet_large", "disable temporarily"},
-    {"tf_nasnet_mobile", "disable temporarily"},
-    {"tf_pnasnet_large", "disable temporarily"},
-    {"shrink", "test case is wrong", {"onnx141"}},
-    {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
-    {"tf_inception_v2", "result mismatch"},
-    {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
-    {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
-    {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
-    {"mxnet_arcface", "Model is an invalid ONNX model"},
-    {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
-    {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
-    {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
-    {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
-    {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
-    {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
-    {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
-    {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
-    {"maxunpool_export_with_output_shape", "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
-    {"training_dropout", "result differs", {}},                       // Temporary, subsequent PR will remove this.
-    {"training_dropout_default", "result differs", {}},               // Temporary, subsequent PR will remove this.
-    {"training_dropout_default_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
-    {"training_dropout_mask", "result differs", {}},                  // Temporary, subsequent PR will remove this.
-    {"adagrad", "not a registered function/op", {}},                  // Op not registered.
-    {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
-    {"adam", "not a registered function/op", {}},                     // Op not registered.
-    {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
-    {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
-    {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
-    {"momentum", "not a registered function/op", {}},                 // Op not registered.
-    {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
-    {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
-    {"sequence_insert_at_back", "onnx currently not supporting loading segment", {}},
-    {"sequence_insert_at_front", "onnx currently not supporting loading segment", {}},
-    {"loop13_seq", "ORT api does not currently support creating empty sequences (needed for this test)", {}},
-    {"cast_FLOAT_to_BFLOAT16", "onnx generate bfloat tensor as uint16 type", {}},
-    {"cast_BFLOAT16_to_FLOAT", "onnx generate bfloat tensor as uint16 type", {}},
-    {"castlike_FLOAT_to_BFLOAT16", "Depends on cast.", {}},
-    {"castlike_BFLOAT16_to_FLOAT", "Depends on cast", {}},
-    {"castlike_FLOAT_to_BFLOAT16_expanded", "Depends on cast.", {}},
-    {"castlike_BFLOAT16_to_FLOAT_expanded", "Depends on cast", {}},
-    {"castlike_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results.", {}},
-    {"castlike_FLOAT_to_STRING_expanded", "Numpy float to string has unexpected rounding for some results.", {}},
-    {"bernoulli", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_double", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_double_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_seed", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_seed_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"bernoulli_expanded", "By design. Test data is for informational purpose because the generator is non deterministic."},
-    {"test_roialign_aligned_true", "Opset 16 not supported yet."},
-    {"test_roialign_aligned_false", "Opset 16 not supported yet."},
-    {"test_roialign_mode_max", "Onnx roialign mode expected output is incorrect."},
-    {"test_scatternd_add", "Opset 16 not supported yet."},
-    {"test_scatternd_multiply", "Opset 16 not supported yet."},
-    {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."},
-    {"col2im_pads", "onnx 18 test data error."},
-
-#if defined(DISABLE_OPTIONAL_TYPE)
-    {"test_optional_get_element", "Optional type not supported in this build flavor."},
-    {"test_optional_get_element_sequence", "Optional type not supported in this build flavor."},
-    {"test_optional_has_element", "Optional type not supported in this build flavor."},
-    {"test_optional_has_element_empty", "Optional type not supported in this build flavor."},
-    {"test_if_opt", "Optional type not supported in this build flavor."},
-    {"test_loop16_seq_none", "Optional type not supported in this build flavor."},
-    {"test_identity_opt", "Optional type not supported in this build flavor."},
+      {"slice_neg_steps",
+       "Type parameter (Tind) bound to different types (tensor(int64) and tensor(int32) in node ()."},
+      {"cast_BFLOAT16_to_FLOAT", "Unexpected input data type"},
+      {"loop13_seq", "Creation of empty sequences is currently not supported in the test runner"},
+      {"sequence_insert_at_front", "shape mismatch, expect {4} got {3}"},
+      {"cast_FLOAT_to_BFLOAT16", "expect uint16 got bfloat16"},
+      {"mnist", "Input data isn't in valid range"},
+      {"BERT_Squad", "test data bug"},
+      {"constantofshape_float_ones", "test data bug", {"opset9", "opset10"}},
+      {"constantofshape_int_zeros", "test data bug", {"opset9", "opset10"}},
+      {"cast_STRING_to_FLOAT", "Linux CI has old ONNX python package with bad test data", {"opset9", "opset10"}},
+      // Numpy float to string has unexpected rounding for some results given numpy default precision is meant to be 8.
+      // "e.g. 0.296140194 -> '0.2961402' not '0.29614019'. ORT produces the latter with precision set to 8,
+      // which doesn't match the expected output that was generated with numpy.
+      {"cast_FLOAT_to_STRING", "Numpy float to string has unexpected rounding for some results."},
+      {"tf_nasnet_large", "disable temporarily"},
+      {"tf_nasnet_mobile", "disable temporarily"},
+      {"tf_pnasnet_large", "disable temporarily"},
+      {"shrink", "test case is wrong", {"opset9"}},
+      {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
+      {"tf_inception_v2", "result mismatch"},
+      {"tf_resnet_v1_50", "result mismatch when Conv BN Fusion is applied"},
+      {"tf_resnet_v1_101", "result mismatch when Conv BN Fusion is applied"},
+      {"tf_resnet_v1_152", "result mismatch when Conv BN Fusion is applied"},
+      {"mxnet_arcface", "Model is an invalid ONNX model"},
+      {"unique_not_sorted_without_axis", "Expected data for 'Y' is incorrect and in sorted order."},
+      {"cumsum_1d_reverse_exclusive", "only failing linux GPU CI. Likely build error."},
+      {"resize_downsample_scales_cubic_align_corners", "results mismatch with onnx tests"},
+      {"resize_downsample_scales_linear_align_corners", "results mismatch with onnx tests"},
+      {"resize_tf_crop_and_resize", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_ceil_half_pixel", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_floor_align_corners", "Bad onnx test output. Needs test fix."},
+      {"resize_upsample_sizes_nearest_round_prefer_ceil_asymmetric", "Bad onnx test output. Needs test fix."},
+      {"bitshift_right_uint16", "BitShift(11) uint16 support not enabled currently"},
+      {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"},
+      {"maxunpool_export_with_output_shape",
+       "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"},
+      {"cntk_simple_seg", "Bad onnx test output caused by wrong SAME_UPPER/SAME_LOWER for ConvTranspose"},
+      {"training_dropout", "result differs", {}},               // Temporary, subsequent PR will remove this.
+      {"training_dropout_default", "result differs", {}},       // Temporary, subsequent PR will remove this.
+      {"training_dropout_default_mask", "result differs", {}},  // Temporary, subsequent PR will remove this.
+      {"training_dropout_mask", "result differs", {}},          // Temporary, subsequent PR will remove this.
+      {"batchnorm_epsilon_training_mode", "training only", {}},
+      {"batchnorm_example_training_mode", "training only", {}},
+      {"bernoulli", "type error", {}},
+      {"bernoulli_double", "type error", {}},
+      {"bernoulli_double_expanded", "type error", {}},
+      {"bernoulli_expanded", "type error", {}},
+      {"bernoulli_seed", "type error", {}},
+      {"bernoulli_seed_expanded", "type error", {}},
+      {"castlike_BFLOAT16_to_FLOAT", "type error", {}},
+      {"castlike_BFLOAT16_to_FLOAT_expanded", "type error", {}},
+      {"castlike_FLOAT_to_BFLOAT16", "type error", {}},
+      {"castlike_FLOAT_to_BFLOAT16_expanded", "type error", {}},
+      {"castlike_FLOAT_to_STRING", "type error", {}},
+      {"castlike_FLOAT_to_STRING_expanded", "type error", {}},
+      {"convtranspose_autopad_same", "Test data has been corrected in ONNX 1.10.", {"opset13", "opset14"}},
+      {"gru_batchwise", "type error", {}},
+      {"lstm_batchwise", "type error", {}},
+      {"optional_get_element", "type error", {}},
+      {"optional_get_element_sequence", "type error", {}},
+      {"optional_has_element", "type error", {}},
+      {"optional_has_element_empty", "type error", {}},
+      {"shape_end_1", "type error", {}},
+      {"shape_end_negative_1", "type error", {}},
+      {"shape_start_1", "type error", {}},
+      {"shape_start_1_end_2", "type error", {}},
+      {"shape_start_1_end_negative_1", "type error", {}},
+      {"shape_start_negative_1", "type error", {}},
+      {"simple_rnn_batchwise", "type error", {}},
+      {"mod_float_mixed_sign_example", "fmod attribute must be true for floating point types", {}},
+      {"col2im_pads", "result mismatch", {"opset18"}},
+#ifdef ENABLE_TRAINING_CORE
+      {"adagrad", "not a registered function/op", {}},                  // Op not registered.
+      {"adagrad_multiple", "not a registered function/op", {}},         // Op not registered.
+      {"adam", "not a registered function/op", {}},                     // Op not registered.
+      {"adam_multiple", "not a registered function/op", {}},            // Op not registered.
+      {"gradient_of_add", "not a registered function/op", {}},          // Op not registered.
+      {"gradient_of_add_and_mul", "not a registered function/op", {}},  // Op not registered.
+      {"momentum", "not a registered function/op", {}},                 // Op not registered.
+      {"momentum_multiple", "not a registered function/op", {}},        // Op not registered.
+      {"nesterov_momentum", "not a registered function/op", {}},        // Op not registered.
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_4d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_mean_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none_weights_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_sum_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_3d", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_sum", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+       "type error",
+       {"opset12"}},
+      {"softmax_cross_entropy_none_weights", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d_log_prob", "type error", {"opset12"}},
+      {"softmax_cross_entropy_none", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "type error", {"opset12"}},
+      {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_weight", "type error", {"opset12"}},
+      {"softmax_cross_entropy_mean_no_weight_ignore_index_4d", "type error", {"opset12"}},
 #endif
+      {"mask_rcnn_keras", "this model currently has an invalid contrib op version set to 10", {}}};
 
-  };
+  // Some EPs may fail to pass some specific testcases.
+  // For example TenosrRT EP may fail on FLOAT16 related testcases if GPU doesn't support float16.
+  // Instead of list all these testcases, we can use following keyword set to filter out testcases wchich contain
+  // specific keyword.
+  std::set<std::string> broken_tests_keyword_set = {};
 
-#ifdef DISABLE_ML_OPS
-  auto starts_with = [](const std::string& find_in, const std::string& find_what) {
-    return find_in.compare(0, find_what.size(), find_what) == 0;
-  };
-  for (const auto& test_ptr : owned_tests) {
-    const std::string& test_name = test_ptr->GetTestCaseName();
-    if (starts_with(test_name, "XGBoost_") ||
-        starts_with(test_name, "coreml_") ||
-        starts_with(test_name, "scikit_") ||
-        starts_with(test_name, "libsvm_")) {
-      broken_tests.insert({test_name, "Traditional ML ops are disabled in this build."});
-    }
-  }
+  if (provider_name == "cuda") {
+#ifdef _WIN32
+    broken_tests.insert({"LSTM_Seq_lens_unpacked", "this test fails with new image since Aug 25."});
+    broken_tests.insert({"bidaf", "this test fails with new image since Aug 25."});
+    broken_tests.insert({"Candy", "Flaky test, need to investigate", {"opset9"}});
+#else
+    broken_tests.insert({"bidaf", "this test should be recovered when multi-gpu pipeline deprecates NV12", {"opset9"}});
 #endif
-
-  if (enable_openvino) {
-    broken_tests.insert({"operator_permute2", "Disabled temporariliy"});
-    broken_tests.insert({"operator_repeat", "Disabled temporariliy"});
-    broken_tests.insert({"operator_repeat_dim_overflow", "Disabled temporariliy"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "Disabled temporariliy"});
-    broken_tests.insert({"candy", "Results mismatch: 1 of 150528"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "OpenVino does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "OpenVino does not support 5D+ tensors"});
-  }
-
-  if (enable_dnnl) {
-    broken_tests.insert({"tf_mobilenet_v2_1.0_224", "result mismatch"});
-    broken_tests.insert({"tf_mobilenet_v2_1.4_224", "result mismatch"});
-    broken_tests.insert({"tf_mobilenet_v1_1.0_224", "result mismatch"});
-    broken_tests.insert({"mobilenetv2-1.0", "result mismatch"});
-    broken_tests.insert({"candy", "result mismatch"});
-    broken_tests.insert({"range_float_type_positive_delta_expanded", "get unknown exception from DNNL EP"});
-    broken_tests.insert({"range_int32_type_negative_delta_expanded", "get unknown exception from DNNL EP"});
-    broken_tests.insert({"averagepool_2d_ceil", "maxpool ceiling not supported"});
-    broken_tests.insert({"maxpool_2d_ceil", "maxpool ceiling not supported"});
-    broken_tests.insert({"maxpool_2d_dilations", "maxpool dilations not supported"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "test pass on dev box but fails on CI build"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-    broken_tests.insert({"maxpool_2d_uint8", "Does not work on DNNL, NNAPI"});
   }
 
-  if (enable_nnapi) {
+  if (provider_name == "nnapi") {
     broken_tests.insert({"scan9_sum", "Error with the extra graph"});
     broken_tests.insert({"scan_sum", "Error with the extra graph"});
     broken_tests.insert({"mvn_expanded", "Failed to find kernel for MemcpyFromHost(1) (node Memcpy_1)"});
@@ -952,61 +962,101 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     broken_tests.insert({"maxpool_2d_uint8", "result mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC_expanded", "shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean_expanded", "shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum_expanded", "shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean_expanded", "shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_expanded", "shape mismatch"});
     // Disable based on George Wu's recommendation.
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded", "shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index_expanded",
+         "shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_iinput_shape_is_NCd1_weight_ignore_index_expanded", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NC", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_expanded", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded",
+                         "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1_weight_expanded", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_no_weight_reduction_mean_ignore_index_expanded",
+         "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_mean", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_reduction_sum", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_mean", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum", "Shape mismatch"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2_with_weight_reduction_sum_ignore_index",
+                         "Shape mismatch"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                         "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+         "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded",
+                         "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
     broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1_mean_weight_negative_ignore_index_log_prob_expanded",
+         "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "Shape mismatch"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+                         "Shape mismatch"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+                         "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
+         "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob", "Shape mismatch"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_sum_weight_high_ignore_index_log_prob_expanded",
+                         "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "Shape mismatch"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_mean", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_mean_3d", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_mean_3d_expanded", "Shape mismatch"});
@@ -1055,68 +1105,64 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     broken_tests.insert({"softmax_cross_entropy_sum_expanded", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_sum_log_prob", "Shape mismatch"});
     broken_tests.insert({"softmax_cross_entropy_sum_log_prob_expanded", "Shape mismatch"});
-    broken_tests.insert({"nllloss_NCd1_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ignore_index", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ignore_index_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_with_weight_reduction_mean", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_with_weight_reduction_mean_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2d3d4d5_mean_weight", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2d3d4d5_mean_weight_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_mean_weight_negative_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1_weight_ii_expanded", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ii", "wait for investigation"});
-    broken_tests.insert({"nllloss_NCd1d2_no_weight_reduction_mean_ii_expanded", "wait for investigation"});
   }
 
-  if (enable_tensorrt) {
-    broken_tests.insert({"fp16_shufflenet", "TRT EP bug"});
-    broken_tests.insert({"fp16_inception_v1", "TRT EP bug"});
-    broken_tests.insert({"fp16_tiny_yolov2", "TRT EP bug"});
-    broken_tests.insert({"tf_inception_v3", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v1_1.0_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v2_1.0_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_mobilenet_v2_1.4_224", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_101", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_152", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v1_50", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_101", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_152", "TRT Engine couldn't be created"});
-    broken_tests.insert({"tf_resnet_v2_50", "TRT Engine couldn't be created"});
-    broken_tests.insert({"convtranspose_1d", "1d convtranspose not supported yet"});
-    broken_tests.insert({"convtranspose_3d", "3d convtranspose not supported yet"});
-  }
+  if (provider_name == "tensorrt") {
+    broken_tests.insert({"convtranspose_with_kernel", "It causes segmentation fault"});
+    broken_tests.insert({"convtranspose_pad", "It causes segmentation fault"});
+    broken_tests.insert({"convtranspose_kernel_shape", "It causes segmentation fault"});
+    broken_tests.insert({"dynamicquantizelinear_expanded", "It causes segmentation fault"});
+    broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "It causes segmentation fault"});
+    broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "It causes segmentation fault"});
 
-  if (enable_cuda) {
-    broken_tests.insert({"candy", "result mismatch"});
-    broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
-    broken_tests.insert({"mlperf_ssd_mobilenet_300", "unknown error"});
-    broken_tests.insert({"mlperf_ssd_resnet34_1200", "unknown error"});
-    broken_tests.insert({"tf_inception_v1", "flaky test"});  // TODO: Investigate cause for flakiness
-    broken_tests.insert({"faster_rcnn", "Linux: faster_rcnn:output=6383:shape mismatch, expect {77} got {57}"});
-    broken_tests.insert({"split_zero_size_splits", "alloc failed"});
+    broken_tests.insert({"basic_conv_with_padding",
+                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                         "engine for fused node"});
+    broken_tests.insert({"basic_conv_without_padding",
+                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                         "engine for fused node"});
+    broken_tests.insert({"conv_with_strides_no_padding",
+                         "Cannot set more than one input unless network has Q/DQ layers. TensorRT EP could not build "
+                         "engine for fused node"});
+
+    broken_tests.insert({"conv_with_autopad_same",
+                         "Internal Error (node_of_y: Cannot set more than one input unless network has Q/DQ layers.)"});
+
+    // unsupported tests since opset16
+    broken_tests.insert({"sequence_map_add_2_sequences", "not supported by TensorRT EP"});
+    broken_tests.insert({"sequence_map_extract_shapes", "not supported by TensorRT EP."});
+    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor", "not supported by TensorRT EP."});
+    broken_tests.insert({"sequence_map_identity_1_sequence", "not supported by TensorRT EP."});
+    broken_tests.insert({"sequence_map_identity_2_sequences", "not supported by TensorRT EP."});
+    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor", "not supported by TensorRT EP."});
+    broken_tests.insert({"leakyrelu_expanded", "not supported by TensorRT EP."});
+    broken_tests.insert({"leakyrelu_default_expanded", "not supported by TensorRT EP."});
+    broken_tests.insert({"leakyrelu_example_expanded", "not supported by TensorRT EP."});
+    broken_tests.insert({"prelu_broadcast_expanded", "not supported by TensorRT EP."});
+    broken_tests.insert({"prelu_example_expanded", "not supported by TensorRT EP."});
+    broken_tests_keyword_set.insert({"scatternd_add"});
+    broken_tests_keyword_set.insert({"scatternd_multiply"});
+    broken_tests_keyword_set.insert({"scatter_elements_with_duplicate_indices"});
+
+    // sce op is not supported
+    broken_tests_keyword_set.insert({"sce"});
+
+    // TensorRT EP CI uses Nvidia Tesla M60 which doesn't support fp16.
+    broken_tests_keyword_set.insert({"FLOAT16"});
   }
 
-  if (enable_dml) {
+  if (provider_name == "dml") {
     broken_tests.insert({"tinyyolov3", "The parameter is incorrect"});
     broken_tests.insert({"PixelShuffle", "Test requires 6D Reshape, which isn't supported by DirectML"});
     broken_tests.insert({"operator_permute2", "Test requires 6D Transpose, which isn't supported by DirectML"});
-    broken_tests.insert({"resize_downsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert({"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
-    broken_tests.insert({"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests.insert({"resize_downsample_linear",
+                         "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests.insert(
+        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
+    broken_tests.insert(
+        {"resize_upsample_linear", "ORT 0.4 uses asymmetric but will conform to half_pixel in the next ONNX version."});
 
     // These tests are temporarily disabled pending investigation
-    broken_tests.insert({"dynamicquantizelinear", "Temporarily disabled pending investigation"});
     broken_tests.insert({"dynamicquantizelinear_expanded", "Temporarily disabled pending investigation"});
     broken_tests.insert({"dynamicquantizelinear_max_adjusted_expanded", "Temporarily disabled pending investigation"});
     broken_tests.insert({"dynamicquantizelinear_min_adjusted_expanded", "Temporarily disabled pending investigation"});
@@ -1128,109 +1174,56 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
     broken_tests.insert({"BERT_Squad", "Temporarily disabled pending investigation"});
     broken_tests.insert({"LSTM_Seq_lens_unpacked", "The parameter is incorrect"});
 
-    broken_tests.insert({"resize_downsample_scales_linear", "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
-    broken_tests.insert({"resize_downsample_sizes_linear_pytorch_half_pixel", "DML does not support downsampling by such a large factor - skips input pixels"});
-    broken_tests.insert({"resize_downsample_sizes_nearest", "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
-    broken_tests.insert({"resize_upsample_sizes_nearest", "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
+    broken_tests.insert({"resize_downsample_scales_linear",
+                         "DML uses half_pixel and this test assumed \"asymmetric\" but does not include \"mode\""});
+    broken_tests.insert({"resize_downsample_sizes_linear_pytorch_half_pixel",
+                         "DML does not support downsampling by such a large factor - skips input pixels"});
+    broken_tests.insert({"resize_downsample_sizes_nearest",
+                         "DML uses pixel centers for nearest, rounding 1 value off for the middle column"});
+    broken_tests.insert({"resize_upsample_sizes_nearest",
+                         "DML uses pixel centers for nearest, which makes more sense (the 3rd row mismatches)"});
     broken_tests.insert({"unsqueeze_three_axes", "DML does not support 6D tensors"});
     broken_tests.insert({"unsqueeze_unsorted_axes", "DMLdoes not support 6D tensors"});
 
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob", "DML does not support 5D+ tensors"});
-    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded", "DML does not support 5D+ tensors"});
-
-    // TODO: Remove identity tests when fixed #42638109
-    broken_tests.insert({"identity_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_1_sequence_1_tensor_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_2_sequences_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_add_2_sequences_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_extract_shapes_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_extract_shapes_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_1_tensor_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_1_sequence_expanded_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_2_sequences_cpu", "Optional type not yet supported for identity-16."});
-    broken_tests.insert({"sequence_map_identity_2_sequences_expanded_cpu", "Optional type not yet supported for identity-16."});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+         "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"negative_log_likelihood_loss_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_expanded",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3_none_no_weight_negative_ignore_index_log_prob_expanded",
+         "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight", "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_expanded", "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob", "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_mean_weight_log_prob_expanded",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert(
+        {"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight", "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_expanded",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob",
+                         "DML does not support 5D+ tensors"});
+    broken_tests.insert({"softmax_cross_entropy_input_shape_is_NCd1d2d3d4d5_none_no_weight_log_prob_expanded",
+                         "DML does not support 5D+ tensors"});
   }
-  if (enable_qnn) {
-    broken_tests.insert({"gemm_default_no_bias", "result differs"});
-    broken_tests.insert({"resize_downsample_scales_linear", "result differs"});
-    broken_tests.insert({"resize_downsample_scales_linear_antialias", "result differs"});
-    broken_tests.insert({"resize_downsample_sizes_linear_antialias", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_NCd1_mean_weight_negative_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean", "result differs"});
-    broken_tests.insert({"sce_mean_3d", "result differs"});
-    broken_tests.insert({"sce_mean_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_no_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight", "result differs"});
-    broken_tests.insert({"sce_mean_weight_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_3d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_4d_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_ii_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_mean_weight_log_prob", "result differs"});
-    broken_tests.insert({"sce_mean_weight_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_none", "result differs"});
-    broken_tests.insert({"sce_none_expanded", "result differs"});
-    broken_tests.insert({"sce_none_log_prob", "result differs"});
-    broken_tests.insert({"sce_none_log_prob_expanded", "result differs"});
-    broken_tests.insert({"sce_sum", "result differs"});
-    broken_tests.insert({"sce_sum_expanded", "result differs"});
-    broken_tests.insert({"sce_sum_log_prob", "result differs"});
-    broken_tests.insert({"sce_sum_log_prob_expanded", "result differs"});
-    broken_tests.insert({"gridsample_reflection_padding", "result differs"});
-    broken_tests.insert({"spacetodepth", "result differs"});
-  }
-#if defined(_WIN32) && !defined(_WIN64)
-  broken_tests.insert({"vgg19", "failed: bad allocation"});
-#endif
-
-  // Disable mask_rcnn_keras as this model currently has an invalid contrib op version set to 10
-  broken_tests.insert({"mask_rcnn_keras", "This model uses contrib ops."});
 
 #ifdef DISABLE_CONTRIB_OPS
   broken_tests.insert({"coreml_SqueezeNet_ImageNet", "This model uses contrib ops."});
@@ -1281,6 +1274,17 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
   broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_1", "This model uses contrib ops."});
   broken_tests.insert({"cdist_float64_sqeuclidean_1000_2000_500", "This model uses contrib ops."});
   broken_tests.insert({"cdist_float64_sqeuclidean_1_1_1", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
+  broken_tests.insert({"bidaf", "This model uses contrib ops."});
+  broken_tests.insert({"fp16_test_tiny_yolov2", "This model uses contrib ops."});
+  broken_tests.insert({"fp16_coreml_FNS-Candy", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_Repeat_ImageNet", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_BiDirectional_ImageNet", "This model uses contrib ops."});
+  broken_tests.insert({"fp16_coreml_LinearRegression_NYCTaxi", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_Average_ImageNet", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_GRU_ImageNet", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_SimpleRNN_ImageNet", "This model uses contrib ops."});
+  broken_tests.insert({"keras2coreml_Dot_imageNet", "This model uses contrib ops."});
 #endif
 
   int result = 0;