diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h
index 83dfb245f263e..71e808160163d 100644
--- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h
+++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h
@@ -826,14 +826,6 @@ class QLinearMatMulHelper : public MatMulHelperBase
     QLinearMatMulHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 3) {}
 };
 
-class MatMulIntegerToFloatHelper : public MatMulHelperBase
-{
-public:
-    template<typename Info_t, typename Shape_t>
-    MatMulIntegerToFloatHelper(const Info_t& info, const Shape_t& shape) : MatMulHelperBase(info, shape, 0, 1) {}
-};
-
-
 class TopKHelper
 {
     void Initialize(
diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
index 51d9a57b5e447..0d5dab35826c1 100644
--- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
+++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
@@ -24,28 +24,66 @@ namespace onnxruntime {
 namespace test {
 
 template <typename IType, typename WType, typename OType>
-void TestMatMulIntegerToFloat(const std::vector<int64_t>& A_dims,
-                              std::vector<int64_t> B_dims,
-                              const std::string& reference_model,
-                              bool is_matrix_b_constant,
+static void CalculateMatMulIntegerToFloat(const int64_t M, const int64_t N, const int64_t K,
+                                          const std::vector<IType>& A_data, const std::vector<OType>& A_scale,
+                                          const std::vector<IType>& A_zero_point, const std::vector<WType>& B_data,
+                                          std::vector<OType>& B_scale, std::vector<WType>& B_zero_point,
+                                          const std::vector<OType>& Bias, std::vector<float>& Y_data,
+                                          bool per_column, bool has_zp, bool has_bias) {
+  if (!per_column) {
+    B_zero_point.resize(N, B_zero_point[0]);
+    B_scale.resize(N, B_scale[0]);
+  }
+
+  for (int64_t m = 0; m < M; m++) {
+    for (int64_t n = 0; n < N; n++) {
+      float sum = 0.0f;
+      for (int64_t k = 0; k < K; k++) {
+        float A_dequantized = has_zp ?
+                              (static_cast<int>(A_data[m * K + k]) - static_cast<int>(A_zero_point[0])) * A_scale[0] :
+                              A_data[m * K + k] * A_scale[0];
+        float B_dequantized = has_zp ?
+                              (static_cast<int>(B_data[k * N + n]) - static_cast<int>(B_zero_point[n])) * B_scale[n] :
+                              B_data[k * N + n] * B_scale[n];
+
+        sum += A_dequantized * B_dequantized;
+      }
+      if (has_bias) {
+        sum += Bias[n];
+      }
+      Y_data[m * N + n] = static_cast<OType>(sum);
+    }
+  }
+}
+
+template <typename IType, typename WType, typename OType>
+void TestMatMulIntegerToFloat(bool is_matrix_b_constant,
                               bool per_column = false,
                               bool has_zp = true,
                               bool has_bias = false) {
   // create rand inputs
   RandomValueGenerator random{};
-
+  int64_t M = 4;
+  int64_t N = 128;
+  int64_t K = 128;
+  std::vector<int64_t> A_dims{M, K};
+  std::vector<int64_t> B_dims{K, N};
+  std::vector<int64_t> Y_dims{M, K};
   std::vector<IType> A_data;
-  std::vector<int> tmp_A_data = random.Uniform<int32_t>(A_dims,
-                                                        std::numeric_limits<WType>::lowest(),
-                                                        std::numeric_limits<WType>::max());
-  std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> WType {
+  std::vector<IType> tmp_A_data = random.Uniform<IType>(A_dims,
+                                                        std::numeric_limits<IType>::lowest(),
+                                                        std::numeric_limits<IType>::max());
+  std::transform(tmp_A_data.begin(), tmp_A_data.end(), std::back_inserter(A_data), [](int32_t v) -> IType {
     return static_cast<IType>(v);
   });
 
   std::vector<WType> B_data;
-  std::vector<int> tmp_B_data = random.Uniform<int32_t>(B_dims,
-                                                        std::numeric_limits<WType>::lowest(),
-                                                        std::numeric_limits<WType>::max());
+
+  std::vector<WType> tmp_B_data;
+  tmp_B_data = random.Uniform<WType>(B_dims,
+                                     (constexpr(std::is_same_v<WType, int8_t>)) ?
+                                     std::numeric_limits<int8_t>::lowest()/2 : std::numeric_limits<uint8_t>::lowest(),
+                                     std::numeric_limits<WType>::max() / 2);
   std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> WType {
     return static_cast<WType>(v);
   });
@@ -60,9 +98,9 @@ void TestMatMulIntegerToFloat(const std::vector<int64_t>& A_dims,
   std::for_each(B_zero_point.begin(),
                 B_zero_point.end(),
                 [&random](WType& zp) {
-                  zp = static_cast<WType>(random.Uniform<int32_t>(std::array<int64_t, 1>{1},
-                                                                  std::numeric_limits<WType>::lowest(),
-                                                                  std::numeric_limits<WType>::max())[0]);
+                  zp = static_cast<WType>(random.Uniform<WType>(std::array<int64_t, 1>{1},
+                                                                std::numeric_limits<WType>::lowest(),
+                                                                std::numeric_limits<WType>::max())[0]);
                 });
 
   std::vector<OType> Bias = random.Uniform<OType>(AsSpan({B_dims.back()}), -0.1f, 0.1f);
@@ -77,7 +115,7 @@ void TestMatMulIntegerToFloat(const std::vector<int64_t>& A_dims,
     test.AddInput<IType>("a_zero_point", {1}, A_zero_point);
     test.AddInput<WType>("b_zero_point", {b_scale_zp_size}, B_zero_point);
   } else {
-    test.AddOptionalInputEdge<WType>();
+    test.AddOptionalInputEdge<IType>();
     test.AddOptionalInputEdge<WType>();
   }
 
@@ -87,39 +125,39 @@ void TestMatMulIntegerToFloat(const std::vector<int64_t>& A_dims,
     test.AddOptionalInputEdge<OType>();
   }
 
-  test.AddReferenceOutputs(reference_model);
-#if defined(USE_DML)
-  if constexpr (std::is_same_v<OType, float>) {
-    test.SetOutputRelErr("Y", 2e-2f);
+  std::vector<float> Y_data(M * N);
+  CalculateMatMulIntegerToFloat<IType, WType, OType>(M, N, K, A_data, A_scale, A_zero_point,
+                                                     B_data, B_scale, B_zero_point, Bias, Y_data,
+                                                     per_column, has_zp, has_bias);
+
+  if (constexpr(std::is_same_v<OType, float>)) {
+    test.AddOutput<float>("Y", {M, N}, Y_data);
   } else {
-    test.SetOutputRelErr("Y", 2.0f);
+    test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
+    test.SetOutputAbsErr("Y", 0.5f);
   }
-#else
-  test.SetOutputRelErr("Y", 1e-4f);
-#endif
 
-  if constexpr (std::is_same_v<OType, float>) {
-    test.Run();
+  // Only DML EP supports these data type combinations for now
+  if ((constexpr(std::is_same_v<OType, MLFloat16>)) ||
+      (constexpr(std::is_same_v<OType, float>) &&
+       constexpr(std::is_same_v<IType, int8_t>) &&
+       constexpr(std::is_same_v<WType, uint8_t>))) {
+    std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+    execution_providers.push_back(DefaultDmlExecutionProvider());
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
   } else {
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCpuExecutionProvider});
+    test.Run();
   }
 
 }
 
 template <typename IType, typename WType, typename OType, bool HasZeroPoint, bool HasBias>
-void RunMatMulIntegerToFloatTest(const string& model_path) {
-  std::vector<int64_t> A_dims{4, 128};
-  std::vector<int64_t> B_dims{128, 128};
-  std::vector<int64_t> Y_dims{4, 128};
-
+void RunMatMulIntegerToFloatTest() {
   TestMatMulIntegerToFloat<IType, WType, OType>(
-                                         A_dims,
-                                         B_dims,
-                                         model_path,
-                                         false,        /*is_matrix_b_constant*/
-                                         false,        /*per_column*/
-                                         HasZeroPoint, /*has_zp*/
-                                         HasBias       /*has_bias*/
+      false,        /*is_matrix_b_constant*/
+      false,        /*per_column*/
+      HasZeroPoint, /*has_zp*/
+      HasBias       /*has_bias*/
   );
 
   TestMatMulIntegerToFloat<IType, WType, OType>(
@@ -130,66 +168,274 @@ void RunMatMulIntegerToFloatTest(const string& model_path) {
                                          false,        /*per_column*/
                                          HasZeroPoint, /*has_zp*/
                                          HasBias       /*has_bias*/
+      true,         /*is_matrix_b_constant*/
+      false,        /*per_column*/
+      HasZeroPoint, /*has_zp*/
+      HasBias       /*has_bias*/
   );
 
   TestMatMulIntegerToFloat<IType, WType, OType>(
-                                         A_dims,
-                                         B_dims,
-                                         model_path,
-                                         false,        /*is_matrix_b_constant*/
-                                         true,         /*per_column*/
-                                         HasZeroPoint, /*has_zp*/
-                                         HasBias       /*has_bias*/
+      false,        /*is_matrix_b_constant*/
+      true,         /*per_column*/
+      HasZeroPoint, /*has_zp*/
+      HasBias       /*has_bias*/
   );
 
   TestMatMulIntegerToFloat<IType, WType, OType>(
-                                         A_dims,
-                                         B_dims,
-                                         model_path,
-                                         true,         /*is_matrix_b_constant*/
-                                         true,         /*per_column*/
-                                         HasZeroPoint, /*has_zp*/
-                                         HasBias       /*has_bias*/
+      true,         /*is_matrix_b_constant*/
+      true,         /*per_column*/
+      HasZeroPoint, /*has_zp*/
+      HasBias       /*has_bias*/
   );
 }
 
-#if USE_DML
-TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8_FP16) {
-  RunMatMulIntegerToFloatTest<uint8_t, int8_t, MLFloat16, true, false>("testdata/matmul_integer_to_float16_int8.onnx");
-  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, MLFloat16, true, false>("testdata/matmul_integer_to_float16_uint8.onnx");
+TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) {
+  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, true, false>();
 }
 
-TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8_FP16) {
-  RunMatMulIntegerToFloatTest<uint8_t, int8_t, MLFloat16, false, true>("testdata/matmul_integer_to_float16_int8_bias.onnx");
-  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, MLFloat16, false, true>("testdata/matmul_integer_to_float16_uint8_bias.onnx");
+TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) {
+  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, false, true>();
 }
 
-TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8_FP16) {
-  RunMatMulIntegerToFloatTest<int8_t, int8_t, MLFloat16, true, false>("testdata/matmul_integer_to_float16_int8_int8.onnx");
+TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8S8) {
+  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, false, false>();
 }
 
-TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8_FP16) {
-  RunMatMulIntegerToFloatTest<int8_t, int8_t, MLFloat16, false, true>("testdata/matmul_integer_to_float16_int8_int8_bias.onnx");
+TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8S8) {
+  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, true, true>();
 }
-#endif // USE_DML
 
-TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8X8) {
-  RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, true, false>("testdata/matmul_integer_to_float_int8.onnx");
-  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, float, true, false>("testdata/matmul_integer_to_float_uint8.onnx");
+TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8U8) {
+  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, float, true, false>();
 }
 
-TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8X8) {
-  RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, false, true>("testdata/matmul_integer_to_float_int8_bias.onnx");
-  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, float, false, true>("testdata/matmul_integer_to_float_uint8_bias.onnx");
+TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8U8) {
+  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, float, false, true>();
 }
 
-TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8S8) {
-  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, true, false>("testdata/matmul_integer_to_float_int8_int8.onnx");
+TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8U8) {
+  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, float, false, false>();
 }
 
-TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8S8) {
-  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, false, true>("testdata/matmul_integer_to_float_int8_int8_bias.onnx");
+TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8X8) {
+  RunMatMulIntegerToFloatTest<uint8_t, uint8_t, float, true, true>();
+}
+
+TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_U8S8) {
+  RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, true, false>();
+}
+
+TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_U8S8) {
+  RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, false, true>();
+}
+
+TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_U8S8) {
+  RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, false, false>();
+}
+
+TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) {
+  RunMatMulIntegerToFloatTest<uint8_t, int8_t, float, true, true>();
+}
+
+// DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output
+#if defined(USE_DML)
+
+TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) {
+  RunMatMulIntegerToFloatTest<int8_t, uint8_t, float, true, false>();
+}
+
+TEST(MatMulIntegerToFloat, NoZeroPoint_HasBias_test_S8U8) {
+  RunMatMulIntegerToFloatTest<int8_t, uint8_t, float, false, true>();
+}
+
+TEST(MatMulIntegerToFloat, NoZeroPoint_NoBias_test_S8U8) {
+  RunMatMulIntegerToFloatTest<int8_t, uint8_t, float, false, false>();
+}
+
+TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_S8U8) {
+  RunMatMulIntegerToFloatTest<int8_t, int8_t, float, true, true>();
+}
+
+TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8U8) {
+  OpTester test("MatMulIntegerToFloat", 1, kMSDomain);
+  int64_t M = 5;
+  int64_t N = 5;
+  int64_t K = 2;
+
+  std::vector<uint8_t> A_data = {1, 5, 2, 1, 9,
+                                 1, 1, 3, 7, 2};
+  std::vector<uint8_t> B_data = {3, 7, 2, 1, 1,
+                                 2, 1, 9, 1, 1};
+  std::vector<MLFloat16> A_scale = ToFloat16({3.0f});
+  std::vector<MLFloat16> B_scale = ToFloat16({2.0f});
+  test.AddInput<uint8_t>("A", {M, K}, A_data);
+  test.AddInput<uint8_t>("B", {K, N}, B_data);
+  std::vector<uint8_t> A_zero_point = {1};
+  std::vector<uint8_t> B_zero_point = {1};
+
+  test.AddInput<MLFloat16>("a_scale", {1}, A_scale);
+  test.AddInput<MLFloat16>("b_scale", {1}, B_scale);
+  test.AddInput<uint8_t>("a_zero_point", {1}, A_zero_point);
+  test.AddInput<uint8_t>("b_zero_point", {1}, B_zero_point);
+
+  std::vector<float> Y_data(M * N);
+  CalculateMatMulIntegerToFloat<uint8_t, uint8_t, MLFloat16>(M, N, K, A_data, A_scale, A_zero_point,
+                                                             B_data, B_scale, B_zero_point, {}, Y_data,
+                                                             false, true, false);
+
+  test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultDmlExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_U8S8) {
+  OpTester test("MatMulIntegerToFloat", 1, kMSDomain);
+  int64_t M = 5;
+  int64_t N = 5;
+  int64_t K = 2;
+
+  std::vector<uint8_t> A_data = {3, 7, 2, 1, 1,
+                                 2, 1, 9, 1, 1};
+  std::vector<int8_t> B_data = {2, -1, -9, 1, 1,
+                                -1, 0, -3, 1, -4};
+  std::vector<MLFloat16> A_scale = ToFloat16({-4.0f});
+  std::vector<MLFloat16> B_scale = ToFloat16({2.0f});
+  test.AddInput<uint8_t>("A", {M, K}, A_data);
+  test.AddInput<int8_t>("B", {K, N}, B_data);
+  std::vector<uint8_t> A_zero_point = {1};
+  std::vector<int8_t> B_zero_point = {3};
+  std::vector<MLFloat16> Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f});
+
+  test.AddInput<MLFloat16>("a_scale", {1}, A_scale);
+  test.AddInput<MLFloat16>("b_scale", {1}, B_scale);
+  test.AddInput<uint8_t>("a_zero_point", {1}, A_zero_point);
+  test.AddInput<int8_t>("b_zero_point", {1}, B_zero_point);
+
+  std::vector<float> Y_data(M * N);
+  CalculateMatMulIntegerToFloat<uint8_t, int8_t, MLFloat16>(M, N, K, A_data, A_scale, A_zero_point,
+                                                            B_data, B_scale, B_zero_point, {}, Y_data,
+                                                            false, true, false);
+
+  test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultDmlExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8S8) {
+  OpTester test("MatMulIntegerToFloat", 1, kMSDomain);
+  int64_t M = 5;
+  int64_t N = 5;
+  int64_t K = 2;
+
+  std::vector<int8_t> A_data = {3, 7, -2, 1, 1,
+                                2, -1, -9, 1, 1};
+  std::vector<int8_t> B_data = {2, -1, -9, 1, 1,
+                                -1, 0, -3, 1, -4};
+  std::vector<MLFloat16> A_scale = ToFloat16({-4.0f});
+  std::vector<MLFloat16> B_scale = ToFloat16({2.0f});
+  test.AddInput<int8_t>("A", {M, K}, A_data);
+  test.AddInput<int8_t>("B", {K, N}, B_data);
+  std::vector<int8_t> A_zero_point = {-1};
+  std::vector<int8_t> B_zero_point = {3};
+  std::vector<MLFloat16> Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f});
+
+  test.AddInput<MLFloat16>("a_scale", {1}, A_scale);
+  test.AddInput<MLFloat16>("b_scale", {1}, B_scale);
+  test.AddInput<int8_t>("a_zero_point", {1}, A_zero_point);
+  test.AddInput<int8_t>("b_zero_point", {1}, B_zero_point);
+  test.AddInput<MLFloat16>("bias", {N}, Bias);
+
+  std::vector<float> Y_data(M * N);
+  CalculateMatMulIntegerToFloat<int8_t, int8_t, MLFloat16>(M, N, K, A_data, A_scale, A_zero_point,
+                                                           B_data, B_scale, B_zero_point, Bias, Y_data,
+                                                           false, true, true);
+
+  test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultDmlExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16_S8U8) {
+  OpTester test("MatMulIntegerToFloat", 1, kMSDomain);
+  int64_t M = 5;
+  int64_t N = 5;
+  int64_t K = 2;
+
+  std::vector<int8_t> A_data = {3, 7, -2, 1, 1,
+                                2, -1, -9, 1, 1};
+  std::vector<uint8_t> B_data = {3, 7, 2, 1, 1,
+                                 2, 1, 9, 1, 1};
+  std::vector<MLFloat16> A_scale = ToFloat16({-4.0f});
+  std::vector<MLFloat16> B_scale = ToFloat16({2.0f});
+  test.AddInput<int8_t>("A", {M, K}, A_data);
+  test.AddInput<uint8_t>("B", {K, N}, B_data);
+  std::vector<int8_t> A_zero_point = {-1};
+  std::vector<uint8_t> B_zero_point = {1};
+  std::vector<MLFloat16> Bias = ToFloat16({11.0f, -17.0f, 1.0f, -3.0f, 12.0f});
+
+  test.AddInput<MLFloat16>("a_scale", {1}, A_scale);
+  test.AddInput<MLFloat16>("b_scale", {1}, B_scale);
+  test.AddInput<int8_t>("a_zero_point", {1}, A_zero_point);
+  test.AddInput<uint8_t>("b_zero_point", {1}, B_zero_point);
+  test.AddInput<MLFloat16>("bias", {N}, Bias);
+
+  std::vector<float> Y_data(M * N);
+  CalculateMatMulIntegerToFloat<int8_t, uint8_t, MLFloat16>(M, N, K, A_data, A_scale, A_zero_point,
+                                                            B_data, B_scale, B_zero_point, Bias, Y_data,
+                                                            false, true, true);
+
+  test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
+
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultDmlExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
+TEST(MatMulIntegerToFloat, MatMulIntegerToFloat_FP16) {
+  OpTester test("MatMulIntegerToFloat", 1, kMSDomain);
+  int64_t M = 2;
+  int64_t N = 2;
+  int64_t K = 3;
+
+  std::vector<int8_t> A_data = {11, -2, 5,
+                                -1, 3, 10};
+  std::vector<int8_t> B_data = {-13, -2,
+                                9, 55,
+                                -1, 23};
+  std::vector<MLFloat16> A_scale = ToFloat16({0.910f});
+  std::vector<MLFloat16> B_scale = ToFloat16({1.10f, 1.123f});
+
+  std::vector<int8_t> A_zero_point = {113};
+  std::vector<int8_t> B_zero_point = {98, 71};
+
+  std::vector<MLFloat16> Bias = ToFloat16({0.10f, 1.123f});
+
+  test.AddInput<int8_t>("A", {M, K}, A_data);
+  test.AddInput<int8_t>("B", {K, N}, B_data);
+
+  test.AddInput<MLFloat16>("a_scale", {}, {A_scale});
+  test.AddInput<MLFloat16>("b_scale", {N}, B_scale);
+  test.AddInput<int8_t>("a_zero_point", {}, {A_zero_point});
+  test.AddInput<int8_t>("b_zero_point", {N}, B_zero_point);
+  test.AddInput<MLFloat16>("bias", {N}, Bias);
+
+  std::vector<float> Y_data(M * N);
+  CalculateMatMulIntegerToFloat<int8_t, int8_t, MLFloat16>(M, N, K, A_data, A_scale, A_zero_point,
+                                                           B_data, B_scale, B_zero_point, Bias, Y_data,
+                                                           true, true, true);
+
+  test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));
+  test.SetOutputRelErr("Y", 2e-2f);
+  std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
+  execution_providers.push_back(DefaultDmlExecutionProvider());
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
 }
+#endif
 
 TEST(MatMulIntegerToFloat, MatMulInteger_With_ZeroPoint) {
   auto test_case = [&](const std::vector<int64_t>& input_shape,
diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py
index 206a8514253c5..37db93a288b08 100644
--- a/onnxruntime/test/testdata/matmul_integer_to_float.py
+++ b/onnxruntime/test/testdata/matmul_integer_to_float.py
@@ -65,13 +65,6 @@ def GenerateModel(model_name, sign_i, sign_w, output_type_fp16, has_zp=True, bia
 
 if __name__ == "__main__":
     GenerateModel("matmul_integer_to_float16_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=True)
-    GenerateModel("matmul_integer_to_float16_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=True)
-    GenerateModel("matmul_integer_to_float16_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=True, has_zp=False, bias=True)
-    GenerateModel("matmul_integer_to_float16_uint8_bias.onnx", sign_i=False, sign_w=False, output_type_fp16=True, has_zp=False, bias=True)
-
-    GenerateModel("matmul_integer_to_float16_int8_int8.onnx", sign_i=True, sign_w=True, output_type_fp16=True)
-    GenerateModel("matmul_integer_to_float16_int8_int8_bias.onnx", sign_i=True, sign_w=True, output_type_fp16=True, has_zp=False, bias=True)
-
     GenerateModel("matmul_integer_to_float_int8.onnx", sign_i=False, sign_w=True, output_type_fp16=False)
     GenerateModel("matmul_integer_to_float_uint8.onnx", sign_i=False, sign_w=False, output_type_fp16=False)
     GenerateModel("matmul_integer_to_float_int8_bias.onnx", sign_i=False, sign_w=True, output_type_fp16=False, has_zp=False, bias=True)