diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
index cc7a892d1c445..7783d3b3f36b7 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@@ -60,6 +60,7 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
           {"HardSwish", {}},
           {"Sigmoid", {}},
           {"Slice", {}},
+          {"LogSoftmax", {}},
           {"Softmax", {}},
           {"Sqrt", {}},
           {"Atan", {}},
@@ -72,7 +73,10 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
           {"Log", {}},
           {"LRN", {}},
           {"Ceil", {}},
+          {"Floor", {}},
+          {"Round", {}},
           {"Abs", {}},
+          {"Neg", {}},
           {"DepthToSpace", {}},
           {"SpaceToDepth", {}}};
 }
@@ -82,10 +86,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
           {"Mul", {}},
           {"Pow", {}},
           {"Sub", {}},
+          {"PRelu", {}},
           {"GridSample", {}}};
 }
 static const OpVersionsAndSelector::OpVersionsMap GetVariadicOpVersionsMap() {
-  return {{"Concat", {}}};
+  return {{"Concat", {}},
+          {"Max", {}},
+          {"Min", {}}};
 }
 static const OpVersionsAndSelector::OpVersionsMap GetConvOpVersionsMap() {
   return {{"Conv", {}}};
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
index ca18c051a9922..8abb847b20b46 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@@ -29,26 +29,37 @@ class SimpleOpBuilder : public BaseOpBuilder {
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
 
  private:
-  Status ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
+  Status ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
 
   static constexpr std::array<std::string_view, 2> gridsample_supported_modes = {"bilinear", "nearest"};
   static constexpr std::array<std::string_view, 3> gridsample_supported_padding_modes = {"zeros", "border", "reflection"};
 };
 
-Status SimpleOpBuilder::ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
-  // QNN Softmax only supports an axis value equal to input_rank - 1 (i.e., same as -1).
-  if (node_unit.OpType() == "Softmax") {
-    int32_t axis = node_unit.SinceVersion() < 13 ? 1 : -1;  // Default axis changed from 1 to -1 in opset 13.
+static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) {
+  if (op_type == "Softmax" || op_type == "LogSoftmax") {
+    // Default axis changed from 1 to -1 in opset 13.
+    return opset_version < 13 ? 1 : -1;
+  }
+
+  return 0;
+}
+
+Status SimpleOpBuilder::ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
+  const std::string& op_type = node_unit.OpType();
+
+  // QNN Softmax and LogSoftmax only support an axis value equal to input_rank - 1 (i.e., same as -1).
+  if (op_type == "Softmax" || op_type == "LogSoftmax") {
+    int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
     Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
     ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
     std::vector<uint32_t> input_shape;
     ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
                       "QNN EP: Cannot get shape for Softmax input");
     ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
-                  "QNN Softmax only supports an `axis` attribute equal to input_rank-1 (or -1)");
+                  "QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)");
   }
 
-  if (node_unit.OpType() == "GridSample") {
+  if (op_type == "GridSample") {
     NodeAttrHelper node_helper(node_unit);
     std::string mode = node_helper.Get("mode", "linear");
     ORT_RETURN_IF_NOT(utils::ArrayHasString(gridsample_supported_modes, mode), "GridSample does not support mode ",
@@ -58,6 +69,13 @@ Status SimpleOpBuilder::ExplictOpCheck(const QnnModelWrapper& qnn_model_wrapper,
                       padding_mode.c_str());
   }
 
+  // ONNX's Min and Max operators accept a variable number of inputs (i.e., variadic).
+  // However, QNN's Min and Max operators must take in exactly two inputs.
+  if (op_type == "Min" || op_type == "Max") {
+    ORT_RETURN_IF_NOT(node_unit.Inputs().size() == 2,
+                      "QNN EP only supports Min and Max operators with exactly 2 inputs.");
+  }
+
   return Status::OK();
 }
 
@@ -207,7 +225,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
   const std::string& op_type = node_unit.OpType();
 
   if (do_op_validation) {
-    ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit));
+    ORT_RETURN_IF_ERROR(ExplicitOpCheck(qnn_model_wrapper, node_unit));
     // Skip the op validation for DepthToSpace & SpaceToDepth if it's not NHWC data layout
     if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) {
       return Status::OK();
@@ -217,7 +235,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
   std::vector<std::string> param_tensor_names;
   // Add attribute
   if (op_type == "LogSoftmax" || op_type == "Softmax" || op_type == "Concat") {
-    int32_t default_axis = ("Softmax" == op_type) ? -1 : 0;
+    int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
     Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
     ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
     QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
index e579e3274e699..eaeebba5bea5c 100644
--- a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
+++ b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
@@ -43,7 +43,7 @@ static GetTestQDQModelFn<QType> BuildQDQArgMxxTestCase(const std::string& op_typ
   return [op_type, input_def, attrs](ModelTestBuilder& builder,
                                      std::vector<QuantParams<QType>>& output_qparams) {
     ORT_UNUSED_PARAMETER(output_qparams);
-    QuantParams<QType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<QType> input_qparams = GetTestInputQuantParams<QType>(input_def);
 
     auto* input = MakeTestInput(builder, input_def);
 
@@ -205,7 +205,7 @@ TEST_F(QnnHTPBackendTests, ArgMaxMin_AsGraphOutputUnsupported) {
   auto model_builder_func = [](const std::string& op_type, const TestInputDef<float>& input_def,
                                const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) -> GetTestModelFn {
     return [op_type, input_def, attrs](ModelTestBuilder& builder) {
-      QuantParams<uint8_t> input_qparams = GetTestInputQuantParams(input_def);
+      QuantParams<uint8_t> input_qparams = GetTestInputQuantParams<uint8_t>(input_def);
 
       auto* input = MakeTestInput(builder, input_def);
       auto* output = builder.MakeOutput();
diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc
index 114802d56cfd3..79ec07796c0e8 100644
--- a/onnxruntime/test/providers/qnn/average_pool_test.cc
+++ b/onnxruntime/test/providers/qnn/average_pool_test.cc
@@ -5,7 +5,9 @@
 
 #include <string>
 #include <unordered_map>
+#include <vector>
 
+#include "core/graph/node_attr_utils.h"
 #include "test/optimizer/qdq_test_utils.h"
 #include "test/providers/qnn/qnn_test_utils.h"
 
@@ -16,87 +18,11 @@
 namespace onnxruntime {
 namespace test {
 
-// Returns a function that creates a graph with a single AveragePool operator.
-static GetTestModelFn BuildAveragePoolTestCase(const TestInputDef<float>& input_def,
-                                               const std::vector<int64_t>& kernel_shape,
-                                               const std::vector<int64_t>& strides,
-                                               const std::vector<int64_t>& pads,
-                                               int64_t count_include_pad,
-                                               const std::string& auto_pad = "NOTSET") {
-  return [input_def, kernel_shape, strides, pads,
-          count_include_pad, auto_pad](ModelTestBuilder& builder) {
-    auto* input = MakeTestInput(builder, input_def);
-
-    auto* output = builder.MakeOutput();
-    Node& pool_node = builder.AddNode("AveragePool", {input}, {output});
-
-    pool_node.AddAttribute("kernel_shape", kernel_shape);
-
-    if (!strides.empty()) {
-      pool_node.AddAttribute("strides", strides);
-    }
-
-    pool_node.AddAttribute("auto_pad", auto_pad);
-
-    if (!pads.empty() && auto_pad == "NOTSET") {
-      pool_node.AddAttribute("pads", pads);
-    }
-
-    if (count_include_pad > 0) {
-      pool_node.AddAttribute("count_include_pad", count_include_pad);
-    }
-  };
-}
-
-// Returns a function that creates a graph with a QDQ AveragePool operator.
-template <typename QuantType>
-GetTestQDQModelFn<QuantType> BuildAveragePoolQDQTestCase(const TestInputDef<float>& input_def,
-                                                         const std::vector<int64_t>& kernel_shape,
-                                                         const std::vector<int64_t>& strides,
-                                                         const std::vector<int64_t>& pads,
-                                                         int64_t count_include_pad,
-                                                         const std::string& auto_pad = "NOTSET") {
-  return [input_def, kernel_shape, strides, pads,
-          count_include_pad, auto_pad](ModelTestBuilder& builder,
-                                       std::vector<QuantParams<QuantType>>& output_qparams) {
-    auto* input_arg = MakeTestInput(builder, input_def);
-
-    // add QDQ + AveragePool
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams(input_def);
-    auto* dq_output = AddQDQNodePair<QuantType>(builder, input_arg, input_qparams.scale, input_qparams.zero_point);
-    auto* averagepool_output = builder.MakeIntermediate();
-    Node& pool_node = builder.AddNode("AveragePool", {dq_output}, {averagepool_output});
-
-    pool_node.AddAttribute("kernel_shape", kernel_shape);
-
-    if (!strides.empty()) {
-      pool_node.AddAttribute("strides", strides);
-    }
-
-    pool_node.AddAttribute("auto_pad", auto_pad);
-
-    if (!pads.empty() && auto_pad == "NOTSET") {
-      pool_node.AddAttribute("pads", pads);
-    }
-
-    if (count_include_pad > 0) {
-      pool_node.AddAttribute("count_include_pad", count_include_pad);
-    }
-
-    // op_output -> Q -> DQ -> output
-    AddQDQNodePairWithOutputAsGraphOutput<QuantType>(builder, averagepool_output,
-                                                     output_qparams[0].scale, output_qparams[0].zero_point);
-  };
-}
-
 // Runs an AveragePool model on the QNN CPU backend. Checks the graph node assignment, and that inference
 // outputs for QNN and CPU match.
-static void RunAveragePoolOpTest(const TestInputDef<float>& input_def,
-                                 const std::vector<int64_t>& kernel_shape,
-                                 const std::vector<int64_t>& strides,
-                                 const std::vector<int64_t>& pads,
-                                 int64_t count_include_pad,
-                                 const std::string& auto_pad,
+static void RunAveragePoolOpTest(const std::string& op_type,
+                                 const std::vector<TestInputDef<float>>& input_defs,
+                                 const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                                  ExpectedEPNodeAssignment expected_ep_assignment,
                                  int opset = 18) {
   ProviderOptions provider_options;
@@ -106,7 +32,7 @@ static void RunAveragePoolOpTest(const TestInputDef<float>& input_def,
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
 
-  RunQnnModelTest(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad),
+  RunQnnModelTest(BuildOpTestCase(op_type, input_defs, attrs),
                   provider_options,
                   opset,
                   expected_ep_assignment);
@@ -115,14 +41,11 @@ static void RunAveragePoolOpTest(const TestInputDef<float>& input_def,
 // Runs a QDQ AveragePool model on the QNN HTP backend. Checks the graph node assignment, and that accuracy
 // on QNN EP is at least as good as on CPU EP.
 template <typename QuantType>
-static void RunQDQAveragePoolOpTest(const TestInputDef<float>& input_def,
-                                    const std::vector<int64_t>& kernel_shape,
-                                    const std::vector<int64_t>& strides,
-                                    const std::vector<int64_t>& pads,
-                                    int64_t count_include_pad,
-                                    const std::string& auto_pad,
+static void RunQDQAveragePoolOpTest(const std::string& op_type,
+                                    const std::vector<TestInputDef<float>>& input_defs,
+                                    const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                                     ExpectedEPNodeAssignment expected_ep_assignment,
-                                    int opset = 18, float fp32_abs_err = 1e-5f) {
+                                    int opset = 18) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -130,13 +53,11 @@ static void RunQDQAveragePoolOpTest(const TestInputDef<float>& input_def,
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
 
-  TestQDQModelAccuracy(BuildAveragePoolTestCase(input_def, kernel_shape, strides, pads, count_include_pad, auto_pad),
-                       BuildAveragePoolQDQTestCase<QuantType>(input_def, kernel_shape, strides, pads, count_include_pad,
-                                                              auto_pad),
+  TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, attrs),
+                       BuildQDQOpTestCase<QuantType>(op_type, input_defs, attrs),
                        provider_options,
                        opset,
-                       expected_ep_assignment,
-                       fp32_abs_err);
+                       expected_ep_assignment);
 }
 
 //
@@ -144,46 +65,48 @@ static void RunQDQAveragePoolOpTest(const TestInputDef<float>& input_def,
 //
 
 // AveragePool with kernel size equal to the spatial dimension of input tensor.
-TEST_F(QnnCPUBackendTests, AveragePool_Global) {
-  RunAveragePoolOpTest(TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),  // random input
-                       {3, 3},                                                   // kernel_shape
-                       {3, 3},                                                   // strides
-                       {0, 0, 0, 0},                                             // pads
-                       0,                                                        // count_include_pad
-                       "NOTSET",
+TEST_F(QnnCPUBackendTests, AveragePool_AsGlobal) {
+  RunAveragePoolOpTest("AveragePool",
+                       {TestInputDef<float>({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))},
+                       {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{3, 3}),
+                        utils::MakeAttribute("strides", std::vector<int64_t>{3, 3})},
+                       ExpectedEPNodeAssignment::All);
+}
+
+// Test GlobalAveragePool on QNN CPU backend.
+TEST_F(QnnCPUBackendTests, GlobalAveragePool) {
+  RunAveragePoolOpTest("GlobalAveragePool",
+                       {TestInputDef<float>({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))},
+                       {},
                        ExpectedEPNodeAssignment::All);
 }
 
 // AveragePool that counts padding.
 TEST_F(QnnCPUBackendTests, AveragePool_CountIncludePad) {
-  RunAveragePoolOpTest(TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),  // random input
-                       {1, 1},                                                   // kernel_shape
-                       {1, 1},                                                   // strides
-                       {0, 0, 0, 0},                                             // pads
-                       1,                                                        // count_include_pad
-                       "NOTSET",
+  RunAveragePoolOpTest("AveragePool",
+                       {TestInputDef<float>({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))},
+                       {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
+                        utils::MakeAttribute("count_include_pad", static_cast<int64_t>(1))},
                        ExpectedEPNodeAssignment::All);
 }
 
 // AveragePool that use auto_pad 'SAME_UPPER'.
 TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameUpper) {
-  RunAveragePoolOpTest(TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),  // random input
-                       {1, 1},                                                   // kernel_shape
-                       {1, 1},                                                   // strides
-                       {},                                                       // pads
-                       1,                                                        // count_include_pad
-                       "SAME_UPPER",
+  RunAveragePoolOpTest("AveragePool",
+                       {TestInputDef<float>({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))},
+                       {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
+                        utils::MakeAttribute("count_include_pad", static_cast<int64_t>(1)),
+                        utils::MakeAttribute("auto_pad", "SAME_UPPER")},
                        ExpectedEPNodeAssignment::All);
 }
 
 // AveragePool that use auto_pad 'SAME_LOWER'.
 TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameLower) {
-  RunAveragePoolOpTest(TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),  // random input
-                       {1, 1},                                                   // kernel_shape
-                       {1, 1},                                                   // strides
-                       {},                                                       // pads
-                       1,                                                        // count_include_pad
-                       "SAME_LOWER",
+  RunAveragePoolOpTest("AveragePool",
+                       {TestInputDef<float>({1, 2, 3, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 18))},
+                       {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
+                        utils::MakeAttribute("count_include_pad", static_cast<int64_t>(1)),
+                        utils::MakeAttribute("auto_pad", "SAME_LOWER")},
                        ExpectedEPNodeAssignment::All);
 }
 
@@ -193,15 +116,23 @@ TEST_F(QnnCPUBackendTests, AveragePool_AutopadSameLower) {
 //
 
 // QDQ AveragePool with kernel size equal to the spatial dimension of input tensor.
-TEST_F(QnnHTPBackendTests, AveragePool_Global_HTP) {
+TEST_F(QnnHTPBackendTests, AveragePool_AsGlobal) {
   std::vector<float> input = {32.1289f, -59.981f, -17.2799f, 62.7263f, 33.6205f, -19.3515f, -54.0113f, 37.5648f, 61.5357f,
                               -52.5769f, 27.3637f, -9.01382f, -65.5612f, 19.9497f, -47.9228f, 26.9813f, 83.064f, 0.362503f};
-  RunQDQAveragePoolOpTest<uint8_t>(TestInputDef<float>({1, 2, 3, 3}, false, input),
-                                   {3, 3},        // kernel_shape
-                                   {3, 3},        // strides
-                                   {0, 0, 0, 0},  // pads
-                                   0,             // count_include_pad
-                                   "NOTSET",
+  RunQDQAveragePoolOpTest<uint8_t>("AveragePool",
+                                   {TestInputDef<float>({1, 2, 3, 3}, false, input)},
+                                   {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{3, 3}),
+                                    utils::MakeAttribute("strides", std::vector<int64_t>{3, 3})},
+                                   ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy for 8-bit QDQ GlobalAveragePool with input of rank 4.
+TEST_F(QnnHTPBackendTests, GlobalAveragePool) {
+  std::vector<float> input = GetFloatDataInRange(-32.0f, 32.0f, 18);
+
+  RunQDQAveragePoolOpTest<uint8_t>("GlobalAveragePool",
+                                   {TestInputDef<float>({1, 2, 3, 3}, false, input)},
+                                   {},
                                    ExpectedEPNodeAssignment::All);
 }
 
@@ -210,12 +141,10 @@ TEST_F(QnnHTPBackendTests, AveragePool_CountIncludePad_HTP_u8) {
   std::vector<float> input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f,
                               1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
 
-  RunQDQAveragePoolOpTest<uint8_t>(TestInputDef<float>({1, 2, 3, 3}, false, input),
-                                   {1, 1},        // kernel_shape
-                                   {1, 1},        // strides
-                                   {0, 0, 0, 0},  // pads
-                                   1,             // count_include_pad
-                                   "NOTSET",
+  RunQDQAveragePoolOpTest<uint8_t>("AveragePool",
+                                   {TestInputDef<float>({1, 2, 3, 3}, false, input)},
+                                   {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
+                                    utils::MakeAttribute("count_include_pad", static_cast<int64_t>(1))},
                                    ExpectedEPNodeAssignment::All,
                                    18);
 }
@@ -225,12 +154,10 @@ TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameUpper_HTP_u8) {
   std::vector<float> input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f,
                               1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
 
-  RunQDQAveragePoolOpTest<uint8_t>(TestInputDef<float>({1, 2, 3, 3}, false, input),
-                                   {1, 1},  // kernel_shape
-                                   {1, 1},  // strides
-                                   {},      // pads
-                                   0,       // count_include_pad
-                                   "SAME_UPPER",
+  RunQDQAveragePoolOpTest<uint8_t>("AveragePool",
+                                   {TestInputDef<float>({1, 2, 3, 3}, false, input)},
+                                   {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
+                                    utils::MakeAttribute("auto_pad", "SAME_UPPER")},
                                    ExpectedEPNodeAssignment::All,
                                    18);
 }
@@ -240,12 +167,10 @@ TEST_F(QnnHTPBackendTests, AveragePool_AutopadSameLower_HTP_u8) {
   std::vector<float> input = {-9.0f, -7.33f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f,
                               1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
 
-  RunQDQAveragePoolOpTest<uint8_t>(TestInputDef<float>({1, 2, 3, 3}, false, input),
-                                   {1, 1},  // kernel_shape
-                                   {1, 1},  // strides
-                                   {},      // pads
-                                   0,       // count_include_pad
-                                   "SAME_LOWER",
+  RunQDQAveragePoolOpTest<uint8_t>("AveragePool",
+                                   {TestInputDef<float>({1, 2, 3, 3}, false, input)},
+                                   {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{1, 1}),
+                                    utils::MakeAttribute("auto_pad", "SAME_LOWER")},
                                    ExpectedEPNodeAssignment::All,
                                    18);
 }
diff --git a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
index 8e4a07e66624e..9b65ca7bda3e2 100644
--- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
@@ -114,15 +114,15 @@ GetTestQDQModelFn<InputQType> BuildQDQBatchNormTestCase(const TestInputDef<float
     const int64_t num_channels = input_shape[1];
 
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<InputQType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<InputQType> input_qparams = GetTestInputQuantParams<InputQType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair<InputQType>(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     NodeArg* scale = MakeTestInput(builder, scale_def);
-    QuantParams<ScaleQType> scale_qparams = GetTestInputQuantParams(scale_def);
+    QuantParams<ScaleQType> scale_qparams = GetTestInputQuantParams<ScaleQType>(scale_def);
     NodeArg* scale_qdq = AddQDQNodePair<ScaleQType>(builder, scale, scale_qparams.scale, scale_qparams.zero_point);
 
     NodeArg* bias = MakeTestInput(builder, bias_def);
-    QuantParams<BiasQType> bias_qparams = GetTestInputQuantParams(bias_def);
+    QuantParams<BiasQType> bias_qparams = GetTestInputQuantParams<BiasQType>(bias_def);
     NodeArg* bias_qdq = AddQDQNodePair<BiasQType>(builder, bias, bias_qparams.scale, bias_qparams.zero_point);
 
     std::vector<float> mean_vals(num_channels);
diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc
index c6ebaaf7ab7e4..b66d86f24af4e 100644
--- a/onnxruntime/test/providers/qnn/conv_test.cc
+++ b/onnxruntime/test/providers/qnn/conv_test.cc
@@ -156,13 +156,13 @@ static GetTestQDQModelFn<InputQType> BuildQDQConvTestCase(const std::string& con
 
     // input -> Q/DQ ->
     auto* input = MakeTestInput(builder, input_def);
-    QuantParams<InputQType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<InputQType> input_qparams = GetTestInputQuantParams<InputQType>(input_def);
     auto* input_qdq = AddQDQNodePair<InputQType>(builder, input, input_qparams.scale, input_qparams.zero_point);
     conv_inputs.push_back(input_qdq);
 
     // weights -> Q/DQ ->
     auto* weights = MakeTestInput(builder, weights_def);
-    QuantParams<InputQType> weights_qparams = GetTestInputQuantParams(weights_def);
+    QuantParams<InputQType> weights_qparams = GetTestInputQuantParams<InputQType>(weights_def);
     auto* weights_qdq = AddQDQNodePair<InputQType>(builder, weights, weights_qparams.scale, weights_qparams.zero_point);
     conv_inputs.push_back(weights_qdq);
 
diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
index d2ca9d8ff71e0..5b05b39f34a27 100644
--- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
@@ -37,7 +37,7 @@ static GetTestQDQModelFn<QuantType> BuildQDQGatherOpTestCase(const TestInputDef<
   return [input_def, indices_def, axis](ModelTestBuilder& builder,
                                         std::vector<QuantParams<QuantType>>& output_qparams) {
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair<QuantType>(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     NodeArg* indices = MakeTestInput(builder, indices_def);
diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
index 683c4d49fa99d..594973e37ef0b 100644
--- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
@@ -45,12 +45,12 @@ static GetTestQDQModelFn<QuantType> BuildQDQInstanceNormTestCase(const TestInput
                                                  std::vector<QuantParams<QuantType>>& output_qparams) {
     // input => Q => DQ =>
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     // scale => Q => DQ =>
     NodeArg* scale = MakeTestInput(builder, scale_def);
-    QuantParams<QuantType> scale_qparams = GetTestInputQuantParams(scale_def);
+    QuantParams<QuantType> scale_qparams = GetTestInputQuantParams<QuantType>(scale_def);
     NodeArg* scale_qdq = AddQDQNodePair(builder, scale, scale_qparams.scale, scale_qparams.zero_point);
 
     // bias (as int32) => DQ =>
diff --git a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
index 772476cb0d245..a8237817c71df 100644
--- a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
@@ -33,7 +33,7 @@ static GetTestQDQModelFn<QuantType> BuildQDQLeakyReluOpTestCase(const TestInputD
                             std::vector<QuantParams<QuantType>>& output_qparams) {
     // input => Q => DQ =>
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair<QuantType>(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     // LeakryRelu
diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc
index 82f7b246aa5e4..4f64b4a7e0d3f 100644
--- a/onnxruntime/test/providers/qnn/lrn_op_test.cc
+++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc
@@ -39,7 +39,7 @@ static GetTestQDQModelFn<InputQType> BuildQDQLRNTestCase(const TestInputDef<floa
                                               std::vector<QuantParams<InputQType>>& output_qparams) {
     // input -> Q -> DQ ->
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<InputQType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<InputQType> input_qparams = GetTestInputQuantParams<InputQType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair<InputQType>(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     // LRN
diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp
index 00ba7bd7858c3..6edb6ecdcfb1a 100644
--- a/onnxruntime/test/providers/qnn/matmul_test.cpp
+++ b/onnxruntime/test/providers/qnn/matmul_test.cpp
@@ -34,12 +34,12 @@ static GetTestQDQModelFn<QuantType> BuildMatMulOpQDQTestCase(const TestInputDef<
                                   std::vector<QuantParams<QuantType>>& output_qparams) {
     // input1 -> Q -> DQ ->
     NodeArg* input1 = MakeTestInput(builder, input1_def);
-    QuantParams<QuantType> input1_qparams = GetTestInputQuantParams(input1_def);
+    QuantParams<QuantType> input1_qparams = GetTestInputQuantParams<QuantType>(input1_def);
     auto* input1_qdq = AddQDQNodePair<QuantType>(builder, input1, input1_qparams.scale, input1_qparams.zero_point);
 
     // input2 -> Q -> DQ ->
     NodeArg* input2 = MakeTestInput(builder, input2_def);
-    QuantParams<QuantType> input2_qparams = GetTestInputQuantParams(input2_def);
+    QuantParams<QuantType> input2_qparams = GetTestInputQuantParams<QuantType>(input2_def);
     auto* input2_qdq = AddQDQNodePair<QuantType>(builder, input2, input2_qparams.scale, input2_qparams.zero_point);
 
     // MatMul
@@ -108,9 +108,9 @@ TEST_F(QnnCPUBackendTests, MatMulOp) {
 // Test MatMul broadcasting
 // Note slight inaccuracy in CPU backend:
 // Expected: contains 896 values, where each value and its corresponding value in 16-byte object
-// <80-03 00-00 00-00 00-00 40-00 34-F0 5B-01 00-00> are an almost-equal pair
-// Actual: 16-byte object <80-03 00-00 00-00 00-00 40-00 23-F0 5B-01 00-00>,
-// where the value pair (148.536011, 148.536255) at index #4 don't match, which is 0.000244141 from 148.536
+// <80-03 00-00 00-00 00-00 40-00 34-DD F7-01 00-00> are an almost-equal pair
+// Actual: 16-byte object <80-03 00-00 00-00 00-00 40-00 23-DD F7-01 00-00>,
+// where the value pair (73.68116, 73.680809) at index #80 don't match, which is -0.000350952 from 73.6812
 TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) {
   // Create two matrices with element values in the range [-10.0, 10.0].
   std::vector<float> input_a = GetFloatDataInRange(-10.0f, 10.0f, 28 * 64);
@@ -118,7 +118,7 @@ TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) {
 
   RunMatMulOpOpTest(TestInputDef<float>({28, 1, 64}, false, input_a),
                     TestInputDef<float>({64, 32}, false, input_b),
-                    ExpectedEPNodeAssignment::All, 18, 0.00026f);
+                    ExpectedEPNodeAssignment::All, 18, 0.0004f);
 }
 
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
diff --git a/onnxruntime/test/providers/qnn/max_min_op_test.cc b/onnxruntime/test/providers/qnn/max_min_op_test.cc
new file mode 100644
index 0000000000000..09ea71e5f03eb
--- /dev/null
+++ b/onnxruntime/test/providers/qnn/max_min_op_test.cc
@@ -0,0 +1,135 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#if !defined(ORT_MINIMAL_BUILD)
+
+#include <string>
+
+#include "test/providers/qnn/qnn_test_utils.h"
+
+#include "onnx/onnx_pb.h"
+#include "gtest/gtest.h"
+
+namespace onnxruntime {
+namespace test {
+
+// Runs an Max/Min model on the QNN CPU backend. Checks the graph node assignment, and that inference
+// outputs for QNN EP and CPU EP match.
+static void RunCPUMinOrMaxOpTest(const std::string& op_type,
+                                 const std::vector<TestInputDef<float>>& input_defs,
+                                 ExpectedEPNodeAssignment expected_ep_assignment,
+                                 int opset = 13) {
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnCpu.dll";
+#else
+  provider_options["backend_path"] = "libQnnCpu.so";
+#endif
+
+  RunQnnModelTest(BuildOpTestCase(op_type, input_defs, {}, kOnnxDomain),
+                  provider_options,
+                  opset,
+                  expected_ep_assignment);
+}
+
+// Runs a QDQ Max/Min model on the QNN (HTP) EP and the ORT CPU EP. Checks the graph node assignment, and that inference
+// running the QDQ model on QNN EP is at least as accurate as on ORT CPU EP (when compared to the baseline float32 model).
+template <typename QType = uint8_t>
+static void RunQDQMinOrMaxOpTest(const std::string& op_type,
+                                 const std::vector<TestInputDef<float>>& input_defs,
+                                 ExpectedEPNodeAssignment expected_ep_assignment,
+                                 int opset = 13) {
+  ProviderOptions provider_options;
+
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+
+  TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, {}, kOnnxDomain),            // baseline float32 model
+                       BuildQDQOpTestCase<QType>(op_type, input_defs, {}, kOnnxDomain),  // QDQ model
+                       provider_options,
+                       opset,
+                       expected_ep_assignment,
+                       1e-4f);
+}
+
+//
+// CPU tests:
+//
+
+// Test that Min with 1 input is *NOT* supported on CPU backend.
+TEST_F(QnnCPUBackendTests, Min_1Input_NotSupported) {
+  RunCPUMinOrMaxOpTest("Min",
+                       {TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f)},
+                       ExpectedEPNodeAssignment::None, 13);
+}
+
+// Test that Max with 1 input is *NOT* supported on CPU backend.
+TEST_F(QnnCPUBackendTests, Max_1Input_NotSupported) {
+  RunCPUMinOrMaxOpTest("Max",
+                       {TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f)},
+                       ExpectedEPNodeAssignment::None, 13);
+}
+
+// Test Min with 2 inputs on CPU backend.
+TEST_F(QnnCPUBackendTests, Min_2Inputs) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunCPUMinOrMaxOpTest("Min",
+                       {TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                        TestInputDef<float>({1, 3, 4, 4}, false, input_data)},
+                       ExpectedEPNodeAssignment::All, 13);
+}
+
+// Test Max with 2 inputs on CPU backend.
+TEST_F(QnnCPUBackendTests, Max_2Inputs) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunCPUMinOrMaxOpTest("Max",
+                       {TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                        TestInputDef<float>({1, 3, 4, 4}, false, input_data)},
+                       ExpectedEPNodeAssignment::All, 13);
+}
+
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+//
+// HTP tests:
+//
+
+// Test that Min with 1 input is *NOT* supported on HTP backend.
+TEST_F(QnnHTPBackendTests, Min_1Input_NotSupported) {
+  RunQDQMinOrMaxOpTest("Min",
+                       {TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f)},
+                       ExpectedEPNodeAssignment::None, 13);
+}
+
+// Test that Max with 1 input is *NOT* supported on HTP backend.
+TEST_F(QnnHTPBackendTests, Max_1Input_NotSupported) {
+  RunQDQMinOrMaxOpTest("Max",
+                       {TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f)},
+                       ExpectedEPNodeAssignment::None, 13);
+}
+
+// Test accuracy of 8-bit Q/DQ Min with 2 inputs on HTP backend.
+TEST_F(QnnHTPBackendTests, Min_2Inputs) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQMinOrMaxOpTest<uint8_t>("Min",
+                                {TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                                 TestInputDef<float>({1, 3, 4, 4}, false, input_data)},
+                                ExpectedEPNodeAssignment::All, 13);
+}
+
+// Test accuracy of 8-bit Q/DQ Max with 2 inputs on HTP backend.
+TEST_F(QnnHTPBackendTests, Max_2Inputs) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 48);
+  RunQDQMinOrMaxOpTest<uint8_t>("Max",
+                                {TestInputDef<float>({1, 3, 4, 4}, false, input_data),
+                                 TestInputDef<float>({1, 3, 4, 4}, false, input_data)},
+                                ExpectedEPNodeAssignment::All, 13);
+}
+
+#endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+}  // namespace test
+}  // namespace onnxruntime
+#endif  // !defined(ORT_MINIMAL_BUILD)
diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp
index c6e8a032ca7f4..1c73eae1468ff 100644
--- a/onnxruntime/test/providers/qnn/pool_op_test.cpp
+++ b/onnxruntime/test/providers/qnn/pool_op_test.cpp
@@ -41,7 +41,7 @@ GetTestQDQModelFn<QuantType> BuildPoolQDQTestCase(const std::string& op_type,
                                      std::vector<QuantParams<QuantType>>& output_qparams) {
     // input -> Q -> DQ ->
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair<QuantType>(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     // MaxPool
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
index feacdc54226b6..548f80675a622 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
@@ -21,19 +21,21 @@ std::vector<float> GetFloatDataInRange(float min_val, float max_val, size_t num_
     return {};
   }
 
+  if (num_elems == 1) {
+    return {min_val};
+  }
+
   std::vector<float> data;
   data.reserve(num_elems);
 
-  const float step_size = (max_val - min_val) / static_cast<float>(num_elems);
+  const float step_size = (max_val - min_val) / static_cast<float>(num_elems - 1);
   float val = min_val;
   for (size_t i = 0; i < num_elems; i++) {
     data.push_back(val);
     val += step_size;
   }
 
-  // Try to ensure that 0.0 and max_val are also included in the array.
-  // If num_elems is less than 3, then not all of min_val, 0, and max_val will be present.
-  data[num_elems / 2] = 0.0f;
+  // Ensure that max_val is included exactly (due to rounding from adding step sizes).
   data[num_elems - 1] = max_val;
 
   return data;
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h
index dd5e6fc23670a..1b0b85319918f 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.h
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h
@@ -199,7 +199,7 @@ struct TestInputDef {
   std::pair<T, T> range_override_;
 };
 
-template <typename QType = uint8_t>
+template <typename QType>
 inline QuantParams<QType> GetTestInputQuantParams(const TestInputDef<float>& input_def) {
   const std::pair<float, float> frange = input_def.GetRange();
   return QuantParams<QType>::Compute(frange.first, frange.second);
@@ -239,10 +239,10 @@ void InferenceModel(const std::string& model_data, const char* log_id,
  * \param fp32_abs_err Small tolerance used for floating-point comparisons.
  * \param log_severity The logger's severity setting.
  */
-template <typename QuantType = uint8_t>
+template <typename QuantType>
 inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTestQDQModelFn<QuantType>& qdq_model_fn,
                                  const ProviderOptions& qnn_options, int opset_version,
-                                 ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err,
+                                 ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err = 1e-4f,
                                  logging::Severity log_severity = logging::Severity::kERROR) {
   // Add kMSDomain to cover contrib op like Gelu
   const std::unordered_map<std::string, int> domain_to_version = {{"", opset_version}, {kMSDomain, 1}};
@@ -314,7 +314,8 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe
 
     // limit the error message count in case test with large data failed
     size_t max_error_count = 10;
-    int error_count = 0;
+    size_t error_count = 0;
+
     // Compare accuracy of QDQ results with float model.
     // QNN EP must be at least as accurate as CPU EP when running the QDQ model.
     for (size_t i = 0; i < num_outputs; i++) {
@@ -433,6 +434,79 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef<bool
 // i.e., initial bias => manual quantization (int32) => DQ => final float bias
 NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef<float>& bias_def, float bias_scale);
 
+/**
+ * Returns a function that builds a model with a single operator with N inputs of the same element type.
+ *
+ * \param op_type The operator to instantiate.
+ * \param input_defs List of input definitions.
+ * \param attrs List of operator attributes.
+ * \param op_domain The operator's domain. Defaults to the ONNX domain (i.e., "").
+ * \returns A model building function.
+ */
+template <typename InputType>
+inline GetTestModelFn BuildOpTestCase(const std::string& op_type,
+                                      const std::vector<TestInputDef<InputType>>& input_defs,
+                                      const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                      const std::string& op_domain = kOnnxDomain) {
+  return [op_type, input_defs, attrs, op_domain](ModelTestBuilder& builder) {
+    std::vector<NodeArg*> op_inputs;
+    op_inputs.reserve(input_defs.size());
+
+    for (const auto& input_def : input_defs) {
+      NodeArg* input = MakeTestInput<InputType>(builder, input_def);
+      op_inputs.push_back(input);
+    }
+
+    auto* output = builder.MakeOutput();
+    Node& onnx_node = builder.AddNode(op_type, op_inputs, {output}, op_domain);
+
+    for (const auto& attr : attrs) {
+      onnx_node.AddAttributeProto(attr);
+    }
+  };
+}
+
+/**
+ * Returns a function that builds a model with a single QDQ operator with N inputs of the same element type.
+ *
+ * \param op_type The operator to instantiate.
+ * \param input_defs List of input definitions.
+ * \param attrs List of operator attributes.
+ * \param op_domain The operator's domain. Defaults to the ONNX domain (i.e., "").
+ * \returns A model building function.
+ */
+template <typename InputQType>
+inline GetTestQDQModelFn<InputQType> BuildQDQOpTestCase(const std::string& op_type,
+                                                        const std::vector<TestInputDef<float>>& input_defs,
+                                                        const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
+                                                        const std::string& op_domain = kOnnxDomain) {
+  return [op_type, input_defs, attrs, op_domain](ModelTestBuilder& builder,
+                                                 std::vector<QuantParams<InputQType>>& output_qparams) {
+    std::vector<NodeArg*> op_inputs;
+    op_inputs.reserve(input_defs.size());
+
+    for (const auto& input_def : input_defs) {
+      NodeArg* input = MakeTestInput<float>(builder, input_def);
+      QuantParams<InputQType> input_qparams = GetTestInputQuantParams<InputQType>(input_def);
+      NodeArg* input_after_qdq = AddQDQNodePair<InputQType>(builder, input, input_qparams.scale,
+                                                            input_qparams.zero_point);
+      op_inputs.push_back(input_after_qdq);
+    }
+
+    // Op -> op_output
+    auto* op_output = builder.MakeIntermediate();
+    Node& onnx_node = builder.AddNode(op_type, op_inputs, {op_output}, op_domain);
+
+    for (const auto& attr : attrs) {
+      onnx_node.AddAttributeProto(attr);
+    }
+
+    // op_output -> Q -> DQ -> output
+    AddQDQNodePairWithOutputAsGraphOutput<InputQType>(builder, op_output, output_qparams[0].scale,
+                                                      output_qparams[0].zero_point);
+  };
+}
+
 /**
  * Runs a test model on the QNN EP. Checks the graph node assignment, and that inference
  * outputs for QNN and CPU match.
diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc
index 755f6b094df07..c3c2b578a1bd0 100644
--- a/onnxruntime/test/providers/qnn/reduce_op_test.cc
+++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc
@@ -366,7 +366,7 @@ static void RunReduceOpQDQTest(const std::string& op_type,
                                bool keepdims,
                                int opset,
                                ExpectedEPNodeAssignment expected_ep_assignment,
-                               float fp32_abs_err = 1e-5f) {
+                               float fp32_abs_err = 1e-4f) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
index 4e7702bd84270..49122c9dacdb1 100644
--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@@ -18,149 +18,16 @@ namespace onnxruntime {
 namespace test {
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
-using UInt8Limits = std::numeric_limits<uint8_t>;
-
-template <typename InputType = float>
-static GetTestModelFn BuildUnaryOpTestCase(const std::string& op_type, const TestInputDef<InputType>& input0_def,
-                                           const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                           const std::string& domain = kOnnxDomain) {
-  return [op_type, input0_def, attrs, domain](ModelTestBuilder& builder) {
-    NodeArg* input0 = MakeTestInput(builder, input0_def);
-
-    auto* output = builder.MakeOutput();
-    auto& op_node = builder.AddNode(op_type, {input0}, {output}, domain);
-    for (const auto& attr : attrs) {
-      op_node.AddAttributeProto(attr);
-    }
-  };
-}
-
-// Creates the graph:
-//                       _______________________
-//                      |                       |
-//    input_u8 -> DQ -> |       SimpleOp        | -> Q -> output_u8
-//                      |_______________________|
-//
-// Currently used to test QNN EP.
-template <typename InputQType>
-GetTestQDQModelFn<InputQType> BuildQDQUnaryOpTestCase(const TestInputDef<float>& input_def,
-                                                      const std::string& op_type,
-                                                      const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                                                      const std::string& domain = kOnnxDomain) {
-  return [input_def, op_type, attrs, domain](ModelTestBuilder& builder,
-                                             std::vector<QuantParams<InputQType>>& output_qparams) {
-    auto* input = MakeTestInput(builder, input_def);
-    QuantParams<InputQType> input_qparams = GetTestInputQuantParams(input_def);
-    auto* input_qdq = AddQDQNodePair<InputQType>(builder, input, input_qparams.scale, input_qparams.zero_point);
-
-    auto* op_output = builder.MakeIntermediate();
-    auto& op_node = builder.AddNode(op_type, {input_qdq}, {op_output}, domain);
-
-    for (const auto& attr : attrs) {
-      op_node.AddAttributeProto(attr);
-    }
-
-    // op_output -> Q -> DQ -> output
-    AddQDQNodePairWithOutputAsGraphOutput<InputQType>(builder, op_output, output_qparams[0].scale, output_qparams[0].zero_point);
-  };
-}
-
-/**
- * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference
- * outputs for QNN and CPU match.
- *
- * \param input_shape The input's shape.
- * \param test_description Description of the test for error reporting.
- * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
- * \param num_modes_in_graph The number of expected nodes in the graph.
- */
-template <typename InputQType = uint8_t>
-static void RunQDQUnaryOpTest(const TestInputDef<float>& input_def, const std::string& op_type,
-                              const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
-                              int opset_version,
-                              ExpectedEPNodeAssignment expected_ep_assignment,
-                              const std::string& domain = kOnnxDomain) {
-  ProviderOptions provider_options;
-#if defined(_WIN32)
-  provider_options["backend_path"] = "QnnHtp.dll";
-#else
-  provider_options["backend_path"] = "libQnnHtp.so";
-#endif
-
-  // Runs model with DQ-> Op -> Q and compares the outputs of the CPU and QNN EPs.
-  TestQDQModelAccuracy(BuildUnaryOpTestCase<float>(op_type, input_def, attrs, domain),
-                       BuildQDQUnaryOpTestCase<InputQType>(input_def, op_type, attrs, domain),
-                       provider_options,
-                       opset_version,
-                       expected_ep_assignment,
-                       1e-5f);
-}
-
-// TODO: share with other op tests
-// Creates the graph with two inputs and attributes
-template <typename InputType>
-static GetTestModelFn BuildOpTestCase(const std::string& op_type,
-                                      const TestInputDef<InputType>& input0_def,
-                                      const TestInputDef<InputType>& input1_def,
-                                      const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
-  return [op_type, input0_def, input1_def, attrs](ModelTestBuilder& builder) {
-    NodeArg* input0 = MakeTestInput(builder, input0_def);
-    NodeArg* input1 = MakeTestInput(builder, input1_def);
-
-    auto* output = builder.MakeOutput();
-    Node& onnx_node = builder.AddNode(op_type, {input0, input1}, {output});
-
-    for (const auto& attr : attrs) {
-      onnx_node.AddAttributeProto(attr);
-    }
-  };
-}
-
-// Creates the graph with two inputs and attributes
-//                       _______________________
-//                      |                       |
-//   input0_u8 -> DQ -> |       SimpleOp        | -> Q -> output_u8
-//   input1_u8 -> DQ -> |_______________________|
-//
-// Currently used to test QNN EP.
-template <typename InputQType>
-static GetTestQDQModelFn<InputQType> BuildQDQOpTestCase(const std::string& op_type,
-                                                        const TestInputDef<float>& input0_def,
-                                                        const TestInputDef<float>& input1_def,
-                                                        const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
-  return [op_type, input0_def, input1_def, attrs](ModelTestBuilder& builder,
-                                                  std::vector<QuantParams<InputQType>>& output_qparams) {
-    NodeArg* input0 = MakeTestInput(builder, input0_def);
-    NodeArg* input1 = MakeTestInput(builder, input1_def);
-
-    // input -> Q -> DQ -> Op
-    QuantParams<InputQType> input0_qparams = GetTestInputQuantParams(input0_def);
-    auto* qdq0_output = AddQDQNodePair<InputQType>(builder, input0, input0_qparams.scale, input0_qparams.zero_point);
-
-    QuantParams<InputQType> input1_qparams = GetTestInputQuantParams(input1_def);
-    auto* qdq1_output = AddQDQNodePair<InputQType>(builder, input1, input1_qparams.scale, input1_qparams.zero_point);
-
-    // Op -> op_output
-    auto* op_output = builder.MakeIntermediate();
-    Node& onnx_node = builder.AddNode(op_type, {qdq0_output, qdq1_output}, {op_output});
-
-    for (const auto& attr : attrs) {
-      onnx_node.AddAttributeProto(attr);
-    }
-
-    // op_output -> Q -> DQ -> output
-    AddQDQNodePairWithOutputAsGraphOutput<InputQType>(builder, op_output, output_qparams[0].scale,
-                                                      output_qparams[0].zero_point);
-  };
-}
-
+// Tests the accuracy of a QDQ model on QNN EP by comparing to CPU EP, which runs both the fp32 model
+// and the QDQ model.
 template <typename InputQType = uint8_t>
 static void RunQDQOpTest(const std::string& op_type,
-                         const TestInputDef<float>& input0_def,
-                         const TestInputDef<float>& input1_def,
+                         const std::vector<TestInputDef<float>>& input_defs,
                          const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                          int opset_version,
-                         ExpectedEPNodeAssignment expected_ep_assignment) {
+                         ExpectedEPNodeAssignment expected_ep_assignment,
+                         const std::string& op_domain = kOnnxDomain,
+                         float fp32_abs_err = 1e-4f) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -168,21 +35,22 @@ static void RunQDQOpTest(const std::string& op_type,
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
 
-  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, input0_def, input1_def, attrs),
-                       BuildQDQOpTestCase<InputQType>(op_type, input0_def, input1_def, attrs),
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, input_defs, attrs, op_domain),
+                       BuildQDQOpTestCase<InputQType>(op_type, input_defs, attrs, op_domain),
                        provider_options,
                        opset_version,
                        expected_ep_assignment,
-                       1e-5f);
+                       fp32_abs_err);
 }
 
+// Runs a non-QDQ model on HTP and compares output to CPU EP.
 template <typename InputType = float>
 static void RunOpTest(const std::string& op_type,
-                      const TestInputDef<InputType>& input0_def,
-                      const TestInputDef<InputType>& input1_def,
+                      const std::vector<TestInputDef<InputType>>& input_defs,
                       const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                       int opset_version,
-                      ExpectedEPNodeAssignment expected_ep_assignment) {
+                      ExpectedEPNodeAssignment expected_ep_assignment,
+                      const std::string& op_domain = kOnnxDomain) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -191,151 +59,307 @@ static void RunOpTest(const std::string& op_type,
 #endif
 
   // Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs.
-  RunQnnModelTest(BuildOpTestCase<InputType>(op_type, input0_def, input1_def, attrs),
+  RunQnnModelTest(BuildOpTestCase<InputType>(op_type, input_defs, attrs, op_domain),
                   provider_options,
                   opset_version,
                   expected_ep_assignment);
 }
 
+// Test the accuracy of QDQ Sigmoid.
+TEST_F(QnnHTPBackendTests, UnaryOp_Sigmoid) {
+  RunQDQOpTest<uint8_t>("Sigmoid",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test the accuracy of QDQ Tanh.
+TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) {
+  RunQDQOpTest<uint8_t>("Tanh",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
 // Check that QNN compiles DQ -> Gelu -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Gelu) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -10.0f, 10.0f),  // Input range [-10.0, 10.0f]
-                    "Gelu",
-                    {},
-                    11,
-                    ExpectedEPNodeAssignment::All,
-                    kMSDomain);  // GeLu is a contrib op.
+  RunQDQOpTest<uint8_t>("Gelu",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        11,
+                        ExpectedEPNodeAssignment::All,
+                        kMSDomain);  // GeLu is a contrib op.
 }
 
 // Check that QNN compiles DQ -> Elu -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Elu) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -10.0f, 10.0f),  // Input range [-10.0, 10.0f]
-                    "Elu",
-                    {},
-                    11,
-                    ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Elu",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        11,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Tests accuracy of QDQ Relu
+// TODO: Relu does not set negative values to zero!
+// Could be due to ORT's ReluQuantFusion!
+//
+// Inaccuracy detected for output 'output', element 0.
+// Output quant params: scale=0.039215687662363052, zero_point=0.
+// Expected val: 0
+// QNN QDQ val: -10 (err 10)
+// CPU QDQ val: 0 (err 0)
+TEST_F(QnnHTPBackendTests, DISABLED_UnaryOp_Relu) {
+  RunQDQOpTest<uint8_t>("Relu",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        14,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> HardSwish -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_HardSwish) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -10.0f, 10.0f),  // Input range [-10.0, 10.0f]
-                    "HardSwish",
-                    {},
-                    14,
-                    ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("HardSwish",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        14,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Atan -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Atan) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -10.0f, 10.0f),  // Input range [-10.0, 10.0f]
-                    "Atan",
-                    {},
-                    14,
-                    ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Atan",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        14,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Asin -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Asin) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -0.5f, 0.5f),  // input range -0.5 to 0.5
-                    "Asin", {},
-                    13, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Asin",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-0.5, 0.5, 6))},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Sign -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Sign) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -10.0f, 10.0f),
-                    "Sign", {},
-                    13, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Sign",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Sin -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Sin) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -3.14159f, 3.14159f),
-                    "Sin", {},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Sin",
+                        {TestInputDef<float>({1, 2, 3}, false, -3.14159f, 3.14159f)},
+                        {},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Cos -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Cos) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, {-3.14159f, -1.5f, -0.5f, 0.0f, 1.5, 3.14159f}),
-                    "Cos", {},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Cos",
+                        {TestInputDef<float>({1, 2, 3}, false, {-3.14159f, -1.5f, -0.5f, 0.0f, 1.5, 3.14159f})},
+                        {},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Cos -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Cos_Inaccurate) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f}),
-                    "Cos", {},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Cos",
+                        {TestInputDef<float>({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f})},
+                        {},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Log -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, UnaryOp_Log) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, {3.14159f, 100.88436f, 10.542863f, 9.1f, 1.05622f, 3.14159f}),
-                    "Log", {},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Log",
+                        {TestInputDef<float>({1, 2, 3}, false, {3.14159f, 100.88436f, 10.542863f, 9.1f, 1.05622f, 3.14159f})},
+                        {},
+                        11, ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy of 8-bit QDQ Exp
+TEST_F(QnnHTPBackendTests, UnaryOp_Exp) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 6);
+  RunQDQOpTest<uint8_t>("Exp",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy of 8-bit QDQ Sqrt
+TEST_F(QnnHTPBackendTests, UnaryOp_Sqrt) {
+  std::vector<float> input_data = GetFloatDataInRange(0.0f, 20.0f, 9);
+  RunQDQOpTest<uint8_t>("Sqrt",
+                        {TestInputDef<float>({1, 3, 3}, false, input_data)},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy of 8-bit QDQ Neg
+TEST_F(QnnHTPBackendTests, UnaryOp_Neg) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 6);
+  RunQDQOpTest<uint8_t>("Neg",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test Not operator on HTP backend.
+TEST_F(QnnHTPBackendTests, UnaryOp_Not) {
+  RunOpTest<bool>("Not",
+                  {TestInputDef<bool>({1, 4}, false, {false, false, true, true})},
+                  {},
+                  17,
+                  ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy of 8-bit QDQ Round
+TEST_F(QnnHTPBackendTests, UnaryOp_Round) {
+  std::vector<float> input_data = GetFloatDataInRange(-9.0f, 9.0f, 6);
+  RunQDQOpTest<uint8_t>("Round",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that the default axis (-1) for SoftMax opset 13 works.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f),
-                    "Softmax",
-                    {},  // Uses default axis of -1 for opset 13
-                    13, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {},  // Uses default axis of -1 for opset 13
+                        13,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that an axis != -1 is not supported.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f),
-                    "Softmax",
-                    {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
-                    13, ExpectedEPNodeAssignment::None);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                        13,
+                        ExpectedEPNodeAssignment::None);
 }
 
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that the default axis (1) for SoftMax opset < 13 does not work.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_DefaultAxisFails) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f),
-                    "Softmax",
-                    {},  // Uses default axis of 1 for opset < 13.
-                    11, ExpectedEPNodeAssignment::None);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {},  // Uses default axis of 1 for opset < 13.
+                        11,
+                        ExpectedEPNodeAssignment::None);
 }
 
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that setting an axis value of -1 works for Softmax opset < 13.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax11_SetValidAxis) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f),
-                    "Softmax",
-                    {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
+                        11,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
+// Test that the default axis (-1) for LogSoftmax opset 13 works.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) {
+  std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
+  RunQDQOpTest<uint8_t>("LogSoftmax",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},  // Uses default axis of -1 for opset 13
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
+// Test that an axis != -1 is not supported.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) {
+  std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
+  RunQDQOpTest<uint8_t>("LogSoftmax",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                        13,
+                        ExpectedEPNodeAssignment::None);
+}
+
+// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
+// Test that the default axis (1) for LogSoftmax opset < 13 does not work.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax11_DefaultAxisFails) {
+  std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
+  RunQDQOpTest<uint8_t>("LogSoftmax",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},  // Uses default axis of 1 for opset < 13.
+                        11,
+                        ExpectedEPNodeAssignment::None);
+}
+
+// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
+// Test that setting an axis value of -1 works for LogSoftmax opset < 13.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax11_SetValidAxis) {
+  std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
+  RunQDQOpTest<uint8_t>("LogSoftmax",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Test QDQ Abs op.
 TEST_F(QnnHTPBackendTests, UnaryOp_Abs) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -10.0f, 10.0f),
-                    "Abs",
-                    {},
-                    13, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("Abs",
+                        {TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Test QDQ Ceil op.
 TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) {
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 3}, false, -100.0f, 100.0f),
-                    "Ceil",
-                    {},
-                    13, ExpectedEPNodeAssignment::All);
+  const std::vector<float> input_data = GetFloatDataInRange(-12.0f, 12.0f, 6);
+  RunQDQOpTest<uint8_t>("Ceil",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test QDQ Floor op.
+TEST_F(QnnHTPBackendTests, UnaryOp_Floor) {
+  const std::vector<float> input_data = GetFloatDataInRange(-12.0f, 12.0f, 6);
+  RunQDQOpTest<uint8_t>("Floor",
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
+                        {},
+                        13,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Test QDQ DepthToSpace.
@@ -348,11 +372,12 @@ TEST_F(QnnHTPBackendTests, DepthToSpaceOp_CRD) {
                                 21., 22., 23.,
                                 27., 28., 29.,
                                 30., 31., 32.};
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 4, 2, 3}, false, X),
-                    "DepthToSpace",
-                    {utils::MakeAttribute("blocksize", static_cast<int64_t>(2)),
-                     utils::MakeAttribute("mode", "CRD")},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("DepthToSpace",
+                        {TestInputDef<float>({1, 4, 2, 3}, false, X)},
+                        {utils::MakeAttribute("blocksize", static_cast<int64_t>(2)),
+                         utils::MakeAttribute("mode", "CRD")},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Test QDQ DepthToSpace.
@@ -365,11 +390,12 @@ TEST_F(QnnHTPBackendTests, DepthToSpaceOp_DCR) {
                                 21., 22., 23.,
                                 27., 28., 29.,
                                 30., 31., 32.};
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 4, 2, 3}, false, X),
-                    "DepthToSpace",
-                    {utils::MakeAttribute("blocksize", static_cast<int64_t>(2)),
-                     utils::MakeAttribute("mode", "DCR")},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("DepthToSpace",
+                        {TestInputDef<float>({1, 4, 2, 3}, false, X)},
+                        {utils::MakeAttribute("blocksize", static_cast<int64_t>(2)),
+                         utils::MakeAttribute("mode", "DCR")},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Test QDQ SpaceToDepth.
@@ -379,10 +405,11 @@ TEST_F(QnnHTPBackendTests, SpaceToDepthOp) {
 
                                 2.0f, 2.1f, 2.2f, 2.3f,
                                 3.0f, 3.1f, 3.2f, 3.3f};
-  RunQDQUnaryOpTest(TestInputDef<float>({1, 2, 2, 4}, false, X),
-                    "SpaceToDepth",
-                    {utils::MakeAttribute("blocksize", static_cast<int64_t>(2))},
-                    11, ExpectedEPNodeAssignment::All);
+  RunQDQOpTest<uint8_t>("SpaceToDepth",
+                        {TestInputDef<float>({1, 2, 2, 4}, false, X)},
+                        {utils::MakeAttribute("blocksize", static_cast<int64_t>(2))},
+                        11,
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Run QDQ model on HTP twice
@@ -404,23 +431,21 @@ TEST_F(QnnHTPBackendTests, ContextBinaryCacheTest) {
 
   // Runs model with DQ-> Atan-> Q and compares the outputs of the CPU and QNN EPs.
   // 1st run will generate the Qnn context cache binary file
-  TestQDQModelAccuracy(BuildUnaryOpTestCase<float>(op_type, input_def, {}),
-                       BuildQDQUnaryOpTestCase<uint8_t>(input_def, op_type, {}),
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}),
                        provider_options,
                        14,
-                       ExpectedEPNodeAssignment::All,
-                       1e-5f);
+                       ExpectedEPNodeAssignment::All);
 
   // Make sure the Qnn context cache binary file is generated
   EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
 
   // 2nd run will load and run from Qnn context cache binary file
-  TestQDQModelAccuracy(BuildUnaryOpTestCase<float>(op_type, input_def, {}),
-                       BuildQDQUnaryOpTestCase<uint8_t>(input_def, op_type, {}),
+  TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}),
+                       BuildQDQOpTestCase<uint8_t>(op_type, {input_def}, {}),
                        provider_options,
                        14,
-                       ExpectedEPNodeAssignment::All,
-                       1e-5f);
+                       ExpectedEPNodeAssignment::All);
 }
 
 TEST_F(QnnHTPBackendTests, QuantAccuracyTest) {
@@ -439,7 +464,7 @@ TEST_F(QnnHTPBackendTests, QuantAccuracyTest) {
 
     // input -> Q -> Transpose -> DQ -> output
     NodeArg* input0 = MakeTestInput(builder, input0_def);
-    QuantParams<uint8_t> qparams = GetTestInputQuantParams(input0_def);
+    QuantParams<uint8_t> qparams = GetTestInputQuantParams<uint8_t>(input0_def);
 
     auto* quant_input = builder.MakeIntermediate();
     builder.AddQuantizeLinearNode<uint8_t>(input0, qparams.scale, qparams.zero_point, quant_input);
@@ -462,8 +487,8 @@ TEST_F(QnnHTPBackendTests, QuantAccuracyTest) {
 // Test QDQ Add
 TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) {
   RunQDQOpTest<uint8_t>("Add",
-                        TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
+                         TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f)},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -472,8 +497,8 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) {
 // Test QDQ Sub
 TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) {
   RunQDQOpTest<uint8_t>("Sub",
-                        TestInputDef<float>({1, 3, 8, 8}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 3, 8, 8}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 3, 8, 8}, false, -10.0f, 10.0f),
+                         TestInputDef<float>({1, 3, 8, 8}, false, -10.0f, 10.0f)},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -481,8 +506,8 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) {
 
 TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_LargeInputs) {
   RunQDQOpTest<uint8_t>("Sub",
-                        TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
-                        TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
+                        {TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
+                         TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f)},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -490,17 +515,65 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_LargeInputs) {
 
 TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_Broadcast) {
   RunQDQOpTest<uint8_t>("Sub",
-                        TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
-                        TestInputDef<float>({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}),
+                        {TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
+                         TestInputDef<float>({3, 1, 1}, true, {1.0f, 0.5f, -0.3f})},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
 }
 
+// Test accuracy of QDQ Pow
+#if defined(__linux__)
+// TODO: This fails on Linux (HTP emulation). Works on Windows ARM64.
+// Inaccuracy detected for output 'output', element 0.
+// Output quant params: scale=0.051073111593723297, zero_point=2.
+// Expected val: 0.0099999997764825821
+// QNN QDQ val: 12.921497344970703 (err 12.911497116088867)
+// CPU QDQ val: -0.10214622318744659 (err 0.11214622110128403)
+TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Pow) {
+#else
+TEST_F(QnnHTPBackendTests, BinaryOp_Pow) {
+#endif
+  std::vector<float> bases_input = {-10.0f, -8.0f, -6.0f, 1.0f, 2.0f, 3.0f, 5.5f, 10.0f};
+  std::vector<float> exponents_input = {-2.0f, -1.0f, 0.0f, 0.5f, 1.0f, 2.0f, 1.5f, 0.2f};
+  RunQDQOpTest<uint8_t>("Pow",
+                        {TestInputDef<float>({1, 2, 2, 2}, false, bases_input),
+                         TestInputDef<float>({1, 2, 2, 2}, false, exponents_input)},
+                        {},
+                        15,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy of QDQ PRelu with dynamic slopes.
+TEST_F(QnnHTPBackendTests, BinaryOp_PRelu_DynamicSlopes) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  std::vector<float> slopes_data = GetFloatDataInRange(-1.0f, 1.0f, 8);
+  RunQDQOpTest<uint8_t>("PRelu",
+                        {TestInputDef<float>({1, 2, 2, 2}, false, input_data),
+                         TestInputDef<float>({1, 2, 2, 2}, false, slopes_data)},
+                        {},
+                        16,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test accuracy of QDQ PRelu with static slope weights.
+TEST_F(QnnHTPBackendTests, BinaryOp_PRelu_StaticSlopes) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0f, 10.0f, 8);
+  std::vector<float> slopes_data = GetFloatDataInRange(-1.0f, 1.0f, 8);
+  RunQDQOpTest<uint8_t>("PRelu",
+                        {TestInputDef<float>({1, 2, 2, 2}, false, input_data),
+                         TestInputDef<float>({1, 2, 2, 2}, true, slopes_data)},
+                        {},
+                        16,
+                        ExpectedEPNodeAssignment::All);
+}
+
 TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) {
+  std::vector<float> input0_data = {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f};
+  std::vector<float> input1_data = {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f};
   RunQDQOpTest<uint8_t>("Div",
-                        TestInputDef<float>({1, 2, 2, 2}, false, {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f}),
-                        TestInputDef<float>({1, 2, 2, 2}, false, {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f}),
+                        {TestInputDef<float>({1, 2, 2, 2}, false, input0_data),
+                         TestInputDef<float>({1, 2, 2, 2}, false, input1_data)},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -514,8 +587,8 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) {
 // CPU QDQ val: -516716.71875 (err 238759.40625)
 TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_LargeInputs) {
   RunQDQOpTest<uint8_t>("Div",
-                        TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
-                        TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
+                        {TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
+                         TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f)},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -523,8 +596,8 @@ TEST_F(QnnHTPBackendTests, DISABLED_BinaryOp_Div4D_LargeInputs) {
 
 TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_Broadcast) {
   RunQDQOpTest<uint8_t>("Div",
-                        TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
-                        TestInputDef<float>({3, 1, 1}, true, {1.0f, 0.5f, -0.3f}),
+                        {TestInputDef<float>({1, 3, 768, 1152}, false, -1.0f, 1.0f),
+                         TestInputDef<float>({3, 1, 1}, true, {1.0f, 0.5f, -0.3f})},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -532,29 +605,30 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_Broadcast) {
 
 // Test QDQ Mul
 TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D) {
+  std::vector<float> input_data = GetFloatDataInRange(-10.0, 10.0f, 8);
   RunQDQOpTest<uint8_t>("Mul",
-                        TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 2, 2, 2}, false, input_data),
+                         TestInputDef<float>({1, 2, 2, 2}, false, input_data)},
                         {},
                         17,
                         ExpectedEPNodeAssignment::All);
 }
 
 // Test And
-TEST_F(QnnCPUBackendTests, BinaryOp_And4D) {
+TEST_F(QnnHTPBackendTests, BinaryOp_And4D) {
   RunOpTest<bool>("And",
-                  TestInputDef<bool>({1, 4}, false, {false, false, true, true}),
-                  TestInputDef<bool>({1, 4}, false, {false, true, false, true}),
+                  {TestInputDef<bool>({1, 4}, false, {false, false, true, true}),
+                   TestInputDef<bool>({1, 4}, false, {false, true, false, true})},
                   {},
                   17,
                   ExpectedEPNodeAssignment::All);
 }
 
 // Test that Or is not yet supported on CPU backend.
-TEST_F(QnnCPUBackendTests, BinaryOp_HTP_Or_Unsupported) {
+TEST_F(QnnHTPBackendTests, BinaryOp_HTP_Or_Unsupported) {
   RunOpTest<bool>("Or",
-                  TestInputDef<bool>({1, 4}, false, {false, false, true, true}),
-                  TestInputDef<bool>({1, 4}, false, {false, true, false, true}),
+                  {TestInputDef<bool>({1, 4}, false, {false, false, true, true}),
+                   TestInputDef<bool>({1, 4}, false, {false, true, false, true})},
                   {},
                   17,
                   ExpectedEPNodeAssignment::None);
@@ -563,8 +637,8 @@ TEST_F(QnnCPUBackendTests, BinaryOp_HTP_Or_Unsupported) {
 // Test QDQ GridSample with bilinear
 TEST_F(QnnHTPBackendTests, GridSample_Bilinear) {
   RunQDQOpTest<uint8_t>("GridSample",
-                        TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)),
+                         TestInputDef<float>({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))},
                         {utils::MakeAttribute("align_corners", static_cast<int64_t>(0)),
                          utils::MakeAttribute("mode", "bilinear"),
                          utils::MakeAttribute("padding_mode", "zeros")},
@@ -575,8 +649,8 @@ TEST_F(QnnHTPBackendTests, GridSample_Bilinear) {
 // Test QDQ GridSample with align corners
 TEST_F(QnnHTPBackendTests, GridSample_AlignCorners) {
   RunQDQOpTest<uint8_t>("GridSample",
-                        TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)),
+                         TestInputDef<float>({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))},
                         {utils::MakeAttribute("align_corners", static_cast<int64_t>(1)),
                          utils::MakeAttribute("mode", "bilinear"),
                          utils::MakeAttribute("padding_mode", "zeros")},
@@ -592,8 +666,8 @@ TEST_F(QnnHTPBackendTests, GridSample_AlignCorners) {
 // CPU QDQ val: 3.3850328922271729 (err 0.022981882095336914)
 TEST_F(QnnHTPBackendTests, DISABLED_GridSample_BorderPadding) {
   RunQDQOpTest<uint8_t>("GridSample",
-                        TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
+                         TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f)},
                         {utils::MakeAttribute("mode", "bilinear"),
                          utils::MakeAttribute("padding_mode", "border")},
                         17,
@@ -603,8 +677,8 @@ TEST_F(QnnHTPBackendTests, DISABLED_GridSample_BorderPadding) {
 // Test QDQ GridSample with nearest mode
 TEST_F(QnnHTPBackendTests, GridSample_Nearest) {
   RunQDQOpTest<uint8_t>("GridSample",
-                        TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)),
+                         TestInputDef<float>({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))},
                         {utils::MakeAttribute("mode", "nearest")},
                         17,
                         ExpectedEPNodeAssignment::All);
@@ -618,13 +692,33 @@ TEST_F(QnnHTPBackendTests, GridSample_Nearest) {
 // CPU QDQ val: 3.2036216259002686 (err 0.0092642307281494141)
 TEST_F(QnnHTPBackendTests, DISABLED_GridSample_ReflectionPaddingMode) {
   RunQDQOpTest<uint8_t>("GridSample",
-                        TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
-                        TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f),
+                        {TestInputDef<float>({1, 1, 3, 2}, false, -10.0f, 10.0f),
+                         TestInputDef<float>({1, 2, 4, 2}, false, -10.0f, 10.0f)},
                         {utils::MakeAttribute("padding_mode", "reflection")},
                         17,
                         ExpectedEPNodeAssignment::All);
 }
 
+// Test QDQ Concat: 3 inputs concatenated at the last axis.
+TEST_F(QnnHTPBackendTests, VariadicOp_Concat_3Inputs_LastAxis) {
+  RunQDQOpTest<uint8_t>("Concat",
+                        {TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
+                         TestInputDef<float>({1, 2, 2, 3}, false, -1.0f, 1.0f),
+                         TestInputDef<float>({1, 2, 2, 1}, false, -2.0f, 2.0f)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test QDQ Concat: 2 inputs concatenated at the second axis.
+TEST_F(QnnHTPBackendTests, VariadicOp_Concat_2Inputs_2ndAxis) {
+  RunQDQOpTest<uint8_t>("Concat",
+                        {TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
+                         TestInputDef<float>({1, 3, 2, 2}, false, -2.0f, 2.0f)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 
 }  // namespace test
diff --git a/onnxruntime/test/providers/qnn/slice_htp_test.cc b/onnxruntime/test/providers/qnn/slice_htp_test.cc
index 23d817a69b89b..f7163f04736a5 100644
--- a/onnxruntime/test/providers/qnn/slice_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/slice_htp_test.cc
@@ -45,7 +45,7 @@ static GetTestQDQModelFn<QuantType> BuildQDQSliceTestCase(const TestInputDef<flo
   return [data_def, starts_def, ends_def, axes_def, steps_def](ModelTestBuilder& builder,
                                                                std::vector<QuantParams<QuantType>>& output_qparams) {
     NodeArg* data = MakeTestInput(builder, data_def);
-    QuantParams<QuantType> data_qparams = GetTestInputQuantParams(data_def);
+    QuantParams<QuantType> data_qparams = GetTestInputQuantParams<QuantType>(data_def);
     NodeArg* data_qdq = AddQDQNodePair(builder, data, data_qparams.scale, data_qparams.zero_point);
 
     NodeArg* starts = MakeTestInput(builder, starts_def);
diff --git a/onnxruntime/test/providers/qnn/transpose_htp_test.cc b/onnxruntime/test/providers/qnn/transpose_htp_test.cc
index adc0e7104b136..8d8c1ebb0fd15 100644
--- a/onnxruntime/test/providers/qnn/transpose_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/transpose_htp_test.cc
@@ -38,7 +38,7 @@ static GetTestQDQModelFn<QuantType> BuildQDQTransposeTestCase(const TestInputDef
                                                               const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
   return [input_def, attrs](ModelTestBuilder& builder, std::vector<QuantParams<QuantType>>& output_qparams) {
     NodeArg* input = MakeTestInput(builder, input_def);
-    QuantParams<QuantType> input_qparams = GetTestInputQuantParams(input_def);
+    QuantParams<QuantType> input_qparams = GetTestInputQuantParams<QuantType>(input_def);
     NodeArg* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point);
 
     auto* output = builder.MakeIntermediate();
diff --git a/onnxruntime/test/providers/qnn/where_htp_test.cc b/onnxruntime/test/providers/qnn/where_htp_test.cc
index 02238dad1c5dd..49f3ef0fd983a 100644
--- a/onnxruntime/test/providers/qnn/where_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/where_htp_test.cc
@@ -42,12 +42,12 @@ static GetTestQDQModelFn<QuantType> BuildQDQWhereTestCase(const TestInputDef<boo
 
     // x => Q => DQ =>
     NodeArg* x = MakeTestInput(builder, x_def);
-    QuantParams<QuantType> x_qparams = GetTestInputQuantParams(x_def);
+    QuantParams<QuantType> x_qparams = GetTestInputQuantParams<QuantType>(x_def);
     NodeArg* x_qdq = AddQDQNodePair(builder, x, x_qparams.scale, x_qparams.zero_point);
 
     // y => Q => DQ =>
     NodeArg* y = MakeTestInput(builder, y_def);
-    QuantParams<QuantType> y_qparams = GetTestInputQuantParams(y_def);
+    QuantParams<QuantType> y_qparams = GetTestInputQuantParams<QuantType>(y_def);
     NodeArg* y_qdq = AddQDQNodePair(builder, y, y_qparams.scale, y_qparams.zero_point);
 
     // Where operator.