diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
index 9b57bc4fa1c0f..4dd2e089dd19f 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
@@ -7,7 +7,6 @@
 #include "core/providers/qnn/builder/qnn_model_wrapper.h"
 #include "core/providers/qnn/builder/op_builder_factory.h"
 #include "core/common/safeint.h"
-#include "onnx/defs/data_type_utils.h"
 
 #include "base_op_builder.h"
 
@@ -24,6 +23,12 @@ class SoftmaxOpBuilder : public BaseOpBuilder {
                        const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
 
  protected:
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
   Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                      const NodeUnit& node_unit,
                                      std::vector<std::string>&& input_names,
@@ -31,31 +36,125 @@ class SoftmaxOpBuilder : public BaseOpBuilder {
                                      bool do_op_validation) const override ORT_MUST_USE_RESULT;
 };
 
-static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) {
-  if (op_type == "Softmax" || op_type == "LogSoftmax") {
-    // Default axis changed from 1 to -1 in opset 13.
-    return opset_version < 13 ? 1 : -1;
-  }
-
-  return 0;
+constexpr int32_t GetDefaultAxisAttribute(int opset_version) {
+  // Default axis changed from 1 to -1 in opset 13.
+  return opset_version < 13 ? 1 : -1;
 }
 
 Status SoftmaxOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
                                        const NodeUnit& node_unit,
                                        const logging::Logger& logger) const {
   ORT_UNUSED_PARAMETER(logger);
-  const std::string& op_type = node_unit.OpType();
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const int opset_version = node_unit.SinceVersion();
+
+  // The QNN HTP backend only supports an `axis` attribute that refers to the last input dimension.
+  // QNN EP is able to support arbitrary axis attributes by wrapping the QNN operator with transposes.
+  // However, the exception is Softmax/LogSoftmax with opset < 13. For these older ONNX operators, only
+  // axis == input_rank - 1 is supported.
+  if (is_npu_backend && opset_version < 13) {
+    const std::string& op_type = node_unit.OpType();
+
+    int32_t axis = GetDefaultAxisAttribute(opset_version);
+    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+    std::vector<uint32_t> input_shape;
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
+                      "QNN EP: Cannot get shape for Softmax input");
+    ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
+                  "QNN ", op_type.c_str(),
+                  " only supports an `axis` attribute equal to input_rank-1 (or -1) for ONNX opset < 13");
+  }
+
+  return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
+}
+
+static std::vector<uint32_t> GetTransposePermToUseLastAxis(uint32_t input_rank, uint32_t axis) {
+  assert(axis < input_rank);
+  std::vector<uint32_t> transpose_perm;
+  transpose_perm.reserve(input_rank);
+
+  for (uint32_t dim = 0; dim < input_rank; dim++) {
+    transpose_perm.push_back(dim);
+  }
 
-  int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
+  // Swap axis dim with last dim.
+  transpose_perm[axis] = input_rank - 1;
+  transpose_perm[input_rank - 1] = axis;
+
+  return transpose_perm;
+}
+
+Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                       const NodeUnit& node_unit,
+                                       const logging::Logger& logger,
+                                       std::vector<std::string>& input_names,
+                                       bool do_op_validation) const {
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const auto& inputs = node_unit.Inputs();
+  assert(inputs.size() == 1);
+
+  int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
   Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
   ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
-  std::vector<uint32_t> input_shape;
-  ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
-                    "QNN EP: Cannot get shape for Softmax input");
-  ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
-                "QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)");
 
-  return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
+  OnnxInputInfo input_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input_info));
+  const size_t input_rank = input_info.shape.size();
+
+  // If the axis attribute refers to the last dimension (Or not NPU), then process the input as normal.
+  if (!is_npu_backend || axis == static_cast<int32_t>(input_rank) - 1) {
+    return ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names);
+  }
+
+  //
+  // The axis does **not** refer to the last input dimension. Must wrap transposes around the operator to be able to use
+  // QNN's Softmax operator, which always uses an axis value that refers to the last dimension.
+  //
+
+  std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(input_rank),
+                                                                       static_cast<uint32_t>(axis));
+
+  const std::string& input_name = inputs[0].node_arg.Name();
+  std::string op_input_name = input_info.is_initializer ? input_name : input_name + "_ort_qnn_ep_transpose";
+  input_names.push_back(op_input_name);
+
+  std::vector<uint32_t> op_input_shape = input_info.shape;
+  op_input_shape[input_rank - 1] = input_info.shape[axis];
+  op_input_shape[axis] = input_info.shape[input_rank - 1];
+
+  std::vector<uint8_t> initializer_bytes;
+  if (input_info.is_initializer) {  // Input is an initializer, so transpose initializer bytes.
+    std::vector<size_t> perm_size_t;
+    perm_size_t.reserve(transpose_perm.size());
+
+    for (auto p : transpose_perm) {
+      perm_size_t.push_back(static_cast<size_t>(p));
+    }
+
+    ORT_RETURN_IF_ERROR(TransposeInitializer(qnn_model_wrapper, *input_info.initializer_tensor, perm_size_t,
+                                             initializer_bytes));
+  } else {  // Input is dynamic, so add transpose node before input.
+    const bool is_graph_input = qnn_model_wrapper.IsGraphInput(input_name);
+
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
+                                                           input_name,
+                                                           op_input_name,
+                                                           input_info.shape,
+                                                           transpose_perm,
+                                                           op_input_shape,
+                                                           input_info.qnn_data_type,
+                                                           input_info.quant_param,
+                                                           do_op_validation,
+                                                           is_graph_input));
+  }
+
+  Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input_name);
+  QnnTensorWrapper input_tensorwrapper(op_input_name, tensor_type, input_info.qnn_data_type, input_info.quant_param,
+                                       std::move(op_input_shape), std::move(initializer_bytes));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+
+  return Status::OK();
 }
 
 Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
@@ -63,21 +162,80 @@ Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_
                                                      std::vector<std::string>&& input_names,
                                                      const logging::Logger& logger,
                                                      bool do_op_validation) const {
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
   const std::string& op_type = node_unit.OpType();
+  const auto& outputs = node_unit.Outputs();
+  assert(outputs.size() == 1);
 
-  int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
+  int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
   Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
-  ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
+  ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+
+  OnnxInputInfo output_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
+  const size_t output_rank = output_info.shape.size();
+  const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;
+
+  // If axis refers to the last dimension (or not NPU), process outputs as usual.
+  if (!is_npu_backend || axis_is_last_dim) {
+    QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
+
+    std::vector<std::string> param_tensor_names;
+    param_tensor_names.push_back(axis_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
+
+    return ProcessOutputs(qnn_model_wrapper, node_unit,
+                          std::move(input_names),
+                          std::move(param_tensor_names),
+                          logger, do_op_validation, GetQnnOpType(op_type));
+  }
+
+  //
+  // The axis **does** not refer to the last dimension. Must wrap the operator with Transposes to be able to use
+  // QNN's Softmax operator, which only supports an axis that refers to the last dimension.
+  //
+
+  axis_qnn_scalar.uint32Value = static_cast<uint32_t>(output_rank - 1);  // NOTE: override axis.
   QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
 
   std::vector<std::string> param_tensor_names;
   param_tensor_names.push_back(axis_param.GetParamTensorName());
   qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
 
-  return ProcessOutputs(qnn_model_wrapper, node_unit,
-                        std::move(input_names),
-                        std::move(param_tensor_names),
-                        logger, do_op_validation, GetQnnOpType(op_type));
+  const std::string& orig_output_name = outputs[0].node_arg.Name();
+  std::string op_output_name = orig_output_name + "_ort_qnn_ep_transpose";
+
+  std::vector<uint32_t> op_output_shape = output_info.shape;
+  op_output_shape[output_rank - 1] = output_info.shape[axis];
+  op_output_shape[axis] = output_info.shape[output_rank - 1];
+
+  QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type, output_info.quant_param,
+                                        std::vector<uint32_t>(op_output_shape));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
+                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
+                                                    GetQnnOpType(node_unit.OpType()),
+                                                    std::move(input_names),
+                                                    {op_output_name},
+                                                    std::move(param_tensor_names)),
+                    "Failed to add node.");
+
+  const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name);
+  std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(output_rank),
+                                                                       static_cast<uint32_t>(axis));
+
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
+                                                         op_output_name,
+                                                         orig_output_name,
+                                                         op_output_shape,
+                                                         transpose_perm,
+                                                         output_info.shape,
+                                                         output_info.qnn_data_type,
+                                                         output_info.quant_param,
+                                                         do_op_validation,
+                                                         is_graph_output));
+
+  return Status::OK();
 }
 
 void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
index be8afa7636b3d..e024eafcd6572 100644
--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@@ -447,8 +447,9 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Log_U16) {
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that the default axis (-1) for SoftMax opset 13 works.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) {
+  const std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
   RunQDQOpTest<uint8_t>("Softmax",
-                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
                         {},  // Uses default axis of -1 for opset 13
                         13,
                         ExpectedEPNodeAssignment::All);
@@ -466,14 +467,43 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_DefaultAxis) {
                          true);        // Use com.microsoft domain for Q/DQ ops
 }
 
-// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
-// Test that an axis != -1 is not supported.
-TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) {
+// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis) {
+  const std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 10.0f, 11.0f, 12.0f, 100.0f, 110.0f, 120.0f,
+                                         1.0856307f, 0.99734545f, 0.2829785f, 1.5062947f, 0.5786002f, 1.6514366f,
+                                         2.4266791f, 0.42891264f, 1.2659363f};
   RunQDQOpTest<uint8_t>("Softmax",
-                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {TestInputDef<float>({1, 2, 3, 3}, false, input_data)},
                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
                         13,
-                        ExpectedEPNodeAssignment::None);
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+// This is a configuration used in one of our partner's models.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis_LargeInput) {
+  const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 124, 1}, false, input_data)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test that 16-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+// This is a configuration used in one of our partner's models.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_NonLastAxis_LargeInput) {
+  const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
+  RunQDQOpTest<uint16_t>("Softmax",
+                         {TestInputDef<float>({1, 124, 1}, false, input_data)},
+                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                         13,
+                         ExpectedEPNodeAssignment::All,
+                         kOnnxDomain,
+                         true);
 }
 
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
@@ -507,15 +537,15 @@ TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) {
                         ExpectedEPNodeAssignment::All);
 }
 
-// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
-// Test that an axis != -1 is not supported.
-TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) {
+// Test that 8-bit QDQ LogSoftmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_NonLastAxis) {
   std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
   RunQDQOpTest<uint8_t>("LogSoftmax",
                         {TestInputDef<float>({1, 2, 3}, false, input_data)},
                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
                         13,
-                        ExpectedEPNodeAssignment::None);
+                        ExpectedEPNodeAssignment::All);
 }
 
 // Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.