Skip to content

Commit

Permalink
Support Softmax/LogSoftmax (opset > 13) with arbitrary axis attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianlizarraga committed Oct 10, 2023
1 parent bf83f04 commit e3b27a2
Show file tree
Hide file tree
Showing 2 changed files with 220 additions and 32 deletions.
202 changes: 180 additions & 22 deletions onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/common/safeint.h"
#include "onnx/defs/data_type_utils.h"

#include "base_op_builder.h"

Check warning on line 11 in onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc#L11

Include the directory when naming header files [build/include_subdir] [4]
Raw output
onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc:11:  Include the directory when naming header files  [build/include_subdir] [4]

Expand All @@ -24,60 +23,219 @@ class SoftmaxOpBuilder : public BaseOpBuilder {
const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;

Check warning on line 23 in onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc#L23

"override" is redundant since function is already declared as "final" [readability/inheritance] [4]
Raw output
onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc:23:  "override" is redundant since function is already declared as "final"  [readability/inheritance] [4]

protected:
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
std::vector<std::string>& input_names,
bool do_op_validation) const override ORT_MUST_USE_RESULT;

Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool do_op_validation) const override ORT_MUST_USE_RESULT;
};

static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) {
if (op_type == "Softmax" || op_type == "LogSoftmax") {
// Default axis changed from 1 to -1 in opset 13.
return opset_version < 13 ? 1 : -1;
}

return 0;
constexpr int32_t GetDefaultAxisAttribute(int opset_version) {
// Default axis changed from 1 to -1 in opset 13.
return opset_version < 13 ? 1 : -1;
}

Status SoftmaxOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger) const {
ORT_UNUSED_PARAMETER(logger);
const std::string& op_type = node_unit.OpType();
const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
const int opset_version = node_unit.SinceVersion();

// The QNN HTP backend only supports an `axis` attribute that refers to the last input dimension.
// QNN EP is able to support arbitrary axis attributes by wrapping the QNN operator with transposes.
// However, the exception is Softmax/LogSoftmax with opset < 13. For these older ONNX operators, only
// axis == input_rank - 1 is supported.
if (is_npu_backend && opset_version < 13) {
const std::string& op_type = node_unit.OpType();

int32_t axis = GetDefaultAxisAttribute(opset_version);
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
"QNN EP: Cannot get shape for Softmax input");
ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
"QNN ", op_type.c_str(),
" only supports an `axis` attribute equal to input_rank-1 (or -1) for ONNX opset < 13");
}

return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
}

static std::vector<uint32_t> GetTransposePermToUseLastAxis(uint32_t input_rank, uint32_t axis) {
assert(axis < input_rank);
std::vector<uint32_t> transpose_perm;
transpose_perm.reserve(input_rank);

for (uint32_t dim = 0; dim < input_rank; dim++) {
transpose_perm.push_back(dim);
}

int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
// Swap axis dim with last dim.
transpose_perm[axis] = input_rank - 1;
transpose_perm[input_rank - 1] = axis;

return transpose_perm;
}

Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
std::vector<std::string>& input_names,
bool do_op_validation) const {
const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
const auto& inputs = node_unit.Inputs();
assert(inputs.size() == 1);

int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
"QNN EP: Cannot get shape for Softmax input");
ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
"QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)");

return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
OnnxInputInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input_info));
const size_t input_rank = input_info.shape.size();

// If the axis attribute refers to the last dimension (Or not NPU), then process the input as normal.
if (!is_npu_backend || axis == static_cast<int32_t>(input_rank) - 1) {
return ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names);
}

//
// The axis does **not** refer to the last input dimension. Must wrap transposes around the operator to be able to use
// QNN's Softmax operator, which always uses an axis value that refers to the last dimension.
//

std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(input_rank),
static_cast<uint32_t>(axis));

const std::string& input_name = inputs[0].node_arg.Name();
std::string op_input_name = input_info.is_initializer ? input_name : input_name + "_ort_qnn_ep_transpose";
input_names.push_back(op_input_name);

std::vector<uint32_t> op_input_shape = input_info.shape;
op_input_shape[input_rank - 1] = input_info.shape[axis];
op_input_shape[axis] = input_info.shape[input_rank - 1];

std::vector<uint8_t> initializer_bytes;
if (input_info.is_initializer) { // Input is an initializer, so transpose initializer bytes.
std::vector<size_t> perm_size_t;
perm_size_t.reserve(transpose_perm.size());

for (auto p : transpose_perm) {
perm_size_t.push_back(static_cast<size_t>(p));
}

ORT_RETURN_IF_ERROR(TransposeInitializer(qnn_model_wrapper, *input_info.initializer_tensor, perm_size_t,
initializer_bytes));
} else { // Input is dynamic, so add transpose node before input.
const bool is_graph_input = qnn_model_wrapper.IsGraphInput(input_name);

ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
input_name,
op_input_name,
input_info.shape,
transpose_perm,
op_input_shape,
input_info.qnn_data_type,
input_info.quant_param,
do_op_validation,
is_graph_input));
}

Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input_name);
QnnTensorWrapper input_tensorwrapper(op_input_name, tensor_type, input_info.qnn_data_type, input_info.quant_param,
std::move(op_input_shape), std::move(initializer_bytes));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");

return Status::OK();
}

Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool do_op_validation) const {
const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
const std::string& op_type = node_unit.OpType();
const auto& outputs = node_unit.Outputs();
assert(outputs.size() == 1);

int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));

OnnxInputInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
const size_t output_rank = output_info.shape.size();
const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;

// If axis refers to the last dimension (or not NPU), process outputs as usual.
if (!is_npu_backend || axis_is_last_dim) {
QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);

std::vector<std::string> param_tensor_names;
param_tensor_names.push_back(axis_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(axis_param));

return ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, do_op_validation, GetQnnOpType(op_type));
}

//
// The axis **does** not refer to the last dimension. Must wrap the operator with Transposes to be able to use
// QNN's Softmax operator, which only supports an axis that refers to the last dimension.
//

axis_qnn_scalar.uint32Value = static_cast<uint32_t>(output_rank - 1); // NOTE: override axis.
QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);

std::vector<std::string> param_tensor_names;
param_tensor_names.push_back(axis_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(axis_param));

return ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, do_op_validation, GetQnnOpType(op_type));
const std::string& orig_output_name = outputs[0].node_arg.Name();
std::string op_output_name = orig_output_name + "_ort_qnn_ep_transpose";

std::vector<uint32_t> op_output_shape = output_info.shape;
op_output_shape[output_rank - 1] = output_info.shape[axis];
op_output_shape[axis] = output_info.shape[output_rank - 1];

QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type, output_info.quant_param,

Check warning on line 212 in onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc#L212

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc:212:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
std::vector<uint32_t>(op_output_shape));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
QNN_OP_PACKAGE_NAME_QTI_AISW,
GetQnnOpType(node_unit.OpType()),
std::move(input_names),
{op_output_name},
std::move(param_tensor_names)),
"Failed to add node.");

const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name);
std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(output_rank),
static_cast<uint32_t>(axis));

ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
op_output_name,
orig_output_name,
op_output_shape,
transpose_perm,
output_info.shape,
output_info.qnn_data_type,
output_info.quant_param,
do_op_validation,
is_graph_output));

return Status::OK();
}

void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
Expand Down
50 changes: 40 additions & 10 deletions onnxruntime/test/providers/qnn/simple_op_htp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,9 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Log_U16) {
// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
// Test that the default axis (-1) for SoftMax opset 13 works.
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) {
const std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
RunQDQOpTest<uint8_t>("Softmax",
{TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
{TestInputDef<float>({1, 2, 3}, false, input_data)},
{}, // Uses default axis of -1 for opset 13
13,
ExpectedEPNodeAssignment::All);
Expand All @@ -466,14 +467,43 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_DefaultAxis) {
true); // Use com.microsoft domain for Q/DQ ops
}

// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
// Test that an axis != -1 is not supported.
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) {
// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
// QNN EP will wrap the operator with transposes.
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis) {
const std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 10.0f, 11.0f, 12.0f, 100.0f, 110.0f, 120.0f,
1.0856307f, 0.99734545f, 0.2829785f, 1.5062947f, 0.5786002f, 1.6514366f,
2.4266791f, 0.42891264f, 1.2659363f};
RunQDQOpTest<uint8_t>("Softmax",
{TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
{TestInputDef<float>({1, 2, 3, 3}, false, input_data)},
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
13,
ExpectedEPNodeAssignment::None);
ExpectedEPNodeAssignment::All);
}

// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
// QNN EP will wrap the operator with transposes.
// This is a configuration used in one of our partner's models.
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis_LargeInput) {
const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
RunQDQOpTest<uint8_t>("Softmax",
{TestInputDef<float>({1, 124, 1}, false, input_data)},
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
13,
ExpectedEPNodeAssignment::All);
}

// Test that 16-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
// QNN EP will wrap the operator with transposes.
// This is a configuration used in one of our partner's models.
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_NonLastAxis_LargeInput) {
const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
RunQDQOpTest<uint16_t>("Softmax",
{TestInputDef<float>({1, 124, 1}, false, input_data)},
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
13,
ExpectedEPNodeAssignment::All,
kOnnxDomain,
true);
}

// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
Expand Down Expand Up @@ -507,15 +537,15 @@ TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) {
ExpectedEPNodeAssignment::All);
}

// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
// Test that an axis != -1 is not supported.
TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) {
// Test that 8-bit QDQ LogSoftmax (opset 13) with axis != -1 is supported by QNN EP.
// QNN EP will wrap the operator with transposes.
TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_NonLastAxis) {
std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
RunQDQOpTest<uint8_t>("LogSoftmax",
{TestInputDef<float>({1, 2, 3}, false, input_data)},
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
13,
ExpectedEPNodeAssignment::None);
ExpectedEPNodeAssignment::All);
}

// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
Expand Down

0 comments on commit e3b27a2

Please sign in to comment.