Skip to content

Commit

Permalink
Enable QDQ Clip on QNN HTP backend. Add unit tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianlizarraga committed Sep 19, 2023
1 parent d9d79cd commit 2091545
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 106 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
{"Abs", {}},
{"Neg", {}},
{"DepthToSpace", {}},
{"SpaceToDepth", {}}};
{"SpaceToDepth", {}},
{"Clip", {}}};
}
static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
return {{"Add", {}},
Expand Down
122 changes: 53 additions & 69 deletions onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ class ClipOpBuilder : public BaseOpBuilder {

private:
Status ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
mutable float min_value_ = std::numeric_limits<float>::lowest();
mutable float max_value_ = std::numeric_limits<float>::max();
};

Status ClipOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
Expand All @@ -61,82 +59,68 @@ Status ClipOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
if (do_op_validation) {
ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit));
}
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;

auto inputs = node_unit.Inputs();
for (size_t input_i = 0; input_i < inputs.size(); ++input_i) {
Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
bool is_quantized_tensor = inputs[input_i].quant_param.has_value();
utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor);

auto& input_name = inputs[input_i].node_arg.Name();
if (input_name.empty()) {
// Ignore unspecified/unused optional input
continue;
}
if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) {
LOGS(logger, VERBOSE) << "Tensor already added or the input is not named, skip it: " << input_name;
input_names.push_back(input_name);
continue;
}

const auto* type_proto = inputs[input_i].node_arg.TypeAsProto();
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type));

std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input_shape), "Cannot get shape");

ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(inputs[input_i].quant_param,
quantize_param.scaleOffsetEncoding.scale,
quantize_param.scaleOffsetEncoding.offset),
"Cannot get quantization parameter");

float* ini_data = nullptr;
std::vector<uint8_t> unpacked_tensor;
bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
if (is_initializer_input) {
const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor));
ini_data = reinterpret_cast<float*>(unpacked_tensor.data());
if (input_i == 1) {
min_value_ = *ini_data;
continue;
} else if (input_i == 2) {
max_value_ = *ini_data;
continue;
}
}
ORT_ENFORCE(input_i == 0, "QNN ReluMinMax operator expects only one input. Min and max are expected to be parameters, ie. initializer inputs in ONNX model");

Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name);
QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_param,
std::move(input_shape), std::move(unpacked_tensor));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
input_names.push_back(input_name);
}

return Status::OK();
return ProcessInput(qnn_model_wrapper, node_unit.Inputs()[0], logger, input_names);
}

Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool do_op_validation) const {
const auto& inputs = node_unit.Inputs();
const size_t num_inputs = inputs.size();

const Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
std::vector<std::string> param_tensor_names;
Qnn_Scalar_t min_qnn_scalar = QNN_SCALAR_INIT;
min_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
min_qnn_scalar.floatValue = min_value_;
QnnParamWrapper min_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE, min_qnn_scalar);
param_tensor_names.push_back(min_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(min_value_param));

Qnn_Scalar_t max_qnn_scalar = QNN_SCALAR_INIT;
max_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
max_qnn_scalar.floatValue = max_value_;
QnnParamWrapper max_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE, max_qnn_scalar);
param_tensor_names.push_back(max_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(max_value_param));

auto get_f32_from_bytes = [](const std::vector<uint8_t>& bytes, float default_val) -> float {
return bytes.empty() ? default_val : *reinterpret_cast<const float*>(bytes.data());
};

// Set the 'min' parameter.
{
std::vector<uint8_t> min_val_bytes;

if (num_inputs > 1 && !inputs[1].node_arg.Name().empty()) {
OnnxInputInfo min_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], min_input_info));
ORT_RETURN_IF_NOT(min_input_info.qnn_data_type == qnn_data_type,
"QNN EP: The 'min' input of the Clip operator must be of type float32.");
ORT_RETURN_IF_NOT(min_input_info.is_initializer, "QNN EP: The Clip operator's 'min' input must be an initializer.");
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*min_input_info.initializer_tensor, min_val_bytes));
}

Qnn_Scalar_t min_qnn_scalar = QNN_SCALAR_INIT;
min_qnn_scalar.dataType = qnn_data_type;
min_qnn_scalar.floatValue = get_f32_from_bytes(min_val_bytes, std::numeric_limits<float>::lowest());
QnnParamWrapper min_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE,
min_qnn_scalar);
param_tensor_names.push_back(min_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(min_value_param));
}

// Set the 'max' parameter.
{
std::vector<uint8_t> max_val_bytes;

if (num_inputs > 2 && !inputs[2].node_arg.Name().empty()) {
OnnxInputInfo max_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], max_input_info));
ORT_RETURN_IF_NOT(max_input_info.qnn_data_type == qnn_data_type,
"QNN EP: The 'max' input of the Clip operator must of type float32.");
ORT_RETURN_IF_NOT(max_input_info.is_initializer, "QNN EP: The Clip operator's 'max' input must be an initializer.");
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*max_input_info.initializer_tensor, max_val_bytes));
}

Qnn_Scalar_t max_qnn_scalar = QNN_SCALAR_INIT;
max_qnn_scalar.dataType = qnn_data_type;
max_qnn_scalar.floatValue = get_f32_from_bytes(max_val_bytes, std::numeric_limits<float>::max());
QnnParamWrapper max_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE,
max_qnn_scalar);
param_tensor_names.push_back(max_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(max_value_param));
}

ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
Expand Down
81 changes: 47 additions & 34 deletions onnxruntime/test/providers/qnn/clip_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,54 +120,67 @@ static void RunQDQClipTestOnHTP(const std::vector<TestInputDef<float>>& input_de
BuildQDQClipTestCase<QType>(input_defs), // QDQ model
provider_options,
opset,
expected_ep_assignment,
1e-4f, logging::Severity::kVERBOSE);
expected_ep_assignment);
}

// Runs a model with a non-QDQ Clip operator on the QNN HTP backend. Checks the graph node assignment
// and that inference outputs for QNN EP and CPU EP match.
template <typename DataType>
static void RunClipTestOnHTP(const std::vector<TestInputDef<DataType>>& input_defs,
ExpectedEPNodeAssignment expected_ep_assignment,
int opset = 13) {
ProviderOptions provider_options;

#if defined(_WIN32)
provider_options["backend_path"] = "QnnHtp.dll";
#else
provider_options["backend_path"] = "libQnnHtp.so";
#endif

RunQnnModelTest(BuildOpTestCase("Clip", input_defs, {}),
provider_options,
opset,
expected_ep_assignment);
}

// Test QDQ Clip with default min/max. (Fused with QuantizeLinear by optimizer).
TEST_F(QnnHTPBackendTests, Clip_4D_DefaultMinMax) {
// Test QDQ Clip with default min/max.
// NOTE: The Clip operator is *optimized* away during L1 optimizations, so QNN EP does not get a graph with a Clip op.
// Instead, QNN EP will get a graph with a Q -> DQ.
// - Original sequence: Q1 -> DQ1 -> Clip -> Q2 -> DQ2
// - ClipQuantFusion: Fuses Clip -> QuantizeLinear resulting in Q1 -> DQ1 -> Q2' -> DQ2
// - DoubleQDQPairsRemover: Simplifies remaining Q1 -> DQ1 -> Q2' -> DQ2 sequence to Q1 -> DQ2.
TEST_F(QnnHTPBackendTests, Clip_U8_DefaultMinMax_Rank4) {
RunQDQClipTestOnHTP<uint8_t>({TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48))},
ExpectedEPNodeAssignment::All);
}

// Test QDQ Clip with non-default min and max inputs.
TEST_F(QnnHTPBackendTests, Clip_4D) {
// Test QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
TEST_F(QnnHTPBackendTests, Clip_U8_Rank4) {
RunQDQClipTestOnHTP<uint8_t>({TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
TestInputDef<float>({}, true, {-5.0f}),
TestInputDef<float>({}, true, {5.0f})},
ExpectedEPNodeAssignment::All);
}

#if 0
// Test non-QDQ Clip with 4D input on HTP
TEST_F(QnnHTPBackendTests, Clip_NotQDQ_4D_f32) {
RunClipTestOnHTP<float>({TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
TestInputDef<float>({}, true, {-5.0f}),
TestInputDef<float>({}, true, {5.0f})},
ExpectedEPNodeAssignment::All);
}
// Test QDQ Clip of rank 5.
TEST_F(QnnHTPBackendTests, Clip_U8_Rank5) {
// We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
// at the input and output. These Q/DQ ops get lowered to QNN's Quantize and Dequantize operators, which DO NOT
// support rank 5 tensors. Therefore, we have to create a test model that only instantiates the DQ -> Clip -> Q
// QDQ node group, which gets lowered to a single QNN Clip node.
GetTestModelFn model_fn = [](ModelTestBuilder& builder) {
// input (u8) -> DQ ->
NodeArg* quant_input = builder.MakeInput<uint8_t>({1, 1, 2, 2, 2}, {0, 1, 6, 10, 20, 100, 128, 255});
NodeArg* input_dq = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<uint8_t>(quant_input, 1.0f, 0, input_dq); // scale = 1.0, zp = 0

// Min/Max initializers
NodeArg* min_input = builder.MakeScalarInitializer(5.0f);
NodeArg* max_input = builder.MakeScalarInitializer(100.0f);

// Unsqueeze ->
NodeArg* clip_output = builder.MakeIntermediate();
builder.AddNode("Clip", {input_dq, min_input, max_input}, {clip_output});

// Q -> output (u8)
NodeArg* output = builder.MakeOutput();
builder.AddQuantizeLinearNode<uint8_t>(clip_output, 1.0f, 0, output); // scale = 1.0, zp = 0
};

ProviderOptions provider_options;

#if defined(_WIN32)
provider_options["backend_path"] = "QnnHtp.dll";
#else
provider_options["backend_path"] = "libQnnHtp.so";
#endif

RunQnnModelTest(model_fn,
provider_options,
13, // opset
ExpectedEPNodeAssignment::All);
}

#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
} // namespace test
} // namespace onnxruntime
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,15 +232,15 @@ TEST_F(QnnHTPBackendTests, Squeeze_Rank5_Rank2_f32) {
ExpectedEPNodeAssignment::All);
}

// Test Squeeze of rank 4 -> rank 3 with a negative axes value.
// Test QDQ Squeeze of rank 4 -> rank 3 with a negative axes value.
TEST_F(QnnHTPBackendTests, Squeeze_Rank4_Rank3_NegAxes_f32) {
RunQDQSqueezeTestOnHTP<uint8_t>("Squeeze",
TestInputDef<float>({1, 3, 2, 1}, false, -10.0f, 10.0f),
TestInputDef<int64_t>({1}, true, {-1}), // Squeeze last axis => (1, 3, 2)
ExpectedEPNodeAssignment::All);
}

// Test Unsqueeze of rank 3 -> rank 5.
// Test QDQ Unsqueeze of rank 3 -> rank 5.
TEST_F(QnnHTPBackendTests, Unsqueeze_Rank3_Rank5_f32) {
// We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
// at the input and output. These Q/DQ ops get lowered to QNN's Quantize and Dequantize operators, which DO NOT
Expand Down

0 comments on commit 2091545

Please sign in to comment.