Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QNN EP] Add more op unit tests (fix Clip, TopK, Tile) #17457

Merged
merged 26 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b4212b8
Add unit tests for QNN Reshape
adrianlizarraga Sep 7, 2023
1b6135a
Clean up comments
adrianlizarraga Sep 7, 2023
5e268ef
Merge latest main branch
adrianlizarraga Sep 15, 2023
06992bc
Add QNN EP tests for the ONNX Flatten op
adrianlizarraga Sep 15, 2023
ebea003
Use correct opset version for Flatten
adrianlizarraga Sep 15, 2023
d4b19fb
Remove unnecessary comments
adrianlizarraga Sep 15, 2023
8b79d15
Add QNN tests for Squeeze and Unsqueeze operators
adrianlizarraga Sep 17, 2023
6d0ab55
Fix comment
adrianlizarraga Sep 17, 2023
3bbc32e
Merge latest commits from main branch
adrianlizarraga Sep 17, 2023
5215e1e
Add QNN CPU tests for Gemm. Need HTP tests.
adrianlizarraga Sep 17, 2023
3abdf14
Add QDQ Gemm HTP tests
adrianlizarraga Sep 18, 2023
976cc8b
Start adding QNN Clip tests
adrianlizarraga Sep 18, 2023
d9d79cd
Merge latest commits from main branch
adrianlizarraga Sep 18, 2023
2091545
Enable QDQ Clip on QNN HTP backend. Add unit tests.
adrianlizarraga Sep 19, 2023
a1b3b35
Fix comment
adrianlizarraga Sep 19, 2023
1d205bb
Convert Status check into an assert
adrianlizarraga Sep 19, 2023
80c8fb4
Improve model-building helpers to accept inputs of a potentially diff…
adrianlizarraga Sep 19, 2023
e5de983
Clean up Clip tests
adrianlizarraga Sep 19, 2023
51ff43d
Add QNN EP tests for the Split operator
adrianlizarraga Sep 20, 2023
482f3ca
Add Split opset 18 unit tests
adrianlizarraga Sep 20, 2023
d491447
Fix linter warnings
adrianlizarraga Sep 20, 2023
1b66cfa
Add QNN EP unit tests for Tile operator
adrianlizarraga Sep 20, 2023
7e8f7ce
Add support for 16bit QDQ Clip. Add more 16-bit QDQ tests.
adrianlizarraga Sep 20, 2023
b1e832a
Enable QDQ TopK on QNN EP's HTP backend. Add TopK unit tests.
adrianlizarraga Sep 20, 2023
3b0cd7e
Merge latest commits from main branch
adrianlizarraga Sep 20, 2023
6694aae
Try to limit the white-space changes made by clang-format
adrianlizarraga Sep 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/optimizer/initializer.h"
#include "core/optimizer/qdq_transformer/clip_quantizelinear.h"

#include <limits>

#include "core/optimizer/initializer.h"
#include "core/optimizer/qdq_transformer/qdq_util.h"
#include "core/optimizer/utils.h"
#include "core/graph/graph_utils.h"
Expand Down Expand Up @@ -50,14 +53,26 @@ static bool GetQConstantLowerUpper(const Graph& graph, const Node& node, float&
switch (zp_initializer.data_type()) {
case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
const int8_t zero_point = zp_initializer.data<int8_t>()[0];
lower = scale * (-128 - zero_point);
upper = scale * (127 - zero_point);
lower = scale * (std::numeric_limits<int8_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<int8_t>::max() - zero_point);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
const uint8_t zero_point = zp_initializer.data<uint8_t>()[0];
lower = scale * (0 - zero_point);
upper = scale * (255 - zero_point);
lower = scale * (std::numeric_limits<uint8_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<uint8_t>::max() - zero_point);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
const int16_t zero_point = zp_initializer.data<int16_t>()[0];
lower = scale * (std::numeric_limits<int16_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<int16_t>::max() - zero_point);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
const uint16_t zero_point = zp_initializer.data<uint16_t>()[0];
lower = scale * (std::numeric_limits<uint16_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<uint16_t>::max() - zero_point);
break;
}
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,42 @@ bool LogicalComparisonNodeGroupSelector::Check(const GraphViewer& graph_viewer,
return dt_input_1 == dt_input_2;
}

bool TopKNodeGroupSelector::Check(const GraphViewer& graph_viewer,
const Node& node,
const std::vector<const Node*>& dq_nodes,
const std::vector<const Node*>& q_nodes) const {
constexpr int num_dq_inputs = 1;
constexpr int num_q_outputs = 1;
if (num_dq_inputs != gsl::narrow_cast<int>(dq_nodes.size())) {
return false;
}

if (const auto dq_validation_status = QDQ::ValidateNodeGroupDQNodes(graph_viewer, node, dq_nodes);
!dq_validation_status.IsOK()) {
return false;
}

if (num_q_outputs != gsl::narrow_cast<int>(q_nodes.size())) {
return false;
}

const Node& dq_node = *dq_nodes.front();
const Node& q_node = *q_nodes.front();

int32_t dt_input = dq_node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
int32_t dt_output = q_node.OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();

if (dt_input != dt_output) {
return false;
}

auto get_const_initializer = [&graph_viewer](const std::string& initializer_name) {
return graph_viewer.GetConstantInitializer(initializer_name, true);
};

return IsQDQPairSupported(q_node, dq_node, get_const_initializer, graph_viewer.ModelPath());
}

} // namespace QDQ
} // namespace onnxruntime

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ class LogicalComparisonNodeGroupSelector : public NodeGroupSelector {
const std::vector<const Node*>& q_nodes) const override;
};

// TopK has 1 DQ input node and 1 Q output node.
// Zero point and scale are constant scalars and must match
class TopKNodeGroupSelector : public NodeGroupSelector {
bool Check(const GraphViewer& graph_viewer, const Node& node,
const std::vector<const Node*>& dq_nodes,
const std::vector<const Node*>& q_nodes) const override;
};

/*
* NodeSelector instances for use in the QDQ::SelectorActionTransformer.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetMiscOpVersionsMap() {
{"Resize", {}},
{"Split", {}},
{"Squeeze", {}},
{"Unsqueeze", {}}};
{"Unsqueeze", {}},
{"Tile", {}}};
}

static const OpVersionsAndSelector::OpVersionsMap GetDropDQOpVersionsMap() {
Expand Down Expand Up @@ -78,7 +79,8 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
{"Abs", {}},
{"Neg", {}},
{"DepthToSpace", {}},
{"SpaceToDepth", {}}};
{"SpaceToDepth", {}},
{"Clip", {}}};
}
static const OpVersionsAndSelector::OpVersionsMap GetBinaryOpVersionsMap() {
return {{"Add", {}},
Expand Down Expand Up @@ -127,6 +129,10 @@ static const OpVersionsAndSelector::OpVersionsMap GetPadOpVersionsMap() {
return {{"Pad", {}}};
}

static const OpVersionsAndSelector::OpVersionsMap GetTopKOpVersionsMap() {
return {{"TopK", {}}};
}

/* Selector rules registration related */
void RegisterMiscSelectors(Selectors& qdq_selectors) {
/* register selectors for miscellaneous ops */
Expand Down Expand Up @@ -227,6 +233,13 @@ void RegisterPadSelectors(Selectors& qdq_selectors) {
std::move(selector));
}

void RegisterTopKSelector(Selectors& qdq_selectors) {
/* register selector for TopK op */
std::unique_ptr<NodeGroupSelector> selector = std::make_unique<TopKNodeGroupSelector>();
qdq_selectors.RegisterSelector(GetTopKOpVersionsMap(),
std::move(selector));
}

void SelectorManager::CreateSelectors() {
RegisterMiscSelectors(qdq_selectors_);
RegisterDropDQSelectors(qdq_selectors_);
Expand All @@ -242,6 +255,7 @@ void SelectorManager::CreateSelectors() {
RegisterLogicalComparisonSelectors(qdq_selectors_);
RegisterWhereSelectors(qdq_selectors_);
RegisterPadSelectors(qdq_selectors_);
RegisterTopKSelector(qdq_selectors_);
}

void SelectorManager::InitializeSelectorsMap() {
Expand Down
127 changes: 56 additions & 71 deletions onnxruntime/core/providers/qnn/builder/opbuilder/clip_op_builder.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <cassert>
#include <limits>

#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
Expand All @@ -9,8 +12,6 @@

#include "base_op_builder.h"

#include <limits>

namespace onnxruntime {
namespace qnn {
class ClipOpBuilder : public BaseOpBuilder {
Expand All @@ -33,8 +34,6 @@ class ClipOpBuilder : public BaseOpBuilder {

private:
Status ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
mutable float min_value_ = std::numeric_limits<float>::lowest();
mutable float max_value_ = std::numeric_limits<float>::max();
};

Status ClipOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
Expand All @@ -61,82 +60,68 @@ Status ClipOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
if (do_op_validation) {
ORT_RETURN_IF_ERROR(ExplictOpCheck(qnn_model_wrapper, node_unit));
}
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;

auto inputs = node_unit.Inputs();
for (size_t input_i = 0; input_i < inputs.size(); ++input_i) {
Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
bool is_quantized_tensor = inputs[input_i].quant_param.has_value();
utils::InitializeQuantizeParam(quantize_param, is_quantized_tensor);

auto& input_name = inputs[input_i].node_arg.Name();
if (input_name.empty()) {
// Ignore unspecified/unused optional input
continue;
}
if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) {
LOGS(logger, VERBOSE) << "Tensor already added or the input is not named, skip it: " << input_name;
input_names.push_back(input_name);
continue;
}

const auto* type_proto = inputs[input_i].node_arg.TypeAsProto();
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, type_proto, qnn_data_type));

std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(inputs[input_i].node_arg, input_shape), "Cannot get shape");

ORT_RETURN_IF_NOT(qnn_model_wrapper.ProcessQuantizationParameter(inputs[input_i].quant_param,
quantize_param.scaleOffsetEncoding.scale,
quantize_param.scaleOffsetEncoding.offset),
"Cannot get quantization parameter");

float* ini_data = nullptr;
std::vector<uint8_t> unpacked_tensor;
bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
if (is_initializer_input) {
const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_tensor, unpacked_tensor));
ini_data = reinterpret_cast<float*>(unpacked_tensor.data());
if (input_i == 1) {
min_value_ = *ini_data;
continue;
} else if (input_i == 2) {
max_value_ = *ini_data;
continue;
}
}
ORT_ENFORCE(input_i == 0, "QNN ReluMinMax operator expects only one input. Min and max are expected to be parameters, ie. initializer inputs in ONNX model");

Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name);
QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_param,
std::move(input_shape), std::move(unpacked_tensor));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
input_names.push_back(input_name);
}

return Status::OK();
return ProcessInput(qnn_model_wrapper, node_unit.Inputs()[0], logger, input_names);
}

Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool do_op_validation) const {
const auto& inputs = node_unit.Inputs();
const size_t num_inputs = inputs.size();

const Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
std::vector<std::string> param_tensor_names;
Qnn_Scalar_t min_qnn_scalar = QNN_SCALAR_INIT;
min_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
min_qnn_scalar.floatValue = min_value_;
QnnParamWrapper min_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE, min_qnn_scalar);
param_tensor_names.push_back(min_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(min_value_param));

Qnn_Scalar_t max_qnn_scalar = QNN_SCALAR_INIT;
max_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
max_qnn_scalar.floatValue = max_value_;
QnnParamWrapper max_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE, max_qnn_scalar);
param_tensor_names.push_back(max_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(max_value_param));

auto get_f32_from_bytes = [](const std::vector<uint8_t>& bytes, float default_val) -> float {
return bytes.empty() ? default_val : *reinterpret_cast<const float*>(bytes.data());
};

// Set the 'min' parameter.
{
std::vector<uint8_t> min_val_bytes;

if (num_inputs > 1 && !inputs[1].node_arg.Name().empty()) {
OnnxInputInfo min_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], min_input_info));
ORT_RETURN_IF_NOT(min_input_info.qnn_data_type == qnn_data_type,
"QNN EP: The 'min' input of the Clip operator must be of type float32.");
assert(min_input_info.is_initializer); // Checked by ExplicitOpCheck().
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*min_input_info.initializer_tensor, min_val_bytes));
}

Qnn_Scalar_t min_qnn_scalar = QNN_SCALAR_INIT;
min_qnn_scalar.dataType = qnn_data_type;
min_qnn_scalar.floatValue = get_f32_from_bytes(min_val_bytes, std::numeric_limits<float>::lowest());
QnnParamWrapper min_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MIN_VALUE,
min_qnn_scalar);
param_tensor_names.push_back(min_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(min_value_param));
}

// Set the 'max' parameter.
{
std::vector<uint8_t> max_val_bytes;

if (num_inputs > 2 && !inputs[2].node_arg.Name().empty()) {
OnnxInputInfo max_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], max_input_info));
ORT_RETURN_IF_NOT(max_input_info.qnn_data_type == qnn_data_type,
"QNN EP: The 'max' input of the Clip operator must of type float32.");
assert(max_input_info.is_initializer); // Checked by ExplicitOpCheck().
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*max_input_info.initializer_tensor, max_val_bytes));
}

Qnn_Scalar_t max_qnn_scalar = QNN_SCALAR_INIT;
max_qnn_scalar.dataType = qnn_data_type;
max_qnn_scalar.floatValue = get_f32_from_bytes(max_val_bytes, std::numeric_limits<float>::max());
QnnParamWrapper max_value_param(node_unit.Index(), node_unit.Name(), QNN_OP_RELU_MIN_MAX_PARAM_MAX_VALUE,
max_qnn_scalar);
param_tensor_names.push_back(max_value_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(max_value_param));
}

ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
Expand Down
15 changes: 13 additions & 2 deletions onnxruntime/core/providers/qnn/builder/opbuilder/topk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,20 @@ Status TopKOpBuilder::ExplictOpCheck(QnnModelWrapper& qnn_model_wrapper, const N
auto rank = input_shape.size();
auto axis = node_helper.Get("axis", -1);

if (-1 == axis && axis != static_cast<int32_t>(rank - 1)) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "QNN TopK axis is always the last dimension");
ORT_RETURN_IF_NOT(axis == -1 || axis == static_cast<int32_t>(rank - 1),
"QNN TopK's axis is always the last dimension");

// ONNX TopK outputs int64 indices, but the equivalent QNN op outputs uint32 indices.
// The QNN HTP backend does not generally support the int64 type, but QNN EP can just use the uint32 type
// for TopK ops within the graph. However, if the TopK op **generates** a graph output,
// then we cannot support it on the HTP backend.
bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
if (is_npu_backend) {
const std::string& output_name = node_unit.Outputs()[0].node_arg.Name();
ORT_RETURN_IF(qnn_model_wrapper.IsGraphOutput(output_name),
"QNN EP does not support TopK ops that generate a graph output.");
}

return Status::OK();
}

Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/test/optimizer/qdq_transformer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2497,10 +2497,15 @@ TEST(QDQTransformerTests, Clip) {
epsilon);
};

constexpr int16_t int16_min = std::numeric_limits<int16_t>::min();
constexpr uint16_t uint16_min = std::numeric_limits<uint16_t>::min();

std::vector<int> opsets{12, 18, 19};
for (auto opset : opsets) {
test_case(.0235294122248888f, static_cast<int8_t>(-128), 0, opset); // [0, 6]
test_case(.0235294122248888f, static_cast<int8_t>(-128), 0, opset, true); // [0, 6] contrib qdq
test_case(9.15541313801785e-5f, int16_min, 0, opset, true); // [0, 6] contrib 16-bit qdq
test_case(0.0009f, int16_min, 1, opset, true); // [0, 58.98] contrib 16-bit qdq
test_case(.02f, static_cast<int8_t>(-128), 0, opset); // [0, 5.1]
test_case(.02f, static_cast<int8_t>(-128), 0, opset, true); // [0, 5.1] contrib qdq
test_case(.03f, static_cast<int8_t>(-128), 1, opset); // [0, 7.65]
Expand All @@ -2513,6 +2518,8 @@ TEST(QDQTransformerTests, Clip) {
test_case(.04f, static_cast<int8_t>(-97), 1, opset, true); // [-1.24, 8.96] contrib qdq
test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset); // [0, 6]
test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset, true); // [0, 6] contrib qdq
test_case(9.15541313801785e-5f, uint16_min, 0, opset, true); // [0, 6] contrib 16-bit qdq
test_case(0.0009f, uint16_min, 1, opset, true); // [0, 58.98] contrib 16-bit qdq
test_case(.02f, static_cast<uint8_t>(0), 0, opset); // [0, 5.1]
test_case(.02f, static_cast<uint8_t>(0), 0, opset, true); // [0, 5.1] contrib qdq
test_case(.03f, static_cast<uint8_t>(0), 1, opset); // [0, 7.65]
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/test/providers/qnn/average_pool_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static void RunAveragePoolOpTest(const std::string& op_type,
provider_options["backend_path"] = "libQnnCpu.so";
#endif

RunQnnModelTest(BuildOpTestCase(op_type, input_defs, attrs),
RunQnnModelTest(BuildOpTestCase<float>(op_type, input_defs, {}, attrs),
provider_options,
opset,
expected_ep_assignment);
Expand All @@ -53,8 +53,8 @@ static void RunQDQAveragePoolOpTest(const std::string& op_type,
provider_options["backend_path"] = "libQnnHtp.so";
#endif

TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, attrs),
BuildQDQOpTestCase<QuantType>(op_type, input_defs, attrs),
TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, input_defs, {}, attrs),
BuildQDQOpTestCase<QuantType>(op_type, input_defs, {}, attrs),
provider_options,
opset,
expected_ep_assignment);
Expand Down
Loading