Skip to content

Commit

Permalink
Add support for 16bit QDQ Clip. Add more 16-bit QDQ tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianlizarraga committed Sep 20, 2023
1 parent 1b66cfa commit 7e8f7ce
Show file tree
Hide file tree
Showing 8 changed files with 375 additions and 172 deletions.
25 changes: 20 additions & 5 deletions onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/optimizer/initializer.h"
#include "core/optimizer/qdq_transformer/clip_quantizelinear.h"

#include <limits>

#include "core/optimizer/initializer.h"
#include "core/optimizer/qdq_transformer/qdq_util.h"
#include "core/optimizer/utils.h"
#include "core/graph/graph_utils.h"
Expand Down Expand Up @@ -50,14 +53,26 @@ static bool GetQConstantLowerUpper(const Graph& graph, const Node& node, float&
switch (zp_initializer.data_type()) {
case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
const int8_t zero_point = zp_initializer.data<int8_t>()[0];
lower = scale * (-128 - zero_point);
upper = scale * (127 - zero_point);
lower = scale * (std::numeric_limits<int8_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<int8_t>::max() - zero_point);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
const uint8_t zero_point = zp_initializer.data<uint8_t>()[0];
lower = scale * (0 - zero_point);
upper = scale * (255 - zero_point);
lower = scale * (std::numeric_limits<uint8_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<uint8_t>::max() - zero_point);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
const int16_t zero_point = zp_initializer.data<int16_t>()[0];
lower = scale * (std::numeric_limits<int16_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<int16_t>::max() - zero_point);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
const uint16_t zero_point = zp_initializer.data<uint16_t>()[0];
lower = scale * (std::numeric_limits<uint16_t>::lowest() - zero_point);
upper = scale * (std::numeric_limits<uint16_t>::max() - zero_point);
break;
}
default:
Expand Down
50 changes: 28 additions & 22 deletions onnxruntime/test/optimizer/qdq_transformer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2501,28 +2501,34 @@ TEST(QDQTransformerTests, Clip) {
for (auto opset : opsets) {
test_case(.0235294122248888f, static_cast<int8_t>(-128), 0, opset); // [0, 6]
test_case(.0235294122248888f, static_cast<int8_t>(-128), 0, opset, true); // [0, 6] contrib qdq
test_case(.02f, static_cast<int8_t>(-128), 0, opset); // [0, 5.1]
test_case(.02f, static_cast<int8_t>(-128), 0, opset, true); // [0, 5.1] contrib qdq
test_case(.03f, static_cast<int8_t>(-128), 1, opset); // [0, 7.65]
test_case(.03f, static_cast<int8_t>(-128), 1, opset, true); // [0, 7.65] contrib qdq
test_case(.02f, static_cast<int8_t>(127), 1, opset); // [-5.1 , 0]
test_case(.02f, static_cast<int8_t>(127), 1, opset, true); // [-5.1 , 0] contrib qdq
test_case(.02f, static_cast<int8_t>(0), 1, opset); // [-2.56, 2.54]
test_case(.02f, static_cast<int8_t>(0), 1, opset, true); // [-2.56, 2.54] contrib qdq
test_case(.04f, static_cast<int8_t>(-97), 1, opset); // [-1.24, 8.96]
test_case(.04f, static_cast<int8_t>(-97), 1, opset, true); // [-1.24, 8.96] contrib qdq
test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset); // [0, 6]
test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset, true); // [0, 6] contrib qdq
test_case(.02f, static_cast<uint8_t>(0), 0, opset); // [0, 5.1]
test_case(.02f, static_cast<uint8_t>(0), 0, opset, true); // [0, 5.1] contrib qdq
test_case(.03f, static_cast<uint8_t>(0), 1, opset); // [0, 7.65]
test_case(.03f, static_cast<uint8_t>(0), 1, opset, true); // [0, 7.65] contrib qdq
test_case(.02f, static_cast<uint8_t>(255), 1, opset); // [-5.1, 0]
test_case(.02f, static_cast<uint8_t>(255), 1, opset, true); // [-5.1, 0] contrib qdq
test_case(.02f, static_cast<uint8_t>(128), 1, opset); // [-2.56, 2.54]
test_case(.02f, static_cast<uint8_t>(128), 1, opset, true); // [-2.56, 2.54] contrib qdq
test_case(.04f, static_cast<uint8_t>(31), 1, opset); // [-1.24, 8.96]
test_case(.04f, static_cast<uint8_t>(31), 1, opset, true); // [-1.24, 8.96] contrib qdq
test_case(9.15541313801785e-5f, static_cast<int16_t>(std::numeric_limits<int16_t>::min()), 0,
opset, true); // [0, 6] contrib 16-bit qdq
test_case(0.0009f, static_cast<int16_t>(std::numeric_limits<int16_t>::min()), 1,
opset, true); // [0, 58.98] contrib 16-bit qdq
test_case(.02f, static_cast<int8_t>(-128), 0, opset); // [0, 5.1]
test_case(.02f, static_cast<int8_t>(-128), 0, opset, true); // [0, 5.1] contrib qdq
test_case(.03f, static_cast<int8_t>(-128), 1, opset); // [0, 7.65]
test_case(.03f, static_cast<int8_t>(-128), 1, opset, true); // [0, 7.65] contrib qdq
test_case(.02f, static_cast<int8_t>(127), 1, opset); // [-5.1 , 0]
test_case(.02f, static_cast<int8_t>(127), 1, opset, true); // [-5.1 , 0] contrib qdq
test_case(.02f, static_cast<int8_t>(0), 1, opset); // [-2.56, 2.54]
test_case(.02f, static_cast<int8_t>(0), 1, opset, true); // [-2.56, 2.54] contrib qdq
test_case(.04f, static_cast<int8_t>(-97), 1, opset); // [-1.24, 8.96]
test_case(.04f, static_cast<int8_t>(-97), 1, opset, true); // [-1.24, 8.96] contrib qdq
test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset); // [0, 6]
test_case(.02352941176f, static_cast<uint8_t>(0), 0, opset, true); // [0, 6] contrib qdq
test_case(9.15541313801785e-5f, static_cast<uint16_t>(0), 0, opset, true); // [0, 6] contrib 16-bit qdq
test_case(0.0009f, static_cast<uint16_t>(0), 1, opset, true); // [0, 58.98] contrib 16-bit qdq
test_case(.02f, static_cast<uint8_t>(0), 0, opset); // [0, 5.1]
test_case(.02f, static_cast<uint8_t>(0), 0, opset, true); // [0, 5.1] contrib qdq
test_case(.03f, static_cast<uint8_t>(0), 1, opset); // [0, 7.65]
test_case(.03f, static_cast<uint8_t>(0), 1, opset, true); // [0, 7.65] contrib qdq
test_case(.02f, static_cast<uint8_t>(255), 1, opset); // [-5.1, 0]
test_case(.02f, static_cast<uint8_t>(255), 1, opset, true); // [-5.1, 0] contrib qdq
test_case(.02f, static_cast<uint8_t>(128), 1, opset); // [-2.56, 2.54]
test_case(.02f, static_cast<uint8_t>(128), 1, opset, true); // [-2.56, 2.54] contrib qdq
test_case(.04f, static_cast<uint8_t>(31), 1, opset); // [-1.24, 8.96]
test_case(.04f, static_cast<uint8_t>(31), 1, opset, true); // [-1.24, 8.96] contrib qdq
}

// opset_version = 10
Expand Down
34 changes: 30 additions & 4 deletions onnxruntime/test/providers/qnn/clip_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ template <typename QType>
static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
const std::vector<TestInputDef<float>>& min_max_defs,
ExpectedEPNodeAssignment expected_ep_assignment,
int opset = 13) {
int opset = 13,
bool use_contrib_qdq = false) {
ProviderOptions provider_options;

#if defined(_WIN32)
Expand All @@ -88,7 +89,8 @@ static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
#endif

auto f32_model_builder = BuildOpTestCase<float, float>("Clip", {input_def}, {min_max_defs}, {});
auto qdq_model_builder = BuildQDQOpTestCase<QType, float>("Clip", {input_def}, {min_max_defs}, {});
auto qdq_model_builder = BuildQDQOpTestCase<QType, float>("Clip", {input_def}, {min_max_defs}, {},
kOnnxDomain, use_contrib_qdq);

TestQDQModelAccuracy(f32_model_builder,
qdq_model_builder,
Expand All @@ -97,7 +99,7 @@ static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
expected_ep_assignment);
}

// Test QDQ Clip with default min/max.
// Test 8-bit QDQ Clip with default min/max.
// NOTE: The Clip operator is *optimized* away during L1 optimizations, so QNN EP does not get a graph with a Clip op.
// Instead, QNN EP will get a graph with a Q -> DQ.
// - Original sequence: Q1 -> DQ1 -> Clip -> Q2 -> DQ2
Expand All @@ -109,14 +111,38 @@ TEST_F(QnnHTPBackendTests, Clip_U8_DefaultMinMax_Rank4) {
ExpectedEPNodeAssignment::All);
}

// Test QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
// Test 16-bit QDQ Clip with default min/max.
// NOTE: The Clip operator is *optimized* away during L1 optimizations, so QNN EP does not get a graph with a Clip op.
// Instead, QNN EP will get a graph with a Q -> DQ.
// - Original sequence: Q1 -> DQ1 -> Clip -> Q2 -> DQ2
// - ClipQuantFusion: Fuses Clip -> QuantizeLinear resulting in Q1 -> DQ1 -> Q2' -> DQ2
// - DoubleQDQPairsRemover: Simplifies remaining Q1 -> DQ1 -> Q2' -> DQ2 sequence to Q1 -> DQ2.
TEST_F(QnnHTPBackendTests, Clip_U16_DefaultMinMax_Rank4) {
RunQDQClipTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
{}, // Don't specify min/max inputs.
ExpectedEPNodeAssignment::All,
13, // opset
true); // Use com.microsoft Q/DQ ops
}

// Test 8-bit QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
TEST_F(QnnHTPBackendTests, Clip_U8_Rank4) {
RunQDQClipTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
{TestInputDef<float>({}, true, {-5.0f}),
TestInputDef<float>({}, true, {5.0f})},
ExpectedEPNodeAssignment::All);
}

// Test 16-bit QDQ Clip with non-default min and max inputs. QNN EP will get a graph with a Clip operator.
TEST_F(QnnHTPBackendTests, Clip_U16_Rank4) {
RunQDQClipTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 48)),
{TestInputDef<float>({}, true, {-5.0f}),
TestInputDef<float>({}, true, {5.0f})},
ExpectedEPNodeAssignment::All,
13, // opset
true); // Use com.microsoft Q/DQ ops
}

// Test QDQ Clip of rank 5.
TEST_F(QnnHTPBackendTests, Clip_U8_Rank5) {
// We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
Expand Down
94 changes: 58 additions & 36 deletions onnxruntime/test/providers/qnn/flatten_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,36 @@ static void RunFlattenTestOnCPU(const TestInputDef<DataType>& input_def,
expected_ep_assignment);
}

//
// CPU tests:
//

// Test that Flatten input (rank4) with axis == 0.
TEST_F(QnnCPUBackendTests, Flatten_Rank4_Axis0) {
RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(0))},
ExpectedEPNodeAssignment::All);
}

// Test that Flatten input (rank4) with axis == -1.
TEST_F(QnnCPUBackendTests, Flatten_Rank4_AxisNeg1) {
RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
ExpectedEPNodeAssignment::All);
}

// Test that Flatten input (rank5) with axis == 2.
TEST_F(QnnCPUBackendTests, Flatten_Rank5_Axis2) {
RunFlattenTestOnCPU(TestInputDef<float>({1, 2, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(2))},
ExpectedEPNodeAssignment::All);
}

#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
//
// HTP tests:
//

// Runs a model with a non-QDQ Flatten operator on the QNN HTP backend. Checks the graph node assignment
// and that inference outputs for QNN EP and CPU EP match.
template <typename DataType>
Expand Down Expand Up @@ -62,7 +92,8 @@ template <typename QType>
static void RunQDQFlattenTestOnHTP(const TestInputDef<float>& input_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
ExpectedEPNodeAssignment expected_ep_assignment,
int opset = 13) {
int opset = 13,
bool use_contrib_qdq = false) {
ProviderOptions provider_options;

#if defined(_WIN32)
Expand All @@ -71,57 +102,48 @@ static void RunQDQFlattenTestOnHTP(const TestInputDef<float>& input_def,
provider_options["backend_path"] = "libQnnHtp.so";
#endif

TestQDQModelAccuracy(BuildOpTestCase<float>("Flatten", {input_def}, {}, attrs), // baseline float32 model
BuildQDQOpTestCase<QType>("Flatten", {input_def}, {}, attrs), // QDQ model
auto f32_model_builder = BuildOpTestCase<float>("Flatten", {input_def}, {}, attrs);
auto qdq_model_builder = BuildQDQOpTestCase<QType>("Flatten", {input_def}, {}, attrs, kOnnxDomain, use_contrib_qdq);
TestQDQModelAccuracy(f32_model_builder,
qdq_model_builder,
provider_options,
opset,
expected_ep_assignment);
}

//
// CPU tests:
//

// Test that Flatten input (rank4) with axis == 0.
TEST_F(QnnCPUBackendTests, Flatten_Rank4_Axis0) {
RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(0))},
ExpectedEPNodeAssignment::All);
}

// Test that Flatten input (rank4) with axis == -1.
TEST_F(QnnCPUBackendTests, Flatten_Rank4_AxisNeg1) {
RunFlattenTestOnCPU(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
ExpectedEPNodeAssignment::All);
}

// Test that Flatten input (rank5) with axis == 2.
TEST_F(QnnCPUBackendTests, Flatten_Rank5_Axis2) {
RunFlattenTestOnCPU(TestInputDef<float>({1, 2, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(2))},
ExpectedEPNodeAssignment::All);
}

#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
//
// HTP tests:
//
// Test that Flatten input (rank4) with axis == 0.
// Test 8-bit QDQ Flatten input (rank4) with axis == 0.
TEST_F(QnnHTPBackendTests, Flatten_Rank4_Axis0) {
RunQDQFlattenTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(0))},
ExpectedEPNodeAssignment::All);
}

// Test that Flatten input (rank4) with axis == -1.
// Test 16-bit QDQ Flatten input (rank4) with axis == 0.
TEST_F(QnnHTPBackendTests, Flatten_Rank4_Axis0_U16) {
RunQDQFlattenTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(0))},
ExpectedEPNodeAssignment::All,
13, // opset
true); // Use com.microsoft Q/DQ ops
}

// Test 8-bit QDQ Flatten input (rank4) with axis == -1.
TEST_F(QnnHTPBackendTests, Flatten_Rank4_AxisNeg1) {
RunQDQFlattenTestOnHTP<uint8_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
ExpectedEPNodeAssignment::All);
}

// Test QDQ Flatten with an input of rank5.
// Test 16-bit QDQ Flatten input (rank4) with axis == -1.
TEST_F(QnnHTPBackendTests, Flatten_Rank4_AxisNeg1_U16) {
RunQDQFlattenTestOnHTP<uint16_t>(TestInputDef<float>({1, 3, 4, 4}, false, -10.0f, 10.0f),
{utils::MakeAttribute("axis", static_cast<int64_t>(-1))},
ExpectedEPNodeAssignment::All,
13, // opset
true); // Use com.microsoft Q/DQ ops
}

// Test 8-bit QDQ Flatten with an input of rank5.
TEST_F(QnnHTPBackendTests, Flatten_QDQ8bit_Rank5) {
// We can't use the usual model-building functions because they add standalone Quantize and Dequantize nodes
// at the input and output. These Q/DQ ops get lowered to QNN's Quantize and Dequantize operators, which DO NOT
Expand Down Expand Up @@ -157,7 +179,7 @@ TEST_F(QnnHTPBackendTests, Flatten_QDQ8bit_Rank5) {
ExpectedEPNodeAssignment::All);
}

// Test that rank4 int32 Flatten runs on HTP backend.
// Test that int32 non-QDQ Flatten runs on HTP backend.
TEST_F(QnnHTPBackendTests, Flatten_Int32_Rank4_Axis2) {
std::vector<int32_t> input_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
RunFlattenTestOnHTP<int32_t>(TestInputDef<int32_t>({1, 3, 2, 2}, false, input_data),
Expand Down
Loading

0 comments on commit 7e8f7ce

Please sign in to comment.