Skip to content

Commit

Permalink
DynamicQuantizeMatMul test update (#19517)
Browse files Browse the repository at this point in the history
Adding Reference output for DynamicQuantizeMatMul
```
.\onnxruntime_test_all.exe --gtest_filter="*DynamicQuantizeMatMul.*"
Note: Google Test filter = *DynamicQuantizeMatMul.*
[==========] Running 10 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 10 tests from DynamicQuantizeMatMul
[ RUN      ] DynamicQuantizeMatMul.HasZeroPoint_NoBias_test_S8
[       OK ] DynamicQuantizeMatMul.HasZeroPoint_NoBias_test_S8 (635 ms)
[ RUN      ] DynamicQuantizeMatMul.HasZeroPoint_NoBias_test_U8
[       OK ] DynamicQuantizeMatMul.HasZeroPoint_NoBias_test_U8 (514 ms)
[ RUN      ] DynamicQuantizeMatMul.NoZeroPoint_HasBias_test_S8
[       OK ] DynamicQuantizeMatMul.NoZeroPoint_HasBias_test_S8 (512 ms)
[ RUN      ] DynamicQuantizeMatMul.NoZeroPoint_HasBias_test_U8
[       OK ] DynamicQuantizeMatMul.NoZeroPoint_HasBias_test_U8 (505 ms)
[ RUN      ] DynamicQuantizeMatMul.NoZeroPoint_NoBias_test_S8
[       OK ] DynamicQuantizeMatMul.NoZeroPoint_NoBias_test_S8 (526 ms)
[ RUN      ] DynamicQuantizeMatMul.NoZeroPoint_NoBias_test_U8
[       OK ] DynamicQuantizeMatMul.NoZeroPoint_NoBias_test_U8 (504 ms)
[ RUN      ] DynamicQuantizeMatMul.HasZeroPoint_HasBias_test_S8
[       OK ] DynamicQuantizeMatMul.HasZeroPoint_HasBias_test_S8 (512 ms)
[ RUN      ] DynamicQuantizeMatMul.HasZeroPoint_HasBias_test_U8
[       OK ] DynamicQuantizeMatMul.HasZeroPoint_HasBias_test_U8 (512 ms)
[ RUN      ] DynamicQuantizeMatMul.UInt8_test_with_empty_input
[       OK ] DynamicQuantizeMatMul.UInt8_test_with_empty_input (112 ms)
[ RUN      ] DynamicQuantizeMatMul.B_PerColumn_ND
[       OK ] DynamicQuantizeMatMul.B_PerColumn_ND (348 ms)
[----------] 10 tests from DynamicQuantizeMatMul (4685 ms total)

[----------] Global test environment tear-down
[==========] 10 tests from 1 test suite ran. (4686 ms total)
[  PASSED  ] 10 tests.
memleakdbg:
----- No memory leaks detected -----
```

- CalculateDynamicQuantizeMatMul to replace CPU EP run reference
- Added more FP32 testcases to isolate all input datatype combinations
  • Loading branch information
raoanag committed Mar 4, 2024
1 parent d0fbd58 commit 91ab4a7
Showing 1 changed file with 142 additions and 62 deletions.
204 changes: 142 additions & 62 deletions onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,90 @@ namespace onnxruntime {
namespace test {

template <typename T>
void TestDynamicQuantizeMatMul(const std::vector<int64_t>& A_dims,
std::vector<int64_t> B_dims,
const std::string& reference_model,
bool is_matrix_b_constant,
static void CalculateDynamicQuantizeMatMul(const int64_t M, const int64_t N, const int64_t K,
const std::vector<float>& A_data, const std::vector<T>& B_data,
std::vector<float>& B_scale, std::vector<T>& B_zero_point,
const std::vector<float>& Bias, std::vector<float>& Y_data,
bool per_column, bool has_zp, bool has_bias) {
// DynamicQuantize Matrix A
const uint32_t num_elements = M * K;
std::vector<T> QuantA_data(num_elements);
std::vector<float> A_scale;
std::vector<T> A_zero_point;

// Get max and min
float min = std::numeric_limits<float>::max();
float max = std::numeric_limits<float>::lowest();
float qmax = static_cast<float>(std::numeric_limits<T>::max());
float qmin = static_cast<float>(std::numeric_limits<T>::lowest());

for (uint32_t i = 0; i < num_elements; ++i) {
max = std::max(A_data[i], max);
min = std::min(A_data[i], min);
}

// Adjust the maximum and minimum to include zero
max = std::max(max, 0.0f);
min = std::min(min, 0.0f);

float scale = static_cast<float>(max - min) / (qmax - qmin);
T zeroPoint = std::round(std::clamp(qmin - min / scale, qmin, qmax));

A_scale.push_back(scale);
A_zero_point.push_back(zeroPoint);

// Matrix Multiplication
for (uint32_t i = 0; i < num_elements; ++i) {
QuantA_data[i] = static_cast<T>(std::round((A_data[i] / scale) + zeroPoint));
}
if (!per_column) {
B_zero_point.resize(N, B_zero_point[0]);
B_scale.resize(N, B_scale[0]);
}

for (int64_t m = 0; m < M; m++) {
for (int64_t n = 0; n < N; n++) {
float sum = 0.0f;
for (int64_t k = 0; k < K; k++) {
float A_dequantized = (static_cast<int>(QuantA_data[m * K + k]) - static_cast<int>(A_zero_point[0]))
* A_scale[0];

float B_dequantized = has_zp ?
(static_cast<int>(B_data[k * N + n]) - static_cast<int>(B_zero_point[n])) * B_scale[n] :
B_data[k * N + n] * B_scale[n];

sum += A_dequantized * B_dequantized;
}
if (has_bias) {
sum += Bias[n];
}
Y_data[m * N + n] = sum;
}
}
}

template <typename T>
void TestDynamicQuantizeMatMul(bool is_matrix_b_constant,
bool per_column = false,
bool has_zp = true,
bool has_bias = false) {
bool has_bias = false,
bool empty_input = false) {
// create rand inputs
RandomValueGenerator random{};

int64_t M = empty_input ? 1 : 4;
int64_t N = 128;
int64_t K = 128;
std::vector<int64_t> A_dims{empty_input ? 0 : M, K};
std::vector<int64_t> B_dims{K, N};
std::vector<int64_t> Y_dims{empty_input ? 0 : M, K};
std::vector<float> A_data = random.Uniform<float>(A_dims, -1.0f, 1.0f);

std::vector<T> B_data;
std::vector<int> tmp_B_data = random.Uniform<int32_t>(B_dims, std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
std::vector<T> tmp_B_data = random.Uniform<T>(B_dims,
(std::is_same_v<T, int8_t>) ?
std::numeric_limits<int8_t>::lowest() / 2 :
std::numeric_limits<uint8_t>::lowest(),
std::numeric_limits<T>::max() / 2);
std::transform(tmp_B_data.begin(), tmp_B_data.end(), std::back_inserter(B_data), [](int32_t v) -> T {
return static_cast<T>(v);
});
Expand All @@ -47,7 +117,9 @@ void TestDynamicQuantizeMatMul(const std::vector<int64_t>& A_dims,
std::for_each(B_zero_point.begin(),
B_zero_point.end(),
[&random](T& zp) {
zp = static_cast<T>(random.Uniform<int32_t>(std::array<int64_t, 1>{1}, std::numeric_limits<T>::min(), std::numeric_limits<T>::max())[0]);
zp = static_cast<T>(random.Uniform<T>(std::array<int64_t, 1>{1},
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max())[0]);
});

std::vector<float> Bias = random.Uniform<float>(AsSpan({B_dims.back()}), -0.1f, 0.1f);
Expand All @@ -69,77 +141,85 @@ void TestDynamicQuantizeMatMul(const std::vector<int64_t>& A_dims,
test.AddOptionalInputEdge<float>();
}

test.AddReferenceOutputs(reference_model);
std::vector<float> Y_data(M * N);
CalculateDynamicQuantizeMatMul<T>(M, N, K, A_data, B_data, B_scale, B_zero_point, Bias, Y_data,
per_column, has_zp, has_bias);
test.AddOutput<float>("Y", Y_dims, Y_data);
test.SetOutputRelErr("Y", 0.02f);
test.Run();
}

template <typename Scalar, bool HasZeroPoint, bool HasBias>
void RunDynamicQuantizeMatMulTest(const string& model_path) {
std::vector<int64_t> A_dims{4, 128};
std::vector<int64_t> B_dims{128, 128};
std::vector<int64_t> Y_dims{4, 128};

TestDynamicQuantizeMatMul<Scalar>(A_dims,
B_dims,
model_path,
false, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
template <typename T, bool HasZeroPoint, bool HasBias>
void RunDynamicQuantizeMatMulTest() {
TestDynamicQuantizeMatMul<T>(false, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);

TestDynamicQuantizeMatMul<Scalar>(A_dims,
B_dims,
model_path,
true, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
TestDynamicQuantizeMatMul<T>(true, /*is_matrix_b_constant*/
false, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);

TestDynamicQuantizeMatMul<Scalar>(A_dims,
B_dims,
model_path,
false, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
TestDynamicQuantizeMatMul<T>(false, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);

TestDynamicQuantizeMatMul<Scalar>(A_dims,
B_dims,
model_path,
true, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
TestDynamicQuantizeMatMul<T>(true, /*is_matrix_b_constant*/
true, /*per_column*/
HasZeroPoint, /*has_zp*/
HasBias /*has_bias*/
);
}

TEST(DynamicQuantizeMatMul, HasZeroPoint_NoBias_test) {
RunDynamicQuantizeMatMulTest<int8_t, true, false>("testdata/dynamic_quantize_matmul_int8.onnx");
RunDynamicQuantizeMatMulTest<uint8_t, true, false>("testdata/dynamic_quantize_matmul_uint8.onnx");
TEST(DynamicQuantizeMatMul, HasZeroPoint_NoBias_test_S8) {
RunDynamicQuantizeMatMulTest<int8_t, true, false>();
}

TEST(DynamicQuantizeMatMul, NoZeroPoint_HasBias_test) {
RunDynamicQuantizeMatMulTest<int8_t, false, true>("testdata/dynamic_quantize_matmul_int8_bias.onnx");
RunDynamicQuantizeMatMulTest<uint8_t, false, true>("testdata/dynamic_quantize_matmul_uint8_bias.onnx");
TEST(DynamicQuantizeMatMul, HasZeroPoint_NoBias_test_U8) {
RunDynamicQuantizeMatMulTest<uint8_t, true, false>();
}

TEST(DynamicQuantizeMatMul, NoZeroPoint_HasBias_test_S8) {
RunDynamicQuantizeMatMulTest<int8_t, false, true>();
}

TEST(DynamicQuantizeMatMul, NoZeroPoint_HasBias_test_U8) {
RunDynamicQuantizeMatMulTest<uint8_t, false, true>();
}

TEST(DynamicQuantizeMatMul, NoZeroPoint_NoBias_test_S8) {
RunDynamicQuantizeMatMulTest<int8_t, false, false>();
}

TEST(DynamicQuantizeMatMul, NoZeroPoint_NoBias_test_U8) {
RunDynamicQuantizeMatMulTest<uint8_t, false, false>();
}

TEST(DynamicQuantizeMatMul, HasZeroPoint_HasBias_test_S8) {
RunDynamicQuantizeMatMulTest<int8_t, true, true>();
}

TEST(DynamicQuantizeMatMul, HasZeroPoint_HasBias_test_U8) {
RunDynamicQuantizeMatMulTest<uint8_t, true, true>();
}

TEST(DynamicQuantizeMatMul, UInt8_test_with_empty_input) {
std::vector<int64_t> A_dims{0, 128};
std::vector<int64_t> B_dims{128, 128};
std::vector<int64_t> Y_dims{0, 128};

TestDynamicQuantizeMatMul<uint8_t>(A_dims,
B_dims,
"testdata/dynamic_quantize_matmul_uint8.onnx",
false /*is_matrix_b_constant*/);

TestDynamicQuantizeMatMul<uint8_t>(A_dims,
B_dims,
"testdata/dynamic_quantize_matmul_uint8.onnx",
true /*is_matrix_b_constant*/);
std::vector<int64_t> A_dims{0, 2};
std::vector<int64_t> B_dims{2, 2};
std::vector<int64_t> Y_dims{0, 2};
OpTester test("DynamicQuantizeMatMul", 1, onnxruntime::kMSDomain);
test.AddInput<float>("T1", A_dims, {});
test.AddInput<uint8_t>("T2", B_dims, {1, 6, 0, 8});
test.AddInput<float>("b_scale", {1}, {1.0f});
test.AddInput<uint8_t>("b_zero_point", {1}, {0});
test.AddOptionalInputEdge<float>();
test.AddOutput<float>("Y", {0, 2}, {});
test.Run();
}

TEST(DynamicQuantizeMatMul, B_PerColumn_ND) {
Expand Down

0 comments on commit 91ab4a7

Please sign in to comment.