ROCm · TedThemistokleous · Feb 23, 2024 · Feb 23, 2024 · Feb 27, 2024 · Feb 27, 2024
@@ -93,6 +93,7 @@ add_library(migraphx
     replace_allocate.cpp
     rewrite_reduce.cpp
     simplify_qdq.cpp
+    simplify_dynamicquantizelinear.cpp
     sqlite.cpp
     rewrite_gelu.cpp
     rewrite_pooling.cpp

@@ -689,6 +689,12 @@ inline auto var(std::string s)
         });
 }
 
+inline auto has_type(shape::type_t t)
+{
+    return make_basic_pred_matcher(
+        [=](instruction_ref ins) { return ins->get_shape().type() == t; });
+}
+
 inline auto name(std::string s)
 {
     return make_basic_pred_matcher(
@@ -858,6 +864,12 @@ auto skip_broadcasts_converts(Ms... ms)
     return skip(name("broadcast", "multibroadcast", "contiguous", "convert"))(ms...);
 }
 
+template <class... Ms>
+auto skip_broadcast_squeeze(Ms... ms)
+{
+    return skip(name("broadcast", "multibroadcast", "contiguous", "squeeze", "unsqueeze"))(ms...);
+}
+
 template <class F>
 inline auto literal_value_checker(F f)
 {

diff --git a/src/include/migraphx/op/dot.hpp b/src/include/migraphx/op/dot.hpp
@@ -39,7 +39,7 @@ struct dot
     std::string name() const { return "dot"; }
     shape compute_shape(std::vector<shape> inputs) const
     {
-        check_shapes{inputs, *this, true}.same_type().same_ndims().has(2);
+        check_shapes{inputs, *this, true}.same_ndims().has(2);
         const shape& a = inputs.at(0);
         const shape& b = inputs.at(1);
         auto t         = a.type();

@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_SIMPLIFY_DYNAMICQUANTIZELINEAR_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SIMPLIFY_DYNAMICQUANTIZELINEAR_HPP
+
+#include <string>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct module;
+
+/**
+ * Replaces uint8 ops for int8 quantization of min/max/convert outside of default
+ */
+struct MIGRAPHX_EXPORT simplify_dynamicquantizelinear
+{
+    std::string name() const { return "simplify_dynamicquantizelinear"; }
+    void apply(module& m) const;
+};
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
diff --git a/src/onnx/parse_dynamicquantizelinear.cpp b/src/onnx/parse_dynamicquantizelinear.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -98,41 +98,40 @@ struct parse_dynamicquantizelinear : op_parser<parse_dynamicquantizelinear>
         if(x_shape.dynamic())
             MIGRAPHX_THROW("DYNAMICQUANTIZELINEAR: dynamic shapes are not supported");
 
-        auto x_reshaped =
-            (x_shape.lens().size() == 1)
-                ? x
-                : info.add_instruction(
-                      migraphx::make_op("reshape", {{"dims", {x_shape.elements()}}}), x);
-
         auto lit_0 = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0}});
-        x_reshaped =
-            info.add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, lit_0);
-
         // 1. Computing y_scale
         // Note: currently, DynamicQuantizeLinear only has uint8 quantization:
-        const auto x_max = std::numeric_limits<uint8_t>::max();
-        const auto x_min = std::numeric_limits<uint8_t>::min();
-
-        auto q_range =
-            info.add_literal(migraphx::literal{migraphx::shape{x_type}, {x_max - x_min}});
+        const auto type_max = std::numeric_limits<uint8_t>::max();
+        const auto type_min = std::numeric_limits<uint8_t>::min();
+        std::vector<size_t> axes(x_shape.lens().size());
+        std::iota(axes.begin(), axes.end(), 0);
 
         // maximum(0, max(x))
-        auto max_x =
-            info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped);
+        auto reduce_max_x =
+            info.add_instruction(migraphx::make_op("reduce_max", {{"axes", axes}}), x);
+        auto max_x = info.add_common_op("max", lit_0, reduce_max_x);
+
         // minimum(0, min(x))
-        auto min_x =
-            info.add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped);
+        auto reduce_min_x =
+            info.add_instruction(migraphx::make_op("reduce_min", {{"axes", axes}}), x);
+        auto min_x = info.add_common_op("min", lit_0, reduce_min_x);
+
+        auto q_range = info.add_literal(migraphx::literal{
+            migraphx::shape{x_type, max_x->get_shape().lens()}, {type_max - type_min}});
+
+        auto q_min = info.add_literal(
+            migraphx::literal{migraphx::shape{x_type, min_x->get_shape().lens()}, {type_min}});
+        auto q_max = info.add_literal(
+            migraphx::literal{migraphx::shape{x_type, max_x->get_shape().lens()}, {type_max}});
 
         // y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
         auto sub0    = info.add_common_op("sub", max_x, min_x);
         auto y_scale = info.add_common_op("div", sub0, q_range);
 
         // 2. Computing y_zero_point
         // intermediate_zero_point = qmin - min(x) / y_scale
-        auto q_min     = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {x_min}});
-        auto q_max     = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {x_max}});
-        auto sub1      = info.add_common_op("sub", q_min, min_x);
-        auto interm_zp = info.add_common_op("div", sub1, y_scale);
+        auto div1      = info.add_common_op("div", min_x, y_scale);
+        auto interm_zp = info.add_common_op("sub", q_min, div1);
         // y_zero_point = cast(round(saturate(itermediate_zero_point)))
         auto saturate = info.add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max);
         auto round    = info.add_instruction(migraphx::make_op("nearbyint"), saturate);

@@ -0,0 +1,105 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/simplify_dynamicquantizelinear.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/shape.hpp>
+#include <migraphx/matcher.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/register_op.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+static std::unordered_set<std::string> get_quantizable_op_names()
+{
+    static std::unordered_set<std::string> s = {"convolution", "dot"};
+    return s;
+}
+
+/*
+ *  Dynamicquantizelinear by default adds uint8_t typed zero point into a quantize linear
+ *  which needs to converted to int8 in order to avoid uint8 x int8 operations or uint8 operations
+ *  from occuring on the backend as this isn't supported by MLIR nor how we simplify our quantizable
+ *  ops.
+ */
+struct match_find_dynamicquantizelinear_convert_int8_zp
+{
+    auto matcher() const
+    {
+        return match::name(get_quantizable_op_names())(
+            match::any_arg(0, 1)(skip_broadcast_squeeze(match::name("quantizelinear")(
+                match::arg(0)(skip_broadcasts(match::any())),
+                match::arg(2)(skip_broadcasts(
+                    match::name("convert")(
+                        match::has_type(migraphx::shape::uint8_type),
+                        match::arg(0)(match::name("nearbyint")(
+                                          match::arg(0)(match::name("clip").bind("saturate")))
+                                          .bind("round")))
+                        .bind("convert")))))));
+    }
+
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        /* Need to modify the uint8 min/max range as well as final convert to convert to int8 */
+        auto convert_op = r.instructions["convert"];
+        // Ops to get q_min/q_max quickly
+        auto round_op    = r.instructions["round"];
+        auto saturate_op = r.instructions["saturate"];
+        auto q_min       = saturate_op->inputs().at(1);
+        auto q_max       = saturate_op->inputs().at(2);
+
+        // get new desired range defined by int8_t
+        const auto x_min = std::numeric_limits<int8_t>::min();
+        const auto x_max = std::numeric_limits<int8_t>::max();
+
+        // Replace min/max of uint8 with min/max of int8 - q_range is identical so doesn't need to
+        // be modified. Need to replace other ops which also take uint8 values first.
+        auto x_type     = q_min->get_shape().type();
+        auto q_min_int8 = m.add_literal(
+            migraphx::literal{migraphx::shape{x_type, q_min->get_shape().lens()}, {x_min}});
+        auto q_max_int8 = m.add_literal(
+            migraphx::literal{migraphx::shape{x_type, q_max->get_shape().lens()}, {x_max}});
+
+        m.replace_instruction(q_min, q_min_int8);
+        m.replace_instruction(q_max, q_max_int8);
+        m.replace_instruction(
+            convert_op,
+            migraphx::make_op("convert", {{"target_type", migraphx::shape::int8_type}}),
+            round_op);
+    }
+};
+
+void simplify_dynamicquantizelinear::apply(module& m) const
+{
+    match::find_matches(m, match_find_dynamicquantizelinear_convert_int8_zp{});
+    migraphx::run_passes(m, {migraphx::dead_code_elimination{}});
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
diff --git a/src/simplify_qdq.cpp b/src/simplify_qdq.cpp
@@ -46,7 +46,7 @@ auto skip_post_dq_ops(Ms... ms)
         match::name("broadcast", "multibroadcast", "contiguous", "transpose", "reshape"))(ms...);
 }
 
-std::unordered_set<std::string> get_quantizable_op_names()
+static std::unordered_set<std::string> get_quantizable_op_names()
 {
     static std::unordered_set<std::string> s = {"convolution", "dot"};
     return s;

@@ -44,6 +44,7 @@
 #include <migraphx/schedule.hpp>
 #include <migraphx/simplify_algebra.hpp>
 #include <migraphx/simplify_reshapes.hpp>
+#include <migraphx/simplify_dynamicquantizelinear.hpp>
 #include <migraphx/preallocate_param.hpp>
 #include <migraphx/cpu/fuse_ops.hpp>
 #include <migraphx/cpu/write_literals.hpp>
@@ -68,6 +69,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
     std::set<shape::type_t> unsupported_types(shape::types().begin(), shape::types().end());
     unsupported_types.erase(shape::type_t::float_type);
     return {normalize_ops{},
+            simplify_dynamicquantizelinear{},
             rewrite_quantization{},
             dead_code_elimination{},
             eliminate_data_type{unsupported_types, shape::type_t::float_type},

@@ -52,6 +52,7 @@
 #include <migraphx/schedule.hpp>
 #include <migraphx/simplify_dyn_ops.hpp>
 #include <migraphx/simplify_qdq.hpp>
+#include <migraphx/simplify_dynamicquantizelinear.hpp>
 #include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/split_single_dyn_dim.hpp>
 #include <migraphx/gpu/allocation_model.hpp>
@@ -127,6 +128,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
         dead_code_elimination{},
         normalize_ops{},
         dead_code_elimination{},
+        simplify_dynamicquantizelinear{},
         simplify_qdq{},
         enable_pass(not mlir_enabled(), rewrite_quantization{}),
         dead_code_elimination{},