ROCm · richagadgil · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 11, 2024
@@ -47,7 +47,8 @@
     m(uint64_type, uint64_t) \
     m(fp8e4m3fnuz_type, migraphx::fp8::fp8e4m3fnuz) \
     m(fp8e4m3fn_type, migraphx::fp8::fp8e4m3fn) \
-    m(fp8e5m2_type, migraphx::fp8::fp8e5m2)
+    m(fp8e5m2_type, migraphx::fp8::fp8e5m2) \
+    m(bf16_type, bf16)
 // clang-format on
 
 #ifdef __cplusplus

@@ -482,6 +482,7 @@ struct compiler
     compiler_target ct;
     compile_options co;
     bool to_fp16 = false;
+    bool to_bf16 = false;
     bool to_fp8  = false;
     bool to_int8 = false;
     bool to_int4 = false;
@@ -506,9 +507,11 @@ struct compiler
            ap.help("Exhastively search for best tuning parameters for kernels"),
            ap.set_value(true));
         ap(to_fp16, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(true));
+        ap(to_bf16, {"--bf16"}, ap.help("Quantize for bf16"), ap.set_value(true));
         ap(to_int8, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(true));
         ap(to_fp8, {"--fp8"}, ap.help("Quantize for fp8"), ap.set_value(true));
         ap(to_int4, {"--int4-weights"}, ap.help("Quantize weights for int4"), ap.set_value(true));
+        ap(to_bf16, {"--bf16"}, ap.help("Quantize for fp16"), ap.set_value(true));
     }
 
     auto params(const program& p)
@@ -555,6 +558,10 @@ struct compiler
         {
             quantize_fp16(p);
         }
+        if(to_bf16)
+        {
+            quantize_bf16(p);
+        }
         if(to_int8)
         {
             quantize_int8(p, t, {host_params(p)});
@@ -567,6 +574,10 @@ struct compiler
         {
             quantize_int4_weights(p);
         }
+        if(to_bf16)
+        {
+            quantize_bf16(p);
+        }
         p.compile(t, co);
         l.save(p);
         return p;
@@ -639,6 +650,10 @@ struct verify : command<verify>
         {
             vo.quantize = precision::fp16;
         }
+        if(c.to_bf16)
+        {
+            vo.quantize = precision::bf16;
+        }
         if(c.to_int8)
         {
             vo.quantize = precision::int8;

@@ -32,6 +32,7 @@ enum class precision
 {
     fp32,
     fp16,
+    bf16,
     int8
 };
 

@@ -100,6 +100,10 @@ std::vector<argument> run_target(program p,
     {
         quantize_fp16(p);
     }
+    if(vo.quantize == precision::bf16)
+    {
+        quantize_bf16(p);
+    }
     p.compile(t, options);
 
     parameter_map m;

@@ -0,0 +1,39 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIGRAPHX_GUARD_RTGLIB_BF16_HPP
+#define MIGRAPHX_GUARD_RTGLIB_BF16_HPP
+
+#include <migraphx/generic_float.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+using bf16 = migraphx::generic_float<7, 8>;
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
@@ -104,6 +104,8 @@ struct float32_parts
     unsigned int exponent : 8;
     unsigned int sign : 1;
 
+    static constexpr unsigned int exponent_width() { return 8; }
+
     static constexpr unsigned int mantissa_width() { return 23; }
 
     static constexpr unsigned int max_exponent() { return all_ones<8>(); }
@@ -152,7 +154,7 @@ struct __attribute__((packed, may_alias)) generic_float
         float32_parts f{};
         f.sign = sign;
 
-        if(exponent == 0) // subnormal fps
+        if(exponent == 0 and ExponentSize != float32_parts::exponent_width()) // subnormal fps
         {
 
             if(mantissa == 0)

@@ -51,6 +51,9 @@ quantize_fp8(program& prog, const target& t, const std::vector<parameter_map>& c
 
 MIGRAPHX_EXPORT void quantize_int4_weights(program& prog);
 
+MIGRAPHX_EXPORT void quantize_bf16(program& prog,
+                                   const std::vector<std::string>& ins_names = {"all"});
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
 

@@ -34,6 +34,7 @@
 #include <migraphx/functional.hpp>
 #include <migraphx/errors.hpp>
 #include <migraphx/half.hpp>
+#include <migraphx/bf16.hpp>
 #include <migraphx/float8.hpp>
 #include <migraphx/serialize.hpp>
 #include <migraphx/config.hpp>
@@ -64,8 +65,9 @@ struct MIGRAPHX_EXPORT shape
     m(uint64_type, uint64_t) \
     m(fp8e4m3fnuz_type, migraphx::fp8::fp8e4m3fnuz) \
     m(fp8e4m3fn_type, migraphx::fp8::fp8e4m3fn) \
-    m(fp8e5m2_type, migraphx::fp8::fp8e5m2)
-// clang-format on
+    m(fp8e5m2_type, migraphx::fp8::fp8e5m2) \
+    m(bf16_type, bf16)
+    // clang-format on
 
 #define MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES(x, t) x,
     enum type_t

@@ -27,6 +27,7 @@
 
 #include <type_traits>
 #include <migraphx/half.hpp>
+#include <migraphx/bf16.hpp>
 #include <migraphx/config.hpp>
 #include <migraphx/float8.hpp>
 
@@ -53,6 +54,10 @@ MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, half)
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, half)
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, half)
 
+MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, bf16)
+MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, bf16)
+MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, bf16)
+
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, migraphx::fp8::fp8e4m3fnuz)
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, migraphx::fp8::fp8e4m3fnuz)
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, migraphx::fp8::fp8e4m3fnuz)

@@ -180,6 +180,17 @@ struct npy_format_descriptor<migraphx::fp8::fp8e5m2>
     static constexpr auto name() { return _("fp8e5m2"); }
 };
 
+template <>
+struct npy_format_descriptor<migraphx::bf16>
+{
+    static std::string format()
+    {
+        // TODO: no standard format in numpy for bf16
+        return "z";
+    }
+    static constexpr auto name() { return _("bf16"); }
+};
+
 } // namespace detail
 } // namespace pybind11
 
@@ -623,6 +634,10 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
         },
         "Auto-convert FP8 parameters and return values to Float for MIGraphX Program",
         py::arg("prog"));
+    m.def("quantize_bf16",
+          &migraphx::quantize_bf16,
+          py::arg("prog"),
+          py::arg("ins_names") = std::vector<std::string>{"all"});
 
 #ifdef HAVE_GPU
     m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);

@@ -74,6 +74,16 @@
                quant_tracer());
 }
 
+void quantize_bf16(program& prog, const std::vector<std::string>& ins_names)
+{
+    run_passes(prog,
+               {normalize_ops{},
+                optimize_module{{"quantizelinear", "dequantizelinear"}},
+                truncate_float_pass{ins_names, shape::bf16_type},
+                optimize_module{{"quantizelinear", "dequantizelinear"}}},
+               quant_tracer());
+}
+
 void quantize_8bits(program& prog,
                     const target& t,
                     shape::type_t precision,

@@ -361,6 +361,7 @@ bool is_pointwise_op_supported_by_mlir(const instruction& i)
     const auto& name                                  = i.name();
     const auto result_type                            = i.get_shape().type();
     const std::initializer_list<type_t> allowed_types = {type_t::float_type,
+                                                         type_t::bf16_type,
                                                          type_t::half_type,
                                                          type_t::fp8e4m3fnuz_type,
                                                          type_t::fp8e4m3fn_type,
@@ -407,6 +408,7 @@ bool is_pointwise_op_supported_by_mlir(const instruction& i)
     };
     std::set<shape::type_t> float_types = {type_t::float_type,
                                            type_t::half_type,
+                                           type_t::bf16_type,
                                            type_t::fp8e4m3fnuz_type,
                                            type_t::fp8e4m3fn_type,
                                            type_t::fp8e5m2_type};
@@ -426,7 +428,7 @@ bool is_pointwise_op_supported_by_mlir(const instruction& i)
             return false;
         } // else
         return std::all_of(i.inputs().begin(), i.inputs().end(), [](const auto& arg) {
-            return contains({type_t::float_type, type_t::half_type}, arg->get_shape().type());
+            return contains({type_t::float_type, type_t::half_type, type_t::bf16_type}, arg->get_shape().type());
         });
     }
     return false;
@@ -438,7 +440,7 @@ bool is_reduce_op_supported_by_mlir(const instruction& i)
     const auto& name                                  = i.name();
     const auto result_type                            = i.get_shape().type();
     const std::initializer_list<type_t> allowed_types = {
-        type_t::float_type, type_t::half_type, type_t::fp8e4m3fnuz_type};
+        type_t::float_type, type_t::half_type, type_t::bf16_type, type_t::fp8e4m3fnuz_type};
     // Preliminary type check.
     if(not contains(allowed_types, result_type))
     {
@@ -695,6 +697,7 @@ struct find_mlir_standalone_op
         if(std::any_of(gemm_based_op->inputs().begin(), gemm_based_op->inputs().end(), [&](auto i) {
                return not contains({shape::type_t::float_type,
                                     shape::type_t::half_type,
+                                    shape::type_t::bf16_type,
                                     shape::type_t::int8_type,
                                     shape::type_t::fp8e4m3fnuz_type,
                                     shape::type_t::fp8e4m3fn_type,

@@ -72,6 +72,7 @@ rocblas_datatype get_type(shape::type_t type)
     case shape::int16_type:
     case shape::int64_type:
     case shape::uint64_type: MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!");
+    case shape::bf16_type: return rocblas_datatype_bf16_r;
     }
 
     MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!");
@@ -221,7 +222,7 @@ struct gemm_impl
         compute_type = rb_compute_type{output_type};
         if(compute_fp32)
         {
-            if(arg_type == rocblas_datatype_f16_r)
+            if(arg_type == rocblas_datatype_f16_r or arg_type == rocblas_datatype_bf16_r)
                 compute_type = rocblas_datatype_f32_r;
         }
         if(arg_type == rocblas_datatype_f8_r)

@@ -84,6 +84,7 @@ hipDataType get_type_hipblas(shape::type_t type)
     case shape::int16_type:
     case shape::int64_type:
     case shape::uint64_type: MIGRAPHX_THROW("HIPBLAS_GEMM: data type not supported!");
+    case shape::bf16_type: return HIP_R_16BF;
     }
 
     MIGRAPHX_THROW("HIPBLAS_GEMM: data type not supported!");

@@ -27,7 +27,9 @@
 #ifndef MIGRAPHX_USE_HIPRTC
 #include <hip/hip_runtime.h>
 #include <hip/hip_fp16.h>
+#include <hip/hip_bf16.h>
 #include <hip/math_functions.h>
+#include <hip/hip_bf16.h>
 #endif
 
 #endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP
@@ -76,6 +76,7 @@ using vec = T __attribute__((ext_vector_type(N)));
 
 using half  = _Float16;
 using half2 = migraphx::vec<half, 2>;
+using bf16 = __bf16;
 
 } // namespace migraphx
 

@@ -312,6 +312,8 @@ struct mlir_program
                 result = mlirF32TypeGet(ctx.get());
             else if(as.type_enum() == shape::half_type)
                 result = mlirF16TypeGet(ctx.get());
+            else if(as.type_enum() == shape::bf16_type)
+                result = mlirBF16TypeGet(ctx.get());
             else if(as.type_enum() == shape::fp8e4m3fnuz_type)
                 result = mlirFloat8E4M3FNUZTypeGet(ctx.get());
             else if(as.type_enum() == shape::fp8e4m3fn_type)

@@ -99,6 +99,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
     unsupported_types.erase(shape::type_t::uint8_type);
     unsupported_types.erase(shape::type_t::int32_type);
     unsupported_types.erase(shape::type_t::tuple_type);
+    unsupported_types.erase(shape::type_t::bf16_type);
 
     // whiltelist supported Ops for the FP8 types
     // different between fp8e4m3fnuz and OCP types because rocBLAS only has