[mlir] Adding quant convolution fusion as anchor op (#1683)

Exposed the mlir_enabled() call the decide for lowering pipeline's enablement Disabled the rewrite quantization pipeline in mlir compilation Added quant convolution as anchor ops Fixed the return type expectations Added the fall back hip implementation for quantizelinear and dequantizelinear Will need advises to improve the implementation for quantizelinear

[mlir] Adding quant convolution fusion as anchor op (#1683)
Exposed the mlir_enabled() call the decide for lowering pipeline's enablement Disabled the rewrite quantization pipeline in mlir compilation Added quant convolution as anchor ops Fixed the return type expectations Added the fall back hip implementation for quantizelinear and dequantizelinear Will need advises to improve the implementation for quantizelinear
7f105952 · Zhuoran Yin · GitHub · 0ff00ef6 · 7f105952 · 7f105952
Unverified Commit 7f105952 authored May 03, 2023 by Zhuoran Yin Committed by GitHub May 03, 2023
9 changed files
--- a/src/cpp_generator.cpp
+++ b/src/cpp_generator.cpp
@@ -179,6 +179,8 @@ std::string cpp_generator::generate_point_op(const operation& op,
        else if(with_char(::isdigit)(key[0]))
        {
            auto i = std::stoul(key);
+            if(i >= args.size())
+                MIGRAPHX_THROW("Invalid argument index: " + key);
            return args.at(i);
        }
        else if(v.contains(key))

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -69,7 +69,6 @@ static void create_pointwise_modules(module_pass_manager& mpm)
            continue;
        if(ins->get_operator().name() == "layout")
            continue;
-        assert(ins->get_operator().attributes().contains("point_op"));
        auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
        pm->set_bypass();


--- a/src/include/migraphx/op/dequantizelinear.hpp
+++ b/src/include/migraphx/op/dequantizelinear.hpp
@@ -37,6 +37,15 @@ namespace op {

 struct dequantizelinear
 {
+
+    value attributes() const
+    {
+        // Note: point_op attribute is not used in this op. Instead, in
+        // gpu compilation pipeline, rewrite_quantization will be invoked
+        // from generate_pointwise() to rewrite this op.
+        return {{"pointwise", true}};
+    }
+
    std::string name() const { return "dequantizelinear"; }
    shape compute_shape(std::vector<shape> inputs) const
    {

--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -38,6 +38,15 @@ namespace op {
 struct quantizelinear
 {
    std::string name() const { return "quantizelinear"; }
+
+    value attributes() const
+    {
+        // Note: point_op attribute is not used in this op. Instead, in
+        // gpu compilation pipeline, rewrite_quantization will be invoked
+        // from generate_pointwise() to rewrite this op.
+        return {{"pointwise", true}};
+    }
+
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.same_dims().has(2, 3);

--- a/src/targets/gpu/compile_gen.cpp
+++ b/src/targets/gpu/compile_gen.cpp
@@ -29,6 +29,7 @@
 #include <migraphx/module.hpp>
 #include <migraphx/dead_code_elimination.hpp>
 #include <migraphx/eliminate_common_subexpression.hpp>
+#include <migraphx/rewrite_quantization.hpp>
 #include <migraphx/cpp_generator.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/instruction.hpp>
@@ -171,7 +172,8 @@ std::string make_transformer_args(std::vector<std::string> transformers)
 void generate_pointwise(cpp_generator& gg, const module& pm, const std::string& name)
 {
    module m = pm;
-    run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
+    run_passes(m,
+               {rewrite_quantization{}, eliminate_common_subexpression{}, dead_code_elimination{}});
    cpp_generator g;
    g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
    g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");

--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
@@ -38,6 +38,27 @@ namespace gpu {

 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_MLIR);

+bool mlir_enabled()
+{
+#ifdef MIGRAPHX_MLIR
+    const bool mlir_enabled = enabled(MIGRAPHX_ENABLE_MLIR{});
+    if(mlir_enabled)
+    {
+        return true;
+    }
+    else
+    {
+
+        std::cerr << "WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
+                     "var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
+                  << std::endl;
+        return false;
+    }
+#else
+    return false;
+#endif
+}
+
 #ifdef MIGRAPHX_MLIR

 struct mlir_op
@@ -58,8 +79,11 @@ struct mlir_op
            MIGRAPHX_THROW("should have one submodule.");
        if(inputs.size() < 2)
            MIGRAPHX_THROW("should have at least two inputs.");
-        auto n = inputs.size();
-        return op.compute_shape({inputs[n - 2], inputs[n - 1]});
+        auto n     = inputs.size();
+        auto* pm   = mods.front();
+        auto type  = pm->get_output_shapes().front().type();
+        auto shape = op.compute_shape({inputs[n - 2], inputs[n - 1]});
+        return shape.with_type(type);
    }
 };
 MIGRAPHX_REGISTER_OP(mlir_op);
@@ -68,7 +92,7 @@ namespace {

 MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
 {
-    if(ins->name() != "convolution")
+    if(ins->name() != "convolution" and ins->name() != "quant_convolution")
        return false;
    value v    = ins->get_operator().to_value();
    auto group = v.at("group").to<int>();
@@ -98,14 +122,25 @@ struct find_mlir_op
        auto names         = pm->get_parameter_names();
        // Whitelist pointwise operators
        if(std::any_of(pm->begin(), pm->end(), [](const auto& i) {
-               return not contains(
-                   {"@literal", "@param", "@return", "convolution", "dot", "add", "relu"},
-                   i.name());
+               return not contains({"@literal",
+                                    "@param",
+                                    "@return",
+                                    "convolution",
+                                    "quant_convolution",
+                                    "dot",
+                                    "add",
+                                    "relu",
+                                    "dequantizelinear",
+                                    "quantizelinear"},
+                                   i.name());
           }))
            return;
-        // Only fuse with fp32/fp16
+        // Only fuse with fp32/fp16/int8/int32
        if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
-               return not contains({shape::type_t::float_type, shape::type_t::half_type},
+               return not contains({shape::type_t::float_type,
+                                    shape::type_t::half_type,
+                                    shape::type_t::int8_type,
+                                    shape::type_t::int32_type},
                                   i->get_shape().type());
           }))
            return;
@@ -148,17 +183,7 @@ struct find_mlir_op
 void fuse_mlir::apply(module_pass_manager& mpm) const
 {
 #ifdef MIGRAPHX_MLIR
-    const bool mlir_enabled = enabled(MIGRAPHX_ENABLE_MLIR{});
-    if(mlir_enabled)
-    {
-        match::find_matches(mpm, find_mlir_op{});
-    }
-    else
-    {
-        std::cerr << "WARNING: MIGraphX built with MLIR but it is not enabled. Please set the env "
-                     "var MIGRAPHX_ENABLE_MLIR to use MLIR kernel generator."
-                  << std::endl;
-    }
+    match::find_matches(mpm, find_mlir_op{});
 #else
    (void)mpm;
 #endif

--- a/src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
@@ -34,6 +34,8 @@ struct module_pass_manager;

 namespace gpu {

+bool mlir_enabled();
+
 struct fuse_mlir
 {
    context* ctx = nullptr;

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -106,7 +106,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        normalize_ops{},
        dead_code_elimination{},
        simplify_qdq{},
-        rewrite_quantization{},
+        enable_pass(not mlir_enabled(), rewrite_quantization{}),
        dead_code_elimination{},
        eliminate_data_type{unsupported_types, shape::type_t::float_type},
        simplify_reshapes{},
@@ -132,7 +132,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        dead_code_elimination{},
        enable_pass(not enabled(MIGRAPHX_DISABLE_REDUCE_FUSION{}), fuse_reduce{}),
        dead_code_elimination{},
-        fuse_mlir{&ctx},
+        enable_pass(mlir_enabled(), fuse_mlir{&ctx}),
        dead_code_elimination{},
        lowering{&ctx, options.offload_copy},
        eliminate_contiguous{"gpu::contiguous"},

--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
@@ -23,6 +23,7 @@
 */
 #include <iostream>
 #include <vector>
+#include <migraphx/gpu/fuse_mlir.hpp>
 #include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/quantization.hpp>
@@ -110,7 +111,16 @@ TEST_CASE(int8_quantization)
        migraphx::target gpu_t = migraphx::make_target("gpu");
        run_prog(p, gpu_t, m, gpu_result);

-        EXPECT(migraphx::verify_range(ref_result, gpu_result));
+        // Note: the tolerance for mlir_enabled result is temporarily bumped
+        // higher because the lowering pipeline between mlir fallback and
+        // regular non-mlir pipeline diverged. MLIR fallback uses the
+        // rewrite_quantization at the very end of the pipeline, whereas
+        // the regular pipeline uses the rewrite_quantization in the much
+        // earlier stage.
+        if(migraphx::gpu::mlir_enabled())
+            EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
+        else
+            EXPECT(migraphx::verify_range(ref_result, gpu_result));
    }
 }