Add a general optimize pass (#1491)

* Add general optimize pass * Fuse gemm multiplies by scalar * Handle zero epsilon

Add a general optimize pass (#1491)
* Add general optimize pass * Fuse gemm multiplies by scalar * Handle zero epsilon
a4b82653 · Paul Fultz II · GitHub · 02b39dc6 · a4b82653 · a4b82653
Unverified Commit a4b82653 authored Jan 30, 2023 by Paul Fultz II Committed by GitHub Jan 30, 2023
7 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -64,6 +64,7 @@ add_library(migraphx
    normalize_ops.cpp
    op_enums.cpp
    operation.cpp
+    optimize_module.cpp
    opt/memory_coloring.cpp
    opt/memory_coloring_impl.cpp
    pad_calc.cpp

--- a/src/include/migraphx/match/layernorm.hpp
+++ b/src/include/migraphx/match/layernorm.hpp
@@ -48,10 +48,11 @@ struct layernorm_matcher
    auto layernorm_onnx() const
    {
-        return f("div")(arg(0)(x_minus_mean()),
+        auto add_eps = f("add")(either_arg(0, 1)(variance(), is_constant().bind("eps")));
+        return f("div")(
+            arg(0)(x_minus_mean()),
-                        arg(1)(skip_broadcasts(f("sqrt")(arg(0)(
+            arg(1)(skip_broadcasts(f("sqrt")(arg(0)(match::any_of(add_eps, variance()))))));
-                            f("add")(either_arg(0, 1)(variance(), is_constant().bind("eps"))))))));
    }
    auto matcher() const { return layernorm_onnx(); }

--- a/src/include/migraphx/optimize_module.hpp
+++ b/src/include/migraphx/optimize_module.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_OPTIMIZE_MODULE_HPP
+#define MIGRAPHX_GUARD_RTGLIB_OPTIMIZE_MODULE_HPP
+#include <string>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct module_pass_manager;
+/**
+ * Runs several passes in a loop
+ */
+struct optimize_module
+{
+    std::string name() const { return "optimize_module"; }
+    void apply(module_pass_manager& mpm) const;
+};
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/optimize_module.cpp
+++ b/src/optimize_module.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/optimize_module.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/simplify_reshapes.hpp>
+#include <migraphx/simplify_algebra.hpp>
+#include <migraphx/eliminate_common_subexpression.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/propagate_constant.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+void optimize_module::apply(module_pass_manager& mpm) const
+{
+    for(int i = 0; i < 2; i++)
+    {
+        mpm.run_pass(simplify_reshapes{});
+        mpm.run_pass(simplify_algebra{});
+        mpm.run_pass(eliminate_common_subexpression{});
+        mpm.run_pass(dead_code_elimination{});
+        mpm.run_pass(propagate_constant{});
+        mpm.run_pass(dead_code_elimination{});
+    }
+}
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -553,11 +553,13 @@ struct find_gemm_pointwise
 {
    auto matcher() const
    {
-        return precompile_name("pointwise")(
+        auto gemm_op   = match::name("gpu::gemm")(match::nargs(3), match::used_once()).bind("gemm");
+        auto binary_op = match::all_of(
            match::nargs(3),
            match::either_arg(0, 1)(
-                match::any_of(match::standard_shape(), match::is_constant()).bind("c"),
+                match::any_of(match::standard_shape(), match::is_constant()).bind("c"), gemm_op));
-                match::name("gpu::gemm")(match::nargs(3), match::used_once()).bind("gemm")));
+        auto unary_op = match::all_of(match::nargs(2), match::arg(0)(gemm_op));
+        return precompile_name("pointwise")(match::any_of(binary_op, unary_op));
    }
    // TODO: Move to matcher.hpp
@@ -589,61 +591,84 @@ struct find_gemm_pointwise
        return match::name("@return")(match::args(match::any_of(add, mul_add, add_mul)));
    }
+    static auto match_mul(const std::string& input)
+    {
+        auto mul = match_mul_const(match_param(input), "alpha");
+        return match::name("@return")(match::args(mul));
+    }
    static float get_float(instruction_ref ins) { return ins->get_literal().at<float>(); }
    template <class Gemm>
    static bool update_gemm(Gemm& gemm, module_ref pm, unsigned input)
    {
        auto names = pm->get_parameter_names();
-        if(names.size() != 2)
-            return false;
        std::sort(names.begin(), names.end());
-        unsigned output = input == 0 ? 1 : 0;
+        if(names.size() == 1)
-        auto mr         = match::match_instruction(
+        {
-            *pm, std::prev(pm->end()), match_add(names[input], names[output]));
+            auto mr = match::match_instruction(*pm, std::prev(pm->end()), match_mul(names[input]));
-        if(mr.result == pm->end())
+            if(mr.result == pm->end())
-            return false;
+                return false;
-        if(contains(mr.instructions, "alpha_mul"))
            gemm.alpha *= get_float(mr.instructions["alpha"]);
-        else if(contains(mr.instructions, "beta_mul"))
+            return true;
-            gemm.beta *= get_float(mr.instructions["beta"]);
+        }
-        else if(contains(mr.instructions, "gamma_mul"))
+        else if(names.size() == 2)
        {
-            gemm.alpha *= get_float(mr.instructions["gamma"]);
+            unsigned output = input == 0 ? 1 : 0;
-            gemm.beta *= get_float(mr.instructions["gamma"]);
+            auto mr         = match::match_instruction(
+                *pm, std::prev(pm->end()), match_add(names[input], names[output]));
+            if(mr.result == pm->end())
+                return false;
+            if(contains(mr.instructions, "alpha_mul"))
+                gemm.alpha *= get_float(mr.instructions["alpha"]);
+            else if(contains(mr.instructions, "beta_mul"))
+                gemm.beta *= get_float(mr.instructions["beta"]);
+            else if(contains(mr.instructions, "gamma_mul"))
+            {
+                gemm.alpha *= get_float(mr.instructions["gamma"]);
+                gemm.beta *= get_float(mr.instructions["gamma"]);
+            }
+            return true;
+        }
+        else
+        {
+            return false;
        }
-        return true;
    }
    void apply(module& m, const match::matcher_result& r) const
    {
        auto ins      = r.result;
        auto gemm_ins = r.instructions["gemm"];
-        auto c_ins    = r.instructions["c"];
        auto gemm = any_cast<rocblas_gemm<op::dot>>(gemm_ins->get_operator());
        // Already fused gemm
        if(not float_equal(gemm.beta, 0))
            return;
-        gemm.beta = 1;
+        if(ins->inputs().size() == 3)
+            gemm.beta = 1;
        if(not update_gemm(
               gemm, ins->module_inputs().front(), ins->inputs().front() == gemm_ins ? 0 : 1))
            return;
-        // const-fold input if not standard shape since rocblas can't handle it
-        if(not c_ins->get_shape().standard())
-        {
-            auto c = make_op("contiguous");
-            auto l = c.compute(c.compute_shape({c_ins->get_shape()}), {c_ins->eval()});
-            c_ins  = m.add_literal(l.get_shape(), l.data());
-        }
        auto inputs = gemm_ins->inputs();
        inputs.pop_back();
-        inputs.push_back(c_ins);
+        if(ins->inputs().size() == 3)
+        {
+            auto c_ins = r.instructions["c"];
+            // const-fold input if not standard shape since rocblas can't handle it
+            if(not c_ins->get_shape().standard())
+            {
+                auto c = make_op("contiguous");
+                auto l = c.compute(c.compute_shape({c_ins->get_shape()}), {c_ins->eval()});
+                c_ins  = m.add_literal(l.get_shape(), l.data());
+            }
+            inputs.push_back(c_ins);
+        }
        inputs.push_back(ins->inputs().back());
        m.replace_instruction(ins, gemm, inputs);

--- a/src/targets/gpu/prefuse_ops.cpp
+++ b/src/targets/gpu/prefuse_ops.cpp
@@ -108,7 +108,9 @@ struct find_add_layernorm
    {
        auto ins     = r.result;
        auto add_ins = r.instructions["add"];
-        auto eps     = r.instructions["eps"]->eval().at<float>();
+        float eps    = 0;
+        if(contains(r.instructions, "eps"))
+            eps = r.instructions["eps"]->eval().at<float>();
        m.replace_instruction(ins, add_layernorm{eps}, add_ins->inputs());
    }

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -38,6 +38,7 @@
 #include <migraphx/layout_nhwc.hpp>
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/normalize_ops.hpp>
+#include <migraphx/optimize_module.hpp>
 #include <migraphx/preallocate_param.hpp>
 #include <migraphx/propagate_constant.hpp>
 #include <migraphx/register_target.hpp>
@@ -118,21 +119,13 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        rewrite_pooling{},
        dead_code_elimination{},
        rewrite_gelu{},
-        dead_code_elimination{},
+        optimize_module{},
-        eliminate_common_subexpression{},
-        dead_code_elimination{},
-        simplify_algebra{},
-        simplify_reshapes{},
        enable_pass(enabled(MIGRAPHX_ENABLE_NHWC{}), layout_nhwc{}),
        dead_code_elimination{},
-        simplify_reshapes{},
-        simplify_algebra{},
        prefuse_ops{},
        dead_code_elimination{},
        auto_contiguous{},
-        simplify_reshapes{},
+        optimize_module{},
-        propagate_constant{},
-        dead_code_elimination{},
        enable_pass(not enabled(MIGRAPHX_DISABLE_POINTWISE_FUSION{}), fuse_pointwise{}),
        dead_code_elimination{},
        fuse_mlir{&ctx},