[6.1] Add support for dot-(mul)-softmax-dot offloads to MLIR (#2345)

ff485c7a · Manupa Karunaratne · GitHub · 6d84f7c6 · ff485c7a · ff485c7a
Unverified Commit ff485c7a authored Nov 30, 2023 by Manupa Karunaratne Committed by GitHub Nov 30, 2023
9 changed files
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -136,12 +136,14 @@ rocmtest clang_debug: rocmnode('mi100+') { cmake_build ->
    }
 }, mlir_debug: rocmnode('mi100+') { cmake_build ->
    stage('MLIR Debug') {
-        withEnv(['MIGRAPHX_ENABLE_EXTRA_MLIR=1']) {
+        withEnv(['MIGRAPHX_ENABLE_EXTRA_MLIR=1', 'MIGRAPHX_MLIR_USE_SPECIFIC_OPS=fused,attention,convolution,dot']) {
            def sanitizers = "undefined"
            // Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
            def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
            def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}"
            def gpu_targets = getgputargets()
+            // Since the purpose of this run verify all things MLIR supports,
+            // enabling all possible types of offloads
            cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DGPU_TARGETS='${gpu_targets}'")
        }
    }

--- a/requirements.txt
+++ b/requirements.txt
@@ -29,4 +29,4 @@ pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build
 msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 ROCmSoftwarePlatform/composable_kernel@70eefcf4f263aa5c25f3c9ff0db8f6f199ef0fb9 -DCK_BUILD_JIT_LIB=On -DCMAKE_POSITION_INDEPENDENT_CODE=On
-ROCmSoftwarePlatform/rocMLIR@13f6c2a69cfe80a575c6b241ec7353d1e953cb12 -DBUILD_FAT_LIBROCKCOMPILER=On
+ROCmSoftwarePlatform/rocMLIR@9e66e8050209f03349a41b6b497f0da2b285a53b -DBUILD_FAT_LIBROCKCOMPILER=On
--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
--- a/src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
@@ -34,10 +34,11 @@ struct module_pass_manager;
 namespace gpu {
 MIGRAPHX_GPU_EXPORT bool mlir_enabled();
+MIGRAPHX_GPU_EXPORT bool mlir_attention_enabled();
 struct MIGRAPHX_GPU_EXPORT fuse_mlir
 {
-    context* ctx = nullptr;
+    context* ctx      = nullptr;
    bool enable_extra = false;
    std::string name() const { return "gpu::fuse_mlir"; }
    void apply(module_pass_manager& mpm) const;

--- a/src/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp
@@ -66,6 +66,10 @@ struct gemm_softmax_gemm
    }
    static bool is_ck_supported_type(shape::type_t t) { return contains({shape::half_type}, t); }
+    static bool is_mlir_supported_type(shape::type_t t)
+    {
+        return contains({shape::type_t::float_type, shape::half_type}, t);
+    }
 };
 } // namespace gpu

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -1032,6 +1032,15 @@ tuning_config get_tuning_config_mlir(const context& migraphx_ctx,
    mlir_program mp;
    mp.set_gpu_properties(migraphx_ctx);
    mp.parse(m);
+    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
+    static std::mutex mutex;
+    if(trace)
+    {
+        const std::lock_guard<std::mutex> lock(mutex);
+        auto mod_op = mlirModuleGetOperation(mp.mmodule.get());
+        std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl;
+    }
    return mp.get_tuning_config(exhaustive);
 }

--- a/src/targets/gpu/prefuse_ops.cpp
+++ b/src/targets/gpu/prefuse_ops.cpp
@@ -31,6 +31,7 @@
 #ifdef MIGRAPHX_USE_COMPOSABLEKERNEL
 #include <migraphx/gpu/ck.hpp>
 #endif
+#include <migraphx/gpu/fuse_mlir.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -124,34 +125,55 @@ struct find_add_layernorm
    }
 };
-#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL
 struct pre_gemm_softmax_gemm : gemm_softmax_gemm
 {
    std::string name() const { return "gpu::pre_gemm_softmax_gemm"; }
 };
 MIGRAPHX_REGISTER_OP(pre_gemm_softmax_gemm);
-MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
+auto is_ck_gemm()
 {
-    if(ins->name() != "dot")
+    return match::make_basic_pred_matcher([=](instruction_ref ins) {
-        return false;
+#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL
-    if(not pre_gemm_softmax_gemm::is_ck_supported_type(ins->get_shape().type()))
+        if(not enabled(MIGRAPHX_ENABLE_CK{}))
+            return false;
+        if(ins->name() != "dot")
+            return false;
+        if(not pre_gemm_softmax_gemm::is_ck_supported_type(ins->get_shape().type()))
+            return false;
+        return true;
+#else
+        (void)ins;
        return false;
-    return true;
+#endif
+    });
+}
+auto is_mlir_gemm()
+{
+    return match::make_basic_pred_matcher([=](instruction_ref ins) {
+        if(not mlir_attention_enabled())
+            return false;
+        if(ins->name() != "dot")
+            return false;
+        return std::all_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
+            return pre_gemm_softmax_gemm::is_mlir_supported_type(i->get_shape().type());
+        });
+    });
 }
 struct find_gemm_softmax_gemm
 {
    auto matcher() const
    {
-        auto gemm1 =
+        auto gemm1 = match::skip(match::name("contiguous"))(
-            match::skip(match::name("contiguous"))(match::name("dot")(is_ck_gemm().bind("gemm1")));
+            match::name("dot")(match::any_of(is_ck_gemm(), is_mlir_gemm()).bind("gemm1")));
        auto mul = match::name("mul")(
            match::nargs(2), match::either_arg(0, 1)(match::is_constant().bind("scale"), gemm1));
        auto softmax = match::name("softmax")(match::arg(0)(mul)).bind("softmax");
-        return match::name("dot")(is_ck_gemm().bind("gemm2"))(match::arg(0)(softmax));
+        return match::name("dot")(match::any_of(is_ck_gemm(), is_mlir_gemm()).bind("gemm2"))(
+            match::arg(0)(softmax));
    }
    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
@@ -179,8 +201,6 @@ struct find_gemm_softmax_gemm
    }
 };
-#endif
 } // namespace
 void prefuse_ops::apply(module_pass_manager& mpm) const
@@ -188,10 +208,7 @@ void prefuse_ops::apply(module_pass_manager& mpm) const
    match::find_matches(mpm.get_module(), find_layernorm{});
    mpm.run_pass(dead_code_elimination{});
    match::find_matches(mpm.get_module(), find_add_layernorm{});
-#ifdef MIHRAPHX_USE_COMPOSABLEKERNEL
+    match::find_matches(mpm, find_gemm_softmax_gemm{});
-    if(enabled(MIGRAPHX_ENABLE_CK{}))
-        match::find_matches(mpm, find_gemm_softmax_gemm{});
-#endif
 }
 } // namespace gpu

--- a/test/gpu/fuse_mlir.cpp
+++ b/test/gpu/fuse_mlir.cpp
@@ -144,10 +144,12 @@ TEST_CASE(int_quant_dot_tanh_fails)
        auto tanh = add_pointwise(p1, "main:pointwise0", {dot}, single_pointwise("tanh"));
        mm->add_return({tanh});
    }
-    migraphx::program p2(p1);
+    // This pass should not fuse as int32_t tanh isn't supported.
-    // This pass should do nothing as int32_t tanh isn't supported.
    run_pass(p1);
-    EXPECT(p1 == p2);
+    auto* mm = p1.get_main_module();
+    bool has_pointwise =
+        std::any_of(mm->begin(), mm->end(), [&](const auto& i) { return i.name() == "pointwise"; });
+    EXPECT(has_pointwise);
 }
 int main(int argc, const char* argv[])

--- a/test/verify/gemm_softmax_gemm_relu.cpp
+++ b/test/verify/gemm_softmax_gemm_relu.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/make_op.hpp>
+struct gemm_softmax_gemm_relu : verify_program<gemm_softmax_gemm_relu>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto* mm = p.get_main_module();
+        migraphx::shape m1_shape{migraphx::shape::half_type, {1, 12, 256, 256}};
+        migraphx::shape m2_shape{migraphx::shape::half_type, {1, 12, 256, 256}};
+        auto m2_elements = m2_shape.elements();
+        auto a           = mm->add_parameter("1", m1_shape);
+        auto b           = mm->add_parameter("2", m1_shape);
+        auto b1          = mm->add_parameter("3", m1_shape);
+        std::vector<float> eights(m2_elements, 0.125);
+        auto eight = mm->add_literal(migraphx::literal{m2_shape, eights});
+        std::vector<float> zeros(m2_elements, 0);
+        auto zero = mm->add_literal(migraphx::literal{m2_shape, zeros});
+        b = mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), b);
+        auto gemm1   = mm->add_instruction(migraphx::make_op("dot"), a, b);
+        auto scale   = mm->add_instruction(migraphx::make_op("mul"), gemm1, eight);
+        auto bias    = mm->add_instruction(migraphx::make_op("add"), scale, zero);
+        auto softmax = mm->add_instruction(migraphx::make_op("softmax", {{"axis", 3}}), bias);
+        auto gemm2   = mm->add_instruction(migraphx::make_op("dot"), softmax, b1);
+        mm->add_instruction(migraphx::make_op("relu"), gemm2);
+        return p;
+    }
+};