fuse_ck_gemm_softmax_gemm.cpp 2.99 KB
Newer Older
Alan Turner's avatar
Alan Turner committed
1
2
3
4
5
6
7
8
9
10
#include <migraphx/gpu/fuse_ck_gemm_softmax_gemm.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/env.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

Alan Turner's avatar
Alan Turner committed
11
12
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_CK_GEMM_SOFTMAX_GEMM);

Alan Turner's avatar
Alan Turner committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
struct module;

namespace gpu {

struct gemm_softmax_gemm_gemm
{
    operation op = make_op("dot");

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
        return pack(f(self.op, "op"));
    }

    std::string name() const { return "gpu::ck_gemm_softmax_gemm"; }

    void check_gemm_shape(const shape& s) const
    {
        if(not contains(range(s.strides().rbegin(), s.strides().rbegin() + 3), 1))
            MIGRAPHX_THROW("Invalid shape for gemm_softmax_gemm_gemm");
    }

    shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
    {
        check_shapes{inputs, *this}.same_ndims();
        if(inputs.size() < 2)
            MIGRAPHX_THROW("should have at least two inputs.");
        auto a  = inputs[0];
        auto b  = inputs[1];
        auto b1 = inputs[2];
        for(const auto& input : inputs)
        {
            check_gemm_shape(input);
        }
        return op.compute_shape({op.compute_shape({a, b}), b1});
    }
};
MIGRAPHX_REGISTER_OP(gemm_softmax_gemm_gemm);

namespace {

MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
{
    if(ins->name() != "dot")
        return false;
    auto a = ins->inputs().front()->get_shape();
    auto b = ins->inputs().back()->get_shape();
    if(a.lens().back() > 2048)
        return false;
    return true;
}

struct find_gemm_softmax_gemm_gemm
{
    auto matcher() const
    {
        auto gemm1 =
            match::skip(match::name("contiguous"))(match::name("dot")(is_ck_gemm().bind("gemm1")));
Alan Turner's avatar
Alan Turner committed
71
72
        auto mul     = match::name("mul")(match::any_of[match::inputs()](gemm1)).bind("scale");
        auto add     = match::name("add")(match::any_of[match::inputs()](mul));
Alan Turner's avatar
Alan Turner committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
        auto softmax = match::name("softmax")(match::any_of[match::inputs()](add)).bind("softmax");
        return match::name("dot")(is_ck_gemm().bind("gemm2"))(
            match::any_of[match::inputs()](softmax));
    }

    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
    {
        auto ins       = r.result;
        auto gemm2_ins = r.instructions["gemm2"];
        auto gemm1_ins = r.instructions["gemm1"];

        auto inputs = gemm1_ins->inputs();            // A, B
        inputs.push_back(gemm2_ins->inputs().back()); // B1

        mpm.get_module().replace_instruction(
            ins, gemm_softmax_gemm_gemm{gemm2_ins->get_operator()}, inputs);
    }
};

} // namespace

void fuse_ck_gemm_softmax_gemm::apply(module_pass_manager& mpm) const
{
Alan Turner's avatar
Alan Turner committed
96
97
    if(not enabled(MIGRAPHX_DISABLE_CK_GEMM_SOFTMAX_GEMM{}))
        match::find_matches(mpm, find_gemm_softmax_gemm_gemm{});
Alan Turner's avatar
Alan Turner committed
98
99
100
101
102
103
}

} // namespace gpu

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx