"src/include/Sequence.hpp" did not exist on "17f3d2d4bccebcc3a70606a916f93dc90e5eaa3a"
gemm_impl.cpp 6.74 KB
Newer Older
Paul Fultz II's avatar
Paul Fultz II committed
1
#include <rocblas.h>
2
#include <migraphx/gpu/gemm_impl.hpp>
Shucai Xiao's avatar
Shucai Xiao committed
3
4
5
6
7

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {

8
rocblas_datatype get_type(shape::type_t type)
Shucai Xiao's avatar
Shucai Xiao committed
9
{
10
    switch(type)
11
    {
12
13
14
15
16
17
18
    case shape::double_type: return rocblas_datatype_f64_r;
    case shape::float_type: return rocblas_datatype_f32_r;
    case shape::half_type: return rocblas_datatype_f16_r;
    case shape::int8_type: return rocblas_datatype_i8_r;
    case shape::uint8_type: return rocblas_datatype_u8_r;
    case shape::int32_type: return rocblas_datatype_i32_r;
    case shape::uint32_type: return rocblas_datatype_u32_r;
Paul Fultz II's avatar
Paul Fultz II committed
19
    case shape::tuple_type:
20
    case shape::bool_type:
21
22
23
24
    case shape::uint16_type:
    case shape::int16_type:
    case shape::int64_type:
    case shape::uint64_type: MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!");
25
    }
26
27

    MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!");
28
29
}

30
31
32
33
34
35
36
37
38
template <class R, class... Ts, class... Us>
R rocblas_invoke(R (*f)(Ts...), Us... xs)
{
    if constexpr(sizeof...(Ts) == sizeof...(Us))
        return f(xs...);
    else
        return f(xs..., nullptr, nullptr);
}

39
template <class T>
Shucai Xiao's avatar
Shucai Xiao committed
40
41
42
43
44
void gemm_impl(context& ctx,
               const shape& output_shape,
               const std::vector<argument>& args,
               T alpha,
               T beta,
Khalique Ahmed's avatar
Khalique Ahmed committed
45
46
               bool int8_x4_format,
               bool compute_fp32)
Shucai Xiao's avatar
Shucai Xiao committed
47
{
48
49
50
51
52
53
54
    bool transa     = args[0].get_shape().transposed();
    bool transb     = args[1].get_shape().transposed();
    auto n_dim      = output_shape.lens().size();
    auto dim_1      = n_dim - 1;
    auto dim_0      = n_dim - 2;
    rocblas_int lda = args[0].get_shape().strides()[transa ? dim_1 : dim_0];
    rocblas_int ldb = args[1].get_shape().strides()[transb ? dim_1 : dim_0];
55
    rocblas_int ldc = args[2].get_shape().strides()[dim_0];
56

57
    bool is_3inputs = (args.size() == 4);
58
59
60
61
62
63
64
    if(!is_3inputs)
    {
        beta = 0;
    }
    rocblas_datatype arg_type = get_type(args[0].get_shape().type());
    auto output_type          = arg_type;
    if(output_type == rocblas_datatype_i8_r)
Shucai Xiao's avatar
Shucai Xiao committed
65
    {
66
        output_type = rocblas_datatype_i32_r;
Shucai Xiao's avatar
Shucai Xiao committed
67
    }
68
    auto compute_type = output_type;
Khalique Ahmed's avatar
Khalique Ahmed committed
69
70
71
72
73
    if(compute_fp32)
    {
        if(arg_type == rocblas_datatype_f16_r)
            compute_type = rocblas_datatype_f32_r;
    }
Shucai Xiao's avatar
Shucai Xiao committed
74

Shucai Xiao's avatar
Shucai Xiao committed
75
76
77
78
79
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
    rocblas_gemm_flags flag =
        int8_x4_format ? rocblas_gemm_flags_pack_int8x4 : rocblas_gemm_flags_none;
#else
    (void)int8_x4_format;
80
    int flag = 0;
Shucai Xiao's avatar
Shucai Xiao committed
81
82
#endif

Shucai Xiao's avatar
Shucai Xiao committed
83
84
85
    auto a_lens = args[0].get_shape().lens();
    auto b_lens = args[1].get_shape().lens();
    output_shape.visit_type([&](auto as) {
86
        
Khalique Ahmed's avatar
Khalique Ahmed committed
87
88
        auto alpha_r = as(alpha);
        auto beta_r  = as(beta);
Khalique Ahmed's avatar
Khalique Ahmed committed
89

90
91
92
93
94
95
96
97
98
99
100
101

        // use void pointer to select different data type if using fp32 mode
        void* alpha_v{&alpha_r};
        void* beta_v{&beta_r};

        if(compute_fp32)
        {
            alpha_v = &alpha;
            beta_v = &beta;
        }
        

Shucai Xiao's avatar
Shucai Xiao committed
102
103
104
105
        auto out_lens   = output_shape.lens();
        rocblas_int m   = out_lens[dim_0];
        rocblas_int n   = out_lens[dim_1];
        rocblas_int k   = args[0].get_shape().lens()[dim_1];
Shucai Xiao's avatar
Shucai Xiao committed
106
        auto to_pointer = [&](auto&& arg) { return as.from(arg.data()); };
Shucai Xiao's avatar
Shucai Xiao committed
107
        if(args[0].get_shape().type() == shape::int8_type and (k % 4) != 0 and int8_x4_format)
108
109
110
        {
            MIGRAPHX_THROW("ROCBLAS_GEMM: k size of int8 type input must be mutlple of 4!");
        }
Shucai Xiao's avatar
Shucai Xiao committed
111

Shucai Xiao's avatar
Shucai Xiao committed
112
113
114
115
        auto num_matrices = std::accumulate(
            out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies<std::size_t>());
        if(num_matrices == 1)
        {
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
            rocblas_invoke(&rocblas_gemm_ex,
                            ctx.get_stream().get_rocblas(),
                            transb ? rocblas_operation_transpose : rocblas_operation_none,
                            transa ? rocblas_operation_transpose : rocblas_operation_none,
                            n,
                            m,
                            k,
                            alpha_v,
                            to_pointer(args.at(1)),
                            arg_type,
                            ldb,
                            to_pointer(args.at(0)),
                            arg_type,
                            lda,
                            beta_v,
                            to_pointer(args[2]),
                            output_type,
                            ldc,
                            is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
                            output_type,
                            ldc,
                            compute_type,
                            rocblas_gemm_algo_standard,
                            0,
                            flag);
Shucai Xiao's avatar
Shucai Xiao committed
141
142
143
        }
        else
        {
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
            rocblas_invoke(&rocblas_gemm_strided_batched_ex,
                            ctx.get_stream().get_rocblas(),
                            transb ? rocblas_operation_transpose : rocblas_operation_none,
                            transa ? rocblas_operation_transpose : rocblas_operation_none,
                            n,
                            m,
                            k,
                            alpha_v,
                            to_pointer(args.at(1)),
                            arg_type,
                            ldb,
                            k * n,
                            to_pointer(args.at(0)),
                            arg_type,
                            lda,
                            m * k,
                            beta_v,
                            to_pointer(args[2]),
                            output_type,
                            ldc,
                            m * n,
                            is_3inputs ? to_pointer(args[3]) : to_pointer(args[2]),
                            output_type,
                            ldc,
                            m * n,
                            num_matrices,
                            compute_type,
                            rocblas_gemm_algo_standard,
                            0,
                            flag);
Shucai Xiao's avatar
Shucai Xiao committed
174
175
        }
    });
176
}
Shucai Xiao's avatar
Shucai Xiao committed
177

178
179
180
181
void gemm(context& ctx,
          const shape& output_shape,
          const std::vector<argument>& args,
          float alpha,
Shucai Xiao's avatar
Shucai Xiao committed
182
          float beta,
Khalique Ahmed's avatar
Khalique Ahmed committed
183
184
          bool int8_x4_format,
          bool compute_fp32)
185
{
Khalique Ahmed's avatar
Khalique Ahmed committed
186
    gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32);
187
188
189
190
191
192
}

void gemm(context& ctx,
          const shape& output_shape,
          const std::vector<argument>& args,
          int32_t alpha,
Shucai Xiao's avatar
Shucai Xiao committed
193
          int32_t beta,
Khalique Ahmed's avatar
Khalique Ahmed committed
194
195
          bool int8_x4_format,
          bool compute_fp32)
196
{
Khalique Ahmed's avatar
Khalique Ahmed committed
197
    gemm_impl(ctx, output_shape, args, alpha, beta, int8_x4_format, compute_fp32);
Shucai Xiao's avatar
Shucai Xiao committed
198
199
200
201
202
}

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx