quant_convolution.cpp 6.37 KB
Newer Older
1
#include <migraphx/gpu/quant_convolution.hpp>
2
#include <migraphx/gpu/device/convert.hpp>
3
4
5
6
7
8
9
10
11
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {

shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const
{
12
    check_shapes{inputs, *this}.has(5).standard();
13
14
15
    return op.compute_shape({inputs.at(0), inputs.at(1)});
}
argument miopen_quant_convolution::compute(context& ctx,
Shucai Xiao's avatar
Shucai Xiao committed
16
17
                                           const shape& output_shape,
                                           const std::vector<argument>& args) const
18
{
Shucai Xiao's avatar
Shucai Xiao committed
19
    auto x_desc      = make_tensor(args[0].get_shape());
20
    auto x_desc_vec4 = make_tensor(args[0].get_shape(), true);
Shucai Xiao's avatar
Shucai Xiao committed
21
    auto w_desc      = make_tensor(args[1].get_shape());
22
    auto w_desc_vec4 = make_tensor(args[1].get_shape(), true);
23
    shape tmp_output_shape{shape::float_type, output_shape.lens()};
Shucai Xiao's avatar
Shucai Xiao committed
24
    auto y_desc = make_tensor(tmp_output_shape);
25

26
27
28
29
30
    float alpha = 1;
    float beta  = 0;

    // pack input to vec4 format
    auto status = miopenTransformTensor(ctx.get_stream().get_miopen(),
Shucai Xiao's avatar
Shucai Xiao committed
31
32
33
34
35
36
37
                                        &alpha,
                                        x_desc.get(),
                                        args[0].implicit(),
                                        &beta,
                                        x_desc_vec4.get(),
                                        arg_vec4_x.implicit());
    if(status != miopenStatusSuccess)
38
    {
39
        MIGRAPHX_THROW("QUANT_CONVOLUTION: transform input tensor failed");
40
41
    }

42
    // pack input to vec4 format
43
    status = miopenTransformTensor(ctx.get_stream().get_miopen(),
Shucai Xiao's avatar
Shucai Xiao committed
44
45
46
47
48
49
50
                                   &alpha,
                                   w_desc.get(),
                                   args[1].implicit(),
                                   &beta,
                                   w_desc_vec4.get(),
                                   arg_vec4_w.implicit());
    if(status != miopenStatusSuccess)
51
    {
52
        MIGRAPHX_THROW("QUANT_CONVOLUTION: transform weight tensor failed");
53
54
    }

Shucai Xiao's avatar
Shucai Xiao committed
55
56
    status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
                                      &alpha,
57
                                      x_desc_vec4.get(),
Shucai Xiao's avatar
Shucai Xiao committed
58
                                      arg_vec4_x.implicit(),
59
60
                                      w_desc_vec4.get(),
                                      arg_vec4_w.implicit(),
Shucai Xiao's avatar
Shucai Xiao committed
61
62
63
64
65
66
67
68
                                      cd.get(),
                                      algo,
                                      &beta,
                                      y_desc.get(),
                                      args[3].implicit(),
                                      args[2].implicit(),
                                      args[2].get_shape().bytes());
    if(status != miopenStatusSuccess)
69
70
71
    {
        MIGRAPHX_THROW("QUANT_CONVOLUTION: run convolution forward failed");
    }
72
73
74

    // Add a conversion from float to int32_t
    device::convert(ctx.get_stream().get(), args[4], args[3]);
Shucai Xiao's avatar
Shucai Xiao committed
75

76
    return args[4];
77
78
79
}

shape miopen_quant_convolution::compile(context& ctx,
Shucai Xiao's avatar
Shucai Xiao committed
80
81
                                        const shape& output_shape,
                                        std::vector<shape> inputs)
82
83
{
    shape workspace_shape{};
84
85
    auto x_desc = make_tensor(inputs[0], true);
    auto w_desc = make_tensor(inputs[1], true);
86
87
    shape tmp_output_shape{shape::float_type, output_shape.lens()};
    auto y_desc = make_tensor(tmp_output_shape);
88
89
90
91
92
93
94
95
96
97

    std::size_t workspace_size = 0;
    miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
                                             w_desc.get(),
                                             x_desc.get(),
                                             cd.get(),
                                             y_desc.get(),
                                             &workspace_size);
    workspace_shape = shape{shape::int8_type, {workspace_size}};

98
99
    arg_vec4_x     = to_gpu(generate_argument(pack_int8_shape(inputs[0])));
    arg_vec4_w     = to_gpu(generate_argument(pack_int8_shape(inputs[1])));
100
    auto y         = allocate_gpu(tmp_output_shape);
101
102
103
104
105
106
    auto workspace = allocate_gpu(workspace_shape);

    int algo_count = 1;
    miopenConvAlgoPerf_t perf;
    auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
                                                        x_desc.get(),
107
                                                        arg_vec4_x.implicit(),
108
                                                        w_desc.get(),
109
                                                        arg_vec4_w.implicit(),
110
111
112
113
114
115
116
117
118
119
                                                        cd.get(),
                                                        y_desc.get(),
                                                        y.implicit(),
                                                        1,
                                                        &algo_count,
                                                        &perf,
                                                        workspace.implicit(),
                                                        workspace_size,
                                                        false);
    if(status != miopenStatusSuccess)
120
121
122
    {
        MIGRAPHX_THROW("QUANT_CONVOLUTION: find convolution failed");
    }
123
124
125
126
127
128
    handle = ctx.get_stream().get_miopen();
    algo   = perf.fwd_algo;
    return shape{shape::int8_type, {perf.memory}};
}

void miopen_quant_convolution::finalize(context& ctx,
Shucai Xiao's avatar
Shucai Xiao committed
129
130
                                        const shape& output_shape,
                                        std::vector<shape> inputs)
131
132
133
134
135
136
137
138
139
140
{
    if(handle == ctx.get_stream().get_miopen())
        return;
    // Check that workspace hasn't changed
    auto size = inputs.at(2).bytes();
    auto ws   = compile(ctx, output_shape, std::move(inputs));
    if(ws.bytes() > size)
        MIGRAPHX_THROW("Workspace has changed during finalization.");
}

141
142
shape miopen_quant_convolution::pack_int8_shape(shape& s)
{
Shucai Xiao's avatar
Shucai Xiao committed
143
    if(s.type() != shape::int8_type)
144
145
146
147
    {
        MIGRAPHX_THROW("PACK_INT8_SHAPE: only process int8_type");
    }

Shucai Xiao's avatar
Shucai Xiao committed
148
    auto lens    = s.lens();
149
    auto strides = s.strides();
Shucai Xiao's avatar
Shucai Xiao committed
150
151
    lens[1]      = (lens[1] + 3) / 4 * 4;
    strides[0]   = strides[1] * lens[1];
152
153
154
155

    return {s.type(), lens, strides};
}

156
157
158
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx