im2col.cpp 8.39 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#include "tile_program.hpp"

Chao Liu's avatar
Chao Liu committed
3
4
5
6
7
8
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/cluster_descriptor.hpp"
#include "ck/tensor/tensor.hpp"

#include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
Chao Liu's avatar
Chao Liu committed
9
10
11
12
13
14
15
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/host_utility/device_prop.hpp"

#include "ck/library/utility/device_memory.hpp"

// program
Chao Liu's avatar
Chao Liu committed
16
17
18
19
20
21
template <ck::index_t NDimSpatial,
          typename ALayout,
          typename T,
          // tuning parameter
          ck::index_t kMPerTile,
          ck::index_t kKPerTile>
Chao Liu's avatar
Chao Liu committed
22
23
24
25
struct Im2Col
{
    __host__ __device__ void
    operator()(TileProgram& tp,
Chao Liu's avatar
Chao Liu committed
26
27
28
29
30
31
32
33
34
35
               const std::array<ck::index_t, NDimSpatial + 3>& a_g_n_c_wis_lengths,
               const std::array<ck::index_t, NDimSpatial + 3>& a_g_n_c_wis_strides,
               const std::array<ck::index_t, NDimSpatial + 3>& b_g_k_c_xs_lengths,
               const std::array<ck::index_t, NDimSpatial + 3>& b_g_k_c_xs_strides,
               const std::array<ck::index_t, NDimSpatial + 3>& c_g_n_k_wos_lengths,
               const std::array<ck::index_t, NDimSpatial + 3>& c_g_n_k_wos_strides,
               const std::array<ck::index_t, NDimSpatial>& conv_filter_strides,
               const std::array<ck::index_t, NDimSpatial>& conv_filter_dilations,
               const std::array<ck::index_t, NDimSpatial>& input_left_pads,
               const std::array<ck::index_t, NDimSpatial>& input_right_pads,
Chao Liu's avatar
Chao Liu committed
36
               //
Chao Liu's avatar
Chao Liu committed
37
38
               const std::array<ck::index_t, 3> a_gemmg_gemmm_gemmk_lengths,
               const std::array<ck::index_t, 3> a_gemmg_gemmm_gemmk_strides,
Chao Liu's avatar
Chao Liu committed
39
40
41
42
43
44
               //
               const T* p_a_img,
               T* p_a_mtx)
    {
        using namespace ck;

Chao Liu's avatar
Chao Liu committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
        constexpr auto I0 = Number<0>{};
        constexpr auto I1 = Number<1>{};

        const auto a_src_desc = tensor_operation::TransformConvFwdToGemm<
            NDimSpatial,
            tensor_operation::device::ConvolutionForwardSpecialization::Default>::
            template MakeADescriptor_M_K<ALayout>(a_g_n_c_wis_lengths,
                                                  a_g_n_c_wis_strides,
                                                  b_g_k_c_xs_lengths,
                                                  b_g_k_c_xs_strides,
                                                  c_g_n_k_wos_lengths,
                                                  c_g_n_k_wos_strides,
                                                  conv_filter_strides,
                                                  conv_filter_dilations,
                                                  input_left_pads,
                                                  input_right_pads);
Chao Liu's avatar
Chao Liu committed
61
62

        const auto a_dst_desc =
Chao Liu's avatar
Chao Liu committed
63
64
65
66
67
68
            make_naive_tensor_descriptor(make_tuple(a_gemmg_gemmm_gemmk_lengths[0],
                                                    a_gemmg_gemmm_gemmk_lengths[1],
                                                    a_gemmg_gemmm_gemmk_lengths[2]),
                                         make_tuple(a_gemmg_gemmm_gemmk_strides[0],
                                                    a_gemmg_gemmm_gemmk_strides[1],
                                                    a_gemmg_gemmm_gemmk_strides[2]));
Chao Liu's avatar
Chao Liu committed
69

Chao Liu's avatar
Chao Liu committed
70
        const auto a_src = tp(make_tensor<AddressSpaceEnum::Global, true>(a_src_desc, p_a_img));
Chao Liu's avatar
Chao Liu committed
71

Chao Liu's avatar
Chao Liu committed
72
        auto a_dst = tp(make_tensor<AddressSpaceEnum::Global, true>(a_dst_desc, p_a_mtx));
Chao Liu's avatar
Chao Liu committed
73

Chao Liu's avatar
Chao Liu committed
74
75
76
        const auto num_gemmg = a_gemmg_gemmm_gemmk_lengths[0];
        const auto num_gemmm = a_gemmg_gemmm_gemmk_lengths[1];
        const auto num_gemmk = a_gemmg_gemmm_gemmk_lengths[2];
Chao Liu's avatar
Chao Liu committed
77

Chao Liu's avatar
Chao Liu committed
78
        const auto id_block = tp.get_block_1d_id();
Chao Liu's avatar
Chao Liu committed
79

Chao Liu's avatar
Chao Liu committed
80
81
        const auto num_tile_m = num_gemmm / kMPerTile;
        const auto num_tile_k = num_gemmk / kKPerTile;
Chao Liu's avatar
Chao Liu committed
82
83
84

        const auto block2tile = tp(make_cluster_descriptor(make_tuple(num_tile_m, num_tile_k)));

Chao Liu's avatar
Chao Liu committed
85
        const auto id_tile = block2tile.CalculateBottomIndex(make_tuple(id_block));
Chao Liu's avatar
Chao Liu committed
86

Chao Liu's avatar
Chao Liu committed
87
88
        const auto id_tile_m = id_tile[I0];
        const auto id_tile_k = id_tile[I1];
Chao Liu's avatar
Chao Liu committed
89

Chao Liu's avatar
Chao Liu committed
90
#if 1
Chao Liu's avatar
Chao Liu committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        // data-based syntax: per-data solution strategy
        auto window_a_src = make_window(a_src,
                                        make_tuple(1, MPerTile, KPerTile),
                                        make_tuple(0, id_tile_m * MPerTile, id_tile_k * KPerTile),
                                        a_src_window_map_strategy);

        auto window_a_dst = make_window(a_dst,
                                        make_tuple(1, MPerTile, KPerTile),
                                        make_tuple(0, id_tile_m * MPerTile, id_tile_k * KPerTile),
                                        a_dst_window_map_strategy);

        for(ck::index_t id_gemmg = 0; id_gemmg < num_gemmg; id_gemmg++)
        {
            copy(window_a_src, window_a_dst, a_copy_strategy);

            window_a_src += make_tuple(1, 0, 0);
            window_a_dst += make_tuple(1, 0, 0);
        }
Chao Liu's avatar
Chao Liu committed
109
#else
Chao Liu's avatar
Chao Liu committed
110
111
112
113
114
115
#endif
    }
};

int main()
{
Chao Liu's avatar
Chao Liu committed
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    using DataType = float;

    constexpr ck::index_t NumDimSpatial = 2;

    ck::index_t G  = 32;
    ck::index_t N  = 256;
    ck::index_t K  = 192;
    ck::index_t C  = 192;
    ck::index_t Y  = 3;
    ck::index_t X  = 3;
    ck::index_t Hi = 28;
    ck::index_t Wi = 28;
    ck::index_t Ho = 28;
    ck::index_t Wo = 28;
Chao Liu's avatar
Chao Liu committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

    std::array<ck::index_t, NumDimSpatial + 3> in_lengths{G, N, Hi, Wi, C};
    std::array<ck::index_t, NumDimSpatial + 3> in_strides{0, 0, 0, 0, 1};

    std::array<ck::index_t, NumDimSpatial + 3> wei_lengths{G, K, Y, X, C};
    std::array<ck::index_t, NumDimSpatial + 3> wei_strides{0, 0, 0, 0, 1};

    std::array<ck::index_t, NumDimSpatial + 3> out_lengths{G, N, Ho, Wo, K};
    std::array<ck::index_t, NumDimSpatial + 3> out_strides{0, 0, 0, 0, 1};

    std::partial_sum(rbegin(in_lengths),
                     std::prev(rend(in_lengths)),
                     std::next(rbegin(in_strides)),
                     std::multiplies<>{});
    std::partial_sum(rbegin(wei_lengths),
                     std::prev(rend(wei_lengths)),
                     std::next(rbegin(wei_strides)),
                     std::multiplies<>{});
    std::partial_sum(rbegin(out_lengths),
                     std::prev(rend(out_lengths)),
                     std::next(rbegin(out_strides)),
                     std::multiplies<>{});

    // transpose GNHWC/GKYXC/GNHWK to GNCHW/GKCYX/GNCHW
    std::rotate(
        rbegin(in_lengths), std::next(rbegin(in_lengths)), std::next(rbegin(in_lengths), 3));
    std::rotate(
        rbegin(in_strides), std::next(rbegin(in_strides)), std::next(rbegin(in_strides), 3));
    std::rotate(
        rbegin(wei_lengths), std::next(rbegin(wei_lengths)), std::next(rbegin(wei_lengths), 3));
    std::rotate(
        rbegin(wei_strides), std::next(rbegin(wei_strides)), std::next(rbegin(wei_strides), 3));
    std::rotate(
        rbegin(out_lengths), std::next(rbegin(out_lengths)), std::next(rbegin(out_lengths), 3));
    std::rotate(
        rbegin(out_strides), std::next(rbegin(out_strides)), std::next(rbegin(out_strides), 3));

    std::array<ck::index_t, NumDimSpatial> filter_strides{1, 1};
    std::array<ck::index_t, NumDimSpatial> filter_dilations{1, 1};
    std::array<ck::index_t, NumDimSpatial> input_left_pads{1, 1};
    std::array<ck::index_t, NumDimSpatial> input_right_pads{1, 1};

    // matrix
Chao Liu's avatar
Chao Liu committed
173
174
    std::array<ck::index_t, 3> in_mtx_lengths{G, G * Ho * Wo, C * Y * X};
    std::array<ck::index_t, 3> in_mtx_strides{0, 0, 1};
Chao Liu's avatar
Chao Liu committed
175
176
177
178
179
180

    std::partial_sum(rbegin(in_mtx_lengths),
                     std::prev(rend(in_mtx_lengths)),
                     std::next(rbegin(in_mtx_strides)),
                     std::multiplies<>{});

Chao Liu's avatar
Chao Liu committed
181
182
    DeviceMem in(sizeof(DataType) * G * N * Hi * Wi * C);
    DeviceMem in_mtx(sizeof(DataType) * G * N * Ho * Wo * C * Y * X);
Chao Liu's avatar
Chao Liu committed
183

Chao Liu's avatar
Chao Liu committed
184
    launch(Im2Col<2, ck::tensor_layout::convolution::GNHWC, float, 128, 128>{},
Chao Liu's avatar
Chao Liu committed
185
           1,
Chao Liu's avatar
Chao Liu committed
186
           256,
Chao Liu's avatar
Chao Liu committed
187
188
189
190
191
192
193
194
195
196
197
198
199
200
           in_lengths,
           in_strides,
           wei_lengths,
           wei_strides,
           out_lengths,
           out_strides,
           filter_strides,
           filter_dilations,
           input_left_pads,
           input_right_pads,
           //
           in_mtx_lengths,
           in_mtx_strides,
           //
Chao Liu's avatar
Chao Liu committed
201
202
           static_cast<DataType*>(in.GetDeviceBuffer()),
           static_cast<DataType*>(in_mtx.GetDeviceBuffer()));
Chao Liu's avatar
Chao Liu committed
203
204
205

    return 0;
}