conv_common.hpp 2.49 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
#ifndef CONV_COMMON_HPP
#define CONV_COMMON_HPP
3

4
5
6
7
8
9
10
11
12
13
#include "dynamic_tensor_descriptor.hpp"

enum ConvTensorLayout
{
    NCHW,
    NHWC,
    CHWN,
    NCHWc,
    NHWCc
};
Chao Liu's avatar
Chao Liu committed
14

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
template <typename... InDesc,
          typename... WeiDesc,
          typename ConvStrides,
          typename ConvDilations,
          typename LeftPads,
          typename RightPads>
constexpr auto get_convolution_output_default_4d_tensor_descriptor(
    const ck::DynamicTensorDescriptor<InDesc...>& in_desc,
    const ck::DynamicTensorDescriptor<WeiDesc...>& wei_desc,
    const ConvStrides& conv_strides,
    const ConvDilations conv_dilations,
    const LeftPads& left_pads,
    const RightPads& right_pads)
{
    using namespace ck;

    constexpr auto I0 = Number<0>{};
    constexpr auto I1 = Number<1>{};
    constexpr auto I2 = Number<2>{};
    constexpr auto I3 = Number<3>{};

    assert(in_desc.GetNumOfDimension() == 4);
    assert(wei_desc.GetNumOfDimension() == 4);
    assert(in_desc.GetLength(I1) == wei_desc.GetLength(I1));

    const auto N  = in_desc.GetLength(I0);
    const auto Hi = in_desc.GetLength(I2);
    const auto Wi = in_desc.GetLength(I3);

    const auto K = wei_desc.GetLength(I0);
    const auto Y = wei_desc.GetLength(I2);
    const auto X = wei_desc.GetLength(I3);

    const auto LeftPadH = left_pads[I0];
    const auto LeftPadW = left_pads[I1];

    const auto RightPadH = right_pads[I0];
    const auto RightPadW = right_pads[I1];

    const auto YEff = (Y - I1) * conv_dilations[I0] + I1;
    const auto XEff = (X - I1) * conv_dilations[I1] + I1;

    const auto Ho = (Hi + LeftPadH + RightPadH - YEff) / conv_strides[I0] + I1;
    const auto Wo = (Wi + LeftPadW + RightPadW - XEff) / conv_strides[I1] + I1;

    return make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(N, K, Ho, Wo));
}

Chao Liu's avatar
Chao Liu committed
63
template <class InDesc, class WeiDesc, class OutDesc>
Chao Liu's avatar
Chao Liu committed
64
constexpr std::size_t
Chao Liu's avatar
tidy  
Chao Liu committed
65
calculate_convolution_flops(const InDesc&, const WeiDesc& wei_desc, const OutDesc& out_desc)
Chao Liu's avatar
Chao Liu committed
66
{
Chao Liu's avatar
Chao Liu committed
67
68
    using namespace ck;

Chao Liu's avatar
Chao Liu committed
69
70
71
72
73
    constexpr auto I0 = Number<0>{};
    constexpr auto I1 = Number<1>{};
    constexpr auto I2 = Number<2>{};
    constexpr auto I3 = Number<3>{};

Chao Liu's avatar
Chao Liu committed
74
75
76
77
    const index_t N  = out_desc.GetLength(I0);
    const index_t K  = out_desc.GetLength(I1);
    const index_t Ho = out_desc.GetLength(I2);
    const index_t Wo = out_desc.GetLength(I3);
Chao Liu's avatar
Chao Liu committed
78

Chao Liu's avatar
Chao Liu committed
79
80
81
    const index_t C = wei_desc.GetLength(I1);
    const index_t Y = wei_desc.GetLength(I2);
    const index_t X = wei_desc.GetLength(I3);
Chao Liu's avatar
Chao Liu committed
82
83
84

    return std::size_t(2) * N * K * Ho * Wo * C * Y * X;
}
Chao Liu's avatar
Chao Liu committed
85

86
#endif