"...git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "106b17e7bf6f64e527babbc7a2864eb93518a79c"
threadwise_direct_convolution.cuh 3.18 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#pragma once
#include "constant_tensor_descriptor.cuh"

template <class TFloat, class InDesc, class WeiDesc, class OutDesc>
__device__ void threadwise_direct_convolution(InDesc,
                                              TFloat* const __restrict__ p_in,
                                              WeiDesc,
                                              TFloat* const __restrict__ p_wei,
                                              OutDesc,
                                              TFloat* __restrict__ p_out)
{
    constexpr auto I0 = Index<0>{};
    constexpr auto I1 = Index<1>{};
    constexpr auto I2 = Index<2>{};
    constexpr auto I3 = Index<3>{};

    constexpr auto in_desc  = InDesc{};
    constexpr auto wei_desc = WeiDesc{};
    constexpr auto out_desc = OutDesc{};

#if 0
    if(threadIdx.x == 0)
    {
Chao Liu's avatar
Chao Liu committed
24
25
26
        print_ConstantTensorDescriptor(in_desc, "threadwise_direct_convolution: ");
        print_ConstantTensorDescriptor(wei_desc, "threadwise_direct_convolution: ");
        print_ConstantTensorDescriptor(out_desc, "threadwise_direct_convolution: ");
Chao Liu's avatar
Chao Liu committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    }
#endif

    for(unsigned n = 0; n < out_desc.GetLength(I0); ++n)
    {
        for(unsigned k = 0; k < out_desc.GetLength(I1); ++k)
        {
            for(unsigned ho = 0; ho < out_desc.GetLength(I2); ++ho)
            {
                for(unsigned wo = 0; wo < out_desc.GetLength(I3); ++wo)
                {
                    for(unsigned c = 0; c < wei_desc.GetLength(I1); ++c)
                    {
                        for(unsigned s = 0; s < wei_desc.GetLength(I2); ++s)
                        {
                            for(unsigned r = 0; r < wei_desc.GetLength(I3); ++r)
                            {
                                const unsigned hi = ho + s;
                                const unsigned wi = wo + r;

47
                                const unsigned in_index = in_desc.Get1dIndex(n, c, hi, wi);
Chao Liu's avatar
Chao Liu committed
48

49
                                const unsigned wei_index = wei_desc.Get1dIndex(k, c, s, r);
Chao Liu's avatar
Chao Liu committed
50

51
                                const unsigned out_index = out_desc.Get1dIndex(n, k, ho, wo);
Chao Liu's avatar
Chao Liu committed
52
53
54
55

                                p_out[out_index] += p_wei[wei_index] * p_in[in_index];

#if 0
Chao Liu's avatar
Chao Liu committed
56
                                //   if(threadIdx.x == 0)
Chao Liu's avatar
Chao Liu committed
57
                                {
Chao Liu's avatar
Chao Liu committed
58
                                    printf("threadwise_direct_convolution: \t"
Chao Liu's avatar
Chao Liu committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
                                           "threadIdx.x %u\t"
                                           "out_index %u, p_out[out_index] %f, \t"
                                           "wei_index %u, p_wei[wei_index] %f, \t"
                                           "in_index %u, p_in[in_index] %f\n",
                                           threadIdx.x,
                                           out_index,
                                           p_out[out_index],
                                           wei_index,
                                           p_wei[wei_index],
                                           in_index,
                                           p_in[in_index]);
                                }
#endif
                            }
                        }
                    }
                }
            }
        }
    }
}