threadwise_tensor_op.cuh 4.62 KB
Newer Older
1
2
3
#pragma once
#include "constant_tensor_descriptor.cuh"

Chao Liu's avatar
Chao Liu committed
4
template <class TFloat, class Desc, class F>
Chao Liu's avatar
Chao Liu committed
5
__device__ void threadwise_4d_tensor_pointwise_op_unary(Desc, TFloat* __restrict__ p, F f)
6
{
Chao Liu's avatar
Chao Liu committed
7
8
9
10
    constexpr auto I0 = Number<0>{};
    constexpr auto I1 = Number<1>{};
    constexpr auto I2 = Number<2>{};
    constexpr auto I3 = Number<3>{};
11

Chao Liu's avatar
Chao Liu committed
12
    constexpr auto desc = Desc{};
13
14
15
16

#if 0
    if(threadIdx.x == 0)
    {
Chao Liu's avatar
Chao Liu committed
17
        print_ConstantTensorDescriptor(desc, "threadwise_4d_tensor_op_unary: ");
18
19
20
    }
#endif

Chao Liu's avatar
Chao Liu committed
21
    for(unsigned did0 = 0; did0 < desc.GetLength(I0); ++did0)
Chao Liu's avatar
Chao Liu committed
22
    {
Chao Liu's avatar
Chao Liu committed
23
        for(unsigned did1 = 0; did1 < desc.GetLength(I1); ++did1)
Chao Liu's avatar
Chao Liu committed
24
        {
Chao Liu's avatar
Chao Liu committed
25
            for(unsigned did2 = 0; did2 < desc.GetLength(I2); ++did2)
Chao Liu's avatar
Chao Liu committed
26
            {
Chao Liu's avatar
Chao Liu committed
27
                for(unsigned did3 = 0; did3 < desc.GetLength(I3); ++did3)
Chao Liu's avatar
Chao Liu committed
28
                {
Chao Liu's avatar
Chao Liu committed
29
                    const unsigned dindex = desc.Get1dIndex(did0, did1, did2, did3);
Chao Liu's avatar
Chao Liu committed
30

Chao Liu's avatar
Chao Liu committed
31
                    f(p[dindex]);
Chao Liu's avatar
Chao Liu committed
32
33
34
35
36
37
38
                }
            }
        }
    }
}

template <class TFloat, class SrcDesc, class DstDesc, class F>
Chao Liu's avatar
Chao Liu committed
39
__device__ void threadwise_4d_tensor_pointwise_op_binary(
Chao Liu's avatar
Chao Liu committed
40
41
    SrcDesc, TFloat* const __restrict__ p_src, DstDesc, TFloat* __restrict__ p_dst, F f)
{
Chao Liu's avatar
Chao Liu committed
42
43
44
45
    constexpr auto I0 = Number<0>{};
    constexpr auto I1 = Number<1>{};
    constexpr auto I2 = Number<2>{};
    constexpr auto I3 = Number<3>{};
Chao Liu's avatar
Chao Liu committed
46
47
48
49
50
51
52
53
54

    constexpr auto src_desc = SrcDesc{};
    constexpr auto dst_desc = DstDesc{};

    static_assert(is_same<decltype(src_desc.GetLengths()), decltype(dst_desc.GetLengths())>::value);

#if 0
    if(threadIdx.x == 0)
    {
Chao Liu's avatar
Chao Liu committed
55
56
        print_ConstantTensorDescriptor(src_desc, "threadwise_4d_tensor_op_binary: src_desc: ");
        print_ConstantTensorDescriptor(dst_desc, "threadwise_4d_tensor_op_binary: dst_desc: ");
Chao Liu's avatar
Chao Liu committed
57
58
59
60
61
62
63
64
65
66
67
    }
#endif

    for(unsigned did0 = 0; did0 < src_desc.GetLength(I0); ++did0)
    {
        for(unsigned did1 = 0; did1 < src_desc.GetLength(I1); ++did1)
        {
            for(unsigned did2 = 0; did2 < src_desc.GetLength(I2); ++did2)
            {
                for(unsigned did3 = 0; did3 < src_desc.GetLength(I3); ++did3)
                {
Chao Liu's avatar
Chao Liu committed
68
                    const unsigned sindex = src_desc.Get1dIndex(did0, did1, did2, did3);
Chao Liu's avatar
Chao Liu committed
69

Chao Liu's avatar
Chao Liu committed
70
                    const unsigned dindex = dst_desc.Get1dIndex(did0, did1, did2, did3);
Chao Liu's avatar
Chao Liu committed
71
72

                    f(p_src[sindex], p_dst[dindex]);
73
74
75
76
77
                }
            }
        }
    }
}
Chao Liu's avatar
Chao Liu committed
78

Chao Liu's avatar
Chao Liu committed
79
template <class TFloat, class Desc>
Chao Liu's avatar
Chao Liu committed
80
__device__ void threadwise_4d_tensor_set_zero(Desc, TFloat* __restrict__ p)
Chao Liu's avatar
Chao Liu committed
81
{
Chao Liu's avatar
Chao Liu committed
82
    auto f_set_zero = [](TFloat& v) { v = TFloat(0); };
Chao Liu's avatar
Chao Liu committed
83

Chao Liu's avatar
Chao Liu committed
84
    threadwise_4d_tensor_pointwise_op_unary<TFloat, Desc, decltype(f_set_zero)>(
Chao Liu's avatar
Chao Liu committed
85
        Desc{}, p, f_set_zero);
Chao Liu's avatar
Chao Liu committed
86
}
Chao Liu's avatar
Chao Liu committed
87

Chao Liu's avatar
Chao Liu committed
88
89
90
91
92
93
94
template <class TFloat, class SrcDesc, class DstDesc>
__device__ void threadwise_4d_tensor_copy(SrcDesc,
                                          TFloat* const __restrict__ p_src,
                                          DstDesc,
                                          TFloat* __restrict__ p_dst)
{
    auto f_copy = [](const TFloat& src, TFloat& dst) { dst = src; };
Chao Liu's avatar
Chao Liu committed
95

Chao Liu's avatar
Chao Liu committed
96
97
    threadwise_4d_tensor_pointwise_op_binary<TFloat, SrcDesc, DstDesc, decltype(f_copy)>(
        SrcDesc{}, p_src, DstDesc{}, p_dst, f_copy);
Chao Liu's avatar
Chao Liu committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
}

template <class TFloat, class Desc, class IDim>
__device__ void threadwise_4d_tensor_shift_down(Desc, TFloat* __restrict__ p, IDim, unsigned shift)
{
    constexpr auto I0 = Number<0>{};
    constexpr auto I1 = Number<1>{};
    constexpr auto I2 = Number<2>{};
    constexpr auto I3 = Number<3>{};

    constexpr auto desc = Desc{};

#if 0
    if(threadIdx.x == 0)
    {
        print_ConstantTensorDescriptor(desc, "threadwise_4d_tensor_shift_down: ");
    }
#endif

    const unsigned did0_end =
        is_same<decltype(I0), IDim>::value ? desc.GetLength(I0) - shift : desc.GetLength(I0);
Chao Liu's avatar
Chao Liu committed
119

Chao Liu's avatar
Chao Liu committed
120
121
    const unsigned did1_end =
        is_same<decltype(I1), IDim>::value ? desc.GetLength(I1) - shift : desc.GetLength(I1);
Chao Liu's avatar
Chao Liu committed
122

Chao Liu's avatar
Chao Liu committed
123
124
    const unsigned did2_end =
        is_same<decltype(I2), IDim>::value ? desc.GetLength(I2) - shift : desc.GetLength(I2);
Chao Liu's avatar
Chao Liu committed
125

Chao Liu's avatar
Chao Liu committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    const unsigned did3_end =
        is_same<decltype(I3), IDim>::value ? desc.GetLength(I3) - shift : desc.GetLength(I3);

    for(unsigned did0 = 0; did0 < did0_end; ++did0)
    {
        for(unsigned did1 = 0; did1 < did1_end; ++did1)
        {
            for(unsigned did2 = 0; did2 < did2_end; ++did2)
            {
                for(unsigned did3 = 0; did3 < did3_end; ++did3)
                {
                    const unsigned dindex = desc.Get1dIndex(did0, did1, did2, did3);

                    const unsigned sindex = dindex + shift * desc.GetStride(IDim{});

                    p[dindex] = p[sindex];
                }
            }
        }
    }
Chao Liu's avatar
Chao Liu committed
146
}