tensor.hpp 7.59 KB
Newer Older
1
2
3
#ifndef CK_TENSOR_HPP
#define CK_TENSOR_HPP

Chao Liu's avatar
Chao Liu committed
4
5
6
#include <thread>
#include <vector>
#include <numeric>
Chao Liu's avatar
Chao Liu committed
7
#include <algorithm>
Chao Liu's avatar
Chao Liu committed
8
#include <utility>
Chao Liu's avatar
Chao Liu committed
9
10
#include <cassert>
#include <iostream>
Chao Liu's avatar
Chao Liu committed
11

Chao Liu's avatar
Chao Liu committed
12
template <class Range>
Chao Liu's avatar
Chao Liu committed
13
std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim)
Chao Liu's avatar
Chao Liu committed
14
15
{
    bool first = true;
Chao Liu's avatar
Chao Liu committed
16
    for(auto&& v : range)
Chao Liu's avatar
Chao Liu committed
17
18
19
20
21
    {
        if(first)
            first = false;
        else
            os << delim;
Chao Liu's avatar
Chao Liu committed
22
        os << v;
Chao Liu's avatar
Chao Liu committed
23
24
25
26
    }
    return os;
}

Wen-Heng (Jack) Chung's avatar
Wen-Heng (Jack) Chung committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
template <class Range>
std::ostream& LogRangeStrided(std::ostream& os, Range&& range, std::string delim, size_t stride)
{
    bool first = true;
    for(size_t idx=0; idx<range.size(); idx+=stride)
    {
        if(first)
            first = false;
        else
            os << delim;
        os << range[idx];
    }
    return os;
}

Chao Liu's avatar
Chao Liu committed
42
typedef enum {
Chao Liu's avatar
Chao Liu committed
43
44
45
46
47
48
49
50
51
52
53
54
    Half  = 0,
    Float = 1,
} DataType_t;

template <class T>
struct DataType;

template <>
struct DataType<float> : std::integral_constant<DataType_t, DataType_t::Float>
{
};

Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
60
61
62
63
template <class F, class T, std::size_t... Is>
auto call_f_unpack_args_impl(F f, T args, std::index_sequence<Is...>)
{
    return f(std::get<Is>(args)...);
}

template <class F, class T>
auto call_f_unpack_args(F f, T args)
{
Chao Liu's avatar
Chao Liu committed
64
    constexpr std::size_t N = std::tuple_size<T>{};
Chao Liu's avatar
Chao Liu committed
65
66
67
68
69
70
71
72
73
74
75
76
77

    return call_f_unpack_args_impl(f, args, std::make_index_sequence<N>{});
}

template <class F, class T, std::size_t... Is>
auto construct_f_unpack_args_impl(T args, std::index_sequence<Is...>)
{
    return F(std::get<Is>(args)...);
}

template <class F, class T>
auto construct_f_unpack_args(F, T args)
{
Chao Liu's avatar
Chao Liu committed
78
    constexpr std::size_t N = std::tuple_size<T>{};
Chao Liu's avatar
Chao Liu committed
79
80
81
82

    return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
}

Chao Liu's avatar
Chao Liu committed
83
84
85
struct TensorDescriptor
{
    TensorDescriptor() = delete;
Chao Liu's avatar
Chao Liu committed
86
87
    TensorDescriptor(std::initializer_list<std::size_t> lens);
    TensorDescriptor(std::initializer_list<std::size_t> lens,
Chao Liu's avatar
Chao Liu committed
88
                     std::initializer_list<std::size_t> strides);
Chao Liu's avatar
Chao Liu committed
89
    TensorDescriptor(std::vector<std::size_t> lens, std::vector<std::size_t> strides);
Chao Liu's avatar
Chao Liu committed
90
91
92
93

    void CalculateStrides();

    template <class Range>
Chao Liu's avatar
Chao Liu committed
94
    TensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end())
Chao Liu's avatar
Chao Liu committed
95
96
97
98
    {
        this->CalculateStrides();
    }

Chao Liu's avatar
Chao Liu committed
99
    template <class Range1, class Range2>
Chao Liu's avatar
Chao Liu committed
100
101
    TensorDescriptor(const Range1& lens, const Range2& strides)
        : mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
Chao Liu's avatar
Chao Liu committed
102
103
    {
    }
Chao Liu's avatar
Chao Liu committed
104

Chao Liu's avatar
Chao Liu committed
105
    std::size_t GetNumOfDimension() const;
Chao Liu's avatar
Chao Liu committed
106
107
108
    std::size_t GetElementSize() const;
    std::size_t GetElementSpace() const;

Chao Liu's avatar
Chao Liu committed
109
110
111
    const std::vector<std::size_t>& GetLengths() const;
    const std::vector<std::size_t>& GetStrides() const;

Chao Liu's avatar
Chao Liu committed
112
    template <class... Is>
113
    std::size_t GetOffsetFromMultiIndex(Is... is) const
Chao Liu's avatar
Chao Liu committed
114
    {
Chao Liu's avatar
Chao Liu committed
115
        assert(sizeof...(Is) == this->GetNumOfDimension());
Chao Liu's avatar
Chao Liu committed
116
117
        std::initializer_list<std::size_t> iss{static_cast<std::size_t>(is)...};
        return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
Chao Liu's avatar
Chao Liu committed
118
119
120
121
122
123
124
    }

    private:
    std::vector<std::size_t> mLens;
    std::vector<std::size_t> mStrides;
};

Chao Liu's avatar
Chao Liu committed
125
struct joinable_thread : std::thread
Chao Liu's avatar
Chao Liu committed
126
{
Chao Liu's avatar
Chao Liu committed
127
128
129
130
    template <class... Xs>
    joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...)
    {
    }
Chao Liu's avatar
Chao Liu committed
131

Chao Liu's avatar
Chao Liu committed
132
133
    joinable_thread(joinable_thread&&) = default;
    joinable_thread& operator=(joinable_thread&&) = default;
Chao Liu's avatar
Chao Liu committed
134

Chao Liu's avatar
Chao Liu committed
135
136
137
138
139
140
    ~joinable_thread()
    {
        if(this->joinable())
            this->join();
    }
};
Chao Liu's avatar
Chao Liu committed
141
142
143
144
145

template <class F, class... Xs>
struct ParallelTensorFunctor
{
    F mF;
Chao Liu's avatar
Chao Liu committed
146
    static constexpr std::size_t NDIM = sizeof...(Xs);
Chao Liu's avatar
Chao Liu committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    std::array<std::size_t, NDIM> mLens;
    std::array<std::size_t, NDIM> mStrides;
    std::size_t mN1d;

    ParallelTensorFunctor(F f, Xs... xs) : mF(f), mLens({static_cast<std::size_t>(xs)...})
    {
        mStrides.back() = 1;
        std::partial_sum(mLens.rbegin(),
                         mLens.rend() - 1,
                         mStrides.rbegin() + 1,
                         std::multiplies<std::size_t>());
        mN1d = mStrides[0] * mLens[0];
    }

Chao Liu's avatar
Chao Liu committed
161
162
163
164
165
166
167
168
169
170
171
172
173
    std::array<std::size_t, NDIM> GetNdIndices(std::size_t i) const
    {
        std::array<std::size_t, NDIM> indices;

        for(int idim = 0; idim < NDIM; ++idim)
        {
            indices[idim] = i / mStrides[idim];
            i -= indices[idim] * mStrides[idim];
        }

        return indices;
    }

Chao Liu's avatar
Chao Liu committed
174
    void operator()(std::size_t num_thread) const
Chao Liu's avatar
Chao Liu committed
175
176
177
178
179
180
181
182
    {
        std::size_t work_per_thread = (mN1d + num_thread - 1) / num_thread;

        std::vector<joinable_thread> threads(num_thread);

        for(std::size_t it = 0; it < num_thread; ++it)
        {
            std::size_t iw_begin = it * work_per_thread;
Chao Liu's avatar
Chao Liu committed
183
            std::size_t iw_end   = std::min((it + 1) * work_per_thread, mN1d);
Chao Liu's avatar
Chao Liu committed
184
185
186

            auto f = [=] {
                for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
Chao Liu's avatar
Chao Liu committed
187
188
189
                {
                    call_f_unpack_args(mF, GetNdIndices(iw));
                }
Chao Liu's avatar
Chao Liu committed
190
191
192
193
194
195
            };
            threads[it] = joinable_thread(f);
        }
    }
};

Chao Liu's avatar
Chao Liu committed
196
197
template <class F, class... Xs>
auto make_ParallelTensorFunctor(F f, Xs... xs)
Chao Liu's avatar
Chao Liu committed
198
{
Chao Liu's avatar
Chao Liu committed
199
    return ParallelTensorFunctor<F, Xs...>(f, xs...);
Chao Liu's avatar
Chao Liu committed
200
201
}

Chao Liu's avatar
Chao Liu committed
202
203
template <class T>
struct Tensor
Chao Liu's avatar
Chao Liu committed
204
{
Chao Liu's avatar
Chao Liu committed
205
    template <class X>
Chao Liu's avatar
Chao Liu committed
206
    Tensor(std::initializer_list<X> lens) : mDesc(lens), mData(mDesc.GetElementSpace())
Chao Liu's avatar
Chao Liu committed
207
208
    {
    }
Chao Liu's avatar
Chao Liu committed
209

Chao Liu's avatar
Chao Liu committed
210
    template <class X>
Chao Liu's avatar
Chao Liu committed
211
    Tensor(std::vector<X> lens) : mDesc(lens), mData(mDesc.GetElementSpace())
Chao Liu's avatar
Chao Liu committed
212
213
    {
    }
Chao Liu's avatar
Chao Liu committed
214

Chao Liu's avatar
Chao Liu committed
215
216
    template <class X, class Y>
    Tensor(std::vector<X> lens, std::vector<Y> strides)
Chao Liu's avatar
Chao Liu committed
217
        : mDesc(lens, strides), mData(mDesc.GetElementSpace())
Chao Liu's avatar
Chao Liu committed
218
219
    {
    }
Chao Liu's avatar
Chao Liu committed
220

Chao Liu's avatar
Chao Liu committed
221
222
    Tensor(const TensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpace()) {}

Chao Liu's avatar
Chao Liu committed
223
224
225
    template <class G>
    void GenerateTensorValue(G g, std::size_t num_thread = 1)
    {
Chao Liu's avatar
Chao Liu committed
226
        switch(mDesc.GetNumOfDimension())
Chao Liu's avatar
Chao Liu committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
        {
        case 1:
        {
            auto f = [&](auto i) { (*this)(i) = g(i); };
            make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread);
            break;
        }
        case 2:
        {
            auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); };
            make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread);
            break;
        }
        case 3:
        {
            auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
            make_ParallelTensorFunctor(
                f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread);
            break;
        }
        case 4:
        {
            auto f = [&](auto i0, auto i1, auto i2, auto i3) {
                (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
            };
            make_ParallelTensorFunctor(f,
                                       mDesc.GetLengths()[0],
                                       mDesc.GetLengths()[1],
                                       mDesc.GetLengths()[2],
                                       mDesc.GetLengths()[3])(num_thread);
            break;
        }
        default: throw std::runtime_error("unspported dimension");
        }
    }

    template <class... Is>
    T& operator()(Is... is)
    {
266
        return mData[mDesc.GetOffsetFromMultiIndex(is...)];
Chao Liu's avatar
Chao Liu committed
267
268
269
270
271
    }

    template <class... Is>
    const T& operator()(Is... is) const
    {
272
        return mData[mDesc.GetOffsetFromMultiIndex(is...)];
Chao Liu's avatar
Chao Liu committed
273
274
275
276
277
278
279
280
281
282
283
284
285
    }

    typename std::vector<T>::iterator begin() { return mData.begin(); }

    typename std::vector<T>::iterator end() { return mData.end(); }

    typename std::vector<T>::const_iterator begin() const { return mData.begin(); }

    typename std::vector<T>::const_iterator end() const { return mData.end(); }

    TensorDescriptor mDesc;
    std::vector<T> mData;
};
286
287

#endif