"host/online_compile/hip_utility/binary_cache.cpp" did not exist on "f6edda6119ebbb237dfa6270797b34f960d7b190"
Array.hpp 11.9 KB
Newer Older
1
2
3
#ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP

Chao Liu's avatar
Chao Liu committed
4
5
#include "Sequence.hpp"
#include "functional2.hpp"
6

7
8
namespace ck {

Chao Liu's avatar
Chao Liu committed
9
template <class TData, index_t NSize>
10
11
struct Array
{
Chao Liu's avatar
Chao Liu committed
12
13
    using Type      = Array<TData, NSize>;
    using data_type = TData;
14

Chao Liu's avatar
Chao Liu committed
15
    static constexpr index_t nSize = NSize;
16

Chao Liu's avatar
Chao Liu committed
17
    index_t mData[nSize];
18
19

    template <class... Xs>
Chao Liu's avatar
Chao Liu committed
20
    __host__ __device__ constexpr Array(Xs... xs) : mData{static_cast<TData>(xs)...}
21
22
23
    {
    }

Chao Liu's avatar
Chao Liu committed
24
    __host__ __device__ static constexpr index_t GetSize() { return NSize; }
25

Chao Liu's avatar
Chao Liu committed
26
27
28
29
30
31
    template <index_t I>
    __host__ __device__ constexpr TData operator[](Number<I>) const
    {
        return mData[I];
    }

Chao Liu's avatar
Chao Liu committed
32
    __host__ __device__ constexpr TData operator[](index_t i) const { return mData[i]; }
33

Chao Liu's avatar
Chao Liu committed
34
35
36
37
38
39
40
    template <index_t I>
    __host__ __device__ TData& operator()(Number<I>)
    {
        return mData[I];
    }

    __host__ __device__ TData& operator()(index_t i) { return mData[i]; }
41

Chao Liu's avatar
Chao Liu committed
42
    template <index_t I>
Chao Liu's avatar
Chao Liu committed
43
    __host__ __device__ constexpr void Set(Number<I>, TData x)
Chao Liu's avatar
Chao Liu committed
44
    {
Chao Liu's avatar
Chao Liu committed
45
46
        static_assert(I < NSize, "wrong!");

Chao Liu's avatar
Chao Liu committed
47
48
49
        mData[I] = x;
    }

Chao Liu's avatar
Chao Liu committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
    __host__ __device__ constexpr void Set(index_t I, TData x) { mData[I] = x; }

    struct lambda_PushBack // emulate constexpr lambda
    {
        const Array<TData, NSize>& old_array;
        Array<TData, NSize + 1>& new_array;

        __host__ __device__ constexpr lambda_PushBack(const Array<TData, NSize>& old_array_,
                                                      Array<TData, NSize + 1>& new_array_)
            : old_array(old_array_), new_array(new_array_)
        {
        }

        template <index_t I>
        __host__ __device__ constexpr void operator()(Number<I>) const
        {
            new_array.Set(Number<I>{}, old_array[I]);
        }
    };

Chao Liu's avatar
Chao Liu committed
70
    __host__ __device__ constexpr auto PushBack(TData x) const
71
72
73
    {
        Array<TData, NSize + 1> new_array;

Chao Liu's avatar
Chao Liu committed
74
        static_for<0, NSize, 1>{}(lambda_PushBack(*this, new_array));
75

Chao Liu's avatar
Chao Liu committed
76
        new_array.Set(Number<NSize>{}, x);
77
78
79

        return new_array;
    }
80
};
81

Chao Liu's avatar
Chao Liu committed
82
83
84
// A: Array
// Picks: Sequence<...>
template <class Arr, class Picks>
Chao Liu's avatar
Chao Liu committed
85
struct ArrayElementPicker
Chao Liu's avatar
Chao Liu committed
86
{
Chao Liu's avatar
Chao Liu committed
87
88
89
    using data_type = typename Arr::data_type;

    __host__ __device__ constexpr ArrayElementPicker(Arr& array) : mData{array}
Chao Liu's avatar
Chao Liu committed
90
91
92
93
94
95
96
97
98
99
    {
        constexpr index_t imax =
            accumulate_on_sequence(Picks{}, math::maxer<index_t>{}, Number<0>{});

        static_assert(imax < Picks::GetSize(), "wrong! exceeding max id");
    }

    __host__ __device__ static constexpr index_t GetSize() { return Picks::GetSize(); }

    template <index_t I>
Chao Liu's avatar
Chao Liu committed
100
    __host__ __device__ constexpr data_type operator[](Number<I>) const
Chao Liu's avatar
Chao Liu committed
101
102
103
104
105
    {
        constexpr auto IP = Picks::Get(Number<I>{});
        return mData[IP];
    }

Chao Liu's avatar
Chao Liu committed
106
    __host__ __device__ constexpr data_type operator[](index_t i) const
Chao Liu's avatar
Chao Liu committed
107
108
109
110
111
112
    {
        constexpr index_t ip = Picks{}[i];
        return mData[ip];
    }

    template <index_t I>
Chao Liu's avatar
Chao Liu committed
113
    __host__ __device__ data_type& operator()(Number<I>)
Chao Liu's avatar
Chao Liu committed
114
115
116
117
118
    {
        constexpr auto IP = Picks::Get(Number<I>{});
        return mData[IP];
    }

Chao Liu's avatar
Chao Liu committed
119
    __host__ __device__ data_type& operator()(index_t i)
Chao Liu's avatar
Chao Liu committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    {
        constexpr index_t ip = Picks{}[i];
        return mData[ip];
    }

    Arr& mData;
};

template <class Arr, class Picks>
__host__ __device__ constexpr auto pick_array_element(Arr& a, Picks)
{
    return ArrayElementPicker<Arr, Picks>(a);
}

Chao Liu's avatar
Chao Liu committed
134
135
136
137
138
139
140
141
142
template <index_t... Is>
__host__ __device__ constexpr auto sequence2array(Sequence<Is...>)
{
    return Array<index_t, sizeof...(Is)>{Is...};
}

template <class TData, index_t NSize>
__host__ __device__ constexpr auto make_zero_array()
{
Chao Liu's avatar
Chao Liu committed
143
    constexpr auto zero_sequence = typename uniform_sequence_gen<NSize, 0>::type{};
Chao Liu's avatar
Chao Liu committed
144
145
    constexpr auto zero_array    = sequence2array(zero_sequence);
    return zero_array;
Chao Liu's avatar
Chao Liu committed
146
147
}

148
template <class TData, index_t NSize, index_t... IRs>
Chao Liu's avatar
Chao Liu committed
149
__host__ __device__ constexpr auto reorder_array_given_new2old(const Array<TData, NSize>& old_array,
Chao Liu's avatar
Chao Liu committed
150
                                                               Sequence<IRs...> /*new2old*/)
151
152
153
{
    static_assert(NSize == sizeof...(IRs), "NSize not consistent");

Chao Liu's avatar
Chao Liu committed
154
    static_assert(is_valid_sequence_map<Sequence<IRs...>>{}, "wrong! invalid reorder map");
155

156
    return Array<TData, NSize>{old_array[IRs]...};
157
158
}

Chao Liu's avatar
Chao Liu committed
159
template <class TData, index_t NSize, class MapOld2New>
Chao Liu's avatar
Chao Liu committed
160
struct lambda_reorder_array_given_old2new
Chao Liu's avatar
Chao Liu committed
161
{
Chao Liu's avatar
Chao Liu committed
162
163
    const Array<TData, NSize>& old_array;
    Array<TData, NSize>& new_array;
Chao Liu's avatar
Chao Liu committed
164

Chao Liu's avatar
Chao Liu committed
165
166
167
    __host__ __device__ constexpr lambda_reorder_array_given_old2new(
        const Array<TData, NSize>& old_array_, Array<TData, NSize>& new_array_)
        : old_array(old_array_), new_array(new_array_)
Chao Liu's avatar
Chao Liu committed
168
169
170
171
172
173
    {
    }

    template <index_t IOldDim>
    __host__ __device__ constexpr void operator()(Number<IOldDim>) const
    {
Chao Liu's avatar
Chao Liu committed
174
        TData old_data = old_array[IOldDim];
Chao Liu's avatar
Chao Liu committed
175
176
177

        constexpr index_t INewDim = MapOld2New::Get(Number<IOldDim>{});

Chao Liu's avatar
Chao Liu committed
178
        new_array.Set(Number<INewDim>{}, old_data);
Chao Liu's avatar
Chao Liu committed
179
180
181
182
183
    }
};

template <class TData, index_t NSize, index_t... IRs>
__host__ __device__ constexpr auto reorder_array_given_old2new(const Array<TData, NSize>& old_array,
Chao Liu's avatar
Chao Liu committed
184
                                                               Sequence<IRs...> /*old2new*/)
Chao Liu's avatar
Chao Liu committed
185
186
187
188
189
{
    Array<TData, NSize> new_array;

    static_assert(NSize == sizeof...(IRs), "NSize not consistent");

Chao Liu's avatar
Chao Liu committed
190
191
    static_assert(is_valid_sequence_map<Sequence<IRs...>>::value, "wrong! invalid reorder map");

Chao Liu's avatar
Chao Liu committed
192
    static_for<0, NSize, 1>{}(
Chao Liu's avatar
Chao Liu committed
193
        lambda_reorder_array_given_old2new<TData, NSize, Sequence<IRs...>>(old_array, new_array));
Chao Liu's avatar
Chao Liu committed
194
195
196

    return new_array;
}
Chao Liu's avatar
Chao Liu committed
197

198
template <class TData, index_t NSize, class ExtractSeq>
Chao Liu's avatar
Chao Liu committed
199
__host__ __device__ constexpr auto extract_array(const Array<TData, NSize>& old_array, ExtractSeq)
200
201
202
203
204
205
206
{
    Array<TData, ExtractSeq::GetSize()> new_array;

    constexpr index_t new_size = ExtractSeq::GetSize();

    static_assert(new_size <= NSize, "wrong! too many extract");

Chao Liu's avatar
Chao Liu committed
207
    static_for<0, new_size, 1>{}([&](auto I) { new_array(I) = old_array[ExtractSeq::Get(I)]; });
208
209
210
211

    return new_array;
}

Chao Liu's avatar
Chao Liu committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
template <class F, class X, class Y, class Z> // emulate constepxr lambda for array math
struct lambda_array_math
{
    const F& f;
    const X& x;
    const Y& y;
    Z& z;

    __host__ __device__ constexpr lambda_array_math(const F& f_, const X& x_, const Y& y_, Z& z_)
        : f(f_), x(x_), y(y_), z(z_)
    {
    }

    template <index_t IDim_>
    __host__ __device__ constexpr void operator()(Number<IDim_>) const
    {
        constexpr auto IDim = Number<IDim_>{};

        z.Set(IDim, f(x[IDim], y[IDim]));
    }
};

234
// Array = Array + Array
Chao Liu's avatar
Chao Liu committed
235
template <class TData, index_t NSize>
Chao Liu's avatar
Chao Liu committed
236
__host__ __device__ constexpr auto operator+(Array<TData, NSize> a, Array<TData, NSize> b)
Chao Liu's avatar
Chao Liu committed
237
238
239
{
    Array<TData, NSize> result;

240
    auto f = math::plus<index_t>{};
Chao Liu's avatar
Chao Liu committed
241

Chao Liu's avatar
Chao Liu committed
242
243
244
    static_for<0, NSize, 1>{}(
        lambda_array_math<decltype(f), decltype(a), decltype(b), decltype(result)>(
            f, a, b, result));
Chao Liu's avatar
Chao Liu committed
245
246
247

    return result;
}
Chao Liu's avatar
Chao Liu committed
248

249
250
251
252
253
254
// Array = Array - Array
template <class TData, index_t NSize>
__host__ __device__ constexpr auto operator-(Array<TData, NSize> a, Array<TData, NSize> b)
{
    Array<TData, NSize> result;

255
    auto f = math::minus<index_t>{};
256

Chao Liu's avatar
Chao Liu committed
257
258
259
    static_for<0, NSize, 1>{}(
        lambda_array_math<decltype(f), decltype(a), decltype(b), decltype(result)>(
            f, a, b, result));
260
261
262
263

    return result;
}

Chao Liu's avatar
Chao Liu committed
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
// Array += Array
template <class TData, index_t NSize>
__host__ __device__ constexpr auto operator+=(Array<TData, NSize>& a, Array<TData, NSize> b)
{
    a = a + b;
    return a;
}

// Array -= Array
template <class TData, index_t NSize>
__host__ __device__ constexpr auto operator-=(Array<TData, NSize>& a, Array<TData, NSize> b)
{
    a = a - b;
    return a;
}
279
280
281
282
283
284
285
286
// Array = Array + Sequence
template <class TData, index_t NSize, index_t... Is>
__host__ __device__ constexpr auto operator+(Array<TData, NSize> a, Sequence<Is...> b)
{
    static_assert(sizeof...(Is) == NSize, "wrong! size not the same");

    Array<TData, NSize> result;

287
    auto f = math::plus<index_t>{};
288

Chao Liu's avatar
Chao Liu committed
289
290
291
    static_for<0, NSize, 1>{}(
        lambda_array_math<decltype(f), decltype(a), decltype(b), decltype(result)>(
            f, a, b, result));
292
293
294
295
296
297
298
299
300
301
302
303

    return result;
}

// Array = Array - Sequence
template <class TData, index_t NSize, index_t... Is>
__host__ __device__ constexpr auto operator-(Array<TData, NSize> a, Sequence<Is...> b)
{
    static_assert(sizeof...(Is) == NSize, "wrong! size not the same");

    Array<TData, NSize> result;

304
    auto f = math::minus<index_t>{};
305

Chao Liu's avatar
Chao Liu committed
306
307
308
    static_for<0, NSize, 1>{}(
        lambda_array_math<decltype(f), decltype(a), decltype(b), decltype(result)>(
            f, a, b, result));
309
310
311
312

    return result;
}

Chao Liu's avatar
Chao Liu committed
313
314
315
316
317
318
319
320
// Array = Array * Sequence
template <class TData, index_t NSize, index_t... Is>
__host__ __device__ constexpr auto operator*(Array<TData, NSize> a, Sequence<Is...> b)
{
    static_assert(sizeof...(Is) == NSize, "wrong! size not the same");

    Array<TData, NSize> result;

321
    auto f = math::multiplies<index_t>{};
Chao Liu's avatar
Chao Liu committed
322

Chao Liu's avatar
Chao Liu committed
323
324
325
    static_for<0, NSize, 1>{}(
        lambda_array_math<decltype(f), decltype(a), decltype(b), decltype(result)>(
            f, a, b, result));
Chao Liu's avatar
Chao Liu committed
326
327
328

    return result;
}
329

330
331
332
// Array = Sequence - Array
template <class TData, index_t NSize, index_t... Is>
__host__ __device__ constexpr auto operator-(Sequence<Is...> a, Array<TData, NSize> b)
333
{
334
335
336
    static_assert(sizeof...(Is) == NSize, "wrong! size not the same");

    Array<TData, NSize> result;
337

338
    auto f = math::minus<index_t>{};
339

Chao Liu's avatar
Chao Liu committed
340
341
342
    static_for<0, NSize, 1>{}(
        lambda_array_math<decltype(f), decltype(a), decltype(b), decltype(result)>(
            f, a, b, result));
343
344
345
346
347
348
349
350
351
352
353
354

    return result;
}

template <class TData, index_t NSize, class Reduce>
__host__ __device__ constexpr TData
accumulate_on_array(const Array<TData, NSize>& a, Reduce f, TData init)
{
    TData result = init;

    static_assert(NSize > 0, "wrong");

Chao Liu's avatar
Chao Liu committed
355
    static_for<0, NSize, 1>{}([&](auto I) { result = f(result, a[I]); });
356
357
358

    return result;
}
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442

template <class T, index_t NSize>
__host__ __device__ void print_Array(const char* s, Array<T, NSize> a)
{
    constexpr index_t nsize = a.GetSize();

    static_assert(nsize > 0 && nsize <= 10, "wrong!");

    static_if<nsize == 1>{}([&](auto) { printf("%s size %u, {%u}\n", s, nsize, a[0]); });

    static_if<nsize == 2>{}([&](auto) { printf("%s size %u, {%u %u}\n", s, nsize, a[0], a[1]); });

    static_if<nsize == 3>{}(
        [&](auto) { printf("%s size %u, {%u %u %u}\n", s, nsize, a[0], a[1], a[2]); });

    static_if<nsize == 4>{}(
        [&](auto) { printf("%s size %u, {%u %u %u %u}\n", s, nsize, a[0], a[1], a[2], a[3]); });

    static_if<nsize == 5>{}([&](auto) {
        printf("%s size %u, {%u %u %u %u %u}\n", s, nsize, a[0], a[1], a[2], a[3], a[4]);
    });

    static_if<nsize == 6>{}([&](auto) {
        printf("%s size %u, {%u %u %u %u %u %u}\n", s, nsize, a[0], a[1], a[2], a[3], a[4], a[5]);
    });

    static_if<nsize == 7>{}([&](auto) {
        printf("%s size %u, {%u %u %u %u %u %u %u}\n",
               s,
               nsize,
               a[0],
               a[1],
               a[2],
               a[3],
               a[4],
               a[5],
               a[6]);
    });

    static_if<nsize == 8>{}([&](auto) {
        printf("%s size %u, {%u %u %u %u %u %u %u %u}\n",
               s,
               nsize,
               a[0],
               a[1],
               a[2],
               a[3],
               a[4],
               a[5],
               a[6],
               a[7]);
    });

    static_if<nsize == 9>{}([&](auto) {
        printf("%s size %u, {%u %u %u %u %u %u %u %u %u}\n",
               s,
               nsize,
               a[0],
               a[1],
               a[2],
               a[3],
               a[4],
               a[5],
               a[6],
               a[7],
               a[8]);
    });

    static_if<nsize == 10>{}([&](auto) {
        printf("%s size %u, {%u %u %u %u %u %u %u %u %u %u}\n",
               s,
               nsize,
               a[0],
               a[1],
               a[2],
               a[3],
               a[4],
               a[5],
               a[6],
               a[7],
               a[8],
               a[9]);
    });
}
443
444
445

} // namespace ck
#endif