math.hpp 6.32 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
// SPDX-License-Identifier: MIT
Illia Silin's avatar
Illia Silin committed
2
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
Chao Liu's avatar
Chao Liu committed
3

Chao Liu's avatar
Chao Liu committed
4
#pragma once
Chao Liu's avatar
Chao Liu committed
5

Chao Liu's avatar
Chao Liu committed
6
#include "ck/ck.hpp"
Chao Liu's avatar
Chao Liu committed
7
#include "integral_constant.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "number.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "type.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "enable_if.hpp"
Chao Liu's avatar
Chao Liu committed
11
12
13
14

namespace ck {
namespace math {

zjing14's avatar
zjing14 committed
15
template <typename T, T s>
Chao Liu's avatar
Chao Liu committed
16
17
18
19
20
struct scales
{
    __host__ __device__ constexpr T operator()(T a) const { return s * a; }
};

zjing14's avatar
zjing14 committed
21
template <typename T>
Chao Liu's avatar
Chao Liu committed
22
23
24
25
26
struct plus
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a + b; }
};

zjing14's avatar
zjing14 committed
27
template <typename T>
Chao Liu's avatar
Chao Liu committed
28
29
30
31
32
33
struct minus
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a - b; }
};

struct multiplies
Chao Liu's avatar
Chao Liu committed
34
35
36
37
38
39
40
41
{
    template <typename A, typename B>
    __host__ __device__ constexpr auto operator()(const A& a, const B& b) const
    {
        return a * b;
    }
};

zjing14's avatar
zjing14 committed
42
template <typename T>
Chao Liu's avatar
Chao Liu committed
43
struct maximize
Chao Liu's avatar
Chao Liu committed
44
45
46
47
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a >= b ? a : b; }
};

zjing14's avatar
zjing14 committed
48
template <typename T>
Chao Liu's avatar
Chao Liu committed
49
50
51
52
53
struct minimize
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a <= b ? a : b; }
};

zjing14's avatar
zjing14 committed
54
template <typename T>
Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
60
struct integer_divide_ceiler
{
    __host__ __device__ constexpr T operator()(T a, T b) const
    {
        static_assert(is_same<T, index_t>{} || is_same<T, int>{}, "wrong type");

zjing14's avatar
zjing14 committed
61
        return (a + b - Number<1>{}) / b;
Chao Liu's avatar
Chao Liu committed
62
63
64
    }
};

zjing14's avatar
zjing14 committed
65
template <typename X, typename Y>
66
67
68
69
70
__host__ __device__ constexpr auto integer_divide_floor(X x, Y y)
{
    return x / y;
}

zjing14's avatar
zjing14 committed
71
template <typename X, typename Y>
Chao Liu's avatar
Chao Liu committed
72
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Chao Liu's avatar
Chao Liu committed
73
{
74
    return (x + y - Number<1>{}) / y;
Chao Liu's avatar
Chao Liu committed
75
76
}

zjing14's avatar
zjing14 committed
77
template <typename X, typename Y>
Chao Liu's avatar
Chao Liu committed
78
__host__ __device__ constexpr auto integer_least_multiple(X x, Y y)
Chao Liu's avatar
Chao Liu committed
79
{
Chao Liu's avatar
Chao Liu committed
80
    return y * integer_divide_ceil(x, y);
Chao Liu's avatar
Chao Liu committed
81
82
}

zjing14's avatar
zjing14 committed
83
template <typename T>
Chao Liu's avatar
Chao Liu committed
84
85
86
87
88
__host__ __device__ constexpr T max(T x)
{
    return x;
}

zjing14's avatar
zjing14 committed
89
90
template <typename T>
__host__ __device__ constexpr T max(T x, T y)
Chao Liu's avatar
Chao Liu committed
91
{
zjing14's avatar
zjing14 committed
92
93
    return x > y ? x : y;
}
Chao Liu's avatar
Chao Liu committed
94

zjing14's avatar
zjing14 committed
95
96
97
98
99
template <index_t X>
__host__ __device__ constexpr index_t max(Number<X>, index_t y)
{
    return X > y ? X : y;
}
Chao Liu's avatar
Chao Liu committed
100

zjing14's avatar
zjing14 committed
101
102
103
104
105
template <index_t Y>
__host__ __device__ constexpr index_t max(index_t x, Number<Y>)
{
    return x > Y ? x : Y;
}
Chao Liu's avatar
Chao Liu committed
106

zjing14's avatar
zjing14 committed
107
108
109
110
111
112
template <typename X, typename... Ys>
__host__ __device__ constexpr auto max(X x, Ys... ys)
{
    static_assert(sizeof...(Ys) > 0, "not enough argument");

    return max(x, max(ys...));
Chao Liu's avatar
Chao Liu committed
113
114
}

zjing14's avatar
zjing14 committed
115
template <typename T>
Chao Liu's avatar
Chao Liu committed
116
117
118
119
120
__host__ __device__ constexpr T min(T x)
{
    return x;
}

zjing14's avatar
zjing14 committed
121
122
123
124
125
126
127
128
template <typename T>
__host__ __device__ constexpr T min(T x, T y)
{
    return x < y ? x : y;
}

template <index_t X>
__host__ __device__ constexpr index_t min(Number<X>, index_t y)
Chao Liu's avatar
Chao Liu committed
129
{
zjing14's avatar
zjing14 committed
130
131
    return X < y ? X : y;
}
Chao Liu's avatar
Chao Liu committed
132

zjing14's avatar
zjing14 committed
133
134
135
136
137
template <index_t Y>
__host__ __device__ constexpr index_t min(index_t x, Number<Y>)
{
    return x < Y ? x : Y;
}
Chao Liu's avatar
Chao Liu committed
138

zjing14's avatar
zjing14 committed
139
140
141
142
template <typename X, typename... Ys>
__host__ __device__ constexpr auto min(X x, Ys... ys)
{
    static_assert(sizeof...(Ys) > 0, "not enough argument");
Chao Liu's avatar
Chao Liu committed
143

zjing14's avatar
zjing14 committed
144
    return min(x, min(ys...));
Chao Liu's avatar
Chao Liu committed
145
146
}

rocking5566's avatar
rocking5566 committed
147
148
149
150
151
152
template <typename T>
__host__ __device__ constexpr T clamp(const T& x, const T& lowerbound, const T& upperbound)
{
    return min(max(x, lowerbound), upperbound);
}

Chao Liu's avatar
Chao Liu committed
153
154
155
156
// prevent implicit type casting
template <typename T>
__host__ T exp(T x);

157
158
159
template <typename T>
__device__ T exp(T x);

160
161
// TODO: add f16 support using v_exp_f16

162
template <>
Chao Liu's avatar
Chao Liu committed
163
inline __device__ float exp<float>(float x)
164
165
166
167
168
{
    return __expf(x);
}

template <>
Chao Liu's avatar
Chao Liu committed
169
inline __device__ double exp<double>(double x)
170
171
172
173
{
    return exp(x);
}

Chao Liu's avatar
Chao Liu committed
174
175
176
177
178
template <>
inline __host__ float exp<float>(float x)
{
    return std::expf(x);
}
179

Chao Liu's avatar
Chao Liu committed
180
181
182
183
184
template <>
inline __host__ double exp<double>(double x)
{
    return std::exp(x);
}
185

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// prevent implicit type casting
template <typename T>
__host__ T exp2(T x);

template <typename T>
__device__ T exp2(T x);

template <>
inline __device__ float exp2<float>(float x)
{
    return exp2f(x);
}

template <>
inline __device__ double exp2<double>(double x)
{
    return exp2(x);
}

template <>
inline __host__ float exp2<float>(float x)
{
    return std::exp2f(x);
}

template <>
inline __host__ double exp2<double>(double x)
{
    return std::exp2l(x); // TODO: std does not have exp2 for double till c++23
}

217
// greatest common divisor, aka highest common factor
Chao Liu's avatar
Chao Liu committed
218
__host__ __device__ constexpr index_t gcd(index_t x, index_t y)
Chao Liu's avatar
Chao Liu committed
219
{
220
221
222
223
224
225
226
227
228
    if(x < 0)
    {
        return gcd(-x, y);
    }
    else if(y < 0)
    {
        return gcd(x, -y);
    }
    else if(x == y || x == 0)
Chao Liu's avatar
Chao Liu committed
229
230
231
    {
        return y;
    }
Chao Liu's avatar
Chao Liu committed
232
    else if(y == 0)
Chao Liu's avatar
Chao Liu committed
233
234
235
    {
        return x;
    }
Chao Liu's avatar
Chao Liu committed
236
    else if(x > y)
Chao Liu's avatar
Chao Liu committed
237
    {
238
        return gcd(x % y, y);
Chao Liu's avatar
Chao Liu committed
239
    }
Chao Liu's avatar
Chao Liu committed
240
    else
Chao Liu's avatar
Chao Liu committed
241
    {
242
        return gcd(x, y % x);
Chao Liu's avatar
Chao Liu committed
243
244
245
246
    }
}

template <index_t X, index_t Y>
247
__host__ __device__ constexpr auto gcd(Number<X>, Number<Y>)
Chao Liu's avatar
Chao Liu committed
248
{
Chao Liu's avatar
Chao Liu committed
249
250
251
    constexpr auto r = gcd(X, Y);

    return Number<r>{};
Chao Liu's avatar
Chao Liu committed
252
253
}

Chao Liu's avatar
Chao Liu committed
254
template <typename X, typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
255
__host__ __device__ constexpr auto gcd(X x, Ys... ys)
Chao Liu's avatar
Chao Liu committed
256
{
257
    return gcd(x, gcd(ys...));
Chao Liu's avatar
Chao Liu committed
258
259
260
}

// least common multiple
Chao Liu's avatar
Chao Liu committed
261
262
template <typename X, typename Y>
__host__ __device__ constexpr auto lcm(X x, Y y)
Chao Liu's avatar
Chao Liu committed
263
{
264
    return (x * y) / gcd(x, y);
Chao Liu's avatar
Chao Liu committed
265
266
}

Chao Liu's avatar
Chao Liu committed
267
template <typename X, typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
Chao Liu's avatar
Chao Liu committed
268
__host__ __device__ constexpr auto lcm(X x, Ys... ys)
Chao Liu's avatar
Chao Liu committed
269
{
Chao Liu's avatar
Chao Liu committed
270
    return lcm(x, lcm(ys...));
Chao Liu's avatar
Chao Liu committed
271
272
}

zjing14's avatar
zjing14 committed
273
template <typename T>
Chao Liu's avatar
Chao Liu committed
274
275
276
277
278
struct equal
{
    __host__ __device__ constexpr bool operator()(T x, T y) const { return x == y; }
};

zjing14's avatar
zjing14 committed
279
template <typename T>
Chao Liu's avatar
Chao Liu committed
280
281
282
283
284
struct less
{
    __host__ __device__ constexpr bool operator()(T x, T y) const { return x < y; }
};

Chao Liu's avatar
Chao Liu committed
285
286
287
288
289
290
__host__ __device__ constexpr int32_t next_power_of_two(int32_t x)
{
    // TODO: x need to be 2 ~ 0x7fffffff. 0, 1, or larger than 0x7fffffff will compile fail
    return 1 << (32 - __builtin_clz(x - 1));
}

291
292
293
template <index_t X>
__host__ __device__ constexpr auto next_power_of_two()
{
Chao Liu's avatar
Chao Liu committed
294
295
    constexpr index_t y = next_power_of_two(X);
    return Number<y>{};
296
297
298
}

template <index_t X>
Chao Liu's avatar
Chao Liu committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
__host__ __device__ constexpr auto next_power_of_two(Number<X>)
{
    constexpr index_t y = next_power_of_two(X);
    return Number<y>{};
}

__host__ __device__ constexpr int32_t integer_log2_floor(int32_t x)
{
    // TODO: x need to be 1 ~ 0x7fffffff
    // __builtin_clz will produce unexpected result if x is 0;
    return 31 - __builtin_clz(x);
}

__host__ __device__ constexpr bool is_power_of_two_integer(int32_t x)
313
{
Chao Liu's avatar
Chao Liu committed
314
315
    // TODO: x need to be 1 ~ 0x7fffffff
    return x == (1 << integer_log2_floor(x));
316
317
}

Chao Liu's avatar
Chao Liu committed
318
} // namespace math
Chao Liu's avatar
Chao Liu committed
319
} // namespace ck