math.hpp 5.81 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
// SPDX-License-Identifier: MIT
Illia Silin's avatar
Illia Silin committed
2
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
Chao Liu's avatar
Chao Liu committed
3

Chao Liu's avatar
Chao Liu committed
4
#pragma once
Chao Liu's avatar
Chao Liu committed
5

Chao Liu's avatar
Chao Liu committed
6
#include "ck/ck.hpp"
Chao Liu's avatar
Chao Liu committed
7
#include "integral_constant.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "number.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "type.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "enable_if.hpp"
Chao Liu's avatar
Chao Liu committed
11
12
13
14

namespace ck {
namespace math {

zjing14's avatar
zjing14 committed
15
template <typename T, T s>
Chao Liu's avatar
Chao Liu committed
16
17
18
19
20
struct scales
{
    __host__ __device__ constexpr T operator()(T a) const { return s * a; }
};

zjing14's avatar
zjing14 committed
21
template <typename T>
Chao Liu's avatar
Chao Liu committed
22
23
24
25
26
struct plus
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a + b; }
};

zjing14's avatar
zjing14 committed
27
template <typename T>
Chao Liu's avatar
Chao Liu committed
28
29
30
31
32
33
struct minus
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a - b; }
};

struct multiplies
Chao Liu's avatar
Chao Liu committed
34
35
36
37
38
39
40
41
{
    template <typename A, typename B>
    __host__ __device__ constexpr auto operator()(const A& a, const B& b) const
    {
        return a * b;
    }
};

zjing14's avatar
zjing14 committed
42
template <typename T>
Chao Liu's avatar
Chao Liu committed
43
struct maximize
Chao Liu's avatar
Chao Liu committed
44
45
46
47
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a >= b ? a : b; }
};

zjing14's avatar
zjing14 committed
48
template <typename T>
Chao Liu's avatar
Chao Liu committed
49
50
51
52
53
struct minimize
{
    __host__ __device__ constexpr T operator()(T a, T b) const { return a <= b ? a : b; }
};

zjing14's avatar
zjing14 committed
54
template <typename T>
Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
60
struct integer_divide_ceiler
{
    __host__ __device__ constexpr T operator()(T a, T b) const
    {
        static_assert(is_same<T, index_t>{} || is_same<T, int>{}, "wrong type");

zjing14's avatar
zjing14 committed
61
        return (a + b - Number<1>{}) / b;
Chao Liu's avatar
Chao Liu committed
62
63
64
    }
};

zjing14's avatar
zjing14 committed
65
template <typename X, typename Y>
66
67
68
69
70
__host__ __device__ constexpr auto integer_divide_floor(X x, Y y)
{
    return x / y;
}

zjing14's avatar
zjing14 committed
71
template <typename X, typename Y>
Chao Liu's avatar
Chao Liu committed
72
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Chao Liu's avatar
Chao Liu committed
73
{
74
    return (x + y - Number<1>{}) / y;
Chao Liu's avatar
Chao Liu committed
75
76
}

zjing14's avatar
zjing14 committed
77
template <typename X, typename Y>
Chao Liu's avatar
Chao Liu committed
78
__host__ __device__ constexpr auto integer_least_multiple(X x, Y y)
Chao Liu's avatar
Chao Liu committed
79
{
Chao Liu's avatar
Chao Liu committed
80
    return y * integer_divide_ceil(x, y);
Chao Liu's avatar
Chao Liu committed
81
82
}

zjing14's avatar
zjing14 committed
83
template <typename T>
Chao Liu's avatar
Chao Liu committed
84
85
86
87
88
__host__ __device__ constexpr T max(T x)
{
    return x;
}

zjing14's avatar
zjing14 committed
89
90
template <typename T>
__host__ __device__ constexpr T max(T x, T y)
Chao Liu's avatar
Chao Liu committed
91
{
zjing14's avatar
zjing14 committed
92
93
    return x > y ? x : y;
}
Chao Liu's avatar
Chao Liu committed
94

zjing14's avatar
zjing14 committed
95
96
97
98
99
template <index_t X>
__host__ __device__ constexpr index_t max(Number<X>, index_t y)
{
    return X > y ? X : y;
}
Chao Liu's avatar
Chao Liu committed
100

zjing14's avatar
zjing14 committed
101
102
103
104
105
template <index_t Y>
__host__ __device__ constexpr index_t max(index_t x, Number<Y>)
{
    return x > Y ? x : Y;
}
Chao Liu's avatar
Chao Liu committed
106

zjing14's avatar
zjing14 committed
107
108
109
110
111
112
template <typename X, typename... Ys>
__host__ __device__ constexpr auto max(X x, Ys... ys)
{
    static_assert(sizeof...(Ys) > 0, "not enough argument");

    return max(x, max(ys...));
Chao Liu's avatar
Chao Liu committed
113
114
}

zjing14's avatar
zjing14 committed
115
template <typename T>
Chao Liu's avatar
Chao Liu committed
116
117
118
119
120
__host__ __device__ constexpr T min(T x)
{
    return x;
}

zjing14's avatar
zjing14 committed
121
122
123
124
125
126
127
128
template <typename T>
__host__ __device__ constexpr T min(T x, T y)
{
    return x < y ? x : y;
}

template <index_t X>
__host__ __device__ constexpr index_t min(Number<X>, index_t y)
Chao Liu's avatar
Chao Liu committed
129
{
zjing14's avatar
zjing14 committed
130
131
    return X < y ? X : y;
}
Chao Liu's avatar
Chao Liu committed
132

zjing14's avatar
zjing14 committed
133
134
135
136
137
template <index_t Y>
__host__ __device__ constexpr index_t min(index_t x, Number<Y>)
{
    return x < Y ? x : Y;
}
Chao Liu's avatar
Chao Liu committed
138

zjing14's avatar
zjing14 committed
139
140
141
142
template <typename X, typename... Ys>
__host__ __device__ constexpr auto min(X x, Ys... ys)
{
    static_assert(sizeof...(Ys) > 0, "not enough argument");
Chao Liu's avatar
Chao Liu committed
143

zjing14's avatar
zjing14 committed
144
    return min(x, min(ys...));
Chao Liu's avatar
Chao Liu committed
145
146
}

rocking5566's avatar
rocking5566 committed
147
148
149
150
151
152
template <typename T>
__host__ __device__ constexpr T clamp(const T& x, const T& lowerbound, const T& upperbound)
{
    return min(max(x, lowerbound), upperbound);
}

Chao Liu's avatar
Chao Liu committed
153
154
155
156
// prevent implicit type casting
template <typename T>
__host__ T exp(T x);

157
158
159
template <typename T>
__device__ T exp(T x);

160
161
// TODO: add f16 support using v_exp_f16

162
template <>
Chao Liu's avatar
Chao Liu committed
163
inline __device__ float exp<float>(float x)
164
165
166
167
168
{
    return __expf(x);
}

template <>
Chao Liu's avatar
Chao Liu committed
169
inline __device__ double exp<double>(double x)
170
171
172
173
{
    return exp(x);
}

Chao Liu's avatar
Chao Liu committed
174
175
176
177
178
template <>
inline __host__ float exp<float>(float x)
{
    return std::expf(x);
}
179

Chao Liu's avatar
Chao Liu committed
180
181
182
183
184
template <>
inline __host__ double exp<double>(double x)
{
    return std::exp(x);
}
185

186
// greatest common divisor, aka highest common factor
Chao Liu's avatar
Chao Liu committed
187
__host__ __device__ constexpr index_t gcd(index_t x, index_t y)
Chao Liu's avatar
Chao Liu committed
188
{
189
190
191
192
193
194
195
196
197
    if(x < 0)
    {
        return gcd(-x, y);
    }
    else if(y < 0)
    {
        return gcd(x, -y);
    }
    else if(x == y || x == 0)
Chao Liu's avatar
Chao Liu committed
198
199
200
    {
        return y;
    }
Chao Liu's avatar
Chao Liu committed
201
    else if(y == 0)
Chao Liu's avatar
Chao Liu committed
202
203
204
    {
        return x;
    }
Chao Liu's avatar
Chao Liu committed
205
    else if(x > y)
Chao Liu's avatar
Chao Liu committed
206
    {
207
        return gcd(x % y, y);
Chao Liu's avatar
Chao Liu committed
208
    }
Chao Liu's avatar
Chao Liu committed
209
    else
Chao Liu's avatar
Chao Liu committed
210
    {
211
        return gcd(x, y % x);
Chao Liu's avatar
Chao Liu committed
212
213
214
215
    }
}

template <index_t X, index_t Y>
216
__host__ __device__ constexpr auto gcd(Number<X>, Number<Y>)
Chao Liu's avatar
Chao Liu committed
217
{
Chao Liu's avatar
Chao Liu committed
218
219
220
    constexpr auto r = gcd(X, Y);

    return Number<r>{};
Chao Liu's avatar
Chao Liu committed
221
222
}

Chao Liu's avatar
Chao Liu committed
223
template <typename X, typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
224
__host__ __device__ constexpr auto gcd(X x, Ys... ys)
Chao Liu's avatar
Chao Liu committed
225
{
226
    return gcd(x, gcd(ys...));
Chao Liu's avatar
Chao Liu committed
227
228
229
}

// least common multiple
Chao Liu's avatar
Chao Liu committed
230
231
template <typename X, typename Y>
__host__ __device__ constexpr auto lcm(X x, Y y)
Chao Liu's avatar
Chao Liu committed
232
{
233
    return (x * y) / gcd(x, y);
Chao Liu's avatar
Chao Liu committed
234
235
}

Chao Liu's avatar
Chao Liu committed
236
template <typename X, typename... Ys, typename enable_if<sizeof...(Ys) >= 2, bool>::type = false>
Chao Liu's avatar
Chao Liu committed
237
__host__ __device__ constexpr auto lcm(X x, Ys... ys)
Chao Liu's avatar
Chao Liu committed
238
{
Chao Liu's avatar
Chao Liu committed
239
    return lcm(x, lcm(ys...));
Chao Liu's avatar
Chao Liu committed
240
241
}

zjing14's avatar
zjing14 committed
242
template <typename T>
Chao Liu's avatar
Chao Liu committed
243
244
245
246
247
struct equal
{
    __host__ __device__ constexpr bool operator()(T x, T y) const { return x == y; }
};

zjing14's avatar
zjing14 committed
248
template <typename T>
Chao Liu's avatar
Chao Liu committed
249
250
251
252
253
struct less
{
    __host__ __device__ constexpr bool operator()(T x, T y) const { return x < y; }
};

Chao Liu's avatar
Chao Liu committed
254
255
256
257
258
259
__host__ __device__ constexpr int32_t next_power_of_two(int32_t x)
{
    // TODO: x need to be 2 ~ 0x7fffffff. 0, 1, or larger than 0x7fffffff will compile fail
    return 1 << (32 - __builtin_clz(x - 1));
}

260
261
262
template <index_t X>
__host__ __device__ constexpr auto next_power_of_two()
{
Chao Liu's avatar
Chao Liu committed
263
264
    constexpr index_t y = next_power_of_two(X);
    return Number<y>{};
265
266
267
}

template <index_t X>
Chao Liu's avatar
Chao Liu committed
268
269
270
271
272
273
274
275
276
277
278
279
280
281
__host__ __device__ constexpr auto next_power_of_two(Number<X>)
{
    constexpr index_t y = next_power_of_two(X);
    return Number<y>{};
}

__host__ __device__ constexpr int32_t integer_log2_floor(int32_t x)
{
    // TODO: x need to be 1 ~ 0x7fffffff
    // __builtin_clz will produce unexpected result if x is 0;
    return 31 - __builtin_clz(x);
}

__host__ __device__ constexpr bool is_power_of_two_integer(int32_t x)
282
{
Chao Liu's avatar
Chao Liu committed
283
284
    // TODO: x need to be 1 ~ 0x7fffffff
    return x == (1 << integer_log2_floor(x));
285
286
}

Chao Liu's avatar
Chao Liu committed
287
} // namespace math
Chao Liu's avatar
Chao Liu committed
288
} // namespace ck