Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
f000fe32
"...composable_kernel_rocm.git" did not exist on "e7dce4d247d2aad9afc7695b29b4c35eaf62b9cc"
Commit
f000fe32
authored
Sep 26, 2023
by
Umang Yadav
Browse files
remove unnecesssary changes
parent
795bea35
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
408 additions
and
433 deletions
+408
-433
include/ck/utility/generic_memory_space_atomic.hpp
include/ck/utility/generic_memory_space_atomic.hpp
+0
-5
include/ck/utility/get_id.hpp
include/ck/utility/get_id.hpp
+0
-5
include/ck/utility/ignore.hpp
include/ck/utility/ignore.hpp
+0
-5
include/ck/utility/inner_product.hpp
include/ck/utility/inner_product.hpp
+0
-5
include/ck/utility/integral_constant.hpp
include/ck/utility/integral_constant.hpp
+27
-33
include/ck/utility/is_known_at_compile_time.hpp
include/ck/utility/is_known_at_compile_time.hpp
+0
-5
include/ck/utility/magic_division.hpp
include/ck/utility/magic_division.hpp
+138
-130
include/ck/utility/math.hpp
include/ck/utility/math.hpp
+149
-99
include/ck/utility/math_v2.hpp
include/ck/utility/math_v2.hpp
+94
-91
include/ck/utility/multi_index.hpp
include/ck/utility/multi_index.hpp
+0
-5
include/ck/utility/number.hpp
include/ck/utility/number.hpp
+0
-5
include/ck/utility/reduction_common.hpp
include/ck/utility/reduction_common.hpp
+0
-5
include/ck/utility/reduction_enums.hpp
include/ck/utility/reduction_enums.hpp
+0
-5
include/ck/utility/reduction_functions_accumulate.hpp
include/ck/utility/reduction_functions_accumulate.hpp
+0
-5
include/ck/utility/reduction_operator.hpp
include/ck/utility/reduction_operator.hpp
+0
-5
include/ck/utility/sequence.hpp
include/ck/utility/sequence.hpp
+0
-5
include/ck/utility/sequence_helper.hpp
include/ck/utility/sequence_helper.hpp
+0
-5
include/ck/utility/static_buffer.hpp
include/ck/utility/static_buffer.hpp
+0
-5
include/ck/utility/statically_indexed_array.hpp
include/ck/utility/statically_indexed_array.hpp
+0
-5
include/ck/utility/statically_indexed_array_multi_index.hpp
include/ck/utility/statically_indexed_array_multi_index.hpp
+0
-5
No files found.
include/ck/utility/generic_memory_space_atomic.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -124,5 +121,3 @@ __device__ float2_t atomic_max<float2_t>(float2_t* p_dst, const float2_t& x)
...
@@ -124,5 +121,3 @@ __device__ float2_t atomic_max<float2_t>(float2_t* p_dst, const float2_t& x)
}
}
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/get_id.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -29,5 +26,3 @@ __device__ index_t get_grid_size() { return gridDim.x; }
...
@@ -29,5 +26,3 @@ __device__ index_t get_grid_size() { return gridDim.x; }
__device__
index_t
get_block_size
()
{
return
blockDim
.
x
;
}
__device__
index_t
get_block_size
()
{
return
blockDim
.
x
;
}
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/ignore.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -23,5 +20,3 @@ struct ignore_t
...
@@ -23,5 +20,3 @@ struct ignore_t
inline
constexpr
detail
::
ignore_t
ignore
;
inline
constexpr
detail
::
ignore_t
ignore
;
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/inner_product.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -237,5 +234,3 @@ inner_product<int8x16_t, int8x16_t, int32_t>(const int8x16_t& a, const int8x16_t
...
@@ -237,5 +234,3 @@ inner_product<int8x16_t, int8x16_t, int32_t>(const int8x16_t& a, const int8x16_t
}
}
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/integral_constant.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -8,50 +5,47 @@
...
@@ -8,50 +5,47 @@
namespace
ck
{
namespace
ck
{
template
<
class
T
,
T
v
>
struct
integral_constant
{
template
<
class
T
,
T
v
>
static
constexpr
T
value
=
v
;
struct
integral_constant
typedef
T
value_type
;
{
typedef
integral_constant
type
;
static
constexpr
T
value
=
v
;
__host__
__device__
constexpr
operator
value_type
()
const
noexcept
{
typedef
T
value_type
;
return
value
;
typedef
integral_constant
type
;
}
__host__
__device__
constexpr
operator
value_type
()
const
noexcept
{
return
value
;
}
__host__
__device__
constexpr
value_type
operator
()()
const
noexcept
{
__host__
__device__
constexpr
value_type
operator
()()
const
noexcept
{
return
value
;
}
return
value
;
}
};
};
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
__host__
__device__
constexpr
auto
operator
+
(
integral_constant
<
TX
,
X
>
,
__host__
__device__
constexpr
auto
operator
+
(
integral_constant
<
TX
,
X
>
,
integral_constant
<
TY
,
Y
>
)
integral_constant
<
TY
,
Y
>
)
{
{
return
integral_constant
<
decltype
(
X
+
Y
),
X
+
Y
>
{};
return
integral_constant
<
decltype
(
X
+
Y
),
X
+
Y
>
{};
}
}
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
__host__
__device__
constexpr
auto
operator
-
(
integral_constant
<
TX
,
X
>
,
__host__
__device__
constexpr
auto
operator
-
(
integral_constant
<
TX
,
X
>
,
integral_constant
<
TY
,
Y
>
)
integral_constant
<
TY
,
Y
>
)
{
{
static_assert
(
Y
<=
X
,
"wrong!"
);
static_assert
(
Y
<=
X
,
"wrong!"
);
return
integral_constant
<
decltype
(
X
-
Y
),
X
-
Y
>
{};
return
integral_constant
<
decltype
(
X
-
Y
),
X
-
Y
>
{};
}
}
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
__host__
__device__
constexpr
auto
operator
*
(
integral_constant
<
TX
,
X
>
,
__host__
__device__
constexpr
auto
operator
*
(
integral_constant
<
TX
,
X
>
,
integral_constant
<
TY
,
Y
>
)
integral_constant
<
TY
,
Y
>
)
{
{
return
integral_constant
<
decltype
(
X
*
Y
),
X
*
Y
>
{};
return
integral_constant
<
decltype
(
X
*
Y
),
X
*
Y
>
{};
}
}
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
__host__
__device__
constexpr
auto
operator
/
(
integral_constant
<
TX
,
X
>
,
__host__
__device__
constexpr
auto
operator
/
(
integral_constant
<
TX
,
X
>
,
integral_constant
<
TY
,
Y
>
)
integral_constant
<
TY
,
Y
>
)
{
{
static_assert
(
Y
>
0
,
"wrong!"
);
static_assert
(
Y
>
0
,
"wrong!"
);
return
integral_constant
<
decltype
(
X
/
Y
),
X
/
Y
>
{};
return
integral_constant
<
decltype
(
X
/
Y
),
X
/
Y
>
{};
}
}
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
template
<
typename
TX
,
TX
X
,
typename
TY
,
TY
Y
>
__host__
__device__
constexpr
auto
operator
%
(
integral_constant
<
TX
,
X
>
,
__host__
__device__
constexpr
auto
operator
%
(
integral_constant
<
TX
,
X
>
,
integral_constant
<
TY
,
Y
>
)
integral_constant
<
TY
,
Y
>
)
{
{
static_assert
(
Y
>
0
,
"wrong!"
);
static_assert
(
Y
>
0
,
"wrong!"
);
return
integral_constant
<
decltype
(
X
%
Y
),
X
%
Y
>
{};
return
integral_constant
<
decltype
(
X
%
Y
),
X
%
Y
>
{};
}
}
}
// namespace ck
#pragma clang diagnostic pop
}
// namespace ck
include/ck/utility/is_known_at_compile_time.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -57,5 +54,3 @@ struct is_known_at_compile_time<Tuple<Ts...>>
...
@@ -57,5 +54,3 @@ struct is_known_at_compile_time<Tuple<Ts...>>
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/magic_division.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -18,135 +15,148 @@ namespace ck {
...
@@ -18,135 +15,148 @@ namespace ck {
// magic number division
// magic number division
// Caution:
// Caution:
// 1. For uint32_t as dividend: magic number division implementation being
// 1. For uint32_t as dividend: magic number division implementation being used would produce
// used would produce correct result if the dividend is uint32_t and its value
// correct result if the dividend is uint32_t and its value is within 31-bit value range.
// is within 31-bit value range.
// 2. For int32_t as dividendd: magic number division for int32_t dividened has not been
// 2. For int32_t as dividendd: magic number division for int32_t dividened
// implemented, the int32_t dividend would be bit-wise interpreted as uint32_t and magic number
// has not been implemented, the int32_t dividend would be bit-wise
// division implementation for uint32_t is then used. Therefore, dividend value need to be
// interpreted as uint32_t and magic number division implementation for
// non-negative.
// uint32_t is then used. Therefore, dividend value need to be non-negative.
// TODO:
// TODO:
// 1. Implement magic number divison for int32_t
// 1. Implement magic number divison for int32_t
// 2. Implement magic number divison for unit32_t with 32-bit value range
// 2. Implement magic number divison for unit32_t with 32-bit value range
struct
MagicDivision
{
struct
MagicDivision
// uint32_t
{
__host__
__device__
static
constexpr
auto
// uint32_t
CalculateMagicNumbers
(
uint32_t
divisor
)
{
__host__
__device__
static
constexpr
auto
CalculateMagicNumbers
(
uint32_t
divisor
)
// WARNING: magic division is only applicable for division inside this
{
// range. You should use the return value of CalculateMagicNumbers, if
// WARNING: magic division is only applicable for division inside this range.
// division is not inside this range. The "else" logic below is to quiet
// You should use the return value of CalculateMagicNumbers, if division is not inside this
// down run-time error.
// range. The "else" logic below is to quiet down run-time error.
if
(
divisor
>=
1
&&
divisor
<=
INT32_MAX
)
{
if
(
divisor
>=
1
&&
divisor
<=
INT32_MAX
)
uint32_t
shift
=
0
;
{
for
(
shift
=
0
;
shift
<
32
;
++
shift
)
{
uint32_t
shift
=
0
;
if
((
1U
<<
shift
)
>=
divisor
)
{
for
(
shift
=
0
;
shift
<
32
;
++
shift
)
break
;
{
if
((
1U
<<
shift
)
>=
divisor
)
{
break
;
}
}
uint64_t
one
=
1
;
uint64_t
multiplier
=
((
one
<<
32
)
*
((
one
<<
shift
)
-
divisor
))
/
divisor
+
1
;
// assert(multiplier <= 0xffffffffUL);
return
make_tuple
(
uint32_t
(
multiplier
),
shift
);
}
}
}
else
{
uint64_t
one
=
1
;
return
make_tuple
(
uint32_t
(
0
),
uint32_t
(
0
));
uint64_t
multiplier
=
}
((
one
<<
32
)
*
((
one
<<
shift
)
-
divisor
))
/
divisor
+
1
;
}
// assert(multiplier <= 0xffffffffUL);
__host__
__device__
static
constexpr
uint32_t
CalculateMagicMultiplier
(
uint32_t
divisor
)
return
make_tuple
(
uint32_t
(
multiplier
),
shift
);
{
}
else
{
auto
tmp
=
CalculateMagicNumbers
(
divisor
);
return
make_tuple
(
uint32_t
(
0
),
uint32_t
(
0
));
}
return
tmp
[
Number
<
0
>
{}];
}
}
__host__
__device__
static
constexpr
uint32_t
__host__
__device__
static
constexpr
uint32_t
CalculateMagicShift
(
uint32_t
divisor
)
CalculateMagicMultiplier
(
uint32_t
divisor
)
{
{
auto
tmp
=
CalculateMagicNumbers
(
divisor
);
auto
tmp
=
CalculateMagicNumbers
(
divisor
);
return
tmp
[
Number
<
0
>
{}];
return
tmp
[
Number
<
1
>
{}];
}
}
__host__
__device__
static
constexpr
uint32_t
// integral_constant<uint32_t, .>
CalculateMagicShift
(
uint32_t
divisor
)
{
template
<
uint32_t
Divisor
>
auto
tmp
=
CalculateMagicNumbers
(
divisor
);
__host__
__device__
static
constexpr
auto
CalculateMagicNumbers
(
integral_constant
<
uint32_t
,
Divisor
>
)
return
tmp
[
Number
<
1
>
{}];
{
}
constexpr
auto
tmp
=
CalculateMagicNumbers
(
uint32_t
{
Divisor
});
// integral_constant<uint32_t, .>
constexpr
uint32_t
multiplier
=
tmp
[
Number
<
0
>
{}];
template
<
uint32_t
Divisor
>
constexpr
uint32_t
shift
=
tmp
[
Number
<
1
>
{}];
__host__
__device__
static
constexpr
auto
CalculateMagicNumbers
(
integral_constant
<
uint32_t
,
Divisor
>
)
{
return
make_tuple
(
integral_constant
<
uint32_t
,
multiplier
>
{},
constexpr
auto
tmp
=
CalculateMagicNumbers
(
uint32_t
{
Divisor
});
integral_constant
<
uint32_t
,
shift
>
{});
}
constexpr
uint32_t
multiplier
=
tmp
[
Number
<
0
>
{}];
constexpr
uint32_t
shift
=
tmp
[
Number
<
1
>
{}];
template
<
uint32_t
Divisor
>
__host__
__device__
static
constexpr
auto
return
make_tuple
(
integral_constant
<
uint32_t
,
multiplier
>
{},
CalculateMagicMultiplier
(
integral_constant
<
uint32_t
,
Divisor
>
)
integral_constant
<
uint32_t
,
shift
>
{});
{
}
constexpr
uint32_t
multiplier
=
CalculateMagicMultiplier
(
uint32_t
{
Divisor
});
template
<
uint32_t
Divisor
>
return
integral_constant
<
uint32_t
,
multiplier
>
{};
__host__
__device__
static
constexpr
auto
}
CalculateMagicMultiplier
(
integral_constant
<
uint32_t
,
Divisor
>
)
{
constexpr
uint32_t
multiplier
=
CalculateMagicMultiplier
(
uint32_t
{
Divisor
});
template
<
uint32_t
Divisor
>
__host__
__device__
static
constexpr
auto
return
integral_constant
<
uint32_t
,
multiplier
>
{};
CalculateMagicShift
(
integral_constant
<
uint32_t
,
Divisor
>
)
}
{
constexpr
uint32_t
shift
=
CalculateMagicShift
(
uint32_t
{
Divisor
});
template
<
uint32_t
Divisor
>
__host__
__device__
static
constexpr
auto
return
integral_constant
<
uint32_t
,
shift
>
{};
CalculateMagicShift
(
integral_constant
<
uint32_t
,
Divisor
>
)
{
}
constexpr
uint32_t
shift
=
CalculateMagicShift
(
uint32_t
{
Divisor
});
// integral_constant<int32_t, .>
return
integral_constant
<
uint32_t
,
shift
>
{};
template
<
int32_t
Divisor
>
}
__host__
__device__
static
constexpr
auto
CalculateMagicNumbers
(
integral_constant
<
int32_t
,
Divisor
>
)
// integral_constant<int32_t, .>
{
template
<
int32_t
Divisor
>
return
CalculateMagicNumbers
(
integral_constant
<
uint32_t
,
Divisor
>
{});
__host__
__device__
static
constexpr
auto
}
CalculateMagicNumbers
(
integral_constant
<
int32_t
,
Divisor
>
)
{
return
CalculateMagicNumbers
(
integral_constant
<
uint32_t
,
Divisor
>
{});
template
<
int32_t
Divisor
>
}
__host__
__device__
static
constexpr
auto
CalculateMagicMultiplier
(
integral_constant
<
int32_t
,
Divisor
>
)
template
<
int32_t
Divisor
>
{
__host__
__device__
static
constexpr
auto
return
CalculateMagicMultiplier
(
integral_constant
<
uint32_t
,
Divisor
>
{});
CalculateMagicMultiplier
(
integral_constant
<
int32_t
,
Divisor
>
)
{
}
return
CalculateMagicMultiplier
(
integral_constant
<
uint32_t
,
Divisor
>
{});
}
template
<
int32_t
Divisor
>
__host__
__device__
static
constexpr
auto
template
<
int32_t
Divisor
>
CalculateMagicShift
(
integral_constant
<
int32_t
,
Divisor
>
)
__host__
__device__
static
constexpr
auto
{
CalculateMagicShift
(
integral_constant
<
int32_t
,
Divisor
>
)
{
return
CalculateMagicShift
(
integral_constant
<
uint32_t
,
Divisor
>
{});
return
CalculateMagicShift
(
integral_constant
<
uint32_t
,
Divisor
>
{});
}
}
// magic division for uint32_t
// magic division for uint32_t
__device__
static
constexpr
uint32_t
__device__
static
constexpr
uint32_t
DoMagicDivision
(
uint32_t
dividend
,
uint32_t
multiplier
,
uint32_t
shift
)
DoMagicDivision
(
uint32_t
dividend
,
uint32_t
multiplier
,
uint32_t
shift
)
{
{
uint32_t
tmp
=
__umulhi
(
dividend
,
multiplier
);
uint32_t
tmp
=
__umulhi
(
dividend
,
multiplier
);
return
(
tmp
+
dividend
)
>>
shift
;
return
(
tmp
+
dividend
)
>>
shift
;
}
}
__host__
static
constexpr
uint32_t
__host__
static
constexpr
uint32_t
DoMagicDivision
(
uint32_t
dividend
,
uint32_t
multiplier
,
uint32_t
shift
)
{
DoMagicDivision
(
uint32_t
dividend
,
uint32_t
multiplier
,
uint32_t
shift
)
uint32_t
tmp
=
static_cast
<
uint64_t
>
(
dividend
)
*
multiplier
>>
32
;
{
return
(
tmp
+
dividend
)
>>
shift
;
uint32_t
tmp
=
static_cast
<
uint64_t
>
(
dividend
)
*
multiplier
>>
32
;
}
return
(
tmp
+
dividend
)
>>
shift
;
}
// magic division for int32_t
// HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
// magic division for int32_t
// non-negative for result to be correct
// HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
// TODO: figure out how to do magic number divison for int32_t as dividended
// non-negative for result to be correct
__device__
static
constexpr
int32_t
// TODO: figure out how to do magic number divison for int32_t as dividended
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
{
__device__
static
constexpr
int32_t
uint32_t
dividend_u32
=
bit_cast
<
uint32_t
>
(
dividend_i32
);
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
uint32_t
tmp
=
__umulhi
(
dividend_u32
,
multiplier
);
{
return
(
tmp
+
dividend_u32
)
>>
shift
;
uint32_t
dividend_u32
=
bit_cast
<
uint32_t
>
(
dividend_i32
);
}
uint32_t
tmp
=
__umulhi
(
dividend_u32
,
multiplier
);
return
(
tmp
+
dividend_u32
)
>>
shift
;
__host__
static
constexpr
int32_t
}
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
{
uint32_t
dividend_u32
=
bit_cast
<
uint32_t
>
(
dividend_i32
);
__host__
static
constexpr
int32_t
uint32_t
tmp
=
static_cast
<
uint64_t
>
(
dividend_u32
)
*
multiplier
>>
32
;
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
return
(
tmp
+
dividend_u32
)
>>
shift
;
{
}
uint32_t
dividend_u32
=
bit_cast
<
uint32_t
>
(
dividend_i32
);
uint32_t
tmp
=
static_cast
<
uint64_t
>
(
dividend_u32
)
*
multiplier
>>
32
;
return
(
tmp
+
dividend_u32
)
>>
shift
;
}
};
};
struct
MDiv
struct
MDiv
...
@@ -222,5 +232,3 @@ struct MDiv2
...
@@ -222,5 +232,3 @@ struct MDiv2
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/math.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "enable_if.hpp"
#include "integral_constant.hpp"
#include "integral_constant.hpp"
#include "number.hpp"
#include "number.hpp"
#include "type.hpp"
#include "type.hpp"
#include "enable_if.hpp"
namespace
ck
{
namespace
ck
{
namespace
math
{
namespace
math
{
template
<
typename
T
,
T
s
>
struct
scales
{
template
<
typename
T
,
T
s
>
__host__
__device__
constexpr
T
operator
()(
T
a
)
const
{
return
s
*
a
;
}
struct
scales
{
__host__
__device__
constexpr
T
operator
()(
T
a
)
const
{
return
s
*
a
;
}
};
};
template
<
typename
T
>
struct
plus
{
template
<
typename
T
>
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
struct
plus
{
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
};
template
<
typename
T
>
struct
minus
{
template
<
typename
T
>
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
struct
minus
{
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
};
};
struct
multiplies
{
struct
multiplies
template
<
typename
A
,
typename
B
>
{
__host__
__device__
constexpr
auto
operator
()(
const
A
&
a
,
const
B
&
b
)
const
{
template
<
typename
A
,
typename
B
>
return
a
*
b
;
__host__
__device__
constexpr
auto
operator
()(
const
A
&
a
,
const
B
&
b
)
const
}
{
return
a
*
b
;
}
};
};
template
<
typename
T
>
struct
maximize
{
template
<
typename
T
>
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
struct
maximize
return
a
>=
b
?
a
:
b
;
{
}
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
return
a
>=
b
?
a
:
b
;
}
};
};
template
<
typename
T
>
struct
minimize
{
template
<
typename
T
>
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
struct
minimize
return
a
<=
b
?
a
:
b
;
{
}
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
return
a
<=
b
?
a
:
b
;
}
};
};
template
<
typename
T
>
struct
integer_divide_ceiler
{
template
<
typename
T
>
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
struct
integer_divide_ceiler
static_assert
(
is_same
<
T
,
index_t
>
{}
||
is_same
<
T
,
int
>
{},
"wrong type"
);
{
__host__
__device__
constexpr
T
operator
()(
T
a
,
T
b
)
const
{
static_assert
(
is_same
<
T
,
index_t
>
{}
||
is_same
<
T
,
int
>
{},
"wrong type"
);
return
(
a
+
b
-
Number
<
1
>
{})
/
b
;
return
(
a
+
b
-
Number
<
1
>
{})
/
b
;
}
}
};
};
template
<
typename
X
,
typename
Y
>
template
<
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
integer_divide_floor
(
X
x
,
Y
y
)
{
__host__
__device__
constexpr
auto
integer_divide_floor
(
X
x
,
Y
y
)
return
x
/
y
;
{
return
x
/
y
;
}
}
template
<
typename
X
,
typename
Y
>
template
<
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
integer_divide_ceil
(
X
x
,
Y
y
)
{
__host__
__device__
constexpr
auto
integer_divide_ceil
(
X
x
,
Y
y
)
return
(
x
+
y
-
Number
<
1
>
{})
/
y
;
{
return
(
x
+
y
-
Number
<
1
>
{})
/
y
;
}
}
template
<
typename
X
,
typename
Y
>
template
<
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
integer_least_multiple
(
X
x
,
Y
y
)
{
__host__
__device__
constexpr
auto
integer_least_multiple
(
X
x
,
Y
y
)
return
y
*
integer_divide_ceil
(
x
,
y
);
{
return
y
*
integer_divide_ceil
(
x
,
y
);
}
}
template
<
typename
T
>
__host__
__device__
constexpr
T
max
(
T
x
)
{
return
x
;
}
template
<
typename
T
>
__host__
__device__
constexpr
T
max
(
T
x
)
{
return
x
;
}
template
<
typename
T
>
__host__
__device__
constexpr
T
max
(
T
x
,
T
y
)
{
template
<
typename
T
>
return
x
>
y
?
x
:
y
;
__host__
__device__
constexpr
T
max
(
T
x
,
T
y
)
{
return
x
>
y
?
x
:
y
;
}
}
template
<
index_t
X
>
template
<
index_t
X
>
__host__
__device__
constexpr
index_t
max
(
Number
<
X
>
,
index_t
y
)
{
__host__
__device__
constexpr
index_t
max
(
Number
<
X
>
,
index_t
y
)
return
X
>
y
?
X
:
y
;
{
return
X
>
y
?
X
:
y
;
}
}
template
<
index_t
Y
>
template
<
index_t
Y
>
__host__
__device__
constexpr
index_t
max
(
index_t
x
,
Number
<
Y
>
)
{
__host__
__device__
constexpr
index_t
max
(
index_t
x
,
Number
<
Y
>
)
return
x
>
Y
?
x
:
Y
;
{
return
x
>
Y
?
x
:
Y
;
}
}
template
<
typename
X
,
typename
...
Ys
>
template
<
typename
X
,
typename
...
Ys
>
__host__
__device__
constexpr
auto
max
(
X
x
,
Ys
...
ys
)
{
__host__
__device__
constexpr
auto
max
(
X
x
,
Ys
...
ys
)
static_assert
(
sizeof
...(
Ys
)
>
0
,
"not enough argument"
);
{
static_assert
(
sizeof
...(
Ys
)
>
0
,
"not enough argument"
);
return
max
(
x
,
max
(
ys
...));
return
max
(
x
,
max
(
ys
...));
}
}
template
<
typename
T
>
__host__
__device__
constexpr
T
min
(
T
x
)
{
return
x
;
}
template
<
typename
T
>
__host__
__device__
constexpr
T
min
(
T
x
)
{
return
x
;
}
template
<
typename
T
>
__host__
__device__
constexpr
T
min
(
T
x
,
T
y
)
{
template
<
typename
T
>
return
x
<
y
?
x
:
y
;
__host__
__device__
constexpr
T
min
(
T
x
,
T
y
)
{
return
x
<
y
?
x
:
y
;
}
}
template
<
index_t
X
>
template
<
index_t
X
>
__host__
__device__
constexpr
index_t
min
(
Number
<
X
>
,
index_t
y
)
{
__host__
__device__
constexpr
index_t
min
(
Number
<
X
>
,
index_t
y
)
return
X
<
y
?
X
:
y
;
{
return
X
<
y
?
X
:
y
;
}
}
template
<
index_t
Y
>
template
<
index_t
Y
>
__host__
__device__
constexpr
index_t
min
(
index_t
x
,
Number
<
Y
>
)
{
__host__
__device__
constexpr
index_t
min
(
index_t
x
,
Number
<
Y
>
)
return
x
<
Y
?
x
:
Y
;
{
return
x
<
Y
?
x
:
Y
;
}
}
template
<
typename
X
,
typename
...
Ys
>
template
<
typename
X
,
typename
...
Ys
>
__host__
__device__
constexpr
auto
min
(
X
x
,
Ys
...
ys
)
{
__host__
__device__
constexpr
auto
min
(
X
x
,
Ys
...
ys
)
static_assert
(
sizeof
...(
Ys
)
>
0
,
"not enough argument"
);
{
static_assert
(
sizeof
...(
Ys
)
>
0
,
"not enough argument"
);
return
min
(
x
,
min
(
ys
...));
return
min
(
x
,
min
(
ys
...));
}
}
template
<
typename
T
>
template
<
typename
T
>
__host__
__device__
constexpr
T
clamp
(
const
T
&
x
,
const
T
&
lowerbound
,
__host__
__device__
constexpr
T
clamp
(
const
T
&
x
,
const
T
&
lowerbound
,
const
T
&
upperbound
)
const
T
&
upperbound
)
{
{
return
min
(
max
(
x
,
lowerbound
),
upperbound
);
return
min
(
max
(
x
,
lowerbound
),
upperbound
);
}
}
// disallow implicit type casting
// disallow implicit type casting
template
<
typename
T
>
__device__
T
exp
(
T
x
);
template
<
typename
T
>
__device__
T
exp
(
T
x
);
// TODO: add f16 support using v_exp_f16
// TODO: add f16 support using v_exp_f16
template
<
>
__device__
float
exp
<
float
>
(
float
x
)
{
return
__expf
(
x
);
}
template
<
>
__device__
float
exp
<
float
>
(
float
x
)
{
return
__expf
(
x
);
}
template
<
>
__device__
double
exp
<
double
>
(
double
x
)
{
return
exp
(
x
);
}
template
<
>
__device__
double
exp
<
double
>
(
double
x
)
{
return
exp
(
x
);
}
//
static inline __host__ float exp(float x) { return ::expf(x); }
static
inline
__host__
float
exp
(
float
x
)
{
return
::
expf
(
x
);
}
//
static inline __host__ double exp(double x) { return std::exp(x); }
static
inline
__host__
double
exp
(
double
x
)
{
return
std
::
exp
(
x
);
}
// greatest common divisor, aka highest common factor
// greatest common divisor, aka highest common factor
__host__
__device__
constexpr
index_t
gcd
(
index_t
x
,
index_t
y
)
{
__host__
__device__
constexpr
index_t
gcd
(
index_t
x
,
index_t
y
)
if
(
x
<
0
)
{
{
return
gcd
(
-
x
,
y
);
if
(
x
<
0
)
}
else
if
(
y
<
0
)
{
{
return
gcd
(
x
,
-
y
);
return
gcd
(
-
x
,
y
);
}
else
if
(
x
==
y
||
x
==
0
)
{
}
return
y
;
else
if
(
y
<
0
)
}
else
if
(
y
==
0
)
{
{
return
x
;
return
gcd
(
x
,
-
y
);
}
else
if
(
x
>
y
)
{
}
return
gcd
(
x
%
y
,
y
);
else
if
(
x
==
y
||
x
==
0
)
}
else
{
{
return
gcd
(
x
,
y
%
x
);
return
y
;
}
}
else
if
(
y
==
0
)
{
return
x
;
}
else
if
(
x
>
y
)
{
return
gcd
(
x
%
y
,
y
);
}
else
{
return
gcd
(
x
,
y
%
x
);
}
}
}
template
<
index_t
X
,
index_t
Y
>
template
<
index_t
X
,
index_t
Y
>
__host__
__device__
constexpr
auto
gcd
(
Number
<
X
>
,
Number
<
Y
>
)
{
__host__
__device__
constexpr
auto
gcd
(
Number
<
X
>
,
Number
<
Y
>
)
constexpr
auto
r
=
gcd
(
X
,
Y
);
{
constexpr
auto
r
=
gcd
(
X
,
Y
);
return
Number
<
r
>
{};
return
Number
<
r
>
{};
}
}
template
<
typename
X
,
typename
...
Ys
,
template
<
typename
X
,
typename
...
Ys
,
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
gcd
(
X
x
,
Ys
...
ys
)
__host__
__device__
constexpr
auto
gcd
(
X
x
,
Ys
...
ys
)
{
{
return
gcd
(
x
,
gcd
(
ys
...));
return
gcd
(
x
,
gcd
(
ys
...));
}
}
// least common multiple
// least common multiple
template
<
typename
X
,
typename
Y
>
template
<
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
lcm
(
X
x
,
Y
y
)
{
__host__
__device__
constexpr
auto
lcm
(
X
x
,
Y
y
)
return
(
x
*
y
)
/
gcd
(
x
,
y
);
{
return
(
x
*
y
)
/
gcd
(
x
,
y
);
}
}
template
<
typename
X
,
typename
...
Ys
,
template
<
typename
X
,
typename
...
Ys
,
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
typename
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
lcm
(
X
x
,
Ys
...
ys
)
__host__
__device__
constexpr
auto
lcm
(
X
x
,
Ys
...
ys
)
{
{
return
lcm
(
x
,
lcm
(
ys
...));
return
lcm
(
x
,
lcm
(
ys
...));
}
}
template
<
typename
T
>
struct
equal
{
template
<
typename
T
>
__host__
__device__
constexpr
bool
operator
()(
T
x
,
T
y
)
const
{
struct
equal
return
x
==
y
;
{
}
__host__
__device__
constexpr
bool
operator
()(
T
x
,
T
y
)
const
{
return
x
==
y
;
}
};
};
template
<
typename
T
>
struct
less
{
template
<
typename
T
>
__host__
__device__
constexpr
bool
operator
()(
T
x
,
T
y
)
const
{
struct
less
return
x
<
y
;
{
}
__host__
__device__
constexpr
bool
operator
()(
T
x
,
T
y
)
const
{
return
x
<
y
;
}
};
};
template
<
index_t
X
>
template
<
index_t
X
>
...
@@ -206,5 +258,3 @@ __host__ __device__ constexpr auto next_power_of_two(Number<X> x)
...
@@ -206,5 +258,3 @@ __host__ __device__ constexpr auto next_power_of_two(Number<X> x)
}
// namespace math
}
// namespace math
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/math_v2.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -16,169 +13,177 @@
...
@@ -16,169 +13,177 @@
namespace
ck
{
namespace
ck
{
namespace
math
{
namespace
math
{
// math functions for the host, some are implemented by calling C++ std
// math functions for the host, some are implemented by calling C++ std functions
// functions
static
inline
__host__
float
abs
(
float
x
)
{
return
x
<
0
?
x
*
-
1.0
:
x
;
};
static
inline
__host__
float
abs
(
float
x
)
{
return
std
::
abs
(
x
)
;
};
static
inline
__host__
double
abs
(
double
x
)
{
return
x
<
0
?
x
*
-
1.0
:
x
;
};
static
inline
__host__
double
abs
(
double
x
)
{
return
std
::
abs
(
x
)
;
};
static
inline
__host__
int8_t
abs
(
int8_t
x
)
{
static
inline
__host__
int8_t
abs
(
int8_t
x
)
int8_t
sgn
=
x
>>
(
8
-
1
);
{
int8_t
sgn
=
x
>>
(
8
-
1
);
return
(
x
^
sgn
)
-
sgn
;
return
(
x
^
sgn
)
-
sgn
;
};
};
static
inline
__host__
int32_t
abs
(
int32_t
x
)
{
static
inline
__host__
int32_t
abs
(
int32_t
x
)
int32_t
sgn
=
x
>>
(
32
-
1
);
{
int32_t
sgn
=
x
>>
(
32
-
1
);
return
(
x
^
sgn
)
-
sgn
;
return
(
x
^
sgn
)
-
sgn
;
};
};
static
inline
__host__
half_t
abs
(
half_t
x
)
{
static
inline
__host__
half_t
abs
(
half_t
x
)
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
{
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
uint16_t
abs_xx
=
xx
&
0x7fff
;
uint16_t
abs_xx
=
xx
&
0x7fff
;
half_t
abs_x
=
ck
::
bit_cast
<
half_t
>
(
abs_xx
);
half_t
abs_x
=
ck
::
bit_cast
<
half_t
>
(
abs_xx
);
return
abs_x
;
return
abs_x
;
};
};
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static
inline
__host__
int4_t
abs
(
int4_t
x
)
{
static
inline
__host__
int4_t
abs
(
int4_t
x
)
int4_t
sgn
=
x
>>
(
4
-
1
);
{
return
(
x
^
sgn
)
-
sgn
;
int4_t
sgn
=
x
>>
(
4
-
1
);
return
(
x
^
sgn
)
-
sgn
;
}
}
#endif
#endif
// TODO: to bit arithmetic to figure it out
static
inline
__host__
bool
isnan
(
float
x
)
{
return
std
::
isnan
(
x
);
};
static
inline
__host__
bool
isnan
(
float
x
)
{
(
void
)
x
;
return
false
;
};
static
inline
__host__
bool
isnan
(
double
x
)
{
static
inline
__host__
bool
isnan
(
double
x
)
{
return
std
::
isnan
(
x
);
};
(
void
)
x
;
return
false
;
};
static
inline
__host__
bool
isnan
(
int8_t
x
)
{
static
inline
__host__
bool
isnan
(
int8_t
x
)
(
void
)
x
;
{
return
false
;
(
void
)
x
;
return
false
;
};
};
static
inline
__host__
bool
isnan
(
int32_t
x
)
{
static
inline
__host__
bool
isnan
(
int32_t
x
)
(
void
)
x
;
{
return
false
;
(
void
)
x
;
return
false
;
};
};
static
inline
__host__
bool
isnan
(
half_t
x
)
{
static
inline
__host__
bool
isnan
(
half_t
x
)
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
{
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
return
(
xx
&
0x7FFF
)
>
0x7C00
;
return
(
xx
&
0x7FFF
)
>
0x7C00
;
};
};
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static
inline
__host__
bool
isnan
(
int4_t
x
)
{
static
inline
__host__
bool
isnan
(
int4_t
x
)
(
void
)
x
;
{
return
false
;
(
void
)
x
;
return
false
;
};
};
#endif
#endif
// MIGRAPHX doesn't care about host compilation, just return identity values for
static
inline
__host__
half_t
sqrt
(
half_t
x
)
// now
{
return
static_cast
<
half_t
>
(
std
::
sqrt
(
static_cast
<
float
>
(
x
)));
static
inline
__host__
half_t
sqrt
(
half_t
x
)
{
return
x
;
};
};
static
inline
__host__
float
sqrt
(
float
x
)
{
return
x
;
};
static
inline
__host__
float
sqrt
(
float
x
)
{
return
std
::
sqrt
(
x
)
;
};
static
inline
__host__
double
sqrt
(
double
x
)
{
return
x
;
};
static
inline
__host__
double
sqrt
(
double
x
)
{
return
std
::
sqrt
(
x
)
;
};
static
inline
__host__
half_t
tanh
(
half_t
x
)
{
return
x
;
};
static
inline
__host__
half_t
tanh
(
half_t
x
)
{
return
static_cast
<
half_t
>
(
std
::
tanh
(
static_cast
<
float
>
(
x
)));
};
static
inline
__host__
float
tanh
(
float
x
)
{
return
x
;
};
static
inline
__host__
float
tanh
(
float
x
)
{
return
std
::
tanh
(
x
)
;
};
static
inline
__host__
double
tanh
(
double
x
)
{
return
x
;
};
static
inline
__host__
double
tanh
(
double
x
)
{
return
std
::
tanh
(
x
)
;
};
// math functions for the HIP kernel, some are implemented by calling hip
// math functions for the HIP kernel, some are implemented by calling hip builtin functions
// builtin functions
static
inline
__device__
float
abs
(
float
x
)
{
return
::
abs
(
x
);
};
static
inline
__device__
float
abs
(
float
x
)
{
return
::
abs
(
x
);
};
static
inline
__device__
double
abs
(
double
x
)
{
return
::
abs
(
x
);
};
static
inline
__device__
double
abs
(
double
x
)
{
return
::
abs
(
x
);
};
static
inline
__device__
int8_t
abs
(
int8_t
x
)
{
static
inline
__device__
int8_t
abs
(
int8_t
x
)
int8_t
sgn
=
x
>>
(
8
-
1
);
{
int8_t
sgn
=
x
>>
(
8
-
1
);
return
(
x
^
sgn
)
-
sgn
;
return
(
x
^
sgn
)
-
sgn
;
};
};
static
inline
__device__
int32_t
abs
(
int32_t
x
)
{
static
inline
__device__
int32_t
abs
(
int32_t
x
)
int32_t
sgn
=
x
>>
(
32
-
1
);
{
int32_t
sgn
=
x
>>
(
32
-
1
);
return
(
x
^
sgn
)
-
sgn
;
return
(
x
^
sgn
)
-
sgn
;
};
};
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static
inline
__device__
int4_t
abs
(
int4_t
x
)
{
static
inline
__device__
int4_t
abs
(
int4_t
x
)
int4_t
sgn
=
x
>>
(
4
-
1
);
{
int4_t
sgn
=
x
>>
(
4
-
1
);
return
(
x
^
sgn
)
-
sgn
;
return
(
x
^
sgn
)
-
sgn
;
};
};
#endif
#endif
static
inline
__device__
half_t
abs
(
half_t
x
)
{
static
inline
__device__
half_t
abs
(
half_t
x
)
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
{
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
uint16_t
abs_xx
=
xx
&
0x7fff
;
uint16_t
abs_xx
=
xx
&
0x7fff
;
half_t
abs_x
=
ck
::
bit_cast
<
half_t
>
(
abs_xx
);
half_t
abs_x
=
ck
::
bit_cast
<
half_t
>
(
abs_xx
);
return
abs_x
;
return
abs_x
;
};
};
static
inline
__device__
bool
isnan
(
float
x
)
{
return
::
isnan
(
x
);
};
static
inline
__device__
bool
isnan
(
float
x
)
{
return
::
isnan
(
x
);
};
static
inline
__device__
bool
isnan
(
double
x
)
{
return
::
isnan
(
x
);
};
static
inline
__device__
bool
isnan
(
double
x
)
{
return
::
isnan
(
x
);
};
static
inline
__device__
bool
isnan
(
int8_t
x
)
{
static
inline
__device__
bool
isnan
(
int8_t
x
)
(
void
)
x
;
{
return
false
;
(
void
)
x
;
return
false
;
};
};
static
inline
__device__
bool
isnan
(
int32_t
x
)
{
static
inline
__device__
bool
isnan
(
int32_t
x
)
(
void
)
x
;
{
return
false
;
(
void
)
x
;
return
false
;
};
};
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
static
inline
__device__
bool
isnan
(
int4_t
x
)
{
static
inline
__device__
bool
isnan
(
int4_t
x
)
(
void
)
x
;
{
return
false
;
(
void
)
x
;
return
false
;
};
};
#endif
#endif
static
inline
__device__
bool
isnan
(
half_t
x
)
{
static
inline
__device__
bool
isnan
(
half_t
x
)
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
{
uint16_t
xx
=
ck
::
bit_cast
<
uint16_t
>
(
x
);
return
(
xx
&
0x7FFF
)
>
0x7C00
;
return
(
xx
&
0x7FFF
)
>
0x7C00
;
};
};
static
inline
__device__
half_t
sqrt
(
half_t
x
)
{
static
inline
__device__
half_t
sqrt
(
half_t
x
)
return
static_cast
<
half_t
>
(
__builtin_amdgcn_sqrtf
(
static_cast
<
float
>
(
x
)));
{
return
static_cast
<
half_t
>
(
__builtin_amdgcn_sqrtf
(
static_cast
<
float
>
(
x
)));
};
};
static
inline
__device__
float
sqrt
(
float
x
)
{
static
inline
__device__
float
sqrt
(
float
x
)
{
return
__builtin_amdgcn_sqrtf
(
x
);
};
return
__builtin_amdgcn_sqrtf
(
x
);
};
static
inline
__device__
double
sqrt
(
double
x
)
{
static
inline
__device__
double
sqrt
(
double
x
)
{
return
__builtin_amdgcn_sqrt
(
x
);
};
return
__builtin_amdgcn_sqrt
(
x
);
};
static
inline
__device__
half_t
tanh
(
half_t
x
)
{
static
inline
__device__
half_t
tanh
(
half_t
x
)
return
static_cast
<
half_t
>
(
::
tanhf
(
static_cast
<
float
>
(
x
)));
{
return
static_cast
<
half_t
>
(
::
tanhf
(
static_cast
<
float
>
(
x
)));
};
};
static
inline
__device__
float
tanh
(
float
x
)
{
return
::
tanhf
(
x
);
};
static
inline
__device__
float
tanh
(
float
x
)
{
return
::
tanhf
(
x
);
};
...
@@ -187,5 +192,3 @@ static inline __device__ double tanh(double x) { return ::tanh(x); };
...
@@ -187,5 +192,3 @@ static inline __device__ double tanh(double x) { return ::tanh(x); };
}
// namespace math
}
// namespace math
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/multi_index.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -13,5 +10,3 @@
...
@@ -13,5 +10,3 @@
#else
#else
#include "statically_indexed_array_multi_index.hpp"
#include "statically_indexed_array_multi_index.hpp"
#endif
#endif
#pragma clang diagnostic pop
include/ck/utility/number.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -19,5 +16,3 @@ using LongNumber = integral_constant<long_index_t, N>;
...
@@ -19,5 +16,3 @@ using LongNumber = integral_constant<long_index_t, N>;
}
// namespace ck
}
// namespace ck
#endif
#endif
#pragma clang diagnostic pop
include/ck/utility/reduction_common.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -29,5 +26,3 @@ struct float_equal_zero
...
@@ -29,5 +26,3 @@ struct float_equal_zero
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/reduction_enums.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -42,5 +39,3 @@ enum struct IndicesType
...
@@ -42,5 +39,3 @@ enum struct IndicesType
};
};
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/reduction_functions_accumulate.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -116,5 +113,3 @@ struct AccumulateWithIndexAndNanCheck<true, ReduceOperation, AccDataType, IndexD
...
@@ -116,5 +113,3 @@ struct AccumulateWithIndexAndNanCheck<true, ReduceOperation, AccDataType, IndexD
}
// namespace detail
}
// namespace detail
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/reduction_operator.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -294,5 +291,3 @@ struct InMemoryDataOperationSupportedOnDataType<InMemoryDataOperationEnum::Add,
...
@@ -294,5 +291,3 @@ struct InMemoryDataOperationSupportedOnDataType<InMemoryDataOperationEnum::Add,
}
// namespace reduce
}
// namespace reduce
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/sequence.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -900,5 +897,3 @@ template <index_t NSize, index_t I>
...
@@ -900,5 +897,3 @@ template <index_t NSize, index_t I>
using
uniform_sequence_gen_t
=
typename
uniform_sequence_gen
<
NSize
,
I
>::
type
;
using
uniform_sequence_gen_t
=
typename
uniform_sequence_gen
<
NSize
,
I
>::
type
;
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/sequence_helper.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -38,5 +35,3 @@ __host__ __device__ constexpr auto to_sequence(Tuple<Number<Is>...>)
...
@@ -38,5 +35,3 @@ __host__ __device__ constexpr auto to_sequence(Tuple<Number<Is>...>)
}
}
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/static_buffer.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -196,5 +193,3 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>)
...
@@ -196,5 +193,3 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber<N>)
}
}
}
// namespace ck
}
// namespace ck
#pragma clang diagnostic pop
include/ck/utility/statically_indexed_array.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -106,5 +103,3 @@ struct StaticallyIndexedArray_v2
...
@@ -106,5 +103,3 @@ struct StaticallyIndexedArray_v2
}
// namespace ck
}
// namespace ck
#endif
#endif
#pragma clang diagnostic pop
include/ck/utility/statically_indexed_array_multi_index.hpp
View file @
f000fe32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
...
@@ -163,5 +160,3 @@ __host__ __device__ void print_multi_index(const Tuple<Xs...>& x)
...
@@ -163,5 +160,3 @@ __host__ __device__ void print_multi_index(const Tuple<Xs...>& x)
}
// namespace ck
}
// namespace ck
#endif
#endif
#pragma clang diagnostic pop
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment