Commit 5be87179 authored by Paul's avatar Paul
Browse files

Format

parent ce4f0940
#ifndef MIGRAPHX_GUARD_KERNELS_PP_HPP #ifndef MIGRAPHX_GUARD_KERNELS_PP_HPP
#define MIGRAPHX_GUARD_KERNELS_PP_HPP #define MIGRAPHX_GUARD_KERNELS_PP_HPP
#define MIGRAPHX_PP_PRIMITIVE_CAT(x, y) x##y #define MIGRAPHX_PP_PRIMITIVE_CAT(x, y) x##y
#define MIGRAPHX_PP_CAT(x, y) MIGRAPHX_PP_PRIMITIVE_CAT(x, y) #define MIGRAPHX_PP_CAT(x, y) MIGRAPHX_PP_PRIMITIVE_CAT(x, y)
...@@ -20,10 +19,10 @@ ...@@ -20,10 +19,10 @@
#define MIGRAPHX_PP_REPEAT9(m, ...) MIGRAPHX_PP_REPEAT8(m, __VA_ARGS__) m(9, __VA_ARGS__) #define MIGRAPHX_PP_REPEAT9(m, ...) MIGRAPHX_PP_REPEAT8(m, __VA_ARGS__) m(9, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT10(m, ...) MIGRAPHX_PP_REPEAT9(m, __VA_ARGS__) m(10, __VA_ARGS__) #define MIGRAPHX_PP_REPEAT10(m, ...) MIGRAPHX_PP_REPEAT9(m, __VA_ARGS__) m(10, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT(n, m, ...) MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_REPEAT, n)(m, __VA_ARGS__) #define MIGRAPHX_PP_REPEAT(n, m, ...) \
MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_REPEAT, n)(m, __VA_ARGS__)
namespace migraphx { namespace migraphx {
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_PP_HPP #endif // MIGRAPHX_GUARD_KERNELS_PP_HPP
...@@ -85,12 +85,12 @@ __device__ void dpp_reduce(T& in, Op op) ...@@ -85,12 +85,12 @@ __device__ void dpp_reduce(T& in, Op op)
#if 1 #if 1
#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK) #if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \ #define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \
template<unsigned int SubWaveSize> \ template <unsigned int SubWaveSize> \
__device__ inline void dpp_reduce(type& x, op f) \ __device__ inline void dpp_reduce(type& x, op f) \
{ \ { \
(void)f; \ (void)f; \
x = 1; \ x = 1; \
} }
#else #else
#define MIGRAPHX_DPP_IIF64(then, ...) then #define MIGRAPHX_DPP_IIF64(then, ...) then
...@@ -105,44 +105,54 @@ __device__ void dpp_reduce(T& in, Op op) ...@@ -105,44 +105,54 @@ __device__ void dpp_reduce(T& in, Op op)
#define MIGRAPHX_DPP_REDUCE_ASM4(ins) #ins " %0 %0 %0 row_bcast:15 row_mask:0xa\n" #define MIGRAPHX_DPP_REDUCE_ASM4(ins) #ins " %0 %0 %0 row_bcast:15 row_mask:0xa\n"
#define MIGRAPHX_DPP_REDUCE_ASM5(ins) #ins " %0 %0 %0 row_bcast:31 row_mask:0xc\n" #define MIGRAPHX_DPP_REDUCE_ASM5(ins) #ins " %0 %0 %0 row_bcast:31 row_mask:0xc\n"
#define MIGRAPHX_DPP_REDUCE_ASM_REPEAT(i, ins) MIGRAPHX_PP_CAT(MIGRAPHX_DPP_REDUCE_ASM, i)(ins) "s_nop 1\n" #define MIGRAPHX_DPP_REDUCE_ASM_REPEAT(i, ins) \
#define MIGRAPHX_DPP_REDUCE_ASM(n, x, ins, ...) { \ MIGRAPHX_PP_CAT(MIGRAPHX_DPP_REDUCE_ASM, i)(ins) "s_nop 1\n"
__asm__ volatile("s_nop 4\n" \ #define MIGRAPHX_DPP_REDUCE_ASM(n, x, ins, ...) \
MIGRAPHX_PP_REPEAT(n, MIGRAPHX_DPP_REDUCE_ASM_REPEAT, ins) \ { \
: "=v"(x) \ __asm__ volatile("s_nop 4\n" MIGRAPHX_PP_REPEAT(n, MIGRAPHX_DPP_REDUCE_ASM_REPEAT, ins) \
: "0"(x)); __VA_ARGS__ \ : "=v"(x) \
: "0"(x)); \
__VA_ARGS__ \
} }
#if __AMDGCN_WAVEFRONT_SIZE == 64 #if __AMDGCN_WAVEFRONT_SIZE == 64
#define MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f) (void)f; #define MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f) (void)f;
#else #else
#define MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f) \ #define MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f) \
auto y = dpp_swizzle<0x1e0>(x); \ auto y = dpp_swizzle<0x1e0>(x); \
x = f(x, y); x = f(x, y);
#endif #endif
#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \ #define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \
template<unsigned int SubWaveSize> \ template <unsigned int SubWaveSize> \
__device__ inline void dpp_reduce(type& x, op f) \ __device__ inline void dpp_reduce(type& x, op f) \
{ \ { \
if constexpr(SubWaveSize == 2) MIGRAPHX_DPP_REDUCE_ASM(0, x, ins,); \ if constexpr(SubWaveSize == 2) \
if constexpr(SubWaveSize == 4) MIGRAPHX_DPP_REDUCE_ASM(1, x, ins,); \ MIGRAPHX_DPP_REDUCE_ASM(0, x, ins, ); \
if constexpr(SubWaveSize == 8) MIGRAPHX_DPP_REDUCE_ASM(2, x, ins,); \ if constexpr(SubWaveSize == 4) \
if constexpr(SubWaveSize == 16) MIGRAPHX_DPP_REDUCE_ASM(3, x, ins,); \ MIGRAPHX_DPP_REDUCE_ASM(1, x, ins, ); \
if constexpr(SubWaveSize == 32) MIGRAPHX_DPP_REDUCE_ASM(MIGRAPHX_DPP_IF_64(__AMDGCN_WAVEFRONT_SIZE)(4, 3), x, ins,MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f)); \ if constexpr(SubWaveSize == 8) \
MIGRAPHX_DPP_WHEN_64(__AMDGCN_WAVEFRONT_SIZE)(if constexpr(SubWaveSize == 64) MIGRAPHX_DPP_REDUCE_ASM(5, x, ins,)); \ MIGRAPHX_DPP_REDUCE_ASM(2, x, ins, ); \
} if constexpr(SubWaveSize == 16) \
MIGRAPHX_DPP_REDUCE_ASM(3, x, ins, ); \
if constexpr(SubWaveSize == 32) \
MIGRAPHX_DPP_REDUCE_ASM(MIGRAPHX_DPP_IF_64(__AMDGCN_WAVEFRONT_SIZE)(4, 3), \
x, \
ins, \
MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f)); \
MIGRAPHX_DPP_WHEN_64(__AMDGCN_WAVEFRONT_SIZE) \
(if constexpr(SubWaveSize == 64) MIGRAPHX_DPP_REDUCE_ASM(5, x, ins, )); \
}
#endif #endif
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_DPP_REDUCE(op, prefix, sign) \ #define MIGRAPHX_DPP_REDUCE(op, prefix, sign) \
MIGRAPHX_DPP_REDUCE_ASM_FUN(double, op, prefix##_f64); \ MIGRAPHX_DPP_REDUCE_ASM_FUN(double, op, prefix##_f64); \
MIGRAPHX_DPP_REDUCE_ASM_FUN(float, op, prefix##_f32); \ MIGRAPHX_DPP_REDUCE_ASM_FUN(float, op, prefix##_f32); \
MIGRAPHX_DPP_REDUCE_ASM_FUN(half, op, prefix##_f16); \ MIGRAPHX_DPP_REDUCE_ASM_FUN(half, op, prefix##_f16); \
MIGRAPHX_DPP_REDUCE_ASM_FUN(int32_t, op, prefix##sign##32); \ MIGRAPHX_DPP_REDUCE_ASM_FUN(int32_t, op, prefix##sign##32); \
MIGRAPHX_DPP_REDUCE_ASM_FUN(uint32_t, op, prefix##_u32); MIGRAPHX_DPP_REDUCE_ASM_FUN(uint32_t, op, prefix##_u32);
// Note: when max and min are in int32_t, signed version of instruction needs to be used. // Note: when max and min are in int32_t, signed version of instruction needs to be used.
MIGRAPHX_DPP_REDUCE(op::sum, v_add, _u) MIGRAPHX_DPP_REDUCE(op::sum, v_add, _u)
MIGRAPHX_DPP_REDUCE(op::product, v_mul, _u) MIGRAPHX_DPP_REDUCE(op::product, v_mul, _u)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment