Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
5be87179
Commit
5be87179
authored
Dec 07, 2023
by
Paul
Browse files
Format
parent
ce4f0940
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
32 deletions
+41
-32
src/targets/gpu/kernels/include/migraphx/kernels/pp.hpp
src/targets/gpu/kernels/include/migraphx/kernels/pp.hpp
+2
-3
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+39
-29
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/pp.hpp
View file @
5be87179
#ifndef MIGRAPHX_GUARD_KERNELS_PP_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_PP_HPP
#define MIGRAPHX_GUARD_KERNELS_PP_HPP
#define MIGRAPHX_GUARD_KERNELS_PP_HPP
#define MIGRAPHX_PP_PRIMITIVE_CAT(x, y) x##y
#define MIGRAPHX_PP_PRIMITIVE_CAT(x, y) x##y
#define MIGRAPHX_PP_CAT(x, y) MIGRAPHX_PP_PRIMITIVE_CAT(x, y)
#define MIGRAPHX_PP_CAT(x, y) MIGRAPHX_PP_PRIMITIVE_CAT(x, y)
...
@@ -20,10 +19,10 @@
...
@@ -20,10 +19,10 @@
#define MIGRAPHX_PP_REPEAT9(m, ...) MIGRAPHX_PP_REPEAT8(m, __VA_ARGS__) m(9, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT9(m, ...) MIGRAPHX_PP_REPEAT8(m, __VA_ARGS__) m(9, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT10(m, ...) MIGRAPHX_PP_REPEAT9(m, __VA_ARGS__) m(10, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT10(m, ...) MIGRAPHX_PP_REPEAT9(m, __VA_ARGS__) m(10, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT(n, m, ...) MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_REPEAT, n)(m, __VA_ARGS__)
#define MIGRAPHX_PP_REPEAT(n, m, ...) \
MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_REPEAT, n)(m, __VA_ARGS__)
namespace
migraphx
{
namespace
migraphx
{
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_PP_HPP
#endif // MIGRAPHX_GUARD_KERNELS_PP_HPP
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
5be87179
...
@@ -86,7 +86,7 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -86,7 +86,7 @@ __device__ void dpp_reduce(T& in, Op op)
#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
// NOLINTNEXTLINE
// NOLINTNEXTLINE
#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \
#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \
template<unsigned int SubWaveSize> \
template
<unsigned int SubWaveSize>
\
__device__ inline void dpp_reduce(type& x, op f) \
__device__ inline void dpp_reduce(type& x, op f) \
{ \
{ \
(void)f; \
(void)f; \
...
@@ -105,12 +105,14 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -105,12 +105,14 @@ __device__ void dpp_reduce(T& in, Op op)
#define MIGRAPHX_DPP_REDUCE_ASM4(ins) #ins " %0 %0 %0 row_bcast:15 row_mask:0xa\n"
#define MIGRAPHX_DPP_REDUCE_ASM4(ins) #ins " %0 %0 %0 row_bcast:15 row_mask:0xa\n"
#define MIGRAPHX_DPP_REDUCE_ASM5(ins) #ins " %0 %0 %0 row_bcast:31 row_mask:0xc\n"
#define MIGRAPHX_DPP_REDUCE_ASM5(ins) #ins " %0 %0 %0 row_bcast:31 row_mask:0xc\n"
#define MIGRAPHX_DPP_REDUCE_ASM_REPEAT(i, ins) MIGRAPHX_PP_CAT(MIGRAPHX_DPP_REDUCE_ASM, i)(ins) "s_nop 1\n"
#define MIGRAPHX_DPP_REDUCE_ASM_REPEAT(i, ins) \
#define MIGRAPHX_DPP_REDUCE_ASM(n, x, ins, ...) { \
MIGRAPHX_PP_CAT(MIGRAPHX_DPP_REDUCE_ASM, i)(ins) "s_nop 1\n"
__asm__ volatile("s_nop 4\n" \
#define MIGRAPHX_DPP_REDUCE_ASM(n, x, ins, ...) \
MIGRAPHX_PP_REPEAT(n, MIGRAPHX_DPP_REDUCE_ASM_REPEAT, ins) \
{ \
__asm__ volatile("s_nop 4\n" MIGRAPHX_PP_REPEAT(n, MIGRAPHX_DPP_REDUCE_ASM_REPEAT, ins) \
: "=v"(x) \
: "=v"(x) \
: "0"(x)); __VA_ARGS__ \
: "0"(x)); \
__VA_ARGS__ \
}
}
#if __AMDGCN_WAVEFRONT_SIZE == 64
#if __AMDGCN_WAVEFRONT_SIZE == 64
...
@@ -122,15 +124,24 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -122,15 +124,24 @@ __device__ void dpp_reduce(T& in, Op op)
#endif
#endif
#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \
#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \
template<unsigned int SubWaveSize> \
template
<unsigned int SubWaveSize>
\
__device__ inline void dpp_reduce(type& x, op f) \
__device__ inline void dpp_reduce(type& x, op f) \
{ \
{ \
if constexpr(SubWaveSize == 2) MIGRAPHX_DPP_REDUCE_ASM(0, x, ins,); \
if constexpr(SubWaveSize == 2) \
if constexpr(SubWaveSize == 4) MIGRAPHX_DPP_REDUCE_ASM(1, x, ins,); \
MIGRAPHX_DPP_REDUCE_ASM(0, x, ins, ); \
if constexpr(SubWaveSize == 8) MIGRAPHX_DPP_REDUCE_ASM(2, x, ins,); \
if constexpr(SubWaveSize == 4) \
if constexpr(SubWaveSize == 16) MIGRAPHX_DPP_REDUCE_ASM(3, x, ins,); \
MIGRAPHX_DPP_REDUCE_ASM(1, x, ins, ); \
if constexpr(SubWaveSize == 32) MIGRAPHX_DPP_REDUCE_ASM(MIGRAPHX_DPP_IF_64(__AMDGCN_WAVEFRONT_SIZE)(4, 3), x, ins,MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f)); \
if constexpr(SubWaveSize == 8) \
MIGRAPHX_DPP_WHEN_64(__AMDGCN_WAVEFRONT_SIZE)(if constexpr(SubWaveSize == 64) MIGRAPHX_DPP_REDUCE_ASM(5, x, ins,)); \
MIGRAPHX_DPP_REDUCE_ASM(2, x, ins, ); \
if constexpr(SubWaveSize == 16) \
MIGRAPHX_DPP_REDUCE_ASM(3, x, ins, ); \
if constexpr(SubWaveSize == 32) \
MIGRAPHX_DPP_REDUCE_ASM(MIGRAPHX_DPP_IF_64(__AMDGCN_WAVEFRONT_SIZE)(4, 3), \
x, \
ins, \
MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f)); \
MIGRAPHX_DPP_WHEN_64(__AMDGCN_WAVEFRONT_SIZE) \
(if constexpr(SubWaveSize == 64) MIGRAPHX_DPP_REDUCE_ASM(5, x, ins, )); \
}
}
#endif
#endif
...
@@ -142,7 +153,6 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -142,7 +153,6 @@ __device__ void dpp_reduce(T& in, Op op)
MIGRAPHX_DPP_REDUCE_ASM_FUN(int32_t, op, prefix##sign##32); \
MIGRAPHX_DPP_REDUCE_ASM_FUN(int32_t, op, prefix##sign##32); \
MIGRAPHX_DPP_REDUCE_ASM_FUN(uint32_t, op, prefix##_u32);
MIGRAPHX_DPP_REDUCE_ASM_FUN(uint32_t, op, prefix##_u32);
// Note: when max and min are in int32_t, signed version of instruction needs to be used.
// Note: when max and min are in int32_t, signed version of instruction needs to be used.
MIGRAPHX_DPP_REDUCE
(
op
::
sum
,
v_add
,
_u
)
MIGRAPHX_DPP_REDUCE
(
op
::
sum
,
v_add
,
_u
)
MIGRAPHX_DPP_REDUCE
(
op
::
product
,
v_mul
,
_u
)
MIGRAPHX_DPP_REDUCE
(
op
::
product
,
v_mul
,
_u
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment