Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
328fce97
Commit
328fce97
authored
Nov 10, 2023
by
Paul
Browse files
Improve dpp reductions on navi
parent
3c160a3f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
8 deletions
+25
-8
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
+21
-7
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+4
-1
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
View file @
328fce97
...
@@ -49,12 +49,8 @@ constexpr unsigned int dpp_row_bcast(unsigned int x)
...
@@ -49,12 +49,8 @@ constexpr unsigned int dpp_row_bcast(unsigned int x)
return
y
;
return
y
;
}
}
template
<
unsigned
int
DppCtrl
,
template
<
class
T
,
class
F
>
unsigned
int
RowMask
=
0xf
,
__device__
T
dpp_op
(
T
&
x
,
F
f
)
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
__device__
T
dpp_mov
(
T
&
x
)
{
{
static
const
index_int
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
static
const
index_int
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
union
type
union
type
...
@@ -68,10 +64,28 @@ __device__ T dpp_mov(T& x)
...
@@ -68,10 +64,28 @@ __device__ T dpp_mov(T& x)
input
.
data
=
x
;
input
.
data
=
x
;
for
(
index_int
i
=
0
;
i
<
n
;
i
++
)
for
(
index_int
i
=
0
;
i
<
n
;
i
++
)
{
{
output
.
reg
[
i
]
=
__hip_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
output
.
reg
[
i
]
=
f
(
input
.
reg
[
i
]
);
}
}
return
output
.
data
;
return
output
.
data
;
}
}
template
<
unsigned
int
DppCtrl
,
unsigned
int
RowMask
=
0xf
,
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
__device__
T
dpp_mov
(
T
&
x
)
{
return
dpp_op
(
x
,
[](
auto
i
)
{
return
__hip_move_dpp
(
i
,
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
});
}
template
<
unsigned
int
Mask
,
class
T
>
__device__
T
dpp_swizzle
(
T
&
x
)
{
return
dpp_op
(
x
,
[](
auto
i
)
{
return
__hip_ds_swizzle
(
i
,
Mask
);
});
}
#endif // MIGRAPHX_HAS_DPP
#endif // MIGRAPHX_HAS_DPP
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
328fce97
...
@@ -45,7 +45,10 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -45,7 +45,10 @@ __device__ void dpp_reduce(T& in, Op op)
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
#if __AMDGCN_WAVEFRONT_SIZE == 64
#if __AMDGCN_WAVEFRONT_SIZE == 32
out
=
dpp_swizzle
<
dpp_row_bcast
(
15
)
>
(
in
);
in
=
op
(
in
,
out
);
#else
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment