Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
6df9f47f
"library/vscode:/vscode.git/clone" did not exist on "97d8c5045ef102b700878d02ce12b79b8a1e0098"
Commit
6df9f47f
authored
Dec 08, 2023
by
Paul
Browse files
Format
parent
67c92b83
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
5 deletions
+3
-5
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+2
-4
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
View file @
6df9f47f
...
@@ -88,7 +88,7 @@ __device__ T dpp_swizzle(T& x)
...
@@ -88,7 +88,7 @@ __device__ T dpp_swizzle(T& x)
return
dpp_op
(
x
,
[](
auto
i
)
{
return
__hip_ds_swizzle
(
i
,
Mask
);
});
return
dpp_op
(
x
,
[](
auto
i
)
{
return
__hip_ds_swizzle
(
i
,
Mask
);
});
}
}
template
<
unsigned
int
SrcLane
,
unsigned
int
Width
,
class
T
>
template
<
unsigned
int
SrcLane
,
unsigned
int
Width
,
class
T
>
__device__
T
dpp_readlane
(
T
&
x
)
__device__
T
dpp_readlane
(
T
&
x
)
{
{
static_assert
(
is_power_of_2
(
Width
),
"Width must be a power of 2"
);
static_assert
(
is_power_of_2
(
Width
),
"Width must be a power of 2"
);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
6df9f47f
...
@@ -170,11 +170,9 @@ __device__ auto subwave_reduce(index idx, Op op, T init, Index n, F f)
...
@@ -170,11 +170,9 @@ __device__ auto subwave_reduce(index idx, Op op, T init, Index n, F f)
using
type
=
decltype
(
index
::
invoke_loop
(
f
,
0
,
_c
<
0
>
));
using
type
=
decltype
(
index
::
invoke_loop
(
f
,
0
,
_c
<
0
>
));
type
x
=
init
;
type
x
=
init
;
idx
.
local_subwave_stride
<
SubWaveSize
>
(
idx
.
local_subwave_stride
<
SubWaveSize
>
(
n
,
[
&
](
auto
i
,
auto
d
)
{
n
,
[
&
](
auto
i
,
auto
d
)
{
x
=
op
(
x
,
index
::
invoke_loop
(
f
,
i
,
d
));
});
x
=
op
(
x
,
index
::
invoke_loop
(
f
,
i
,
d
));
});
dpp_reduce
<
SubWaveSize
>
(
x
,
op
);
dpp_reduce
<
SubWaveSize
>
(
x
,
op
);
return
dpp_readlane
<
SubWaveSize
-
1
,
SubWaveSize
>
(
x
);
return
dpp_readlane
<
SubWaveSize
-
1
,
SubWaveSize
>
(
x
);
}
}
template
<
class
Op
,
class
T
,
class
Index
,
class
F
>
template
<
class
Op
,
class
T
,
class
Index
,
class
F
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment