Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23a18b2b
"vscode:/vscode.git/clone" did not exist on "22500e6c2ec77ef9527746ae30fdaa2f673dae5d"
Commit
23a18b2b
authored
Mar 08, 2022
by
Shucai Xiao
Browse files
fix bugs in softmax half2 implementation
parent
08818705
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+4
-4
No files found.
src/targets/gpu/device/softmax.cpp
View file @
23a18b2b
...
...
@@ -66,7 +66,7 @@ softmax_kernel(void* data_in, index_int batch_item_num, index_int block_size, vo
__half2
*
in_data_reduce
=
buffer2
;
__half2
*
in_data
=
buffer2
+
batch_item_num
;
int
start
=
tid
/
block_size
*
batch_item_num
;
for
(
int
i
=
t
id
;
i
<
batch_item_num
;
i
+=
block_size
)
for
(
int
i
=
t
hreadIdx
.
x
;
i
<
batch_item_num
;
i
+=
block_size
)
{
auto
d
=
input
[
i
+
start
];
in_data
[
i
]
=
d
;
...
...
@@ -76,7 +76,7 @@ softmax_kernel(void* data_in, index_int batch_item_num, index_int block_size, vo
auto
batch_max
=
block_reduce
(
in_data_reduce
,
batch_item_num
,
threadIdx
.
x
,
block_size
,
half2_max
{});
for
(
int
i
=
t
id
;
i
<
batch_item_num
;
i
+=
block_size
)
for
(
int
i
=
t
hreadIdx
.
x
;
i
<
batch_item_num
;
i
+=
block_size
)
{
in_data
[
i
]
=
h2exp
(
__hsub2
(
in_data
[
i
],
batch_max
));
in_data_reduce
[
i
]
=
in_data
[
i
];
...
...
@@ -85,7 +85,7 @@ softmax_kernel(void* data_in, index_int batch_item_num, index_int block_size, vo
auto
batch_sum
=
block_reduce
(
in_data_reduce
,
batch_item_num
,
threadIdx
.
x
,
block_size
,
half2_sum
{});
for
(
int
i
=
t
id
;
i
<
batch_item_num
;
i
+=
block_size
)
for
(
int
i
=
t
hreadIdx
.
x
;
i
<
batch_item_num
;
i
+=
block_size
)
{
output
[
i
+
start
]
=
__h2div
(
in_data
[
i
],
batch_sum
);
}
...
...
@@ -163,7 +163,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
{
int
block_num
=
batch_shape
.
elements
();
int
shared_size
=
batch_item_num
*
2
*
result
.
get_shape
().
type_size
();
softmax_kernel
2
<<<
block_num
,
block_size
,
shared_size
,
stream
>>>
(
softmax_kernel
<<<
block_num
,
block_size
,
shared_size
,
stream
>>>
(
arg
.
data
(),
batch_item_num
,
block_size
,
result
.
data
());
}
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment