Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
f7120342
Commit
f7120342
authored
Jul 31, 2024
by
Jing Zhang
Browse files
clean
parent
f5ea85f3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
13 deletions
+21
-13
include/ck/utility/amd_buffer_addressing.hpp
include/ck/utility/amd_buffer_addressing.hpp
+21
-13
No files found.
include/ck/utility/amd_buffer_addressing.hpp
View file @
f7120342
...
@@ -931,22 +931,30 @@ amd_buffer_atomic_add(const typename vector_type_maker<T, N>::type::type src_thr
...
@@ -931,22 +931,30 @@ amd_buffer_atomic_add(const typename vector_type_maker<T, N>::type::type src_thr
using
scalar_t
=
typename
scalar_type
<
vector_t
>::
type
;
using
scalar_t
=
typename
scalar_type
<
vector_t
>::
type
;
constexpr
index_t
vector_size
=
scalar_type
<
vector_t
>::
vector_size
;
constexpr
index_t
vector_size
=
scalar_type
<
vector_t
>::
vector_size
;
#if 0
if
constexpr
(
is_same
<
T
,
bhalf_t
>::
value
)
uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x80000000;
amd_buffer_atomic_add_impl<scalar_t, vector_size>(
src_thread_data, dst_wave_buffer_resource, dst_addr_shift + dst_thread_addr_offset, 0);
#else
if
(
dst_thread_element_valid
)
{
{
i
gnore
=
dst_wave_buffer_resource
;
i
f
(
dst_thread_element_valid
)
ignore
=
dst_thread_addr_offset
;
{
// amd_buffer_atomic_add_impl<scalar_t, vector_size>(
// src_thread_data, dst_wave_buffer_resource, dst_thread_addr_offset, 0);
amd_global_atomic_add_impl
<
scalar_t
,
vector_size
>
(
amd_global_atomic_add_impl
<
scalar_t
,
vector_size
>
(
src_thread_data
,
src_thread_data
,
p_dst_wave
+
dst_thread_element_offset
);
p_dst_wave
+
dst_thread_element_offset
);
}
}
}
else
{
#if CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK
uint32_t
dst_addr_shift
=
dst_thread_element_valid
?
0
:
0x80000000
;
amd_buffer_atomic_add_impl
<
scalar_t
,
vector_size
>
(
src_thread_data
,
dst_wave_buffer_resource
,
dst_addr_shift
+
dst_thread_addr_offset
,
0
);
#else
if
(
dst_thread_element_valid
)
{
amd_buffer_atomic_add_impl
<
scalar_t
,
vector_size
>
(
src_thread_data
,
dst_wave_buffer_resource
,
dst_thread_addr_offset
,
0
);
}
#endif
#endif
}
}
}
// buffer_atomic_max requires:
// buffer_atomic_max requires:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment