Commit f7120342 authored by Jing Zhang's avatar Jing Zhang
Browse files

clean

parent f5ea85f3
...@@ -931,7 +931,18 @@ amd_buffer_atomic_add(const typename vector_type_maker<T, N>::type::type src_thr ...@@ -931,7 +931,18 @@ amd_buffer_atomic_add(const typename vector_type_maker<T, N>::type::type src_thr
using scalar_t = typename scalar_type<vector_t>::type; using scalar_t = typename scalar_type<vector_t>::type;
constexpr index_t vector_size = scalar_type<vector_t>::vector_size; constexpr index_t vector_size = scalar_type<vector_t>::vector_size;
#if 0 if constexpr(is_same<T, bhalf_t>::value)
{
if(dst_thread_element_valid)
{
amd_global_atomic_add_impl<scalar_t, vector_size>(
src_thread_data, p_dst_wave + dst_thread_element_offset);
}
}
else
{
#if CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK
uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x80000000; uint32_t dst_addr_shift = dst_thread_element_valid ? 0 : 0x80000000;
amd_buffer_atomic_add_impl<scalar_t, vector_size>( amd_buffer_atomic_add_impl<scalar_t, vector_size>(
...@@ -939,14 +950,11 @@ amd_buffer_atomic_add(const typename vector_type_maker<T, N>::type::type src_thr ...@@ -939,14 +950,11 @@ amd_buffer_atomic_add(const typename vector_type_maker<T, N>::type::type src_thr
#else #else
if(dst_thread_element_valid) if(dst_thread_element_valid)
{ {
ignore = dst_wave_buffer_resource; amd_buffer_atomic_add_impl<scalar_t, vector_size>(
ignore = dst_thread_addr_offset; src_thread_data, dst_wave_buffer_resource, dst_thread_addr_offset, 0);
// amd_buffer_atomic_add_impl<scalar_t, vector_size>(
// src_thread_data, dst_wave_buffer_resource, dst_thread_addr_offset, 0);
amd_global_atomic_add_impl<scalar_t, vector_size>(src_thread_data,
p_dst_wave + dst_thread_element_offset);
} }
#endif #endif
}
} }
// buffer_atomic_max requires: // buffer_atomic_max requires:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment