"...composable_kernel_rocm.git" did not exist on "0ef27d537d855311099e0863a47b2564898fca7d"
Commit b0f295cb authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed global_atomic_add

parent 895e8c40
......@@ -568,6 +568,7 @@ __device__ void amd_global_atomic_add_impl(const typename vector_type<T, N>::typ
{
if constexpr(is_same<T, half_t>::value)
{
#if 0
if constexpr(N == 2)
{
__builtin_amdgcn_global_atomic_fadd_v2f16(addr, src_thread_data);
......@@ -586,6 +587,13 @@ __device__ void amd_global_atomic_add_impl(const typename vector_type<T, N>::typ
__builtin_amdgcn_global_atomic_fadd_v2f16(addr + i, tmp.AsType<half2_t>()[i]);
});
}
#else
static_assert(N % 2 == 0, "");
vector_type<half_t, N> tmp{src_thread_data};
static_for<0, N / 2, 1>{}([&](auto i) {
__builtin_amdgcn_global_atomic_fadd_v2f16(bit_cast<half2_t*>(addr) + i, tmp.template AsType<half2_t>()[i]);
});
#endif
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment