Commit 7f9e59a1 authored by Chao Liu's avatar Chao Liu
Browse files

Merge remote-tracking branch 'origin/improve_buffer_address_for_pad' into bwd_data_v4r1_nhwc

parents ac62d13e d8cf5e5a
......@@ -24,6 +24,7 @@
#if CK_USE_AMD_XDLOPS
#include "amd_xdlops.hpp"
#include "amd_xdlops_inline_asm.hpp"
#endif
#endif
......@@ -145,19 +145,17 @@ struct AtomicAddData
template <>
__device__ void Run<AddressSpace::Vgpr, AddressSpace::Global>(const T* p_src,
index_t src_offset,
bool src_valid,
index_t /* src_range */,
bool src_valid T* p_dst,
T* p_dst,
index_t dst_offset,
bool dst_valid,
index_t dst_range) const
{
const auto zeros = vector_t(0);
amd_buffer_atomic_add<T, DataPerAccess>(src_valid ? &(p_src[src_offset]) : &zeros,
p_dst,
dst_offset,
dst_valid,
index_t dst_range);
amd_buffer_atomic_add<T, DataPerAccess>(
src_valid ? &(p_src[src_offset]) : &zeros, p_dst, dst_offset, dst_valid, dst_range);
}
#endif
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment