Commit 7f9e59a1 authored by Chao Liu's avatar Chao Liu
Browse files

Merge remote-tracking branch 'origin/improve_buffer_address_for_pad' into bwd_data_v4r1_nhwc

parents ac62d13e d8cf5e5a
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#if CK_USE_AMD_XDLOPS #if CK_USE_AMD_XDLOPS
#include "amd_xdlops.hpp" #include "amd_xdlops.hpp"
#include "amd_xdlops_inline_asm.hpp"
#endif #endif
#endif #endif
...@@ -145,19 +145,17 @@ struct AtomicAddData ...@@ -145,19 +145,17 @@ struct AtomicAddData
template <> template <>
__device__ void Run<AddressSpace::Vgpr, AddressSpace::Global>(const T* p_src, __device__ void Run<AddressSpace::Vgpr, AddressSpace::Global>(const T* p_src,
index_t src_offset, index_t src_offset,
bool src_valid,
index_t /* src_range */, index_t /* src_range */,
bool src_valid T* p_dst, T* p_dst,
index_t dst_offset, index_t dst_offset,
bool dst_valid, bool dst_valid,
index_t dst_range) const index_t dst_range) const
{ {
const auto zeros = vector_t(0); const auto zeros = vector_t(0);
amd_buffer_atomic_add<T, DataPerAccess>(src_valid ? &(p_src[src_offset]) : &zeros, amd_buffer_atomic_add<T, DataPerAccess>(
p_dst, src_valid ? &(p_src[src_offset]) : &zeros, p_dst, dst_offset, dst_valid, dst_range);
dst_offset,
dst_valid,
index_t dst_range);
} }
#endif #endif
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment