Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7f9e59a1
Commit
7f9e59a1
authored
Jul 30, 2020
by
Chao Liu
Browse files
Merge remote-tracking branch 'origin/improve_buffer_address_for_pad' into bwd_data_v4r1_nhwc
parents
ac62d13e
d8cf5e5a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
6 deletions
+5
-6
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+1
-0
composable_kernel/include/utility/in_memory_operation.amd.hpp.in
...ble_kernel/include/utility/in_memory_operation.amd.hpp.in
+4
-6
No files found.
composable_kernel/include/utility/common_header.hpp
View file @
7f9e59a1
...
...
@@ -24,6 +24,7 @@
#if CK_USE_AMD_XDLOPS
#include "amd_xdlops.hpp"
#include "amd_xdlops_inline_asm.hpp"
#endif
#endif
composable_kernel/include/utility/in_memory_operation.amd.hpp.in
View file @
7f9e59a1
...
...
@@ -145,19 +145,17 @@ struct AtomicAddData
template <>
__device__ void Run<AddressSpace::Vgpr, AddressSpace::Global>(const T* p_src,
index_t src_offset,
bool src_valid,
index_t /* src_range */,
bool src_valid
T* p_dst,
T* p_dst,
index_t dst_offset,
bool dst_valid,
index_t dst_range) const
{
const auto zeros = vector_t(0);
amd_buffer_atomic_add<T, DataPerAccess>(src_valid ? &(p_src[src_offset]) : &zeros,
p_dst,
dst_offset,
dst_valid,
index_t dst_range);
amd_buffer_atomic_add<T, DataPerAccess>(
src_valid ? &(p_src[src_offset]) : &zeros, p_dst, dst_offset, dst_valid, dst_range);
}
#endif
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment