Commit d5d78533 authored by Chao Liu's avatar Chao Liu
Browse files

sync with miopen

parent 4cd8f454
......@@ -15,6 +15,8 @@ namespace ck {
// The dimension access order can be different for src and dst.
// Will do valid mapping check on src data: Read 0 if src data has a invalid mapping
// Will do valid mapping check on dst data: No write if dst data has a invalid mapping
// BlockSize can be equal or larger than ThreadCluster size, which means some threads may not do
// threadwise copy
template <index_t BlockSize,
typename BlockSrcDesc,
typename BlockDstDesc,
......
......@@ -118,7 +118,9 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
SrcDataPerRead,
SrcAddressSpace,
AddressSpace::Vgpr,
InMemoryDataOperation::Set>(
InMemoryDataOperation::Set,
SrcDataStride,
1>(
p_src, src_coord.GetOffset(), p_src_long_vector, buffer_offset);
}
}
......@@ -150,7 +152,9 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
DstDataPerWrite,
AddressSpace::Vgpr,
DstAddressSpace,
DstInMemOp>(
DstInMemOp,
1,
DstDataStride>(
p_dst_long_vector, buffer_offset, p_dst, dst_coord.GetOffset());
}
}
......
......@@ -26,8 +26,4 @@
#include "amd_xdlops.hpp"
#endif
#if CK_USE_AMD_XDLOPS_EMULATE
#include "amd_xdlops_emulate.hpp"
#endif
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment