Unverified Commit 37a347e3 authored by Qianfeng's avatar Qianfeng Committed by GitHub
Browse files

Fix to the using of static_for in amd_buffer_addressing.hpp (#1337)



* Add insert_dummy_dep_per_dword over-loading for length 64

* Fix insert_dummy_dep_per_dword and remove over-loading for length 64

* Remove blank lines

---------
Co-authored-by: default avatarPo Yen Chen <PoYen.Chen@amd.com>
parent acda4c5a
...@@ -552,8 +552,9 @@ namespace impl{ ...@@ -552,8 +552,9 @@ namespace impl{
template<index_t N> template<index_t N>
CK_TILE_DEVICE void insert_dummy_dep_per_dword(array<float, N>& b) CK_TILE_DEVICE void insert_dummy_dep_per_dword(array<float, N>& b)
{ {
static_for<0, b.size(), 1>{}([&](auto i){ constexpr auto kSize = remove_cvref_t<decltype(b)>::size();
asm volatile(" " : : "v"(b.get(i)) : "memory"); static_for<0, kSize, 1>{}([&](auto i){
asm volatile(" " : : "v"(b.get(number<i>{})) : "memory");
}); });
} }
#if 1 #if 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment