Unverified Commit 564e3752 authored by Chenggang Zhao's avatar Chenggang Zhao Committed by GitHub
Browse files

Fix `< PTX ISA 8.6` compatibility (#194)

parent 11a0b0e1
...@@ -309,7 +309,7 @@ __device__ __forceinline__ void tma_load_1d(const void* smem_ptr, const void* gm ...@@ -309,7 +309,7 @@ __device__ __forceinline__ void tma_load_1d(const void* smem_ptr, const void* gm
auto mbar_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(mbar_ptr)); auto mbar_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(mbar_ptr));
auto smem_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr)); auto smem_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
const auto cache_hint = evict_first ? kEvictFirst : kEvictNormal; const auto cache_hint = evict_first ? kEvictFirst : kEvictNormal;
asm volatile("cp.async.bulk.shared::cta.global.mbarrier::complete_tx::bytes.L2::cache_hint [%0], [%1], %2, [%3], %4;\n" asm volatile("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.L2::cache_hint [%0], [%1], %2, [%3], %4;\n"
:: "r"(smem_int_ptr), "l"(gmem_ptr), "r"(num_bytes), "r"(mbar_int_ptr), "l"(cache_hint) : "memory"); :: "r"(smem_int_ptr), "l"(gmem_ptr), "r"(num_bytes), "r"(mbar_int_ptr), "l"(cache_hint) : "memory");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment