Unverified Commit b4522474 authored by sasha0552's avatar sasha0552 Committed by GitHub
Browse files

[Bugfix][Kernel] Implement acquire/release polyfill for Pascal (#8776)

parent ee777d9c
...@@ -131,15 +131,26 @@ DINLINE O downcast(array_t<float, O::size> val) { ...@@ -131,15 +131,26 @@ DINLINE O downcast(array_t<float, O::size> val) {
} }
static DINLINE void st_flag_release(FlagType* flag_addr, FlagType flag) { static DINLINE void st_flag_release(FlagType* flag_addr, FlagType flag) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
asm volatile("st.release.sys.global.u32 [%1], %0;" ::"r"(flag), asm volatile("st.release.sys.global.u32 [%1], %0;" ::"r"(flag),
"l"(flag_addr)); "l"(flag_addr));
#else
asm volatile("membar.sys; st.volatile.global.u32 [%1], %0;" ::"r"(flag),
"l"(flag_addr));
#endif
} }
static DINLINE FlagType ld_flag_acquire(FlagType* flag_addr) { static DINLINE FlagType ld_flag_acquire(FlagType* flag_addr) {
FlagType flag; FlagType flag;
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
asm volatile("ld.acquire.sys.global.u32 %0, [%1];" asm volatile("ld.acquire.sys.global.u32 %0, [%1];"
: "=r"(flag) : "=r"(flag)
: "l"(flag_addr)); : "l"(flag_addr));
#else
asm volatile("ld.volatile.global.u32 %0, [%1]; membar.gl;"
: "=r"(flag)
: "l"(flag_addr));
#endif
return flag; return flag;
} }
......
...@@ -44,7 +44,14 @@ ...@@ -44,7 +44,14 @@
} while (0) } while (0)
__global__ void dummy_kernel() { __global__ void dummy_kernel() {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
for (int i = 0; i < 100; i++) __nanosleep(1000000); // 100ms for (int i = 0; i < 100; i++) __nanosleep(1000000); // 100ms
#else
for (int i = 0; i < 100; i++) {
long long int start = clock64();
while (clock64() - start < 150000000); // approximately 98.4ms on P40
}
#endif
} }
template <typename T> template <typename T>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment