Commit 2ff6d9be authored by limm's avatar limm
Browse files

add __ldg fun definition

parent ceb47f1d
...@@ -16,3 +16,16 @@ __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask, ...@@ -16,3 +16,16 @@ __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask,
const unsigned int delta) { const unsigned int delta) {
return __shfl_down_sync(mask, var.operator __half(), delta); return __shfl_down_sync(mask, var.operator __half(), delta);
} }
#ifdef USE_ROCM
__device__ __inline__ at::Half __ldg(const at::Half* ptr) {
return __ldg(reinterpret_cast<const __half*>(ptr));
}
#define SHFL_UP_SYNC(mask, var, delta) __shfl_up(var, delta)
#define SHFL_DOWN_SYNC(mask, var, delta) __shfl_down(var, delta)
#define SHFL_SYNC(mask, var, delta) __shfl(var, delta)
#else
#define SHFL_UP_SYNC __shfl_up_sync
#define SHFL_DOWN_SYNC __shfl_down_sync
#define SHFL_SYNC __shfl_sync
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment