Commit 5437ce4f authored by Jeff Daily's avatar Jeff Daily
Browse files

fix build for rocm 7.0

parent 6453a041
...@@ -8,11 +8,6 @@ ...@@ -8,11 +8,6 @@
#if defined(__HIP_PLATFORM_AMD__) #if defined(__HIP_PLATFORM_AMD__)
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask.
// Since mask is full 0xffffffff, we can use __shfl_down instead.
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset)
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset)
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd // ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd
#define atomicAdd_block atomicAdd #define atomicAdd_block atomicAdd
...@@ -46,6 +41,11 @@ ...@@ -46,6 +41,11 @@
#define cudaStream_t hipStream_t #define cudaStream_t hipStream_t
#define cudaSuccess hipSuccess #define cudaSuccess hipSuccess
// ROCm 7.0 did add __shfl_down_sync et al, but the following hack still works.
// Since mask is full 0xffffffff, we can use __shfl_down instead.
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset)
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset)
// warpSize is only allowed for device code. // warpSize is only allowed for device code.
// HIP header used to define warpSize as a constexpr that was either 32 or 64 // HIP header used to define warpSize as a constexpr that was either 32 or 64
// depending on the target device, and then always set it to 64 for host code. // depending on the target device, and then always set it to 64 for host code.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment