Commit 06741cca authored by Jeff Daily's avatar Jeff Daily
Browse files

update for ROCm 7 BC-breaking change to warpSize

parent e461e868
/*!
* Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/
#pragma once
#if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__)
#if defined(__HIP_PLATFORM_AMD__)
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask.
// Since mask is full 0xffffffff, we can use __shfl_down instead.
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset)
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset)
// ROCm warpSize is constexpr and is either 32 or 64 depending on gfx arch.
#define WARPSIZE warpSize
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd
#define atomicAdd_block atomicAdd
// hipify
#include <hip/hip_runtime.h>
#define cudaDeviceProp hipDeviceProp_t
......@@ -41,7 +44,20 @@
#define cudaStreamDestroy hipStreamDestroy
#define cudaStream_t hipStream_t
#define cudaSuccess hipSuccess
#else // __HIP_PLATFORM_AMD__ || __HIP__
// warpSize is only allowed for device code.
// HIP header used to define warpSize as a constexpr that was either 32 or 64
// depending on the target device, and then always set it to 64 for host code.
static inline constexpr int WARP_SIZE_INTERNAL() {
#if defined(__GFX9__)
return 64;
#else // __GFX9__
return 32;
#endif // __GFX9__
}
#define WARPSIZE (WARP_SIZE_INTERNAL())
#else // __HIP_PLATFORM_AMD__
// CUDA warpSize is not a constexpr, but always 32
#define WARPSIZE 32
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment