Commit 06741cca authored by Jeff Daily's avatar Jeff Daily
Browse files

update for ROCm 7 BC-breaking change to warpSize

parent e461e868
/*! /*!
* Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved. * Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
*/ */
#pragma once
#if defined(USE_CUDA) || defined(USE_ROCM) #if defined(USE_CUDA) || defined(USE_ROCM)
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__) #if defined(__HIP_PLATFORM_AMD__)
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask. // ROCm doesn't have __shfl_down_sync, only __shfl_down without mask.
// Since mask is full 0xffffffff, we can use __shfl_down instead. // Since mask is full 0xffffffff, we can use __shfl_down instead.
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset) #define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset)
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset) #define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset)
// ROCm warpSize is constexpr and is either 32 or 64 depending on gfx arch.
#define WARPSIZE warpSize
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd // ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd
#define atomicAdd_block atomicAdd #define atomicAdd_block atomicAdd
// hipify // hipify
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#define cudaDeviceProp hipDeviceProp_t #define cudaDeviceProp hipDeviceProp_t
...@@ -41,7 +44,20 @@ ...@@ -41,7 +44,20 @@
#define cudaStreamDestroy hipStreamDestroy #define cudaStreamDestroy hipStreamDestroy
#define cudaStream_t hipStream_t #define cudaStream_t hipStream_t
#define cudaSuccess hipSuccess #define cudaSuccess hipSuccess
#else // __HIP_PLATFORM_AMD__ || __HIP__
// warpSize is only allowed for device code.
// HIP header used to define warpSize as a constexpr that was either 32 or 64
// depending on the target device, and then always set it to 64 for host code.
static inline constexpr int WARP_SIZE_INTERNAL() {
#if defined(__GFX9__)
return 64;
#else // __GFX9__
return 32;
#endif // __GFX9__
}
#define WARPSIZE (WARP_SIZE_INTERNAL())
#else // __HIP_PLATFORM_AMD__
// CUDA warpSize is not a constexpr, but always 32 // CUDA warpSize is not a constexpr, but always 32
#define WARPSIZE 32 #define WARPSIZE 32
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment