cuda_rocm_interop.h 991 Bytes
Newer Older
1
2
3
/*!
 * Copyright(C) 2023 Advanced Micro Devices, Inc. All rights reserved.
 */
4
5
6
7

#ifndef LIGHTGBM_INCLUDE_LIGHTGBM_CUDA_CUDA_ROCM_INTEROP_H_
#define LIGHTGBM_INCLUDE_LIGHTGBM_CUDA_CUDA_ROCM_INTEROP_H_

8
9
10
11
12
13
14
15
16
17
18
19
20
21
#ifdef USE_CUDA

#if defined(__HIP_PLATFORM_AMD__) || defined(__HIP__)
// ROCm doesn't have __shfl_down_sync, only __shfl_down without mask.
// Since mask is full 0xffffffff, we can use __shfl_down instead.
#define __shfl_down_sync(mask, val, offset) __shfl_down(val, offset)
#define __shfl_up_sync(mask, val, offset) __shfl_up(val, offset)
// ROCm warpSize is constexpr and is either 32 or 64 depending on gfx arch.
#define WARPSIZE warpSize
// ROCm doesn't have atomicAdd_block, but it should be semantically the same as atomicAdd
#define atomicAdd_block atomicAdd
#else
// CUDA warpSize is not a constexpr, but always 32
#define WARPSIZE 32
22
23
24
#endif  // defined(__HIP_PLATFORM_AMD__) || defined(__HIP__)

#endif  // USE_CUDA
25

26
#endif  // LIGHTGBM_INCLUDE_LIGHTGBM_CUDA_CUDA_ROCM_INTEROP_H_