"git@developer.sourcefind.cn:OpenDAS/deepspeed.git" did not exist on "c814abdadd31d7a92ba2b77c48b2f042318a5a7f"
Unverified Commit 08d18a47 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Build] Fix bf16/fp16 building issues for CUDA 12.2 (#6074)


Signed-off-by: default avatarXin Yao <xiny@nvidia.com>
parent de344fa4
...@@ -46,6 +46,8 @@ min(__nv_bfloat16 a, __nv_bfloat16 b) { ...@@ -46,6 +46,8 @@ min(__nv_bfloat16 a, __nv_bfloat16 b) {
// Arithmetic BF16 operations for architecture >= 8.0 are already defined in // Arithmetic BF16 operations for architecture >= 8.0 are already defined in
// cuda_bf16.h // cuda_bf16.h
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800) #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)
// CUDA 12.2 adds "emulated" support for older architectures.
#if defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
__device__ __forceinline__ __nv_bfloat16 __device__ __forceinline__ __nv_bfloat16
operator+(const __nv_bfloat16& lh, const __nv_bfloat16& rh) { operator+(const __nv_bfloat16& lh, const __nv_bfloat16& rh) {
return __nv_bfloat16(float(lh) + float(rh)); // NOLINT return __nv_bfloat16(float(lh) + float(rh)); // NOLINT
...@@ -138,6 +140,7 @@ __device__ __forceinline__ bool operator<=( ...@@ -138,6 +140,7 @@ __device__ __forceinline__ bool operator<=(
const __nv_bfloat16& lh, const __nv_bfloat16& rh) { const __nv_bfloat16& lh, const __nv_bfloat16& rh) {
return float(lh) <= float(rh); // NOLINT return float(lh) <= float(rh); // NOLINT
} }
#endif // defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
#endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800) #endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)
#endif // __CUDACC__ #endif // __CUDACC__
......
...@@ -45,6 +45,8 @@ static __device__ __forceinline__ half min(half a, half b) { ...@@ -45,6 +45,8 @@ static __device__ __forceinline__ half min(half a, half b) {
// Arithmetic FP16 operations for architecture >= 5.3 are already defined in // Arithmetic FP16 operations for architecture >= 5.3 are already defined in
// cuda_fp16.h // cuda_fp16.h
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530) #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
// CUDA 12.2 adds "emulated" support for older architectures.
#if defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
__device__ __forceinline__ __half __device__ __forceinline__ __half
operator+(const __half& lh, const __half& rh) { operator+(const __half& lh, const __half& rh) {
return __half(float(lh) + float(rh)); // NOLINT return __half(float(lh) + float(rh)); // NOLINT
...@@ -125,7 +127,8 @@ __device__ __forceinline__ bool operator>=(const __half& lh, const __half& rh) { ...@@ -125,7 +127,8 @@ __device__ __forceinline__ bool operator>=(const __half& lh, const __half& rh) {
__device__ __forceinline__ bool operator<=(const __half& lh, const __half& rh) { __device__ __forceinline__ bool operator<=(const __half& lh, const __half& rh) {
return float(lh) <= float(rh); // NOLINT return float(lh) <= float(rh); // NOLINT
} }
#endif // __CUDA_ARCH__ < 530 #endif // defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
#endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
#endif // __CUDACC__ #endif // __CUDACC__
#endif // DGL_ARRAY_CUDA_FP16_CUH_ #endif // DGL_ARRAY_CUDA_FP16_CUH_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment