[Build] Fix bf16/fp16 building issues for CUDA 12.2 (#6074)

Signed-off-by: Xin Yao <xiny@nvidia.com>

[Build] Fix bf16/fp16 building issues for CUDA 12.2 (#6074)
Signed-off-by: Xin Yao <xiny@nvidia.com>
08d18a47 · Xin Yao · GitHub · de344fa4 · 08d18a47 · 08d18a47
Unverified Commit 08d18a47 authored Aug 14, 2023 by Xin Yao Committed by GitHub Aug 14, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 1 deletion

src/array/cuda/bf16.cuh src/array/cuda/bf16.cuh +3 -0

src/array/cuda/fp16.cuh src/array/cuda/fp16.cuh +4 -1

No files found.
--- a/src/array/cuda/bf16.cuh
+++ b/src/array/cuda/bf16.cuh
@@ -46,6 +46,8 @@ min(__nv_bfloat16 a, __nv_bfloat16 b) {
 // Arithmetic BF16 operations for architecture >= 8.0 are already defined in
 // cuda_bf16.h
 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)
+// CUDA 12.2 adds "emulated" support for older architectures.
+#if defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
 __device__ __forceinline__ __nv_bfloat16
 operator+(const __nv_bfloat16& lh, const __nv_bfloat16& rh) {
  return __nv_bfloat16(float(lh) + float(rh));  // NOLINT
@@ -138,6 +140,7 @@ __device__ __forceinline__ bool operator<=(
    const __nv_bfloat16& lh, const __nv_bfloat16& rh) {
  return float(lh) <= float(rh);  // NOLINT
 }
+#endif  // defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
 #endif  // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)
 #endif  // __CUDACC__

--- a/src/array/cuda/fp16.cuh
+++ b/src/array/cuda/fp16.cuh
@@ -45,6 +45,8 @@ static __device__ __forceinline__ half min(half a, half b) {
 // Arithmetic FP16 operations for architecture >= 5.3 are already defined in
 // cuda_fp16.h
 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
+// CUDA 12.2 adds "emulated" support for older architectures.
+#if defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
 __device__ __forceinline__ __half
 operator+(const __half& lh, const __half& rh) {
  return __half(float(lh) + float(rh));  // NOLINT
@@ -125,7 +127,8 @@ __device__ __forceinline__ bool operator>=(const __half& lh, const __half& rh) {
 __device__ __forceinline__ bool operator<=(const __half& lh, const __half& rh) {
  return float(lh) <= float(rh);  // NOLINT
 }
-#endif  // __CUDA_ARCH__ < 530
+#endif  // defined(CUDART_VERSION) && (CUDART_VERSION < 12020)
+#endif  // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
 #endif  // __CUDACC__
 #endif  // DGL_ARRAY_CUDA_FP16_CUH_