[Build][Tests] Enable FP16 for GPU builds in CI (#4030)

* Enable FP16 for GPU builds in CI * Limit default GPU archs to pascal and above * Disable FP16 dispatching for cuda architectures less than 60 * Fix linting * Fix typos

[Build][Tests] Enable FP16 for GPU builds in CI (#4030)
* Enable FP16 for GPU builds in CI * Limit default GPU archs to pascal and above * Disable FP16 dispatching for cuda architectures less than 60 * Fix linting * Fix typos
7a065a9c · nv-dlasalle · GitHub · d1124b7b · 7a065a9c · 7a065a9c
Unverified Commit 7a065a9c authored May 26, 2022 by nv-dlasalle Committed by GitHub May 26, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 2 deletions

src/array/cuda/atomic.cuh src/array/cuda/atomic.cuh +4 -1

src/array/cuda/utils.h src/array/cuda/utils.h +17 -0

tests/scripts/build_dgl.sh tests/scripts/build_dgl.sh +1 -1

No files found.
--- a/src/array/cuda/atomic.cuh
+++ b/src/array/cuda/atomic.cuh
@@ -234,14 +234,17 @@ __device__ __forceinline__ double AtomicAdd<double>(double* addr, double val) {
 #ifdef USE_FP16
 #if defined(CUDART_VERSION) && CUDART_VERSION >= 10000
+// half make sure we have half support
+#if __CUDA_ARCH__ >= 600
 template <>
 __device__ __forceinline__ half AtomicAdd<half>(half* addr, half val) {
 #if __CUDA_ARCH__ >= 700
  return atomicAdd(addr, val);
 #else
  return *addr + val;
-#endif  // __CUDA_ARCH__
+#endif  // __CUDA_ARCH__ >= 700
 }
+#endif  // __CUDA_ARCH__ >= 600
 #endif  // defined(CUDART_VERSION) && CUDART_VERSION >= 10000
 #endif  // USE_FP16

--- a/src/array/cuda/utils.h
+++ b/src/array/cuda/utils.h
@@ -21,6 +21,7 @@ namespace cuda {
 #define CUDA_MAX_NUM_THREADS 1024
 #ifdef USE_FP16
+#if __CUDA_ARCH__ >= 600
 #define SWITCH_BITS(bits, DType, ...)                           \
  do {                                                          \
    if ((bits) == 16) {                                         \
@@ -36,6 +37,22 @@ namespace cuda {
      LOG(FATAL) << "Data type not recognized with bits " << bits; \
    }                                                           \
  } while (0)
+#else
+#define SWITCH_BITS(bits, DType, ...)                           \
+  do {                                                          \
+    if ((bits) == 16) {                                         \
+      LOG(FATAL) << "FP16 only supported on CUDA architectures >= 60"; \
+    } else if ((bits) == 32) {                                  \
+      typedef float DType;                                      \
+      { __VA_ARGS__ }                                           \
+    } else if ((bits) == 64) {                                  \
+      typedef double DType;                                     \
+      { __VA_ARGS__ }                                           \
+    } else {                                                    \
+      LOG(FATAL) << "Data type not recognized with bits " << bits; \
+    }                                                           \
+  } while (0)
+#endif  // __CUDA_ARCH__ >= 600
 #else  // USE_FP16
 #define SWITCH_BITS(bits, DType, ...)                           \
  do {                                                          \

--- a/tests/scripts/build_dgl.sh
+++ b/tests/scripts/build_dgl.sh
@@ -20,7 +20,7 @@ if [[ $arch == *"x86"* ]]; then
 fi
 if [ "$1" == "gpu" ]; then
-    CMAKE_VARS="-DUSE_CUDA=ON -DUSE_NCCL=ON $CMAKE_VARS"
+    CMAKE_VARS="-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_FP16=ON $CMAKE_VARS"
 fi
 if [ -d build ]; then