[Fix] Using PyTorch WARP_SHFL_DOWN macro for half support (#2843)

c8a9ae75 · Zachary Streeter · GitHub · 6e9ee267 · c8a9ae75
Unverified Commit c8a9ae75 authored Sep 03, 2023 by Zachary Streeter Committed by GitHub Sep 03, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh +4 -1

No files found.
--- a/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
@@ -2,6 +2,8 @@
 #ifndef CARAFE_CUDA_KERNEL_CUH
 #define CARAFE_CUDA_KERNEL_CUH
+#include <ATen/cuda/DeviceUtils.cuh>
 #ifdef MMCV_USE_PARROTS
 #include "parrots_cuda_helper.hpp"
 #else
@@ -56,7 +58,8 @@ template <>
 __device__ __forceinline__ phalf warpReduceSum(phalf val) {
  for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
 #ifdef MMCV_WITH_HIP
-    __PHALF(val) += __shfl_down(val, offset);
+    // Using PyTorch's macro for half support
+    __PHALF(val) += WARP_SHFL_DOWN(val, offset);
 #else
    __PHALF(val) +=
        __shfl_down_sync(FULL_MASK, __PHALF(val).operator __half(), offset);