Commit 32fec198 authored by xiabo's avatar xiabo
Browse files

Include is missing an increase

parent 59b0fb60
......@@ -56,7 +56,7 @@ template <>
__device__ __forceinline__ phalf warpReduceSum(phalf val) {
for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
#ifdef HIP_DIFF
__PHALF(val) += __shfl_down(FULL_MASK, val, offset);
__PHALF(val) += __shfl_down(val, offset);
#else
__PHALF(val) +=
__shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset);
......
......@@ -79,7 +79,7 @@ __global__ void correlation_forward_cuda_kernel(
// accumulate
for (int offset = 16; offset > 0; offset /= 2)
#ifdef HIP_DIFF
prod_sum += __shfl_down(FULL_MASK, prod_sum, offset);
prod_sum += __shfl_down(float(prod_sum), offset);
#else
prod_sum += __shfl_down_sync(FULL_MASK, float(prod_sum), offset);
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment