Commit 758826fc authored by Thor Johnsen's avatar Thor Johnsen
Browse files

Resolve possible race condition in stride_finite_check kernel

parent 0bfb8300
......@@ -793,7 +793,8 @@ void fused_strided_check_finite(
//Determine #threads and #blocks
const int threadsPerBlock = 512;
const dim3 blocks((niter+threadsPerBlock-1)/threadsPerBlock);
//In order to avoid race condition, blocks must be 1 when clear_overflow_first flag is set.
const dim3 blocks(clear_overflow_first ? 1 : (niter+threadsPerBlock-1)/threadsPerBlock);
AT_ASSERTM(at::cuda::detail::canUse32BitIndexMath(p_copy), "parameter tensor is too large to be indexed with int32");
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment