Commit 8437d295 authored by Michael Carilli's avatar Michael Carilli
Browse files

Fixing interaction of DDP with dynamic loss scaling

parent 74c06d87
...@@ -24,13 +24,9 @@ struct ScaleFunctor ...@@ -24,13 +24,9 @@ struct ScaleFunctor
TensorListMetadata<2>& tl, TensorListMetadata<2>& tl,
float scale) float scale)
{ {
__shared__ int noop_smem; // I'd like this kernel to propagate infs/nans.
// if(*noop_gmem == 1)
if(threadIdx.x == 0) // return;
noop_smem = *noop_gmem;
__syncthreads();
if(noop_smem == 1)
return;
int tensor_loc = tl.block_to_tensor[blockIdx.x]; int tensor_loc = tl.block_to_tensor[blockIdx.x];
int chunk_idx = tl.block_to_chunk[blockIdx.x]; int chunk_idx = tl.block_to_chunk[blockIdx.x];
...@@ -44,7 +40,7 @@ struct ScaleFunctor ...@@ -44,7 +40,7 @@ struct ScaleFunctor
n -= chunk_idx*chunk_size; n -= chunk_idx*chunk_size;
// Non-divergent exit condition for the __syncthreads // Non-divergent exit condition for __syncthreads, not necessary here
float incoming_vals[ILP]; float incoming_vals[ILP];
for(int i_start = 0; for(int i_start = 0;
i_start < n && i_start < chunk_size; i_start < n && i_start < chunk_size;
...@@ -72,17 +68,11 @@ struct ScaleFunctor ...@@ -72,17 +68,11 @@ struct ScaleFunctor
if(isfinite(incoming_vals[ii])) if(isfinite(incoming_vals[ii]))
out[i] = static_cast<out_t>(incoming_vals[ii]*scale); out[i] = static_cast<out_t>(incoming_vals[ii]*scale);
else else
{
out[i] = static_cast<out_t>(incoming_vals[ii]*scale);
*noop_gmem = 1; // Blindly fire off a write. These will race but that's ok. *noop_gmem = 1; // Blindly fire off a write. These will race but that's ok.
}
} }
// *noop_gmem = 1 is NOT guaranteed to be seen immediately by thread 0. I wonder if
// we can rig block-wide and grid-wide short-circuiting with only one syncthreads.
// It's possible we can just lean on the cache (no smem or syncs) and still be fast.
if(threadIdx.x == 0)
noop_smem = *noop_gmem;
__syncthreads();
if(noop_smem == 1)
break;
} }
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment