Unverified Commit a8178684 authored by Kirthi Shankar Sivamani's avatar Kirthi Shankar Sivamani Committed by GitHub
Browse files

Avoid amax roll for non-run modules (#825)


Signed-off-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>
parent 0757149d
...@@ -197,16 +197,18 @@ kernel_bulk( ...@@ -197,16 +197,18 @@ kernel_bulk(
const auto last_amax = ((amax_reduction_buffer != nullptr) const auto last_amax = ((amax_reduction_buffer != nullptr)
&& (amax_reduction_buffer[offset_in_buffer+count] != 0.0f)) ? && (amax_reduction_buffer[offset_in_buffer+count] != 0.0f)) ?
amax_reduction_buffer[offset_in_buffer+count] : amax_history[0]; amax_reduction_buffer[offset_in_buffer+count] : amax_history[0];
for (size_t off = 0; off < length; off += bsize) { if (last_amax != 0.0f) {
const size_t i = off + tid; for (size_t off = 0; off < length; off += bsize) {
float a = 0; const size_t i = off + tid;
if (i < length) { float a = 0;
a = (i < length - 1) ? amax_history[(i+1)*stride] : last_amax; if (i < length) {
amax = fmaxf(amax, a); a = (i < length - 1) ? amax_history[(i+1)*stride] : last_amax;
} amax = fmaxf(amax, a);
__syncthreads(); // Inplace roll }
if (i < length) { __syncthreads(); // Inplace roll
amax_history[i*stride] = (i > 0) ? a : 0; if (i < length) {
amax_history[i*stride] = (i > 0) ? a : 0;
}
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment