Unverified Commit bc4c452c authored by Oleg Goncharov's avatar Oleg Goncharov Committed by GitHub
Browse files

[common] Removed tensor boundary checks in MXFP8 kernels (#1519)



Added constexpr checks of tensor boundaries
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>
parent fc1b91c2
......@@ -261,9 +261,15 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK)
}
}
in_compute[j] = elt;
if constexpr (IS_ACT || IS_DACT) {
if (!out_of_bounds) {
thread_amax = fmaxf(thread_amax, fabsf(elt));
}
} else {
// If no activation, elt is 0 so we can safely do this
thread_amax = fmaxf(thread_amax, fabsf(elt));
}
}
__builtin_assume(block_amax >= 0);
......@@ -320,9 +326,14 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK)
}
}
in_compute[i] = elt;
if constexpr (IS_ACT || IS_DACT) {
if (!out_of_bounds) {
amax = fmaxf(amax, fabsf(elt));
}
} else {
// If no activation, elt is 0 so we can safely do this
amax = fmaxf(amax, fabsf(elt));
}
}
__builtin_assume(block_amax >= 0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment