Unverified Commit bc4c452c authored by Oleg Goncharov's avatar Oleg Goncharov Committed by GitHub
Browse files

[common] Removed tensor boundary checks in MXFP8 kernels (#1519)



Added constexpr checks of tensor boundaries
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>
parent fc1b91c2
...@@ -261,7 +261,13 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK) ...@@ -261,7 +261,13 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK)
} }
} }
in_compute[j] = elt; in_compute[j] = elt;
if (!out_of_bounds) {
if constexpr (IS_ACT || IS_DACT) {
if (!out_of_bounds) {
thread_amax = fmaxf(thread_amax, fabsf(elt));
}
} else {
// If no activation, elt is 0 so we can safely do this
thread_amax = fmaxf(thread_amax, fabsf(elt)); thread_amax = fmaxf(thread_amax, fabsf(elt));
} }
} }
...@@ -320,7 +326,12 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK) ...@@ -320,7 +326,12 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK)
} }
} }
in_compute[i] = elt; in_compute[i] = elt;
if (!out_of_bounds) { if constexpr (IS_ACT || IS_DACT) {
if (!out_of_bounds) {
amax = fmaxf(amax, fabsf(elt));
}
} else {
// If no activation, elt is 0 so we can safely do this
amax = fmaxf(amax, fabsf(elt)); amax = fmaxf(amax, fabsf(elt));
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment