Unverified Commit 29b84c16 authored by Oleg Goncharov's avatar Oleg Goncharov Committed by GitHub
Browse files

[Common] Fix NVFP4 tuned-kernel numerics (#2639)



* Fixed scaling-factor computation for FP32 to match the reference implementation.
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>

* Uncommented the tuned kernel path
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



---------
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>
Co-authored-by: default avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
parent 94ba75d7
......@@ -1168,10 +1168,10 @@ void quantize_transpose(const Tensor &input, const Tensor *noop, Tensor *output,
// TODO(Frank): Is there a better way to do this?
bool return_transpose = output->has_columnwise_data();
// if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
// quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
// return;
// }
if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
return;
}
constexpr bool COMPUTE_ACTIVATIONS = false;
using ParamOP = Empty;
......
......@@ -163,9 +163,24 @@ __device__ __forceinline__ float get_amax_of_pair(const IType2 pair) {
template <typename SF_TYPE>
__device__ __forceinline__ SF_TYPE
compute_nvfp4_scaling_coefficient(const nvfp4_scale_t S_dec_block, const float S_enc) {
constexpr float float_max = detail::TypeExtrema<SF_TYPE>::max;
const float scale_rcp = fminf(S_enc / static_cast<float>(S_dec_block), float_max);
return static_cast<SF_TYPE>(scale_rcp);
NVTE_DEVICE_ERROR("Unsupported scaling-factor type. Only FP32 and BF16 are supported.");
}
template <>
__device__ __forceinline__ float compute_nvfp4_scaling_coefficient<float>(
const nvfp4_scale_t S_dec_block, const float S_enc) {
const float S_dec = 1.0f / S_enc;
const float scale_rcp =
fminf(1.0f / (static_cast<float>(S_dec_block) * S_dec), detail::TypeExtrema<float>::max);
return scale_rcp;
}
template <>
__device__ __forceinline__ bf16
compute_nvfp4_scaling_coefficient<bf16>(const nvfp4_scale_t S_dec_block, const float S_enc) {
const float scale_rcp =
fminf(S_enc / (static_cast<float>(S_dec_block)), detail::TypeExtrema<bf16>::max);
return static_cast<bf16>(scale_rcp);
}
template <bool USE_STOCHASTIC_ROUNDING, bool USE_FAST_MATH>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment