"tests/spec_decode/e2e/untest_integration.py" did not exist on "1a9a61d76c00b52b9925818cb4e06e5cfc32805e"
Unverified Commit 771913e4 authored by Vadim Gimpelson's avatar Vadim Gimpelson Committed by GitHub
Browse files

[Bugfix] Fix NVFP4+MTP crash: force unquantized mtp.fc for Qwen3.5 (#38832)


Signed-off-by: default avatarVadim Gimpelson <vadim.gimpelson@gmail.com>
parent 71a9125c
......@@ -75,13 +75,22 @@ class Qwen3_5MultiTokenPredictor(nn.Module):
config.hidden_size,
)
# Workaround: mtp.fc is stored as BF16 in NVFP4 checkpoints but is
# missing from hf_quant_config.json exclude_modules. Force unquantized.
# Ref: https://github.com/vllm-project/vllm/pull/38650
# Ref: https://github.com/NVIDIA/Model-Optimizer/pull/1124
fc_quant = (
None
if (quant_config and quant_config.get_name() == "modelopt_fp4")
else quant_config
)
self.fc = ColumnParallelLinear(
self.config.hidden_size * 2,
self.config.hidden_size,
gather_output=True,
bias=False,
return_bias=False,
quant_config=quant_config,
quant_config=fc_quant,
prefix=f"{prefix}.fc",
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment