Commit b924a846 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.11.0-dev-wm' into 'v0.11.0-dev'

[fix]解决deepseek模型cudagraph模式下精度异常问题

See merge request dcutoolkit/deeplearing/vllm!278
parents 09c2856a 14dc2b30
......@@ -181,8 +181,13 @@ class DeepseekV2MoE(nn.Module):
quant_config=None,
prefix=f"{prefix}.gate")
if config.topk_method == "noaux_tc":
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts, dtype=torch.float32))
if envs.VLLM_ENABLE_MOE_FUSED_GATE:
# avoid moe_fused_gate precision error
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts))
else:
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts, dtype=torch.float32))
else:
self.gate.e_score_correction_bias = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment