Commit b31c7251 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix run error

parent bdd33b3f
......@@ -912,6 +912,7 @@ class ModelConfig:
# imports during override detection (e.g., MXFP4 imports Triton)
"mxfp4",
"cpu_awq",
"slimquant_marlin",
"slimquant_w4a8_marlin",
"slimquant_compressed_tensors_marlin",
]
......
......@@ -371,7 +371,7 @@ class SpeculativeConfig:
tokenizer_revision=self.target_model_config.tokenizer_revision,
spec_target_max_model_len=self.target_model_config.max_model_len,
quantization=self.quantization,
enforce_eager=True if envs.VLLM_SPEC_DECODE_EAGER else self.target_model_config.enforce_eager,
enforce_eager=self.target_model_config.enforce_eager,
max_logprobs=self.target_model_config.max_logprobs,
hf_overrides=SpeculativeConfig.hf_config_override,
config_format=self.target_model_config.config_format,
......
......@@ -263,9 +263,14 @@ class DeepseekV2MoE(nn.Module):
prefix=f"{prefix}.gate",
)
if getattr(config, "topk_method", None) == "noaux_tc":
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts, dtype=torch.float32)
)
if envs.VLLM_ENABLE_MOE_FUSED_GATE:
# avoid moe_fused_gate precision error
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts))
else:
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts, dtype=torch.float32)
)
else:
self.gate.e_score_correction_bias = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment