Commit b31c7251 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix run error

parent bdd33b3f
...@@ -912,6 +912,7 @@ class ModelConfig: ...@@ -912,6 +912,7 @@ class ModelConfig:
# imports during override detection (e.g., MXFP4 imports Triton) # imports during override detection (e.g., MXFP4 imports Triton)
"mxfp4", "mxfp4",
"cpu_awq", "cpu_awq",
"slimquant_marlin",
"slimquant_w4a8_marlin", "slimquant_w4a8_marlin",
"slimquant_compressed_tensors_marlin", "slimquant_compressed_tensors_marlin",
] ]
......
...@@ -371,7 +371,7 @@ class SpeculativeConfig: ...@@ -371,7 +371,7 @@ class SpeculativeConfig:
tokenizer_revision=self.target_model_config.tokenizer_revision, tokenizer_revision=self.target_model_config.tokenizer_revision,
spec_target_max_model_len=self.target_model_config.max_model_len, spec_target_max_model_len=self.target_model_config.max_model_len,
quantization=self.quantization, quantization=self.quantization,
enforce_eager=True if envs.VLLM_SPEC_DECODE_EAGER else self.target_model_config.enforce_eager, enforce_eager=self.target_model_config.enforce_eager,
max_logprobs=self.target_model_config.max_logprobs, max_logprobs=self.target_model_config.max_logprobs,
hf_overrides=SpeculativeConfig.hf_config_override, hf_overrides=SpeculativeConfig.hf_config_override,
config_format=self.target_model_config.config_format, config_format=self.target_model_config.config_format,
......
...@@ -263,6 +263,11 @@ class DeepseekV2MoE(nn.Module): ...@@ -263,6 +263,11 @@ class DeepseekV2MoE(nn.Module):
prefix=f"{prefix}.gate", prefix=f"{prefix}.gate",
) )
if getattr(config, "topk_method", None) == "noaux_tc": if getattr(config, "topk_method", None) == "noaux_tc":
if envs.VLLM_ENABLE_MOE_FUSED_GATE:
# avoid moe_fused_gate precision error
self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts))
else:
self.gate.e_score_correction_bias = nn.Parameter( self.gate.e_score_correction_bias = nn.Parameter(
torch.empty(config.n_routed_experts, dtype=torch.float32) torch.empty(config.n_routed_experts, dtype=torch.float32)
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment