Unverified Commit 0297773a authored by Gao016's avatar Gao016 Committed by GitHub
Browse files

a tiny fix for support deepseek bf16 weights (#12313)


Co-authored-by: default avatargaochang <gaochang@U-19PX2WQ1-0350.local>
parent 587deb15
......@@ -2998,7 +2998,7 @@ class DeepseekV2ForCausalLM(nn.Module):
disable_reason = "Only Deepseek V3/R1 on NV-platform with capability >= 80 can use shared experts fusion optimization."
elif get_moe_expert_parallel_world_size() > 1:
disable_reason = "Deepseek V3/R1 can not use shared experts fusion optimization under expert parallelism."
elif self.quant_config.get_name() == "w4afp8":
elif self.quant_config and self.quant_config.get_name() == "w4afp8":
disable_reason = "Deepseek V3/R1 W4AFP8 model uses different quant method for routed experts and shared experts."
if disable_reason is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment