Commit 46bb1d6d authored by wangmin6's avatar wangmin6
Browse files

Merge branch 'v0.15.1-dev_fix_moe_bug' into 'v0.15.1-dev'

fix: 修复MOE量化tensor对于其他模型的影响

See merge request dcutoolkit/deeplearing/vllm!500
parents 8001970c 8e726b3f
...@@ -1721,11 +1721,16 @@ class FusedMoE(CustomOp): ...@@ -1721,11 +1721,16 @@ class FusedMoE(CustomOp):
hidden_states, router_logits hidden_states, router_logits
) )
else: else:
shared_output, fused_output = torch.ops.vllm.moe_forward_shared( if envs.USE_FUSED_RMS_QUANT:
hidden_states, router_logits, encode_layer_name(), shared_output, fused_output = torch.ops.vllm.moe_forward_shared(
i_q=i_q, hidden_states, router_logits, encode_layer_name(),
i_s=i_s i_q=i_q,
) i_s=i_s
)
else:
shared_output, fused_output = torch.ops.vllm.moe_forward_shared(
hidden_states, router_logits, encode_layer_name()
)
return ( return (
reduce_output(shared_output)[..., :og_hidden_states], reduce_output(shared_output)[..., :og_hidden_states],
reduce_output(fused_output)[..., :og_hidden_states], reduce_output(fused_output)[..., :og_hidden_states],
...@@ -1976,7 +1981,10 @@ class FusedMoE(CustomOp): ...@@ -1976,7 +1981,10 @@ class FusedMoE(CustomOp):
# because matrix multiply maybe modify the hidden_states. # because matrix multiply maybe modify the hidden_states.
if has_separate_shared_experts and not use_shared_experts_stream: if has_separate_shared_experts and not use_shared_experts_stream:
assert self.shared_experts is not None assert self.shared_experts is not None
shared_output = self.shared_experts(hidden_states, iqis=(i_q, i_s)) if envs.USE_FUSED_RMS_QUANT:
shared_output = self.shared_experts(hidden_states, iqis=(i_q, i_s))
else:
shared_output = self.shared_experts(hidden_states)
# NOTE: Similar with DP, PCP also needs dispatch and combine. For # NOTE: Similar with DP, PCP also needs dispatch and combine. For
# simplicity, AgRsAll2All was added separately for PCP here. Maybe # simplicity, AgRsAll2All was added separately for PCP here. Maybe
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment