Commit 46bb1d6d authored by wangmin6's avatar wangmin6
Browse files

Merge branch 'v0.15.1-dev_fix_moe_bug' into 'v0.15.1-dev'

fix: 修复MOE量化tensor对于其他模型的影响

See merge request dcutoolkit/deeplearing/vllm!500
parents 8001970c 8e726b3f
......@@ -1721,11 +1721,16 @@ class FusedMoE(CustomOp):
hidden_states, router_logits
)
else:
shared_output, fused_output = torch.ops.vllm.moe_forward_shared(
hidden_states, router_logits, encode_layer_name(),
i_q=i_q,
i_s=i_s
)
if envs.USE_FUSED_RMS_QUANT:
shared_output, fused_output = torch.ops.vllm.moe_forward_shared(
hidden_states, router_logits, encode_layer_name(),
i_q=i_q,
i_s=i_s
)
else:
shared_output, fused_output = torch.ops.vllm.moe_forward_shared(
hidden_states, router_logits, encode_layer_name()
)
return (
reduce_output(shared_output)[..., :og_hidden_states],
reduce_output(fused_output)[..., :og_hidden_states],
......@@ -1976,7 +1981,10 @@ class FusedMoE(CustomOp):
# because matrix multiply maybe modify the hidden_states.
if has_separate_shared_experts and not use_shared_experts_stream:
assert self.shared_experts is not None
shared_output = self.shared_experts(hidden_states, iqis=(i_q, i_s))
if envs.USE_FUSED_RMS_QUANT:
shared_output = self.shared_experts(hidden_states, iqis=(i_q, i_s))
else:
shared_output = self.shared_experts(hidden_states)
# NOTE: Similar with DP, PCP also needs dispatch and combine. For
# simplicity, AgRsAll2All was added separately for PCP here. Maybe
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment