Unverified Commit 433c0f86 authored by zhaozx-cn's avatar zhaozx-cn Committed by GitHub
Browse files

[Model] Fix bailing_moe accuracy problem (#28277)


Signed-off-by: default avatarzhaozx-cn <zhaozx2116@163.com>
parent 8d3748d3
......@@ -39,7 +39,6 @@ from vllm.distributed import (
get_pp_group,
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
)
from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.fused_moe import SharedFusedMoE
......@@ -330,7 +329,9 @@ class BailingMoE(nn.Module):
final_hidden_states = final_hidden_states + shared_output
if self.tp_size > 1:
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
final_hidden_states
)
return final_hidden_states.view(num_tokens, hidden_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment