Unverified Commit 433c0f86 authored by zhaozx-cn's avatar zhaozx-cn Committed by GitHub
Browse files

[Model] Fix bailing_moe accuracy problem (#28277)


Signed-off-by: default avatarzhaozx-cn <zhaozx2116@163.com>
parent 8d3748d3
...@@ -39,7 +39,6 @@ from vllm.distributed import ( ...@@ -39,7 +39,6 @@ from vllm.distributed import (
get_pp_group, get_pp_group,
get_tensor_model_parallel_rank, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
) )
from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.fused_moe import SharedFusedMoE from vllm.model_executor.layers.fused_moe import SharedFusedMoE
...@@ -330,7 +329,9 @@ class BailingMoE(nn.Module): ...@@ -330,7 +329,9 @@ class BailingMoE(nn.Module):
final_hidden_states = final_hidden_states + shared_output final_hidden_states = final_hidden_states + shared_output
if self.tp_size > 1: if self.tp_size > 1:
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states) final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
final_hidden_states
)
return final_hidden_states.view(num_tokens, hidden_size) return final_hidden_states.view(num_tokens, hidden_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment