[Model] Fix bailing_moe accuracy problem (#28277)

Signed-off-by: zhaozx-cn <zhaozx2116@163.com>

[Model] Fix bailing_moe accuracy problem (#28277)
Signed-off-by: zhaozx-cn <zhaozx2116@163.com>
433c0f86 · zhaozx-cn · GitHub · 8d3748d3 · 433c0f86
Unverified Commit 433c0f86 authored Nov 14, 2025 by zhaozx-cn Committed by GitHub Nov 14, 2025
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

vllm/model_executor/models/bailing_moe.py vllm/model_executor/models/bailing_moe.py +3 -2

No files found.
--- a/vllm/model_executor/models/bailing_moe.py
+++ b/vllm/model_executor/models/bailing_moe.py
@@ -39,7 +39,6 @@ from vllm.distributed import (
    get_pp_group,
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_reduce,
 )
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe import SharedFusedMoE
@@ -330,7 +329,9 @@ class BailingMoE(nn.Module):
            final_hidden_states = final_hidden_states + shared_output

        if self.tp_size > 1:
-            final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
+            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
+                final_hidden_states
+            )
        return final_hidden_states.view(num_tokens, hidden_size)