Unverified Commit 4f510bc2 authored by yiz-liu's avatar yiz-liu Committed by GitHub
Browse files

[Model] Removes redundant all-reduce operation in Qwen3MoeSparseMoeBlock (#23169)


Signed-off-by: default avatarYizhou Liu <liu_yizhou@outlook.com>
parent 1298c677
......@@ -139,7 +139,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
top_k=config.num_experts_per_tok,
hidden_size=config.hidden_size,
intermediate_size=config.moe_intermediate_size,
reduce_results=False,
reduce_results=True,
renormalize=config.norm_topk_prob,
quant_config=quant_config,
prefix=f"{prefix}.experts",
......@@ -163,10 +163,6 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
final_hidden_states = self.experts(hidden_states=hidden_states,
router_logits=router_logits)
if self.tp_size > 1:
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel( # noqa E501
final_hidden_states)
return final_hidden_states.view(orig_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment