Unverified Commit 4f510bc2 authored by yiz-liu's avatar yiz-liu Committed by GitHub
Browse files

[Model] Removes redundant all-reduce operation in Qwen3MoeSparseMoeBlock (#23169)


Signed-off-by: default avatarYizhou Liu <liu_yizhou@outlook.com>
parent 1298c677
...@@ -139,7 +139,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module): ...@@ -139,7 +139,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
top_k=config.num_experts_per_tok, top_k=config.num_experts_per_tok,
hidden_size=config.hidden_size, hidden_size=config.hidden_size,
intermediate_size=config.moe_intermediate_size, intermediate_size=config.moe_intermediate_size,
reduce_results=False, reduce_results=True,
renormalize=config.norm_topk_prob, renormalize=config.norm_topk_prob,
quant_config=quant_config, quant_config=quant_config,
prefix=f"{prefix}.experts", prefix=f"{prefix}.experts",
...@@ -163,10 +163,6 @@ class Qwen3MoeSparseMoeBlock(nn.Module): ...@@ -163,10 +163,6 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
final_hidden_states = self.experts(hidden_states=hidden_states, final_hidden_states = self.experts(hidden_states=hidden_states,
router_logits=router_logits) router_logits=router_logits)
if self.tp_size > 1:
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel( # noqa E501
final_hidden_states)
return final_hidden_states.view(orig_shape) return final_hidden_states.view(orig_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment