Unverified Commit 12a3f645 authored by Netanel Haber's avatar Netanel Haber Committed by GitHub
Browse files

[Bugfix][MoE] Only unpad routed output before shared expert add or routed output transform (#40865)


Signed-off-by: default avatarNetanel Haber <58652339+netanel-haber@users.noreply.github.com>
Co-authored-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 60cd878a
......@@ -552,6 +552,8 @@ class MoERunner(MoERunnerInterface):
# Record before `_maybe_pad_hidden_states` pads activations to match
# `moe_config.hidden_dim`, e.g. after `align_trtllm_fp4_moe_hidden_dim_for_fi`
# so routed output can be trimmed before
# shared+routed add / latent up proj if needed.
routed_hidden_dim = hidden_states.shape[-1]
hidden_states, og_hidden_dim = self._maybe_pad_hidden_states(
shared_experts_input,
......@@ -577,7 +579,9 @@ class MoERunner(MoERunnerInterface):
# Extract outputs from result
shared_output, fused_output = _unpack(result)
if hidden_dim_was_padded:
if (
shared_output is not None or self.routed_output_transform is not None
) and hidden_dim_was_padded:
fused_output = fused_output[..., :routed_hidden_dim]
# If combine kernel already reduced fused, reduce shared to match.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment