Merge branch 'v0.15.1-dev_lightop_moe_sum_mul_add' into 'v0.15.1-dev'

fix(moe): 仅在 fused moe_sum+mul+add 开启时透传 shared_output See merge request dcutoolkit/deeplearing/vllm!520

Merge branch 'v0.15.1-dev_lightop_moe_sum_mul_add' into 'v0.15.1-dev'
fix(moe): 仅在 fused moe_sum+mul+add 开启时透传 shared_output See merge request dcutoolkit/deeplearing/vllm!520
12b5bcb1 · wangmin6 · 84b9fe55 · 839dc88e · 12b5bcb1
Commit 12b5bcb1 authored Mar 20, 2026 by wangmin6
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 4 deletions

vllm/model_executor/layers/fused_moe/layer.py vllm/model_executor/layers/fused_moe/layer.py +24 -4

No files found.
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -2060,8 +2060,18 @@ class FusedMoE(CustomOp):
                        use_nn_moe=self.use_nn_moe,
                        i_q=i_q,
                        i_s=i_s,
-                        shared_output=shared_output,
+                        shared_output=(
-                        routed_scaling_factor=routed_scaling_factor,
+                            shared_output
+                            if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
+                            and shared_output is not None
+                            else None
+                        ),
+                        routed_scaling_factor=(
+                            routed_scaling_factor
+                            if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
+                            and shared_output is not None
+                            else 1.0
+                        ),
                    )
                else:
                    final_hidden_states = self.quant_method.apply(
@@ -2070,8 +2080,18 @@ class FusedMoE(CustomOp):
                        topk_weights=topk_weights,
                        topk_ids=topk_ids,
                        use_nn_moe=self.use_nn_moe,
-                        shared_output=shared_output,
+                        shared_output=(
-                        routed_scaling_factor=routed_scaling_factor,
+                            shared_output
+                            if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
+                            and shared_output is not None
+                            else None
+                        ),
+                        routed_scaling_factor=(
+                            routed_scaling_factor
+                            if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
+                            and shared_output is not None
+                            else 1.0
+                        ),
                    )
            if has_separate_shared_experts: