Commit 12b5bcb1 authored by wangmin6's avatar wangmin6
Browse files

Merge branch 'v0.15.1-dev_lightop_moe_sum_mul_add' into 'v0.15.1-dev'

fix(moe): 仅在 fused moe_sum+mul+add 开启时透传 shared_output

See merge request dcutoolkit/deeplearing/vllm!520
parents 84b9fe55 839dc88e
...@@ -2060,8 +2060,18 @@ class FusedMoE(CustomOp): ...@@ -2060,8 +2060,18 @@ class FusedMoE(CustomOp):
use_nn_moe=self.use_nn_moe, use_nn_moe=self.use_nn_moe,
i_q=i_q, i_q=i_q,
i_s=i_s, i_s=i_s,
shared_output=shared_output, shared_output=(
routed_scaling_factor=routed_scaling_factor, shared_output
if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
and shared_output is not None
else None
),
routed_scaling_factor=(
routed_scaling_factor
if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
and shared_output is not None
else 1.0
),
) )
else: else:
final_hidden_states = self.quant_method.apply( final_hidden_states = self.quant_method.apply(
...@@ -2070,8 +2080,18 @@ class FusedMoE(CustomOp): ...@@ -2070,8 +2080,18 @@ class FusedMoE(CustomOp):
topk_weights=topk_weights, topk_weights=topk_weights,
topk_ids=topk_ids, topk_ids=topk_ids,
use_nn_moe=self.use_nn_moe, use_nn_moe=self.use_nn_moe,
shared_output=shared_output, shared_output=(
routed_scaling_factor=routed_scaling_factor, shared_output
if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
and shared_output is not None
else None
),
routed_scaling_factor=(
routed_scaling_factor
if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
and shared_output is not None
else 1.0
),
) )
if has_separate_shared_experts: if has_separate_shared_experts:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment