Commit beae085a authored by yangql's avatar yangql
Browse files

处理VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD导致的awq推理bug问题

parent 06185134
...@@ -2073,27 +2073,23 @@ class FusedMoE(CustomOp): ...@@ -2073,27 +2073,23 @@ class FusedMoE(CustomOp):
else 1.0 else 1.0
), ),
) )
else: elif envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD and shared_output is not None:
final_hidden_states = self.quant_method.apply( final_hidden_states = self.quant_method.apply(
layer=self, layer=self,
x=x, # The type signture of this is wrong due to the hack. x=x, # The type signture of this is wrong due to the hack.
topk_weights=topk_weights, topk_weights=topk_weights,
topk_ids=topk_ids, topk_ids=topk_ids,
use_nn_moe=self.use_nn_moe, use_nn_moe=self.use_nn_moe,
shared_output=( shared_output=shared_output,
shared_output routed_scaling_factor=routed_scaling_factor,
if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
and shared_output is not None
else None
),
routed_scaling_factor=(
routed_scaling_factor
if envs.VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD
and shared_output is not None
else 1.0
),
) )
else:
final_hidden_states = self.quant_method.apply(
layer=self,
x=x, # The type signture of this is wrong due to the hack.
topk_weights=topk_weights,
topk_ids=topk_ids,
use_nn_moe=self.use_nn_moe,)
if has_separate_shared_experts: if has_separate_shared_experts:
assert self.shared_experts is not None assert self.shared_experts is not None
......
...@@ -381,6 +381,7 @@ class MoeWNA16Method(FusedMoEMethodBase): ...@@ -381,6 +381,7 @@ class MoeWNA16Method(FusedMoEMethodBase):
topk_ids: torch.Tensor, topk_ids: torch.Tensor,
use_nn_moe: bool | None = False, use_nn_moe: bool | None = False,
use_fused_gate: bool | None = False, use_fused_gate: bool | None = False,
**_
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
from vllm.model_executor.layers.fused_moe import fused_experts from vllm.model_executor.layers.fused_moe import fused_experts
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment