修复ds3 int8调用的功能

cd87548a · gaoqiong · 8f73ab36 · cd87548a
Commit cd87548a authored Apr 22, 2025 by gaoqiong
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 3 deletions

vllm/model_executor/layers/quantization/blockwise_int8.py vllm/model_executor/layers/quantization/blockwise_int8.py +10 -3

No files found.
--- a/vllm/model_executor/layers/quantization/blockwise_int8.py
+++ b/vllm/model_executor/layers/quantization/blockwise_int8.py
@@ -373,18 +373,21 @@ class BlockInt8MoEMethod:
        router_logits: torch.Tensor,
        top_k: int,
        renormalize: bool,
-        use_grouped_topk: bool,
+        use_grouped_topk: bool = False,
        topk_group: Optional[int] = None,
-        use_nn_moe: Optional[bool] = False,
        num_expert_group: Optional[int] = None,
+        global_num_experts: int = -1,
+        expert_map: Optional[torch.Tensor] = None,
        custom_routing_function: Optional[Callable] = None,
        scoring_func: str = "softmax",
        e_score_correction_bias: Optional[torch.Tensor] = None,
+        apply_router_weight_on_input: bool = False,
+        activation: str = "silu",
+        use_nn_moe: Optional[bool] = False,
    ) -> torch.Tensor:
        from vllm.model_executor.layers.fused_moe import fused_experts
-        #print("===========fused_experts========================")
        # Expert selection
        topk_weights, topk_ids = FusedMoE.select_experts(
            hidden_states=x,
@@ -409,6 +412,10 @@ class BlockInt8MoEMethod:
            topk_ids=topk_ids,
            inplace=True,
            use_int8_w8a8=True,
+            activation=activation,
+            expert_map=expert_map,
+            apply_router_weight_on_input=apply_router_weight_on_input,
+            global_num_experts=global_num_experts,
            w1_scale=(layer.w13_weight_scale_inv),
            w2_scale=(layer.w2_weight_scale_inv),
            a1_scale=layer.w13_input_scale,