add use_int4_w4a8 of fused_experts

048f7316 · zhuwenwen · 11b94900 · 048f7316
Commit 048f7316 authored Oct 12, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +1 -0

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1642,6 +1642,7 @@ def fused_experts(
    quant_config: Optional[FusedMoEQuantConfig] = None,
    allow_deep_gemm: bool = False,
    allow_cutlass_block_scaled_grouped_gemm: bool = False,
+    use_int4_w4a8: bool = False,
    use_nn_moe: Optional[bool] = False,
 ) -> torch.Tensor: