[ci] fix llama4 ci error (#5126)

db452760 · Xiaoyu Zhang · GitHub · 57f99608 · db452760
Unverified Commit db452760 authored Apr 07, 2025 by Xiaoyu Zhang Committed by GitHub Apr 07, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py ...quantization/compressed_tensors/compressed_tensors_moe.py +2 -0

No files found.
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -285,6 +285,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
        activation: str = "silu",
        inplace: bool = True,
        no_combine: bool = False,
+        apply_router_weight_on_input: bool = False,
    ) -> torch.Tensor:
        from sglang.srt.layers.moe.fused_moe_triton import fused_experts
        from sglang.srt.layers.moe.topk import select_experts
@@ -314,6 +315,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
            w2_scale=layer.w2_weight_scale,
            a1_scale=layer.w13_input_scale,
            a2_scale=layer.w2_input_scale,
+            apply_router_weight_on_input=apply_router_weight_on_input,
        )