followup fix for llama 4 trtllm flashinfer backend (#12314)

83a7c89c · b8zhong · GitHub · 0380ca82 · 83a7c89c
Unverified Commit 83a7c89c authored Oct 28, 2025 by b8zhong Committed by GitHub Oct 28, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/layers/quantization/modelopt_quant.py python/sglang/srt/layers/quantization/modelopt_quant.py +1 -1

No files found.
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -673,7 +673,7 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase):
                    routed_scaling_factor if routed_scaling_factor is not None else 1.0
                ),
                use_routing_scales_on_input=use_routing_scales_on_input,
-                tile_tokens_dim=None,
+                tile_tokens_dim=8,  # TODO(brayden): use the FI tile calculation
                routing_method_type=routing_method_type,
            )