[Bug][MoE] Fix TRTLLM NVFP4 Routing Kernel Precision (#36725)

Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>

[Bug][MoE] Fix TRTLLM NVFP4 Routing Kernel Precision (#36725)
Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
5bf3c42d · Robert Shaw · GitHub · 38364a7e · 5bf3c42d
Unverified Commit 5bf3c42d authored Mar 23, 2026 by Robert Shaw Committed by GitHub Mar 23, 2026
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 5 deletions

vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py ...del_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py +2 -5

No files found.
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
@@ -298,10 +298,7 @@ class TrtLlmNvFp4ExpertsMonolithic(
            and self.routing_method_type != RoutingMethodType.Llama4
        )

-        # Prepare routing bias into kernel format.
-        routing_bias = e_score_correction_bias
-        if routing_bias is not None:
-            routing_bias = routing_bias.to(torch.bfloat16)
+        # Prepare router logits for kernel format.
        router_logits = (
            router_logits.to(torch.float32)
            if self.routing_method_type == RoutingMethodType.DeepSeekV3
@@ -311,7 +308,7 @@ class TrtLlmNvFp4ExpertsMonolithic(
        # Invoke kernel.
        return flashinfer.fused_moe.trtllm_fp4_block_scale_moe(
            routing_logits=router_logits,
-            routing_bias=routing_bias,
+            routing_bias=e_score_correction_bias,
            hidden_states=hidden_states,
            hidden_states_scale=a1q_scale.view(torch.float8_e4m3fn).reshape(
                *hidden_states.shape[:-1], -1