[Bugfix] Fix passing of activation_type to trtllm fused MoE NVFP4 and FP8 (#36017)

Signed-off-by: amitz-nv <203509407+amitz-nv@users.noreply.github.com>

[Bugfix] Fix passing of activation_type to trtllm fused MoE NVFP4 and FP8 (#36017)
Signed-off-by: amitz-nv <203509407+amitz-nv@users.noreply.github.com>
d7adcadb · amitz-nv · GitHub · f678c3f6 · d7adcadb · d7adcadb
Unverified Commit d7adcadb authored Mar 05, 2026 by amitz-nv Committed by GitHub Mar 04, 2026
2 changed files
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
@@ -240,12 +240,11 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic):
    ) -> torch.Tensor:
        # Delay import for non-CUDA.
        import flashinfer
-        from flashinfer.fused_moe.core import ActivationType
        # Confirm supported activation function.
        assert activation in [MoEActivation.SILU, MoEActivation.RELU2_NO_MUL]
-        activation_type = ActivationType(activation_to_flashinfer_int(activation))
+        activation_type = activation_to_flashinfer_int(activation)
        # Confirm Llama-4 routing is proper.
        if self.routing_method_type == RoutingMethodType.Llama4:

--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_nvfp4_moe.py
@@ -323,4 +323,5 @@ class TrtLlmNvFp4ExpertsMonolithic(
            routed_scaling_factor=routed_scaling_factor,
            routing_method_type=self.routing_method_type,
            do_finalize=True,
+            activation_type=activation_to_flashinfer_int(activation),
        )[0]