[Bugfix] Move flashinfer kernel check into ```__init__``` function of ```FusedMoE``` (#29018)

Signed-off-by: Max Hu <hyoung2991@gmail.com>

[Bugfix] Move flashinfer kernel check into ```init``` function of ```FusedMoE``` (#29018)
Signed-off-by: Max Hu <hyoung2991@gmail.com>
cb0a7b4b · Max Hu · GitHub · 8f4f77a7 · cb0a7b4b
Unverified Commit cb0a7b4b authored Nov 19, 2025 by Max Hu Committed by GitHub Nov 19, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

vllm/model_executor/layers/fused_moe/layer.py vllm/model_executor/layers/fused_moe/layer.py +4 -1

No files found.
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -574,6 +574,9 @@ class FusedMoE(CustomOp):
            is_act_and_mul=is_act_and_mul,
            is_lora_enabled=vllm_config.lora_config is not None,
        )
+        self.moe_config_use_flashinfer_cutlass_kernels = (
+            self.moe_config.use_flashinfer_cutlass_kernels
+        )

        self.quant_config = quant_config

@@ -728,7 +731,7 @@ class FusedMoE(CustomOp):
        return (
            self.moe_quant_config is not None
            and self.moe_quant_config.quant_dtype == "nvfp4"
-            and self.moe_config.use_flashinfer_cutlass_kernels
+            and self.moe_config_use_flashinfer_cutlass_kernels
        )

    @property