[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)

Signed-off-by: mgoin <mgoin64@gmail.com>

[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 (#23678)
Signed-off-by: mgoin <mgoin64@gmail.com>
de02b07d · Michael Goin · GitHub · eb199516 · de02b07d
Unverified Commit de02b07d authored Aug 26, 2025 by Michael Goin Committed by GitHub Aug 27, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/layers/quantization/mxfp4.py vllm/model_executor/layers/quantization/mxfp4.py +2 -2

No files found.
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -10,8 +10,6 @@ from vllm.config import get_current_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig,
                                                  FusedMoEMethodBase)
-from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
-    triton_kernel_moe_forward)
 from vllm.model_executor.layers.linear import (LinearBase,
                                               UnquantizedLinearMethod)
 from vllm.model_executor.layers.quantization import QuantizationMethods
@@ -557,6 +555,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
            )[0]
            return trtllm_gen_output
        else:
+            from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (  # noqa: E501
+                triton_kernel_moe_forward)
            return triton_kernel_moe_forward(
                hidden_states=x,
                w1=self.w13_weight_triton_tensor,