[Bugfix][Misc] Use TritonPlaceholderModule to defensively import triton (#15099)

Signed-off-by: Mengqing Cao <cmq0113@163.com>

[Bugfix][Misc] Use TritonPlaceholderModule to defensively import triton (#15099)
Signed-off-by: Mengqing Cao <cmq0113@163.com>
2f540455 · Mengqing Cao · GitHub · 5aa6efb9 · 2f540455 · 2f540455
Unverified Commit 2f540455 authored Apr 25, 2025 by Mengqing Cao Committed by GitHub Apr 24, 2025
5 changed files
--- a/benchmarks/kernels/benchmark_lora.py
+++ b/benchmarks/kernels/benchmark_lora.py
@@ -17,8 +17,14 @@ from torch.utils.benchmark import Measurement as TMeasurement
 from utils import ArgPool, Bench, CudaGraphBenchParams
 from weight_shapes import WEIGHT_SHAPES
-from vllm.lora.ops.triton_ops import LoRAKernelMeta, lora_expand, lora_shrink
+from vllm.triton_utils import HAS_TRITON
-from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
+if HAS_TRITON:
+    from vllm.lora.ops.triton_ops import (LoRAKernelMeta, lora_expand,
+                                          lora_shrink)
+    from vllm.lora.ops.triton_ops.utils import (_LORA_A_PTR_DICT,
+                                                _LORA_B_PTR_DICT)
 from vllm.utils import FlexibleArgumentParser
 DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())

--- a/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
+++ b/vllm/model_executor/layers/mamba/ops/mamba_ssm.py
@@ -10,8 +10,10 @@ from packaging import version
 from vllm import _custom_ops as ops
 from vllm.attention.backends.utils import PAD_SLOT_ID
+from vllm.triton_utils import HAS_TRITON
-TRITON3 = version.parse(triton.__version__) >= version.parse("3.0.0")
+TRITON3 = HAS_TRITON and (version.parse(triton.__version__)
+                          >= version.parse("3.0.0"))
 if TRITON3:

--- a/vllm/triton_utils/__init__.py
+++ b/vllm/triton_utils/__init__.py
@@ -2,4 +2,4 @@
 from vllm.triton_utils.importing import HAS_TRITON
 __all__ = ["HAS_TRITON"]
\ No newline at end of file
--- a/vllm/triton_utils/importing.py
+++ b/vllm/triton_utils/importing.py
 # SPDX-License-Identifier: Apache-2.0
+import sys
+import types
 from importlib.util import find_spec
 from vllm.logger import init_logger
-from vllm.platforms import current_platform
 logger = init_logger(__name__)
 HAS_TRITON = (
    find_spec("triton") is not None
-    and not current_platform.is_xpu()  # Not compatible
+    or find_spec("pytorch-triton-xpu") is not None  # Not compatible
 )
 if not HAS_TRITON:
    logger.info("Triton not installed or not compatible; certain GPU-related"
                " functions will not be available.")
+    class TritonPlaceholder(types.ModuleType):
+        def __init__(self):
+            super().__init__("triton")
+            self.jit = self._dummy_decorator("jit")
+            self.autotune = self._dummy_decorator("autotune")
+            self.heuristics = self._dummy_decorator("heuristics")
+            self.language = TritonLanguagePlaceholder()
+            logger.warning_once(
+                "Triton is not installed. Using dummy decorators. "
+                "Install it via `pip install triton` to enable kernel"
+                "compilation.")
+        def _dummy_decorator(self, name):
+            def decorator(func=None, **kwargs):
+                if func is None:
+                    return lambda f: f
+                return func
+            return decorator
+    class TritonLanguagePlaceholder(types.ModuleType):
+        def __init__(self):
+            super().__init__("triton.language")
+            self.constexpr = None
+            self.dtype = None
+    sys.modules['triton'] = TritonPlaceholder()
+    sys.modules['triton.language'] = TritonLanguagePlaceholder()
+if 'triton' in sys.modules:
+    logger.info("Triton module has been replaced with a placeholder.")
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -63,6 +63,9 @@ from torch.library import Library
 from typing_extensions import Never, ParamSpec, TypeIs, assert_never
 import vllm.envs as envs
+# NOTE: import triton_utils to make TritonPlaceholderModule work
+#       if triton is unavailable
+import vllm.triton_utils  # noqa: F401
 from vllm.logger import enable_trace_function_call, init_logger
 if TYPE_CHECKING: