Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2f540455
Unverified
Commit
2f540455
authored
Apr 25, 2025
by
Mengqing Cao
Committed by
GitHub
Apr 24, 2025
Browse files
[Bugfix][Misc] Use TritonPlaceholderModule to defensively import triton (#15099)
Signed-off-by:
Mengqing Cao
<
cmq0113@163.com
>
parent
5aa6efb9
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
53 additions
and
6 deletions
+53
-6
benchmarks/kernels/benchmark_lora.py
benchmarks/kernels/benchmark_lora.py
+8
-2
vllm/model_executor/layers/mamba/ops/mamba_ssm.py
vllm/model_executor/layers/mamba/ops/mamba_ssm.py
+3
-1
vllm/triton_utils/__init__.py
vllm/triton_utils/__init__.py
+1
-1
vllm/triton_utils/importing.py
vllm/triton_utils/importing.py
+38
-2
vllm/utils.py
vllm/utils.py
+3
-0
No files found.
benchmarks/kernels/benchmark_lora.py
View file @
2f540455
...
@@ -17,8 +17,14 @@ from torch.utils.benchmark import Measurement as TMeasurement
...
@@ -17,8 +17,14 @@ from torch.utils.benchmark import Measurement as TMeasurement
from
utils
import
ArgPool
,
Bench
,
CudaGraphBenchParams
from
utils
import
ArgPool
,
Bench
,
CudaGraphBenchParams
from
weight_shapes
import
WEIGHT_SHAPES
from
weight_shapes
import
WEIGHT_SHAPES
from
vllm.lora.ops.triton_ops
import
LoRAKernelMeta
,
lora_expand
,
lora_shrink
from
vllm.triton_utils
import
HAS_TRITON
from
vllm.lora.ops.triton_ops.utils
import
_LORA_A_PTR_DICT
,
_LORA_B_PTR_DICT
if
HAS_TRITON
:
from
vllm.lora.ops.triton_ops
import
(
LoRAKernelMeta
,
lora_expand
,
lora_shrink
)
from
vllm.lora.ops.triton_ops.utils
import
(
_LORA_A_PTR_DICT
,
_LORA_B_PTR_DICT
)
from
vllm.utils
import
FlexibleArgumentParser
from
vllm.utils
import
FlexibleArgumentParser
DEFAULT_MODELS
=
list
(
WEIGHT_SHAPES
.
keys
())
DEFAULT_MODELS
=
list
(
WEIGHT_SHAPES
.
keys
())
...
...
vllm/model_executor/layers/mamba/ops/mamba_ssm.py
View file @
2f540455
...
@@ -10,8 +10,10 @@ from packaging import version
...
@@ -10,8 +10,10 @@ from packaging import version
from
vllm
import
_custom_ops
as
ops
from
vllm
import
_custom_ops
as
ops
from
vllm.attention.backends.utils
import
PAD_SLOT_ID
from
vllm.attention.backends.utils
import
PAD_SLOT_ID
from
vllm.triton_utils
import
HAS_TRITON
TRITON3
=
version
.
parse
(
triton
.
__version__
)
>=
version
.
parse
(
"3.0.0"
)
TRITON3
=
HAS_TRITON
and
(
version
.
parse
(
triton
.
__version__
)
>=
version
.
parse
(
"3.0.0"
))
if
TRITON3
:
if
TRITON3
:
...
...
vllm/triton_utils/__init__.py
View file @
2f540455
...
@@ -2,4 +2,4 @@
...
@@ -2,4 +2,4 @@
from
vllm.triton_utils.importing
import
HAS_TRITON
from
vllm.triton_utils.importing
import
HAS_TRITON
__all__
=
[
"HAS_TRITON"
]
__all__
=
[
"HAS_TRITON"
]
\ No newline at end of file
vllm/triton_utils/importing.py
View file @
2f540455
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
sys
import
types
from
importlib.util
import
find_spec
from
importlib.util
import
find_spec
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.platforms
import
current_platform
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
HAS_TRITON
=
(
HAS_TRITON
=
(
find_spec
(
"triton"
)
is
not
None
find_spec
(
"triton"
)
is
not
None
and
not
current_platform
.
is_xpu
()
# Not compatible
or
find_spec
(
"pytorch-triton-xpu"
)
is
not
None
# Not compatible
)
)
if
not
HAS_TRITON
:
if
not
HAS_TRITON
:
logger
.
info
(
"Triton not installed or not compatible; certain GPU-related"
logger
.
info
(
"Triton not installed or not compatible; certain GPU-related"
" functions will not be available."
)
" functions will not be available."
)
class
TritonPlaceholder
(
types
.
ModuleType
):
def
__init__
(
self
):
super
().
__init__
(
"triton"
)
self
.
jit
=
self
.
_dummy_decorator
(
"jit"
)
self
.
autotune
=
self
.
_dummy_decorator
(
"autotune"
)
self
.
heuristics
=
self
.
_dummy_decorator
(
"heuristics"
)
self
.
language
=
TritonLanguagePlaceholder
()
logger
.
warning_once
(
"Triton is not installed. Using dummy decorators. "
"Install it via `pip install triton` to enable kernel"
"compilation."
)
def
_dummy_decorator
(
self
,
name
):
def
decorator
(
func
=
None
,
**
kwargs
):
if
func
is
None
:
return
lambda
f
:
f
return
func
return
decorator
class
TritonLanguagePlaceholder
(
types
.
ModuleType
):
def
__init__
(
self
):
super
().
__init__
(
"triton.language"
)
self
.
constexpr
=
None
self
.
dtype
=
None
sys
.
modules
[
'triton'
]
=
TritonPlaceholder
()
sys
.
modules
[
'triton.language'
]
=
TritonLanguagePlaceholder
()
if
'triton'
in
sys
.
modules
:
logger
.
info
(
"Triton module has been replaced with a placeholder."
)
vllm/utils.py
View file @
2f540455
...
@@ -63,6 +63,9 @@ from torch.library import Library
...
@@ -63,6 +63,9 @@ from torch.library import Library
from
typing_extensions
import
Never
,
ParamSpec
,
TypeIs
,
assert_never
from
typing_extensions
import
Never
,
ParamSpec
,
TypeIs
,
assert_never
import
vllm.envs
as
envs
import
vllm.envs
as
envs
# NOTE: import triton_utils to make TritonPlaceholderModule work
# if triton is unavailable
import
vllm.triton_utils
# noqa: F401
from
vllm.logger
import
enable_trace_function_call
,
init_logger
from
vllm.logger
import
enable_trace_function_call
,
init_logger
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment