Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f9bc5a06
Unverified
Commit
f9bc5a06
authored
May 06, 2025
by
Mengqing Cao
Committed by
GitHub
May 06, 2025
Browse files
[Bugfix] Fix triton import with local TritonPlaceholder (#17446)
Signed-off-by:
Mengqing Cao
<
cmq0113@163.com
>
parent
05e1f964
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
53 additions
and
46 deletions
+53
-46
vllm/model_executor/layers/mamba/ops/ssd_combined.py
vllm/model_executor/layers/mamba/ops/ssd_combined.py
+2
-1
vllm/model_executor/layers/mamba/ops/ssd_state_passing.py
vllm/model_executor/layers/mamba/ops/ssd_state_passing.py
+2
-2
vllm/model_executor/layers/quantization/awq_triton.py
vllm/model_executor/layers/quantization/awq_triton.py
+2
-2
vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py
...ayers/quantization/compressed_tensors/triton_scaled_mm.py
+2
-2
vllm/model_executor/layers/quantization/utils/fp8_utils.py
vllm/model_executor/layers/quantization/utils/fp8_utils.py
+1
-2
vllm/model_executor/layers/quantization/utils/int8_utils.py
vllm/model_executor/layers/quantization/utils/int8_utils.py
+1
-2
vllm/triton_utils/__init__.py
vllm/triton_utils/__init__.py
+10
-2
vllm/triton_utils/importing.py
vllm/triton_utils/importing.py
+31
-29
vllm/v1/sample/rejection_sampler.py
vllm/v1/sample/rejection_sampler.py
+1
-2
vllm/v1/spec_decode/eagle.py
vllm/v1/spec_decode/eagle.py
+1
-2
No files found.
vllm/model_executor/layers/mamba/ops/ssd_combined.py
View file @
f9bc5a06
...
...
@@ -6,10 +6,11 @@
# ruff: noqa: E501
import
torch
import
triton
from
einops
import
rearrange
from
packaging
import
version
from
vllm.triton_utils
import
triton
from
.ssd_bmm
import
_bmm_chunk_fwd
from
.ssd_chunk_scan
import
_chunk_scan_fwd
from
.ssd_chunk_state
import
(
_chunk_cumsum_fwd
,
_chunk_state_fwd
,
...
...
vllm/model_executor/layers/mamba/ops/ssd_state_passing.py
View file @
f9bc5a06
...
...
@@ -6,8 +6,8 @@
# ruff: noqa: E501
import
torch
import
triton
import
triton.language
as
tl
from
vllm.triton_utils
import
tl
,
triton
@
triton
.
autotune
(
...
...
vllm/model_executor/layers/quantization/awq_triton.py
View file @
f9bc5a06
# SPDX-License-Identifier: Apache-2.0
import
torch
import
triton
import
triton.language
as
tl
from
vllm.triton_utils
import
tl
,
triton
AWQ_TRITON_SUPPORTED_GROUP_SIZES
=
[
-
1
,
32
,
64
,
128
]
...
...
vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py
View file @
f9bc5a06
...
...
@@ -3,8 +3,8 @@
from
typing
import
Optional
,
Type
import
torch
import
triton
import
triton.language
as
tl
from
vllm.triton_utils
import
tl
,
triton
def
is_weak_contiguous
(
x
:
torch
.
Tensor
):
...
...
vllm/model_executor/layers/quantization/utils/fp8_utils.py
View file @
f9bc5a06
...
...
@@ -7,8 +7,6 @@ import os
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
,
Union
import
torch
import
triton
import
triton.language
as
tl
from
vllm
import
_custom_ops
as
ops
from
vllm.logger
import
init_logger
...
...
@@ -17,6 +15,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
from
vllm.model_executor.layers.quantization.utils.w8a8_utils
import
(
CUTLASS_BLOCK_FP8_SUPPORTED
)
from
vllm.platforms
import
current_platform
from
vllm.triton_utils
import
tl
,
triton
from
vllm.utils
import
direct_register_custom_op
logger
=
init_logger
(
__name__
)
...
...
vllm/model_executor/layers/quantization/utils/int8_utils.py
View file @
f9bc5a06
...
...
@@ -8,10 +8,9 @@ import os
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
import
torch
import
triton
import
triton.language
as
tl
from
vllm.platforms
import
current_platform
from
vllm.triton_utils
import
tl
,
triton
logger
=
logging
.
getLogger
(
__name__
)
...
...
vllm/triton_utils/__init__.py
View file @
f9bc5a06
# SPDX-License-Identifier: Apache-2.0
from
vllm.triton_utils.importing
import
HAS_TRITON
from
vllm.triton_utils.importing
import
(
HAS_TRITON
,
TritonLanguagePlaceholder
,
TritonPlaceholder
)
__all__
=
[
"HAS_TRITON"
]
if
HAS_TRITON
:
import
triton
import
triton.language
as
tl
else
:
triton
=
TritonPlaceholder
()
tl
=
TritonLanguagePlaceholder
()
__all__
=
[
"HAS_TRITON"
,
"triton"
,
"tl"
]
vllm/triton_utils/importing.py
View file @
f9bc5a06
...
...
@@ -16,32 +16,34 @@ if not HAS_TRITON:
logger
.
info
(
"Triton not installed or not compatible; certain GPU-related"
" functions will not be available."
)
class
TritonPlaceholder
(
types
.
ModuleType
):
def
__init__
(
self
):
super
().
__init__
(
"triton"
)
self
.
jit
=
self
.
_dummy_decorator
(
"jit"
)
self
.
autotune
=
self
.
_dummy_decorator
(
"autotune"
)
self
.
heuristics
=
self
.
_dummy_decorator
(
"heuristics"
)
self
.
language
=
TritonLanguagePlaceholder
()
logger
.
warning_once
(
"Triton is not installed. Using dummy decorators. "
"Install it via `pip install triton` to enable kernel"
"compilation."
)
def
_dummy_decorator
(
self
,
name
):
def
decorator
(
func
=
None
,
**
kwargs
):
if
func
is
None
:
return
lambda
f
:
f
return
func
return
decorator
class
TritonLanguagePlaceholder
(
types
.
ModuleType
):
def
__init__
(
self
):
super
().
__init__
(
"triton.language"
)
self
.
constexpr
=
None
self
.
dtype
=
None
self
.
int64
=
None
class
TritonPlaceholder
(
types
.
ModuleType
):
def
__init__
(
self
):
super
().
__init__
(
"triton"
)
self
.
jit
=
self
.
_dummy_decorator
(
"jit"
)
self
.
autotune
=
self
.
_dummy_decorator
(
"autotune"
)
self
.
heuristics
=
self
.
_dummy_decorator
(
"heuristics"
)
self
.
language
=
TritonLanguagePlaceholder
()
logger
.
warning_once
(
"Triton is not installed. Using dummy decorators. "
"Install it via `pip install triton` to enable kernel"
" compilation."
)
def
_dummy_decorator
(
self
,
name
):
def
decorator
(
*
args
,
**
kwargs
):
if
args
and
callable
(
args
[
0
]):
return
args
[
0
]
return
lambda
f
:
f
return
decorator
class
TritonLanguagePlaceholder
(
types
.
ModuleType
):
def
__init__
(
self
):
super
().
__init__
(
"triton.language"
)
self
.
constexpr
=
None
self
.
dtype
=
None
self
.
int64
=
None
vllm/v1/sample/rejection_sampler.py
View file @
f9bc5a06
...
...
@@ -3,10 +3,9 @@ from typing import Optional
import
torch
import
torch.nn
as
nn
import
triton
import
triton.language
as
tl
from
vllm.logger
import
init_logger
from
vllm.triton_utils
import
tl
,
triton
from
vllm.v1.sample.metadata
import
SamplingMetadata
from
vllm.v1.sample.ops.topk_topp_sampler
import
apply_top_k_top_p
from
vllm.v1.spec_decode.metadata
import
SpecDecodeMetadata
...
...
vllm/v1/spec_decode/eagle.py
View file @
f9bc5a06
# SPDX-License-Identifier: Apache-2.0
import
torch
import
torch.nn
as
nn
import
triton
import
triton.language
as
tl
from
vllm.config
import
CompilationLevel
,
VllmConfig
,
set_current_vllm_config
from
vllm.forward_context
import
set_forward_context
...
...
@@ -11,6 +9,7 @@ from vllm.model_executor.model_loader.loader import get_model_loader
from
vllm.model_executor.model_loader.utils
import
set_default_torch_dtype
from
vllm.model_executor.models
import
ModelRegistry
from
vllm.model_executor.models.llama_eagle3
import
Eagle3LlamaForCausalLM
from
vllm.triton_utils
import
tl
,
triton
from
vllm.v1.attention.backends.flash_attn
import
FlashAttentionMetadata
from
vllm.v1.sample.metadata
import
SamplingMetadata
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment