Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c0a4b95d
Unverified
Commit
c0a4b95d
authored
Nov 06, 2025
by
Harry Mellor
Committed by
GitHub
Nov 07, 2025
Browse files
Fix issues from #28242 (#28257)
Signed-off-by:
Harry Mellor
<
19981378+hmellor@users.noreply.github.com
>
parent
a47d94f1
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
21 deletions
+10
-21
vllm/config/compilation.py
vllm/config/compilation.py
+3
-7
vllm/model_executor/models/qwen2_5_vl.py
vllm/model_executor/models/qwen2_5_vl.py
+1
-3
vllm/model_executor/models/transformers/utils.py
vllm/model_executor/models/transformers/utils.py
+0
-11
vllm/model_executor/models/vision.py
vllm/model_executor/models/vision.py
+6
-0
No files found.
vllm/config/compilation.py
View file @
c0a4b95d
...
@@ -251,13 +251,6 @@ class CompilationConfig:
...
@@ -251,13 +251,6 @@ class CompilationConfig:
disabled when running with Inductor: mode>=VLLM_COMPILE and use_inductor=True.
disabled when running with Inductor: mode>=VLLM_COMPILE and use_inductor=True.
Inductor generates (fused) Triton kernels for disabled custom ops."""
Inductor generates (fused) Triton kernels for disabled custom ops."""
splitting_ops
:
list
[
str
]
|
None
=
None
splitting_ops
:
list
[
str
]
|
None
=
None
"""
Provide control over whether to compile the multimodal encoder
such as Qwen2_5_vl
"""
compile_mm_encoder
:
bool
=
True
"""A list of ops to exclude from cudagraphs, used in piecewise compilation.
"""A list of ops to exclude from cudagraphs, used in piecewise compilation.
The behavior depends on use_inductor_graph_partition:
The behavior depends on use_inductor_graph_partition:
...
@@ -275,6 +268,9 @@ class CompilationConfig:
...
@@ -275,6 +268,9 @@ class CompilationConfig:
If None, defaults to attention ops for piecewise cudagraphs.
If None, defaults to attention ops for piecewise cudagraphs.
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
compile_mm_encoder
:
bool
=
True
"""Whether or not to compile the multimodal encoder.
Currently, this only works for `Qwen2_5_vl`."""
# Inductor capture
# Inductor capture
use_inductor
:
bool
|
None
=
None
use_inductor
:
bool
|
None
=
None
...
...
vllm/model_executor/models/qwen2_5_vl.py
View file @
c0a4b95d
...
@@ -67,9 +67,7 @@ from vllm.model_executor.layers.linear import (
...
@@ -67,9 +67,7 @@ from vllm.model_executor.layers.linear import (
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.model_executor.models.transformers.utils
import
(
from
vllm.model_executor.models.vision
import
should_torch_compile_mm_vit
should_torch_compile_mm_vit
,
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.evs
import
(
from
vllm.multimodal.evs
import
(
compute_mrope_for_media
,
compute_mrope_for_media
,
...
...
vllm/model_executor/models/transformers/utils.py
View file @
c0a4b95d
...
@@ -205,14 +205,3 @@ def can_enable_torch_compile(vllm_config: "VllmConfig") -> bool:
...
@@ -205,14 +205,3 @@ def can_enable_torch_compile(vllm_config: "VllmConfig") -> bool:
# Dynamic rope scaling is not compatible with torch.compile
# Dynamic rope scaling is not compatible with torch.compile
rope_scaling
:
dict
=
getattr
(
text_config
,
"rope_scaling"
,
None
)
or
{}
rope_scaling
:
dict
=
getattr
(
text_config
,
"rope_scaling"
,
None
)
or
{}
return
rope_scaling
.
get
(
"rope_type"
)
!=
"dynamic"
return
rope_scaling
.
get
(
"rope_type"
)
!=
"dynamic"
def
should_torch_compile_mm_vit
(
vllm_config
:
"VllmConfig"
)
->
bool
:
"""
Callable to be passed to `@support_torch_compile`'s `enable_if` argument.
Defaults to `True` but is disabled in the following situations:
- The model uses dynamic rope scaling.
"""
return
vllm_config
.
compilation_config
.
compile_mm_encoder
vllm/model_executor/models/vision.py
View file @
c0a4b95d
...
@@ -11,6 +11,7 @@ import torch
...
@@ -11,6 +11,7 @@ import torch
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.backends.registry
import
_Backend
from
vllm.attention.backends.registry
import
_Backend
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
...
@@ -100,6 +101,11 @@ def get_vit_attn_backend(
...
@@ -100,6 +101,11 @@ def get_vit_attn_backend(
return
current_platform
.
get_vit_attn_backend
(
head_size
,
dtype
)
return
current_platform
.
get_vit_attn_backend
(
head_size
,
dtype
)
def
should_torch_compile_mm_vit
(
vllm_config
:
VllmConfig
)
->
bool
:
"""Callable to be passed to `@support_torch_compile`'s `enable_if` argument."""
return
vllm_config
.
compilation_config
.
compile_mm_encoder
VisionFeatureSelectStrategyStr
=
Literal
[
"class"
,
"default"
,
"full"
]
VisionFeatureSelectStrategyStr
=
Literal
[
"class"
,
"default"
,
"full"
]
VisionFeatureSelectStrategy
:
TypeAlias
=
(
VisionFeatureSelectStrategy
:
TypeAlias
=
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment