Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d3387750
Unverified
Commit
d3387750
authored
Nov 13, 2025
by
Roger Wang
Committed by
GitHub
Nov 13, 2025
Browse files
[Misc] Turn off encoder torch compile by default (#28634)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
b230286f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
5 deletions
+11
-5
tests/compile/test_multimodal_compile.py
tests/compile/test_multimodal_compile.py
+6
-3
tests/models/multimodal/generation/test_common.py
tests/models/multimodal/generation/test_common.py
+2
-0
vllm/config/compilation.py
vllm/config/compilation.py
+3
-2
No files found.
tests/compile/test_multimodal_compile.py
View file @
d3387750
...
...
@@ -10,8 +10,8 @@ from vllm.platforms import current_platform
def
test_compile
():
vllm_config
=
VllmConfig
()
# Default configuration compile
s
mm encoder
assert
vllm_config
.
compilation_config
.
compile_mm_encoder
# Default configuration
does not
compile mm encoder
assert
not
vllm_config
.
compilation_config
.
compile_mm_encoder
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
...
...
@@ -39,7 +39,10 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
"Qwen/Qwen2.5-VL-3B-Instruct"
,
max_model_len
=
2048
,
gpu_memory_utilization
=
0.8
,
compilation_config
=
{
"mode"
:
CompilationMode
.
VLLM_COMPILE
},
compilation_config
=
{
"mode"
:
CompilationMode
.
VLLM_COMPILE
,
"compile_mm_encoder"
:
True
,
},
)
as
_
,
):
pass
...
...
tests/models/multimodal/generation/test_common.py
View file @
d3387750
...
...
@@ -131,6 +131,7 @@ VLM_TEST_SETTINGS = {
prompt_formatter
=
lambda
img_prompt
:
f
"<|im_start|>User
\n
{
img_prompt
}
<|im_end|>
\n
<|im_start|>assistant
\n
"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|image_pad|><|vision_end|>"
,
video_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|video_pad|><|vision_end|>"
,
enforce_eager
=
False
,
max_model_len
=
4096
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForImageTextToText
,
...
...
@@ -160,6 +161,7 @@ VLM_TEST_SETTINGS = {
VLMTestType
.
MULTI_IMAGE
,
VLMTestType
.
VIDEO
,
),
enforce_eager
=
False
,
needs_video_metadata
=
True
,
prompt_formatter
=
lambda
img_prompt
:
f
"<|im_start|>User
\n
{
img_prompt
}
<|im_end|>
\n
<|im_start|>assistant
\n
"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|image_pad|><|vision_end|>"
,
# noqa: E501
...
...
vllm/config/compilation.py
View file @
d3387750
...
...
@@ -320,9 +320,10 @@ class CompilationConfig:
If None, defaults to attention ops for piecewise cudagraphs.
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
compile_mm_encoder
:
bool
=
Tru
e
compile_mm_encoder
:
bool
=
Fals
e
"""Whether or not to compile the multimodal encoder.
Currently, this only works for `Qwen2_5_vl`."""
Currently, this only works for `Qwen2_5_vl` on selected platforms.
Disabled by default until more models are supported/tested to work."""
# Inductor capture
use_inductor
:
bool
|
None
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment