Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0b6a8a30
Unverified
Commit
0b6a8a30
authored
Dec 09, 2025
by
Ilya Markov
Committed by
GitHub
Dec 09, 2025
Browse files
[BugFix] Fix non detected failing tests (#30277)
Signed-off-by:
ilmarkov
<
markovilya197@gmail.com
>
parent
804e3468
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
77 additions
and
38 deletions
+77
-38
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+6
-2
tests/compile/fullgraph/test_multimodal_compile.py
tests/compile/fullgraph/test_multimodal_compile.py
+0
-1
tests/compile/test_compile_ranges.py
tests/compile/test_compile_ranges.py
+6
-0
tests/compile/test_pass_manager.py
tests/compile/test_pass_manager.py
+40
-33
vllm/compilation/inductor_pass.py
vllm/compilation/inductor_pass.py
+6
-2
vllm/compilation/piecewise_backend.py
vllm/compilation/piecewise_backend.py
+19
-0
No files found.
.buildkite/test-pipeline.yaml
View file @
0b6a8a30
...
@@ -468,7 +468,9 @@ steps:
...
@@ -468,7 +468,9 @@ steps:
# tests covered elsewhere.
# tests covered elsewhere.
# Use `find` to launch multiple instances of pytest so that
# Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
-
"
find
compile/
-maxdepth
1
-name
'test_*.py'
-exec
pytest
-s
-v
{}
\\\\
;"
# However, find does not normally propagate error codes, so we combine it with xargs
# (using -0 for proper path handling)
-
"
find
compile/
-maxdepth
1
-name
'test_*.py'
-print0
|
xargs
-0
-n1
-I{}
pytest
-s
-v
'{}'"
-
label
:
PyTorch Fullgraph Smoke Test
# 15min
-
label
:
PyTorch Fullgraph Smoke Test
# 15min
timeout_in_minutes
:
30
timeout_in_minutes
:
30
...
@@ -482,7 +484,9 @@ steps:
...
@@ -482,7 +484,9 @@ steps:
# as it is a heavy test that is covered in other steps.
# as it is a heavy test that is covered in other steps.
# Use `find` to launch multiple instances of pytest so that
# Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965
-
"
find
compile/fullgraph/
-name
'test_*.py'
-not
-name
'test_full_graph.py'
-exec
pytest
-s
-v
{}
\\\\
;"
# However, find does not normally propagate error codes, so we combine it with xargs
# (using -0 for proper path handling)
-
"
find
compile/fullgraph
-maxdepth
1
-name
'test_*.py'
-not
-name
'test_full_graph.py'
-print0
|
xargs
-0
-n1
-I{}
pytest
-s
-v
'{}'"
-
label
:
PyTorch Fullgraph Test
# 27min
-
label
:
PyTorch Fullgraph Test
# 27min
timeout_in_minutes
:
40
timeout_in_minutes
:
40
...
...
tests/compile/fullgraph/test_multimodal_compile.py
View file @
0b6a8a30
...
@@ -17,7 +17,6 @@ def test_compile():
...
@@ -17,7 +17,6 @@ def test_compile():
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@
pytest
.
mark
.
forked
@
pytest
.
mark
.
forked
@
pytest
.
mark
.
skipif
(
not
current_platform
.
is_cuda
(),
reason
=
"Skip if not cuda"
)
@
pytest
.
mark
.
skipif
(
not
current_platform
.
is_cuda
(),
reason
=
"Skip if not cuda"
)
@
pytest
.
mark
.
xfail
def
test_qwen2_5_vl_compilation
(
vllm_runner
,
monkeypatch
):
def
test_qwen2_5_vl_compilation
(
vllm_runner
,
monkeypatch
):
"""Test that Qwen2.5-VL vision submodules are compiled.
"""Test that Qwen2.5-VL vision submodules are compiled.
...
...
tests/compile/test_compile_ranges.py
View file @
0b6a8a30
...
@@ -80,6 +80,8 @@ def test_compile_ranges(use_fresh_inductor_cache):
...
@@ -80,6 +80,8 @@ def test_compile_ranges(use_fresh_inductor_cache):
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
scheduler_config
=
SchedulerConfig
(
scheduler_config
=
SchedulerConfig
(
max_num_batched_tokens
=
8192
,
max_num_batched_tokens
=
8192
,
max_model_len
=
8192
,
is_encoder_decoder
=
False
,
),
),
compilation_config
=
CompilationConfig
(
compilation_config
=
CompilationConfig
(
mode
=
CompilationMode
.
VLLM_COMPILE
,
mode
=
CompilationMode
.
VLLM_COMPILE
,
...
@@ -112,6 +114,8 @@ def test_compile_config_get_compile_ranges():
...
@@ -112,6 +114,8 @@ def test_compile_config_get_compile_ranges():
VllmConfig
(
VllmConfig
(
scheduler_config
=
SchedulerConfig
(
scheduler_config
=
SchedulerConfig
(
max_num_batched_tokens
=
8192
,
max_num_batched_tokens
=
8192
,
max_model_len
=
8192
,
is_encoder_decoder
=
False
,
),
),
compilation_config
=
compilation_config
,
compilation_config
=
compilation_config
,
)
)
...
@@ -134,6 +138,8 @@ def test_inductor_cache_compile_ranges(monkeypatch, use_fresh_inductor_cache):
...
@@ -134,6 +138,8 @@ def test_inductor_cache_compile_ranges(monkeypatch, use_fresh_inductor_cache):
)
)
scheduler_config
=
SchedulerConfig
(
scheduler_config
=
SchedulerConfig
(
max_num_batched_tokens
=
8192
,
max_num_batched_tokens
=
8192
,
max_model_len
=
8192
,
is_encoder_decoder
=
False
,
)
)
torch
.
set_default_device
(
"cuda"
)
torch
.
set_default_device
(
"cuda"
)
...
...
tests/compile/test_pass_manager.py
View file @
0b6a8a30
...
@@ -5,9 +5,14 @@ import copy
...
@@ -5,9 +5,14 @@ import copy
import
pytest
import
pytest
import
torch
import
torch
from
vllm.compilation.inductor_pass
import
CallableInductorPass
,
InductorPass
from
vllm.compilation.inductor_pass
import
(
CallableInductorPass
,
InductorPass
,
pass_context
,
)
from
vllm.compilation.pass_manager
import
PostGradPassManager
from
vllm.compilation.pass_manager
import
PostGradPassManager
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config.utils
import
Range
# dummy custom pass that doesn't inherit
# dummy custom pass that doesn't inherit
...
@@ -42,6 +47,8 @@ class ProperPass(InductorPass):
...
@@ -42,6 +47,8 @@ class ProperPass(InductorPass):
],
],
)
)
def
test_pass_manager_uuid
(
callable
):
def
test_pass_manager_uuid
(
callable
):
# Set the pass context as PassManager uuid uses it
with
pass_context
(
Range
(
start
=
1
,
end
=
8
)):
# Some passes need dtype to be set
# Some passes need dtype to be set
config
=
VllmConfig
(
model_config
=
ModelConfig
(
dtype
=
torch
.
bfloat16
))
config
=
VllmConfig
(
model_config
=
ModelConfig
(
dtype
=
torch
.
bfloat16
))
...
...
vllm/compilation/inductor_pass.py
View file @
0b6a8a30
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
__future__
import
annotations
import
functools
import
functools
import
hashlib
import
hashlib
import
inspect
import
inspect
...
@@ -8,15 +10,17 @@ import json
...
@@ -8,15 +10,17 @@ import json
import
types
import
types
from
collections.abc
import
Callable
from
collections.abc
import
Callable
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
from
typing
import
Any
from
typing
import
TYPE_CHECKING
,
Any
import
torch
import
torch
from
torch
import
fx
from
torch
import
fx
from
torch._subclasses.fake_tensor
import
FakeTensorMode
,
unset_fake_temporarily
from
torch._subclasses.fake_tensor
import
FakeTensorMode
,
unset_fake_temporarily
from
vllm.config.utils
import
Range
from
vllm.utils.torch_utils
import
is_torch_equal_or_newer
from
vllm.utils.torch_utils
import
is_torch_equal_or_newer
if
TYPE_CHECKING
:
from
vllm.config.utils
import
Range
if
is_torch_equal_or_newer
(
"2.6"
):
if
is_torch_equal_or_newer
(
"2.6"
):
from
torch._inductor.custom_graph_pass
import
CustomGraphPass
from
torch._inductor.custom_graph_pass
import
CustomGraphPass
else
:
else
:
...
...
vllm/compilation/piecewise_backend.py
View file @
0b6a8a30
...
@@ -53,8 +53,27 @@ class PiecewiseBackend:
...
@@ -53,8 +53,27 @@ class PiecewiseBackend:
self
.
is_last_graph
=
piecewise_compile_index
==
total_piecewise_compiles
-
1
self
.
is_last_graph
=
piecewise_compile_index
==
total_piecewise_compiles
-
1
self
.
is_full_graph
=
total_piecewise_compiles
==
1
self
.
is_full_graph
=
total_piecewise_compiles
==
1
# TODO: we need to generalize encoder compilation to other models
self
.
is_encoder_compilation
=
vllm_backend
.
prefix
in
[
"Qwen2_5_VisionPatchEmbed"
,
"Qwen2_5_VisionPatchMerger"
,
"Qwen2_5_VisionBlock"
,
]
self
.
compile_ranges
=
self
.
compilation_config
.
get_compile_ranges
()
self
.
compile_ranges
=
self
.
compilation_config
.
get_compile_ranges
()
if
self
.
is_encoder_compilation
:
# For encoder compilation we use the max int32 value
# to set the upper bound of the compile ranges
max_int32
=
2
**
31
-
1
last_compile_range
=
self
.
compile_ranges
[
-
1
]
assert
(
last_compile_range
.
end
==
vllm_config
.
scheduler_config
.
max_num_batched_tokens
)
self
.
compile_ranges
[
-
1
]
=
Range
(
start
=
last_compile_range
.
start
,
end
=
max_int32
)
log_string
=
f
"PiecewiseBackend: compile_ranges:
{
self
.
compile_ranges
}
"
log_string
=
f
"PiecewiseBackend: compile_ranges:
{
self
.
compile_ranges
}
"
logger
.
debug_once
(
log_string
)
logger
.
debug_once
(
log_string
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment