Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
71b25b0d
Unverified
Commit
71b25b0d
authored
Sep 26, 2025
by
Isotr0py
Committed by
GitHub
Sep 25, 2025
Browse files
[V0 deprecation] Clean up V0 fallback in compilation config (#25675)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
0ea80c87
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
73 deletions
+22
-73
vllm/config/__init__.py
vllm/config/__init__.py
+20
-70
vllm/config/compilation.py
vllm/config/compilation.py
+2
-3
No files found.
vllm/config/__init__.py
View file @
71b25b0d
...
...
@@ -384,19 +384,7 @@ class VllmConfig:
else
:
self
.
compilation_config
.
cudagraph_mode
=
CUDAGraphMode
.
NONE
if
self
.
cache_config
.
cpu_offload_gb
>
0
and
\
self
.
compilation_config
.
level
!=
CompilationLevel
.
NO_COMPILATION
\
and
not
envs
.
VLLM_USE_V1
:
logger
.
warning
(
"CPU offload is not supported with `torch.compile` in v0 yet."
" Disabling `torch.compile`."
)
self
.
compilation_config
.
level
=
CompilationLevel
.
NO_COMPILATION
if
self
.
cache_config
.
kv_sharing_fast_prefill
:
if
not
envs
.
VLLM_USE_V1
:
raise
NotImplementedError
(
"Fast prefill optimization for KV sharing is not supported "
"in V0 currently."
)
if
self
.
speculative_config
is
not
None
and
\
self
.
speculative_config
.
use_eagle
():
...
...
@@ -410,14 +398,6 @@ class VllmConfig:
"--kv-sharing-fast-prefill requires changes on model side for "
"correctness and to realize prefill savings. "
)
if
((
not
envs
.
VLLM_USE_V1
)
and
self
.
lora_config
is
not
None
and
self
.
compilation_config
.
level
!=
CompilationLevel
.
NO_COMPILATION
):
logger
.
warning
(
"LoRA for V0 is not supported with `torch.compile` yet. "
"Disabling `torch.compile`."
)
self
.
compilation_config
.
level
=
CompilationLevel
.
NO_COMPILATION
disable_chunked_prefill_reasons
:
list
[
str
]
=
[]
if
self
.
model_config
:
...
...
@@ -604,36 +584,6 @@ class VllmConfig:
"""
# calculate the default `batch_size_capture_list`
if
not
envs
.
VLLM_USE_V1
:
batch_size_capture_list
=
[]
if
self
.
scheduler_config
is
not
None
and
\
self
.
model_config
is
not
None
and
\
not
self
.
model_config
.
enforce_eager
:
possible_sizes
=
[
1
,
2
,
4
]
+
[
8
*
i
for
i
in
range
(
1
,
1025
)]
if
self
.
parallel_config
.
tensor_parallel_size
>
1
and
\
self
.
compilation_config
.
pass_config
.
enable_sequence_parallelism
:
possible_sizes
=
self
.
update_sizes_for_sequence_parallelism
(
possible_sizes
)
# find the minimum size that is larger than max_num_seqs,
# which then becomes the max_batchsize_to_capture
larger_sizes
=
[
x
for
x
in
possible_sizes
if
x
>=
self
.
scheduler_config
.
max_num_seqs
]
if
larger_sizes
:
max_batchsize_to_capture
=
larger_sizes
[
0
]
else
:
max_batchsize_to_capture
=
possible_sizes
[
-
1
]
# filter out the sizes that are
# larger than max_batchsize_to_capture
batch_size_capture_list
=
[
size
for
size
in
possible_sizes
if
size
<=
max_batchsize_to_capture
]
else
:
batch_size_capture_list
=
[]
if
self
.
model_config
is
not
None
and
\
not
self
.
model_config
.
enforce_eager
:
...
...
vllm/config/compilation.py
View file @
71b25b0d
...
...
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union
from
pydantic
import
TypeAdapter
,
field_validator
from
pydantic.dataclasses
import
dataclass
import
vllm.envs
as
envs
from
vllm.compilation.inductor_pass
import
CallableInductorPass
,
InductorPass
from
vllm.config.utils
import
config
from
vllm.logger
import
init_logger
...
...
@@ -75,11 +74,11 @@ class PassConfig:
don't all have access to full configuration - that would create a cycle as
the `PassManager` is set as a property of config."""
enable_fusion
:
bool
=
field
(
default_factory
=
lambda
:
not
envs
.
VLLM_USE_V1
)
enable_fusion
:
bool
=
False
"""Whether to enable the custom fusion (RMSNorm/SiluMul+quant) pass."""
enable_attn_fusion
:
bool
=
False
"""Whether to enable the custom attention+quant fusion pass."""
enable_noop
:
bool
=
field
(
default_factory
=
lambda
:
not
envs
.
VLLM_USE_V1
)
enable_noop
:
bool
=
False
"""Whether to enable the custom no-op elimination pass."""
enable_sequence_parallelism
:
bool
=
False
"""Whether to enable sequence parallelism."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment