Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c3ee80a0
Unverified
Commit
c3ee80a0
authored
Nov 06, 2025
by
wangxiyuan
Committed by
GitHub
Nov 06, 2025
Browse files
[V0 deprecation]clean up is_v1_supported_oracle (#28116)
Signed-off-by:
wangxiyuan
<
wangxiyuan1007@gmail.com
>
parent
3755c145
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
134 deletions
+21
-134
tests/v1/test_oracle.py
tests/v1/test_oracle.py
+6
-44
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+15
-90
No files found.
tests/v1/test_oracle.py
View file @
c3ee80a0
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
pytest
import
pytest
import
vllm.envs
as
envs
from
vllm
import
LLM
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
def
test_reject_bad_config
(
monkeypatch
):
def
test_unsupported_configs
():
with
monkeypatch
.
context
()
as
m
:
with
pytest
.
raises
(
NotImplementedError
):
m
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
AsyncEngineArgs
(
def
test_unsupported_configs
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
with
pytest
.
raises
(
NotImplementedError
):
AsyncEngineArgs
(
model
=
MODEL
,
speculative_config
=
{
"model"
:
MODEL
,
},
).
create_engine_config
()
def
test_enable_by_default_fallback
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
if
os
.
getenv
(
"VLLM_USE_V1"
,
None
):
m
.
delenv
(
"VLLM_USE_V1"
)
# Should default to V1 for supported config.
_
=
AsyncEngineArgs
(
model
=
MODEL
,
model
=
MODEL
,
enforce_eager
=
True
,
speculative_config
=
{
"model"
:
MODEL
,
},
).
create_engine_config
()
).
create_engine_config
()
assert
envs
.
VLLM_USE_V1
m
.
delenv
(
"VLLM_USE_V1"
)
def
test_v1_llm_by_default
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
if
os
.
getenv
(
"VLLM_USE_V1"
,
None
):
m
.
delenv
(
"VLLM_USE_V1"
)
# Should default to V1 for supported config.
llm
=
LLM
(
MODEL
,
enforce_eager
=
True
,
enable_lora
=
True
)
print
(
llm
.
generate
(
"Hello my name is"
))
assert
hasattr
(
llm
.
llm_engine
,
"engine_core"
)
m
.
delenv
(
"VLLM_USE_V1"
)
vllm/engine/arg_utils.py
View file @
c3ee80a0
...
@@ -1290,15 +1290,7 @@ class EngineArgs:
...
@@ -1290,15 +1290,7 @@ class EngineArgs:
"""
"""
Create the VllmConfig.
Create the VllmConfig.
NOTE: for autoselection of V0 vs V1 engine, we need to
NOTE: If VllmConfig is incompatible, we raise an error.
create the ModelConfig first, since ModelConfig's attrs
(e.g. the model arch) are needed to make the decision.
This function set VLLM_USE_V1=X if VLLM_USE_V1 is
unspecified by the user.
If VLLM_USE_V1 is specified by the user but the VllmConfig
is incompatible, we raise an error.
"""
"""
current_platform
.
pre_register_and_update
()
current_platform
.
pre_register_and_update
()
...
@@ -1324,22 +1316,7 @@ class EngineArgs:
...
@@ -1324,22 +1316,7 @@ class EngineArgs:
self
.
model
=
model_config
.
model
self
.
model
=
model_config
.
model
self
.
tokenizer
=
model_config
.
tokenizer
self
.
tokenizer
=
model_config
.
tokenizer
# * If VLLM_USE_V1 is unset, we enable V1 for "supported features"
self
.
_check_feature_supported
(
model_config
)
# and fall back to V0 for experimental or unsupported features.
# * If VLLM_USE_V1=1, we enable V1 for supported + experimental
# features and raise error for unsupported features.
# * If VLLM_USE_V1=0, we disable V1.
use_v1
=
False
try_v1
=
envs
.
VLLM_USE_V1
or
not
envs
.
is_set
(
"VLLM_USE_V1"
)
if
try_v1
and
self
.
_is_v1_supported_oracle
(
model_config
):
use_v1
=
True
# If user explicitly set VLLM_USE_V1, sanity check we respect it.
if
envs
.
is_set
(
"VLLM_USE_V1"
):
assert
use_v1
==
envs
.
VLLM_USE_V1
# Otherwise, set the VLLM_USE_V1 variable globally.
else
:
envs
.
set_vllm_use_v1
(
use_v1
)
# Set default arguments for V1 Engine.
# Set default arguments for V1 Engine.
self
.
_set_default_args
(
usage_context
,
model_config
)
self
.
_set_default_args
(
usage_context
,
model_config
)
...
@@ -1708,17 +1685,10 @@ class EngineArgs:
...
@@ -1708,17 +1685,10 @@ class EngineArgs:
return
config
return
config
def
_is_v1_supported_oracle
(
self
,
model_config
:
ModelConfig
)
->
bool
:
def
_check_feature_supported
(
self
,
model_config
:
ModelConfig
):
"""Oracle for whether to use V0 or V1 Engine by default."""
"""Raise an error if the feature is not supported."""
#############################################################
# Unsupported Feature Flags on V1.
if
self
.
logits_processor_pattern
!=
EngineArgs
.
logits_processor_pattern
:
if
self
.
logits_processor_pattern
!=
EngineArgs
.
logits_processor_pattern
:
_raise_or_fallback
(
_raise_unsupported_error
(
feature_name
=
"--logits-processor-pattern"
)
feature_name
=
"--logits-processor-pattern"
,
recommend_to_remove
=
False
)
return
False
# No Concurrent Partial Prefills so far.
# No Concurrent Partial Prefills so far.
if
(
if
(
...
@@ -1726,12 +1696,9 @@ class EngineArgs:
...
@@ -1726,12 +1696,9 @@ class EngineArgs:
or
self
.
max_long_partial_prefills
or
self
.
max_long_partial_prefills
!=
SchedulerConfig
.
max_long_partial_prefills
!=
SchedulerConfig
.
max_long_partial_prefills
):
):
_raise_or_fallback
(
_raise_unsupported_error
(
feature_name
=
"Concurrent Partial Prefill"
)
feature_name
=
"Concurrent Partial Prefill"
,
recommend_to_remove
=
False
)
return
False
#
V1 supports
N-gram, Medusa, and Eagle speculative decoding.
# N-gram, Medusa, and Eagle
are supported for
speculative decoding.
if
self
.
speculative_config
is
not
None
:
if
self
.
speculative_config
is
not
None
:
# speculative_config could still be a dict at this point
# speculative_config could still be a dict at this point
if
isinstance
(
self
.
speculative_config
,
dict
):
if
isinstance
(
self
.
speculative_config
,
dict
):
...
@@ -1746,35 +1713,6 @@ class EngineArgs:
...
@@ -1746,35 +1713,6 @@ class EngineArgs:
"such as ngram, medusa, eagle, or mtp."
"such as ngram, medusa, eagle, or mtp."
)
)
V1_BACKENDS
=
[
"FLASH_ATTN"
,
"PALLAS"
,
"TRITON_ATTN"
,
"TRITON_MLA"
,
"CUTLASS_MLA"
,
"FLASHMLA"
,
"FLASH_ATTN_MLA"
,
"FLASHINFER"
,
"FLASHINFER_MLA"
,
"ROCM_AITER_MLA"
,
"TORCH_SDPA"
,
"FLEX_ATTENTION"
,
"TREE_ATTN"
,
"XFORMERS"
,
"ROCM_ATTN"
,
"ROCM_AITER_UNIFIED_ATTN"
,
]
if
(
envs
.
is_set
(
"VLLM_ATTENTION_BACKEND"
)
and
envs
.
VLLM_ATTENTION_BACKEND
not
in
V1_BACKENDS
):
name
=
f
"VLLM_ATTENTION_BACKEND=
{
envs
.
VLLM_ATTENTION_BACKEND
}
"
_raise_or_fallback
(
feature_name
=
name
,
recommend_to_remove
=
True
)
return
False
#############################################################
# Experimental Features - allow users to opt in.
if
self
.
pipeline_parallel_size
>
1
:
if
self
.
pipeline_parallel_size
>
1
:
supports_pp
=
getattr
(
supports_pp
=
getattr
(
self
.
distributed_executor_backend
,
"supports_pp"
,
False
self
.
distributed_executor_backend
,
"supports_pp"
,
False
...
@@ -1790,18 +1728,10 @@ class EngineArgs:
...
@@ -1790,18 +1728,10 @@ class EngineArgs:
"executor or multiprocessing executor or external "
"executor or multiprocessing executor or external "
"launcher"
"launcher"
)
)
_raise_or_fallback
(
feature_name
=
name
,
recommend_to_remove
=
False
)
_raise_unsupported_error
(
feature_name
=
name
)
return
False
if
current_platform
.
is_cpu
()
and
model_config
.
get_sliding_window
()
is
not
None
:
if
current_platform
.
is_cpu
()
and
model_config
.
get_sliding_window
()
is
not
None
:
_raise_or_fallback
(
_raise_unsupported_error
(
feature_name
=
"sliding window (CPU backend)"
)
feature_name
=
"sliding window (CPU backend)"
,
recommend_to_remove
=
False
)
return
False
#############################################################
return
True
def
_set_default_args
(
def
_set_default_args
(
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
...
@@ -2000,17 +1930,12 @@ class AsyncEngineArgs(EngineArgs):
...
@@ -2000,17 +1930,12 @@ class AsyncEngineArgs(EngineArgs):
return
parser
return
parser
def
_raise_or_fallback
(
feature_name
:
str
,
recommend_to_remove
:
bool
):
def
_raise_unsupported_error
(
feature_name
:
str
):
if
envs
.
is_set
(
"VLLM_USE_V1"
)
and
envs
.
VLLM_USE_V1
:
msg
=
(
raise
NotImplementedError
(
f
"
{
feature_name
}
is not supported. We recommend to "
f
"VLLM_USE_V1=1 is not supported with
{
feature_name
}
."
f
"remove
{
feature_name
}
from your config."
)
)
msg
=
f
"
{
feature_name
}
is not supported by the V1 Engine. "
raise
NotImplementedError
(
msg
)
msg
+=
"Falling back to V0. "
if
recommend_to_remove
:
msg
+=
f
"We recommend to remove
{
feature_name
}
from your config "
msg
+=
"in favor of the V1 Engine."
logger
.
warning
(
msg
)
def
human_readable_int
(
value
):
def
human_readable_int
(
value
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment