Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c3ee80a0
Unverified
Commit
c3ee80a0
authored
Nov 06, 2025
by
wangxiyuan
Committed by
GitHub
Nov 06, 2025
Browse files
[V0 deprecation]clean up is_v1_supported_oracle (#28116)
Signed-off-by:
wangxiyuan
<
wangxiyuan1007@gmail.com
>
parent
3755c145
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
134 deletions
+21
-134
tests/v1/test_oracle.py
tests/v1/test_oracle.py
+6
-44
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+15
-90
No files found.
tests/v1/test_oracle.py
View file @
c3ee80a0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
pytest
import
vllm.envs
as
envs
from
vllm
import
LLM
from
vllm.engine.arg_utils
import
AsyncEngineArgs
MODEL
=
"meta-llama/Llama-3.2-1B-Instruct"
def
test_reject_bad_config
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
def
test_unsupported_configs
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
with
pytest
.
raises
(
NotImplementedError
):
AsyncEngineArgs
(
model
=
MODEL
,
speculative_config
=
{
"model"
:
MODEL
,
},
).
create_engine_config
()
def
test_enable_by_default_fallback
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
if
os
.
getenv
(
"VLLM_USE_V1"
,
None
):
m
.
delenv
(
"VLLM_USE_V1"
)
# Should default to V1 for supported config.
_
=
AsyncEngineArgs
(
def
test_unsupported_configs
():
with
pytest
.
raises
(
NotImplementedError
):
AsyncEngineArgs
(
model
=
MODEL
,
enforce_eager
=
True
,
speculative_config
=
{
"model"
:
MODEL
,
},
).
create_engine_config
()
assert
envs
.
VLLM_USE_V1
m
.
delenv
(
"VLLM_USE_V1"
)
def
test_v1_llm_by_default
(
monkeypatch
):
with
monkeypatch
.
context
()
as
m
:
if
os
.
getenv
(
"VLLM_USE_V1"
,
None
):
m
.
delenv
(
"VLLM_USE_V1"
)
# Should default to V1 for supported config.
llm
=
LLM
(
MODEL
,
enforce_eager
=
True
,
enable_lora
=
True
)
print
(
llm
.
generate
(
"Hello my name is"
))
assert
hasattr
(
llm
.
llm_engine
,
"engine_core"
)
m
.
delenv
(
"VLLM_USE_V1"
)
vllm/engine/arg_utils.py
View file @
c3ee80a0
...
...
@@ -1290,15 +1290,7 @@ class EngineArgs:
"""
Create the VllmConfig.
NOTE: for autoselection of V0 vs V1 engine, we need to
create the ModelConfig first, since ModelConfig's attrs
(e.g. the model arch) are needed to make the decision.
This function set VLLM_USE_V1=X if VLLM_USE_V1 is
unspecified by the user.
If VLLM_USE_V1 is specified by the user but the VllmConfig
is incompatible, we raise an error.
NOTE: If VllmConfig is incompatible, we raise an error.
"""
current_platform
.
pre_register_and_update
()
...
...
@@ -1324,22 +1316,7 @@ class EngineArgs:
self
.
model
=
model_config
.
model
self
.
tokenizer
=
model_config
.
tokenizer
# * If VLLM_USE_V1 is unset, we enable V1 for "supported features"
# and fall back to V0 for experimental or unsupported features.
# * If VLLM_USE_V1=1, we enable V1 for supported + experimental
# features and raise error for unsupported features.
# * If VLLM_USE_V1=0, we disable V1.
use_v1
=
False
try_v1
=
envs
.
VLLM_USE_V1
or
not
envs
.
is_set
(
"VLLM_USE_V1"
)
if
try_v1
and
self
.
_is_v1_supported_oracle
(
model_config
):
use_v1
=
True
# If user explicitly set VLLM_USE_V1, sanity check we respect it.
if
envs
.
is_set
(
"VLLM_USE_V1"
):
assert
use_v1
==
envs
.
VLLM_USE_V1
# Otherwise, set the VLLM_USE_V1 variable globally.
else
:
envs
.
set_vllm_use_v1
(
use_v1
)
self
.
_check_feature_supported
(
model_config
)
# Set default arguments for V1 Engine.
self
.
_set_default_args
(
usage_context
,
model_config
)
...
...
@@ -1708,17 +1685,10 @@ class EngineArgs:
return
config
def
_is_v1_supported_oracle
(
self
,
model_config
:
ModelConfig
)
->
bool
:
"""Oracle for whether to use V0 or V1 Engine by default."""
#############################################################
# Unsupported Feature Flags on V1.
def
_check_feature_supported
(
self
,
model_config
:
ModelConfig
):
"""Raise an error if the feature is not supported."""
if
self
.
logits_processor_pattern
!=
EngineArgs
.
logits_processor_pattern
:
_raise_or_fallback
(
feature_name
=
"--logits-processor-pattern"
,
recommend_to_remove
=
False
)
return
False
_raise_unsupported_error
(
feature_name
=
"--logits-processor-pattern"
)
# No Concurrent Partial Prefills so far.
if
(
...
...
@@ -1726,12 +1696,9 @@ class EngineArgs:
or
self
.
max_long_partial_prefills
!=
SchedulerConfig
.
max_long_partial_prefills
):
_raise_or_fallback
(
feature_name
=
"Concurrent Partial Prefill"
,
recommend_to_remove
=
False
)
return
False
_raise_unsupported_error
(
feature_name
=
"Concurrent Partial Prefill"
)
#
V1 supports
N-gram, Medusa, and Eagle speculative decoding.
# N-gram, Medusa, and Eagle
are supported for
speculative decoding.
if
self
.
speculative_config
is
not
None
:
# speculative_config could still be a dict at this point
if
isinstance
(
self
.
speculative_config
,
dict
):
...
...
@@ -1746,35 +1713,6 @@ class EngineArgs:
"such as ngram, medusa, eagle, or mtp."
)
V1_BACKENDS
=
[
"FLASH_ATTN"
,
"PALLAS"
,
"TRITON_ATTN"
,
"TRITON_MLA"
,
"CUTLASS_MLA"
,
"FLASHMLA"
,
"FLASH_ATTN_MLA"
,
"FLASHINFER"
,
"FLASHINFER_MLA"
,
"ROCM_AITER_MLA"
,
"TORCH_SDPA"
,
"FLEX_ATTENTION"
,
"TREE_ATTN"
,
"XFORMERS"
,
"ROCM_ATTN"
,
"ROCM_AITER_UNIFIED_ATTN"
,
]
if
(
envs
.
is_set
(
"VLLM_ATTENTION_BACKEND"
)
and
envs
.
VLLM_ATTENTION_BACKEND
not
in
V1_BACKENDS
):
name
=
f
"VLLM_ATTENTION_BACKEND=
{
envs
.
VLLM_ATTENTION_BACKEND
}
"
_raise_or_fallback
(
feature_name
=
name
,
recommend_to_remove
=
True
)
return
False
#############################################################
# Experimental Features - allow users to opt in.
if
self
.
pipeline_parallel_size
>
1
:
supports_pp
=
getattr
(
self
.
distributed_executor_backend
,
"supports_pp"
,
False
...
...
@@ -1790,18 +1728,10 @@ class EngineArgs:
"executor or multiprocessing executor or external "
"launcher"
)
_raise_or_fallback
(
feature_name
=
name
,
recommend_to_remove
=
False
)
return
False
_raise_unsupported_error
(
feature_name
=
name
)
if
current_platform
.
is_cpu
()
and
model_config
.
get_sliding_window
()
is
not
None
:
_raise_or_fallback
(
feature_name
=
"sliding window (CPU backend)"
,
recommend_to_remove
=
False
)
return
False
#############################################################
return
True
_raise_unsupported_error
(
feature_name
=
"sliding window (CPU backend)"
)
def
_set_default_args
(
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
...
...
@@ -2000,17 +1930,12 @@ class AsyncEngineArgs(EngineArgs):
return
parser
def
_raise_or_fallback
(
feature_name
:
str
,
recommend_to_remove
:
bool
):
if
envs
.
is_set
(
"VLLM_USE_V1"
)
and
envs
.
VLLM_USE_V1
:
raise
NotImplementedError
(
f
"VLLM_USE_V1=1 is not supported with
{
feature_name
}
."
)
msg
=
f
"
{
feature_name
}
is not supported by the V1 Engine. "
msg
+=
"Falling back to V0. "
if
recommend_to_remove
:
msg
+=
f
"We recommend to remove
{
feature_name
}
from your config "
msg
+=
"in favor of the V1 Engine."
logger
.
warning
(
msg
)
def
_raise_unsupported_error
(
feature_name
:
str
):
msg
=
(
f
"
{
feature_name
}
is not supported. We recommend to "
f
"remove
{
feature_name
}
from your config."
)
raise
NotImplementedError
(
msg
)
def
human_readable_int
(
value
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment