Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
af826e08
Unverified
Commit
af826e08
authored
Oct 30, 2025
by
wangxiyuan
Committed by
GitHub
Oct 30, 2025
Browse files
[V0 deprecation] Remove VLLM_USE_V1 usage in config module (#27784)
Signed-off-by:
wangxiyuan
<
wangxiyuan1007@gmail.com
>
parent
e806178d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
62 deletions
+9
-62
vllm/config/lora.py
vllm/config/lora.py
+0
-5
vllm/config/model.py
vllm/config/model.py
+2
-23
vllm/config/speculative.py
vllm/config/speculative.py
+0
-7
vllm/config/vllm.py
vllm/config/vllm.py
+7
-27
No files found.
vllm/config/lora.py
View file @
af826e08
...
@@ -9,7 +9,6 @@ from pydantic import ConfigDict, Field, model_validator
...
@@ -9,7 +9,6 @@ from pydantic import ConfigDict, Field, model_validator
from
pydantic.dataclasses
import
dataclass
from
pydantic.dataclasses
import
dataclass
from
typing_extensions
import
Self
from
typing_extensions
import
Self
import
vllm.envs
as
envs
from
vllm.config.utils
import
config
from
vllm.config.utils
import
config
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
...
@@ -106,10 +105,6 @@ class LoRAConfig:
...
@@ -106,10 +105,6 @@ class LoRAConfig:
return
self
return
self
def
verify_with_cache_config
(
self
,
cache_config
:
CacheConfig
):
if
cache_config
.
cpu_offload_gb
>
0
and
not
envs
.
VLLM_USE_V1
:
raise
ValueError
(
"V0 LoRA does not support CPU offload, please use V1."
)
def
verify_with_model_config
(
self
,
model_config
:
ModelConfig
):
def
verify_with_model_config
(
self
,
model_config
:
ModelConfig
):
if
self
.
lora_dtype
in
(
None
,
"auto"
):
if
self
.
lora_dtype
in
(
None
,
"auto"
):
self
.
lora_dtype
=
model_config
.
dtype
self
.
lora_dtype
=
model_config
.
dtype
...
...
vllm/config/model.py
View file @
af826e08
...
@@ -32,7 +32,6 @@ from vllm.transformers_utils.config import (
...
@@ -32,7 +32,6 @@ from vllm.transformers_utils.config import (
get_pooling_config
,
get_pooling_config
,
get_sentence_transformer_tokenizer_config
,
get_sentence_transformer_tokenizer_config
,
is_encoder_decoder
,
is_encoder_decoder
,
is_interleaved
,
try_get_dense_modules
,
try_get_dense_modules
,
try_get_generation_config
,
try_get_generation_config
,
try_get_safetensors_metadata
,
try_get_safetensors_metadata
,
...
@@ -442,15 +441,12 @@ class ModelConfig:
...
@@ -442,15 +441,12 @@ class ModelConfig:
self
.
enforce_eager
=
True
self
.
enforce_eager
=
True
# Set the default seed to 0 in V1.
# Set the default seed to 0 in V1.
# NOTE(woosuk): In V0, we set the default seed to None because the
# NOTE(woosuk): In V1, we use separate processes for workers (unless
# driver worker shares the same process as the user process, and thus
# setting a seed affects the user process as well.
# In V1, we use separate processes for workers (unless
# VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
# VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
# doesn't affect the user process. However, without a consistent seed,
# doesn't affect the user process. However, without a consistent seed,
# different tensor parallel workers would sample different tokens,
# different tensor parallel workers would sample different tokens,
# leading to inconsistent results.
# leading to inconsistent results.
if
envs
.
VLLM_USE_V1
and
self
.
seed
is
None
:
if
self
.
seed
is
None
:
self
.
seed
=
0
self
.
seed
=
0
if
not
envs
.
VLLM_ENABLE_V1_MULTIPROCESSING
:
if
not
envs
.
VLLM_ENABLE_V1_MULTIPROCESSING
:
logger
.
warning
(
logger
.
warning
(
...
@@ -703,23 +699,6 @@ class ModelConfig:
...
@@ -703,23 +699,6 @@ class ModelConfig:
revision
=
self
.
revision
,
revision
=
self
.
revision
,
)
)
# Interleaved attention is not supported by some backends in V0
if
(
not
self
.
disable_sliding_window
and
is_interleaved
(
self
.
hf_text_config
)
and
not
envs
.
VLLM_USE_V1
and
(
backend
:
=
envs
.
VLLM_ATTENTION_BACKEND
)
in
(
"XFORMERS"
,
"FLASHINFER"
)
):
logger
.
warning_once
(
"%s has interleaved attention, which is currently not "
"supported by the %s backend. Disabling sliding window and "
"capping the max length to the sliding window size (%d)."
,
self
.
hf_text_config
.
model_type
,
backend
,
self
.
hf_text_config
.
sliding_window
,
)
self
.
disable_sliding_window
=
True
self
.
original_max_model_len
=
self
.
max_model_len
self
.
original_max_model_len
=
self
.
max_model_len
self
.
max_model_len
=
self
.
get_and_verify_max_len
(
self
.
max_model_len
)
self
.
max_model_len
=
self
.
get_and_verify_max_len
(
self
.
max_model_len
)
# Init multimodal config if needed
# Init multimodal config if needed
...
...
vllm/config/speculative.py
View file @
af826e08
...
@@ -9,7 +9,6 @@ from pydantic import Field, SkipValidation, model_validator
...
@@ -9,7 +9,6 @@ from pydantic import Field, SkipValidation, model_validator
from
pydantic.dataclasses
import
dataclass
from
pydantic.dataclasses
import
dataclass
from
typing_extensions
import
Self
from
typing_extensions
import
Self
import
vllm.envs
as
envs
from
vllm.config.parallel
import
ParallelConfig
from
vllm.config.parallel
import
ParallelConfig
from
vllm.config.utils
import
config
from
vllm.config.utils
import
config
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -366,12 +365,6 @@ class SpeculativeConfig:
...
@@ -366,12 +365,6 @@ class SpeculativeConfig:
# Replace hf_config for EAGLE draft_model
# Replace hf_config for EAGLE draft_model
if
self
.
method
in
(
"eagle"
,
"eagle3"
):
if
self
.
method
in
(
"eagle"
,
"eagle3"
):
if
self
.
enable_chunked_prefill
and
not
envs
.
VLLM_USE_V1
:
raise
ValueError
(
"Chunked prefill and EAGLE are not compatible "
"when using V0."
)
from
vllm.transformers_utils.configs
import
SpeculatorsConfig
from
vllm.transformers_utils.configs
import
SpeculatorsConfig
from
vllm.transformers_utils.configs.eagle
import
EAGLEConfig
from
vllm.transformers_utils.configs.eagle
import
EAGLEConfig
...
...
vllm/config/vllm.py
View file @
af826e08
...
@@ -130,7 +130,6 @@ class VllmConfig:
...
@@ -130,7 +130,6 @@ class VllmConfig:
from
vllm
import
__version__
from
vllm
import
__version__
vllm_factors
.
append
(
__version__
)
vllm_factors
.
append
(
__version__
)
vllm_factors
.
append
(
envs
.
VLLM_USE_V1
)
if
self
.
model_config
:
if
self
.
model_config
:
vllm_factors
.
append
(
self
.
model_config
.
compute_hash
())
vllm_factors
.
append
(
self
.
model_config
.
compute_hash
())
else
:
else
:
...
@@ -306,7 +305,6 @@ class VllmConfig:
...
@@ -306,7 +305,6 @@ class VllmConfig:
self
.
cache_config
.
verify_with_parallel_config
(
self
.
parallel_config
)
self
.
cache_config
.
verify_with_parallel_config
(
self
.
parallel_config
)
if
self
.
lora_config
is
not
None
:
if
self
.
lora_config
is
not
None
:
self
.
lora_config
.
verify_with_cache_config
(
self
.
cache_config
)
self
.
lora_config
.
verify_with_model_config
(
self
.
model_config
)
self
.
lora_config
.
verify_with_model_config
(
self
.
model_config
)
if
self
.
quant_config
is
None
and
self
.
model_config
is
not
None
:
if
self
.
quant_config
is
None
and
self
.
model_config
is
not
None
:
...
@@ -332,18 +330,9 @@ class VllmConfig:
...
@@ -332,18 +330,9 @@ class VllmConfig:
# we use the default mode. The default mode depends on other
# we use the default mode. The default mode depends on other
# settings (see the below code).
# settings (see the below code).
if
self
.
compilation_config
.
mode
is
None
:
if
self
.
compilation_config
.
mode
is
None
:
if
envs
.
VLLM_USE_V1
:
if
self
.
model_config
is
not
None
and
not
self
.
model_config
.
enforce_eager
:
if
(
self
.
compilation_config
.
mode
=
CompilationMode
.
VLLM_COMPILE
self
.
model_config
is
not
None
and
not
self
.
model_config
.
enforce_eager
):
self
.
compilation_config
.
mode
=
CompilationMode
.
VLLM_COMPILE
else
:
self
.
compilation_config
.
mode
=
CompilationMode
.
NONE
else
:
else
:
# NB: Passing both --enforce-eager and a compilation mode
# in V0 means the compilation mode wins out.
self
.
compilation_config
.
mode
=
CompilationMode
.
NONE
self
.
compilation_config
.
mode
=
CompilationMode
.
NONE
else
:
else
:
assert
self
.
compilation_config
.
mode
>=
CompilationMode
.
NONE
assert
self
.
compilation_config
.
mode
>=
CompilationMode
.
NONE
...
@@ -371,10 +360,7 @@ class VllmConfig:
...
@@ -371,10 +360,7 @@ class VllmConfig:
# if cudagraph_mode is not explicitly set by users, set default
# if cudagraph_mode is not explicitly set by users, set default
# value
# value
if
self
.
compilation_config
.
cudagraph_mode
is
None
:
if
self
.
compilation_config
.
cudagraph_mode
is
None
:
if
(
if
self
.
compilation_config
.
mode
==
CompilationMode
.
VLLM_COMPILE
:
envs
.
VLLM_USE_V1
and
self
.
compilation_config
.
mode
==
CompilationMode
.
VLLM_COMPILE
):
# default to full and piecewise for most models
# default to full and piecewise for most models
self
.
compilation_config
.
cudagraph_mode
=
(
self
.
compilation_config
.
cudagraph_mode
=
(
CUDAGraphMode
.
FULL_AND_PIECEWISE
CUDAGraphMode
.
FULL_AND_PIECEWISE
...
@@ -428,7 +414,7 @@ class VllmConfig:
...
@@ -428,7 +414,7 @@ class VllmConfig:
# override related settings when enforce eager
# override related settings when enforce eager
self
.
compilation_config
.
max_cudagraph_capture_size
=
0
self
.
compilation_config
.
max_cudagraph_capture_size
=
0
self
.
compilation_config
.
cudagraph_capture_sizes
=
[]
self
.
compilation_config
.
cudagraph_capture_sizes
=
[]
el
if
envs
.
VLLM_USE_V1
:
el
se
:
self
.
compilation_config
.
cudagraph_num_of_warmups
=
1
self
.
compilation_config
.
cudagraph_num_of_warmups
=
1
self
.
_set_cudagraph_sizes
()
self
.
_set_cudagraph_sizes
()
...
@@ -535,14 +521,11 @@ class VllmConfig:
...
@@ -535,14 +521,11 @@ class VllmConfig:
current_platform
.
check_and_update_config
(
self
)
current_platform
.
check_and_update_config
(
self
)
# Do this after all the updates to compilation_config.mode
# Do this after all the updates to compilation_config.mode
if
(
if
self
.
compilation_config
.
mode
==
CompilationMode
.
VLLM_COMPILE
:
envs
.
VLLM_USE_V1
and
self
.
compilation_config
.
mode
==
CompilationMode
.
VLLM_COMPILE
):
self
.
compilation_config
.
set_splitting_ops_for_v1
()
self
.
compilation_config
.
set_splitting_ops_for_v1
()
# final check of cudagraph mode after all possible updates
# final check of cudagraph mode after all possible updates
if
envs
.
VLLM_USE_V1
and
current_platform
.
is_cuda_alike
():
if
current_platform
.
is_cuda_alike
():
if
(
if
(
self
.
compilation_config
.
cudagraph_mode
.
has_full_cudagraphs
()
self
.
compilation_config
.
cudagraph_mode
.
has_full_cudagraphs
()
and
self
.
model_config
is
not
None
and
self
.
model_config
is
not
None
...
@@ -587,10 +570,7 @@ class VllmConfig:
...
@@ -587,10 +570,7 @@ class VllmConfig:
if
not
self
.
instance_id
:
if
not
self
.
instance_id
:
self
.
instance_id
=
random_uuid
()[:
5
]
self
.
instance_id
=
random_uuid
()[:
5
]
if
(
if
not
self
.
scheduler_config
.
disable_hybrid_kv_cache_manager
:
envs
.
VLLM_USE_V1
and
not
self
.
scheduler_config
.
disable_hybrid_kv_cache_manager
):
# logger should only print warning message for hybrid models. As we
# logger should only print warning message for hybrid models. As we
# can't know whether the model is hybrid or not now, so we don't log
# can't know whether the model is hybrid or not now, so we don't log
# warning message here and will log it later.
# warning message here and will log it later.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment