Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c625f904
Unverified
Commit
c625f904
authored
Sep 23, 2025
by
Isotr0py
Committed by
GitHub
Sep 23, 2025
Browse files
[V0 deprecation] Remove `_set_default_args_v0` function (#25409)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
6fa78d8f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
72 deletions
+11
-72
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+11
-72
No files found.
vllm/engine/arg_utils.py
View file @
c625f904
...
...
@@ -1147,20 +1147,15 @@ class EngineArgs:
else
:
envs
.
set_vllm_use_v1
(
use_v1
)
# Set default arguments for V0 or V1 Engine.
if
use_v1
:
self
.
_set_default_args_v1
(
usage_context
,
model_config
)
# Disable chunked prefill for POWER (ppc64le)/ARM/s390x CPUs in V1
if
current_platform
.
is_cpu
(
)
and
current_platform
.
get_cpu_architecture
()
in
(
CpuArchEnum
.
POWERPC
,
CpuArchEnum
.
S390X
,
CpuArchEnum
.
ARM
):
logger
.
info
(
"Chunked prefill is not supported for ARM and POWER "
"and S390X CPUs; "
"disabling it for V1 backend."
)
self
.
enable_chunked_prefill
=
False
else
:
self
.
_set_default_args_v0
(
model_config
)
# Set default arguments for V1 Engine.
self
.
_set_default_args
(
usage_context
,
model_config
)
# Disable chunked prefill for POWER (ppc64le)/ARM/s390x CPUs in V1
if
current_platform
.
is_cpu
()
and
current_platform
.
get_cpu_architecture
(
)
in
(
CpuArchEnum
.
POWERPC
,
CpuArchEnum
.
S390X
,
CpuArchEnum
.
ARM
):
logger
.
info
(
"Chunked prefill is not supported for ARM and POWER "
"and S390X CPUs; "
"disabling it for V1 backend."
)
self
.
enable_chunked_prefill
=
False
assert
self
.
enable_chunked_prefill
is
not
None
sliding_window
:
Optional
[
int
]
=
None
...
...
@@ -1528,64 +1523,8 @@ class EngineArgs:
return
True
def
_set_default_args_v0
(
self
,
model_config
:
ModelConfig
)
->
None
:
"""Set Default Arguments for V0 Engine."""
max_model_len
=
model_config
.
max_model_len
use_long_context
=
max_model_len
>
32768
if
self
.
enable_chunked_prefill
is
None
:
# Chunked prefill not supported for Multimodal or MLA in V0.
if
model_config
.
is_multimodal_model
or
model_config
.
use_mla
:
self
.
enable_chunked_prefill
=
False
# Enable chunked prefill by default for long context (> 32K)
# models to avoid OOM errors in initial memory profiling phase.
elif
use_long_context
:
is_gpu
=
current_platform
.
is_cuda
()
use_sliding_window
=
(
model_config
.
get_sliding_window
()
is
not
None
)
use_spec_decode
=
self
.
speculative_config
is
not
None
if
(
is_gpu
and
not
use_sliding_window
and
not
use_spec_decode
and
not
self
.
enable_lora
):
self
.
enable_chunked_prefill
=
True
logger
.
warning
(
"Chunked prefill is enabled by default for models "
"with max_model_len > 32K. Chunked prefill might "
"not work with some features or models. If you "
"encounter any issues, please disable by launching "
"with --enable-chunked-prefill=False."
)
if
self
.
enable_chunked_prefill
is
None
:
self
.
enable_chunked_prefill
=
False
if
not
self
.
enable_chunked_prefill
and
use_long_context
:
logger
.
warning
(
"The model has a long context length (%s). This may cause"
"OOM during the initial memory profiling phase, or result "
"in low performance due to small KV cache size. Consider "
"setting --max-model-len to a smaller value."
,
max_model_len
)
# Disable prefix caching for multimodal models for VLLM_V0.
if
self
.
enable_prefix_caching
and
model_config
.
is_multimodal_model
:
logger
.
warning
(
"--enable-prefix-caching is not supported for multimodal "
"models in V0 and has been disabled."
)
self
.
enable_prefix_caching
=
False
if
self
.
enable_prompt_embeds
:
logger
.
warning
(
"--enable-prompt-embeds and --enable-prefix-caching "
"are not supported together in V0. Prefix caching has "
"been disabled."
)
self
.
enable_prefix_caching
=
False
# Set max_num_seqs to 256 for VLLM_V0.
if
self
.
max_num_seqs
is
None
:
self
.
max_num_seqs
=
256
def
_set_default_args_v1
(
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
)
->
None
:
def
_set_default_args
(
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
)
->
None
:
"""Set Default Arguments for V1 Engine."""
# V1 always uses chunked prefills and prefix caching
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment