Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
573531eb
Commit
573531eb
authored
Oct 15, 2025
by
zhuwenwen
Browse files
support --no-enable-chunked-prefill of v1
parent
33f37e9f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
0 deletions
+13
-0
vllm/config/model.py
vllm/config/model.py
+4
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+9
-0
No files found.
vllm/config/model.py
View file @
573531eb
...
@@ -276,6 +276,9 @@ class ModelConfig:
...
@@ -276,6 +276,9 @@ class ModelConfig:
override_pooler_config
:
Optional
[
Union
[
dict
,
PoolerConfig
]]
=
None
override_pooler_config
:
Optional
[
Union
[
dict
,
PoolerConfig
]]
=
None
"""[DEPRECATED] Use `pooler_config` instead. This field will be removed in
"""[DEPRECATED] Use `pooler_config` instead. This field will be removed in
v0.12.0 or v1.0.0, whichever is sooner."""
v0.12.0 or v1.0.0, whichever is sooner."""
enable_chunked_prefill
:
Optional
[
bool
]
=
None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
# Multimodal config and init vars
# Multimodal config and init vars
multimodal_config
:
Optional
[
MultiModalConfig
]
=
None
multimodal_config
:
Optional
[
MultiModalConfig
]
=
None
...
@@ -320,6 +323,7 @@ class ModelConfig:
...
@@ -320,6 +323,7 @@ class ModelConfig:
factors
.
append
(
self
.
rope_scaling
)
factors
.
append
(
self
.
rope_scaling
)
factors
.
append
(
self
.
rope_theta
)
factors
.
append
(
self
.
rope_theta
)
factors
.
append
(
self
.
video_pruning_rate
)
factors
.
append
(
self
.
video_pruning_rate
)
factors
.
append
(
self
.
enable_chunked_prefill
)
# hf_config can control how the model looks!
# hf_config can control how the model looks!
try
:
try
:
...
...
vllm/engine/arg_utils.py
View file @
573531eb
...
@@ -1055,6 +1055,7 @@ class EngineArgs:
...
@@ -1055,6 +1055,7 @@ class EngineArgs:
logits_processors
=
self
.
logits_processors
,
logits_processors
=
self
.
logits_processors
,
video_pruning_rate
=
self
.
video_pruning_rate
,
video_pruning_rate
=
self
.
video_pruning_rate
,
io_processor_plugin
=
self
.
io_processor_plugin
,
io_processor_plugin
=
self
.
io_processor_plugin
,
enable_chunked_prefill
=
self
.
enable_chunked_prefill
,
)
)
def
validate_tensorizer_args
(
self
):
def
validate_tensorizer_args
(
self
):
...
@@ -1561,6 +1562,10 @@ class EngineArgs:
...
@@ -1561,6 +1562,10 @@ class EngineArgs:
if
model_config
.
runner_type
!=
"pooling"
:
if
model_config
.
runner_type
!=
"pooling"
:
self
.
enable_chunked_prefill
=
True
self
.
enable_chunked_prefill
=
True
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
# TODO: When prefix caching supports prompt embeds inputs, this
# TODO: When prefix caching supports prompt embeds inputs, this
# check can be removed.
# check can be removed.
if
(
self
.
enable_prompt_embeds
if
(
self
.
enable_prompt_embeds
...
@@ -1584,6 +1589,10 @@ class EngineArgs:
...
@@ -1584,6 +1589,10 @@ class EngineArgs:
action
=
"Enabling"
if
\
action
=
"Enabling"
if
\
incremental_prefill_supported
else
"Disabling"
incremental_prefill_supported
else
"Disabling"
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
if
self
.
enable_chunked_prefill
is
None
:
if
self
.
enable_chunked_prefill
is
None
:
self
.
enable_chunked_prefill
=
incremental_prefill_supported
self
.
enable_chunked_prefill
=
incremental_prefill_supported
logger
.
info
(
"(%s) chunked prefill by default"
,
action
)
logger
.
info
(
"(%s) chunked prefill by default"
,
action
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment