Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2c4b2c80
Commit
2c4b2c80
authored
Oct 15, 2025
by
zhuwenwen
Browse files
support --no-enable-chunked-prefill of v1
parent
f7e9c329
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
0 deletions
+12
-0
vllm/config.py
vllm/config.py
+4
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+8
-0
No files found.
vllm/config.py
View file @
2c4b2c80
...
...
@@ -418,6 +418,9 @@ class ModelConfig:
- "transformers" will use the Transformers model implementation."""
override_attention_dtype
:
Optional
[
str
]
=
None
"""Override dtype for attention"""
enable_chunked_prefill
:
Optional
[
bool
]
=
None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
def
compute_hash
(
self
)
->
str
:
"""
...
...
@@ -448,6 +451,7 @@ class ModelConfig:
factors
.
append
(
self
.
rope_theta
)
# hf_config can control how the model looks!
factors
.
append
(
self
.
hf_config
.
to_json_string
())
factors
.
append
(
self
.
enable_chunked_prefill
)
str_factors
=
str
(
factors
)
assert_hashable
(
str_factors
)
return
hashlib
.
sha256
(
str
(
factors
).
encode
()).
hexdigest
()
...
...
vllm/engine/arg_utils.py
View file @
2c4b2c80
...
...
@@ -1004,6 +1004,7 @@ class EngineArgs:
enable_sleep_mode
=
self
.
enable_sleep_mode
,
model_impl
=
self
.
model_impl
,
override_attention_dtype
=
self
.
override_attention_dtype
,
enable_chunked_prefill
=
self
.
enable_chunked_prefill
,
)
def
create_load_config
(
self
)
->
LoadConfig
:
...
...
@@ -1593,6 +1594,9 @@ class EngineArgs:
# For pooling tasks the default is False
if
model_config
.
runner_type
!=
"pooling"
:
self
.
enable_chunked_prefill
=
True
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
if
self
.
enable_prefix_caching
is
None
:
self
.
enable_prefix_caching
=
True
else
:
...
...
@@ -1607,6 +1611,10 @@ class EngineArgs:
action
=
"Enabling"
if
\
incremental_prefill_supported
else
"Disabling"
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
if
self
.
enable_chunked_prefill
is
None
:
self
.
enable_chunked_prefill
=
incremental_prefill_supported
logger
.
info
(
"(%s) chunked prefill by default"
,
action
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment