Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ea0ccfe6
"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "c866e0079de05cf6aee5931f3b9e200e8cbcf26c"
Commit
ea0ccfe6
authored
Aug 26, 2025
by
zhuwenwen
Browse files
support --no-enable-prefix-caching
parent
7a5df8f7
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
0 deletions
+11
-0
vllm/config/__init__.py
vllm/config/__init__.py
+4
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+7
-0
No files found.
vllm/config/__init__.py
View file @
ea0ccfe6
...
@@ -470,6 +470,9 @@ class ModelConfig:
...
@@ -470,6 +470,9 @@ class ModelConfig:
logits_processors
:
Optional
[
list
[
Union
[
str
,
type
[
LogitsProcessor
]]]]
=
None
logits_processors
:
Optional
[
list
[
Union
[
str
,
type
[
LogitsProcessor
]]]]
=
None
"""One or more logits processors' fully-qualified class names or class
"""One or more logits processors' fully-qualified class names or class
definitions"""
definitions"""
enable_chunked_prefill
:
Optional
[
bool
]
=
None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
def
compute_hash
(
self
)
->
str
:
def
compute_hash
(
self
)
->
str
:
"""
"""
...
@@ -500,6 +503,7 @@ class ModelConfig:
...
@@ -500,6 +503,7 @@ class ModelConfig:
factors
.
append
(
self
.
rope_theta
)
factors
.
append
(
self
.
rope_theta
)
# hf_config can control how the model looks!
# hf_config can control how the model looks!
factors
.
append
(
self
.
hf_config
.
to_json_string
())
factors
.
append
(
self
.
hf_config
.
to_json_string
())
factors
.
append
(
self
.
enable_chunked_prefill
)
str_factors
=
str
(
factors
)
str_factors
=
str
(
factors
)
assert_hashable
(
str_factors
)
assert_hashable
(
str_factors
)
return
hashlib
.
sha256
(
str
(
factors
).
encode
()).
hexdigest
()
return
hashlib
.
sha256
(
str
(
factors
).
encode
()).
hexdigest
()
...
...
vllm/engine/arg_utils.py
View file @
ea0ccfe6
...
@@ -1590,6 +1590,9 @@ class EngineArgs:
...
@@ -1590,6 +1590,9 @@ class EngineArgs:
# For pooling tasks the default is False
# For pooling tasks the default is False
if
model_config
.
runner_type
!=
"pooling"
:
if
model_config
.
runner_type
!=
"pooling"
:
self
.
enable_chunked_prefill
=
True
self
.
enable_chunked_prefill
=
True
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
if
self
.
enable_prefix_caching
is
None
:
if
self
.
enable_prefix_caching
is
None
:
self
.
enable_prefix_caching
=
True
self
.
enable_prefix_caching
=
True
else
:
else
:
...
@@ -1602,6 +1605,10 @@ class EngineArgs:
...
@@ -1602,6 +1605,10 @@ class EngineArgs:
action
=
"Enabling"
if
\
action
=
"Enabling"
if
\
incremental_prefill_supported
else
"Disabling"
incremental_prefill_supported
else
"Disabling"
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
if
self
.
enable_chunked_prefill
is
None
:
if
self
.
enable_chunked_prefill
is
None
:
self
.
enable_chunked_prefill
=
incremental_prefill_supported
self
.
enable_chunked_prefill
=
incremental_prefill_supported
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment