Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b4e081cb
Unverified
Commit
b4e081cb
authored
Aug 01, 2025
by
Cyrus Leung
Committed by
GitHub
Aug 01, 2025
Browse files
[Bugfix] Disable multi-modal preprocessor cache for DP (#21896)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
79731a79
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
2 deletions
+21
-2
vllm/config.py
vllm/config.py
+6
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+12
-0
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/serve.py
+3
-2
No files found.
vllm/config.py
View file @
b4e081cb
...
@@ -871,6 +871,12 @@ class ModelConfig:
...
@@ -871,6 +871,12 @@ class ModelConfig:
return
None
return
None
def
set_disable_mm_preprocessor_cache
(
self
,
value
:
bool
)
->
None
:
mm_config
=
self
.
get_multimodal_config
()
self
.
disable_mm_preprocessor_cache
=
value
mm_config
.
disable_mm_preprocessor_cache
=
value
def
_get_encoder_config
(
self
):
def
_get_encoder_config
(
self
):
return
get_sentence_transformer_tokenizer_config
(
return
get_sentence_transformer_tokenizer_config
(
self
.
model
,
self
.
revision
)
self
.
model
,
self
.
revision
)
...
...
vllm/engine/arg_utils.py
View file @
b4e081cb
...
@@ -1197,6 +1197,18 @@ class EngineArgs:
...
@@ -1197,6 +1197,18 @@ class EngineArgs:
enable_multimodal_encoder_data_parallel
,
enable_multimodal_encoder_data_parallel
,
)
)
supports_mm_preprocessor_cache
=
(
self
.
data_parallel_size
==
1
or
data_parallel_external_lb
)
if
(
not
supports_mm_preprocessor_cache
and
model_config
.
is_multimodal_model
and
not
model_config
.
disable_mm_preprocessor_cache
):
logger
.
warning
(
"Multi-modal preprocessor cache is not compatible "
"with data parallelism when there does not exist a "
"one-to-one correspondance between API process and "
"EngineCore process, so the cache will be disabled."
)
model_config
.
set_disable_mm_preprocessor_cache
(
True
)
speculative_config
=
self
.
create_speculative_config
(
speculative_config
=
self
.
create_speculative_config
(
target_model_config
=
model_config
,
target_model_config
=
model_config
,
target_parallel_config
=
parallel_config
,
target_parallel_config
=
parallel_config
,
...
...
vllm/entrypoints/cli/serve.py
View file @
b4e081cb
...
@@ -167,8 +167,9 @@ def run_multi_api_server(args: argparse.Namespace):
...
@@ -167,8 +167,9 @@ def run_multi_api_server(args: argparse.Namespace):
if
model_config
.
is_multimodal_model
and
not
(
if
model_config
.
is_multimodal_model
and
not
(
orig_disable_mm_preprocessor_cache
):
orig_disable_mm_preprocessor_cache
):
logger
.
warning
(
"Multi-model preprocessor cache will be disabled "
logger
.
warning
(
"for api_server_count > 1"
)
"Multi-modal preprocessor cache is not compatible "
"with api_server_count > 1, so the cache will be disabled."
)
executor_class
=
Executor
.
get_class
(
vllm_config
)
executor_class
=
Executor
.
get_class
(
vllm_config
)
log_stats
=
not
engine_args
.
disable_log_stats
log_stats
=
not
engine_args
.
disable_log_stats
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment