Unverified Commit bda3eda8 authored by Bvicii's avatar Bvicii Committed by GitHub
Browse files

[Bugfix] Disallow renderer_num_workers > 1 with mm processor cache (#38418)


Signed-off-by: default avatarBvicii <yizhanhuang2002@gmail.com>
parent 2bf5b70a
...@@ -1131,6 +1131,28 @@ def test_needs_dp_coordination( ...@@ -1131,6 +1131,28 @@ def test_needs_dp_coordination(
assert vllm_config.needs_dp_coordinator == expected_needs_coordinator assert vllm_config.needs_dp_coordinator == expected_needs_coordinator
def test_renderer_num_workers_with_mm_cache():
"""Disallow renderer_num_workers > 1 when mm processor cache is enabled,
since neither cache type is thread-safe."""
mm_model = "Qwen/Qwen2-VL-2B-Instruct"
# Should raise: multi-worker + cache enabled (default cache_gb=4)
with pytest.raises(ValueError, match="renderer-num-workers"):
ModelConfig(mm_model, renderer_num_workers=4)
# Should raise: multi-worker + explicit cache size
with pytest.raises(ValueError, match="renderer-num-workers"):
ModelConfig(mm_model, renderer_num_workers=2, mm_processor_cache_gb=1.0)
# Should pass: multi-worker + cache disabled
config = ModelConfig(mm_model, renderer_num_workers=4, mm_processor_cache_gb=0)
assert config.renderer_num_workers == 4
# Should pass: single worker + cache enabled (default)
config = ModelConfig(mm_model, renderer_num_workers=1)
assert config.renderer_num_workers == 1
def test_eagle_draft_model_config(): def test_eagle_draft_model_config():
"""Test that EagleDraft model config is correctly set.""" """Test that EagleDraft model config is correctly set."""
target_model_config = ModelConfig( target_model_config = ModelConfig(
......
...@@ -647,6 +647,19 @@ class ModelConfig: ...@@ -647,6 +647,19 @@ class ModelConfig:
self.multimodal_config = MultiModalConfig(**mm_config_kwargs) # type: ignore[arg-type] self.multimodal_config = MultiModalConfig(**mm_config_kwargs) # type: ignore[arg-type]
if (
self.renderer_num_workers > 1
and self.multimodal_config.mm_processor_cache_gb > 0
):
raise ValueError(
"Cannot use --renderer-num-workers > 1 with the "
"multimodal processor cache enabled. The cache is "
"not thread-safe and does not support concurrent "
"renderer workers. Please set "
"--renderer-num-workers 1 (the default), or "
"disable the cache with --mm-processor-cache-gb 0."
)
# Multimodal GGUF models must use original repo for mm processing # Multimodal GGUF models must use original repo for mm processing
if is_gguf(self.tokenizer) and self.is_multimodal_model: if is_gguf(self.tokenizer) and self.is_multimodal_model:
raise ValueError( raise ValueError(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment