[Bugfix] Disallow renderer_num_workers > 1 with mm processor cache (#38418)

Signed-off-by: Bvicii <yizhanhuang2002@gmail.com>

[Bugfix] Disallow renderer_num_workers > 1 with mm processor cache (#38418)
Signed-off-by: Bvicii <yizhanhuang2002@gmail.com>
bda3eda8 · Bvicii · GitHub · 2bf5b70a · bda3eda8 · bda3eda8
Unverified Commit bda3eda8 authored Mar 28, 2026 by Bvicii Committed by GitHub Mar 28, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 0 deletions

tests/test_config.py tests/test_config.py +22 -0

vllm/config/model.py vllm/config/model.py +13 -0

No files found.
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1131,6 +1131,28 @@ def test_needs_dp_coordination(
    assert vllm_config.needs_dp_coordinator == expected_needs_coordinator
+def test_renderer_num_workers_with_mm_cache():
+    """Disallow renderer_num_workers > 1 when mm processor cache is enabled,
+    since neither cache type is thread-safe."""
+    mm_model = "Qwen/Qwen2-VL-2B-Instruct"
+    # Should raise: multi-worker + cache enabled (default cache_gb=4)
+    with pytest.raises(ValueError, match="renderer-num-workers"):
+        ModelConfig(mm_model, renderer_num_workers=4)
+    # Should raise: multi-worker + explicit cache size
+    with pytest.raises(ValueError, match="renderer-num-workers"):
+        ModelConfig(mm_model, renderer_num_workers=2, mm_processor_cache_gb=1.0)
+    # Should pass: multi-worker + cache disabled
+    config = ModelConfig(mm_model, renderer_num_workers=4, mm_processor_cache_gb=0)
+    assert config.renderer_num_workers == 4
+    # Should pass: single worker + cache enabled (default)
+    config = ModelConfig(mm_model, renderer_num_workers=1)
+    assert config.renderer_num_workers == 1
 def test_eagle_draft_model_config():
    """Test that EagleDraft model config is correctly set."""
    target_model_config = ModelConfig(

--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -647,6 +647,19 @@ class ModelConfig:
            self.multimodal_config = MultiModalConfig(**mm_config_kwargs)  # type: ignore[arg-type]
+            if (
+                self.renderer_num_workers > 1
+                and self.multimodal_config.mm_processor_cache_gb > 0
+            ):
+                raise ValueError(
+                    "Cannot use --renderer-num-workers > 1 with the "
+                    "multimodal processor cache enabled. The cache is "
+                    "not thread-safe and does not support concurrent "
+                    "renderer workers. Please set "
+                    "--renderer-num-workers 1 (the default), or "
+                    "disable the cache with --mm-processor-cache-gb 0."
+                )
        # Multimodal GGUF models must use original repo for mm processing
        if is_gguf(self.tokenizer) and self.is_multimodal_model:
            raise ValueError(