[Bugfix] Fix startup hang for Granite Speech (#33699)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Bugfix] Fix startup hang for Granite Speech (#33699)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
18e7cbbb · Cyrus Leung · GitHub · f0d52517 · 18e7cbbb
Unverified Commit 18e7cbbb authored Feb 03, 2026 by Cyrus Leung Committed by GitHub Feb 03, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 8 deletions

vllm/multimodal/budget.py vllm/multimodal/budget.py +8 -8

No files found.
--- a/vllm/multimodal/budget.py
+++ b/vllm/multimodal/budget.py
@@ -54,17 +54,17 @@ class MultiModalBudget:
        self.max_model_len = model_config.max_model_len
        self.max_num_reqs = scheduler_config.max_num_seqs
-        cache = mm_registry.processor_only_cache_from_config(vllm_config)
+        with set_default_torch_num_threads():  # Avoid hang during startup
-        processor = mm_registry.create_processor(model_config, cache=cache)
+            cache = mm_registry.processor_only_cache_from_config(vllm_config)
+            processor = mm_registry.create_processor(model_config, cache=cache)
-        self.cache = cache
+            self.cache = cache
-        self.mm_limits = mm_limits = processor.info.allowed_mm_limits
+            self.mm_limits = mm_limits = processor.info.allowed_mm_limits
-        active_modalities = {
+            active_modalities = {
-            modality for modality, limit in mm_limits.items() if limit > 0
+                modality for modality, limit in mm_limits.items() if limit > 0
-        }
+            }
-        with set_default_torch_num_threads():  # Avoid hang during startup
            all_mm_max_toks_per_item = get_mm_max_toks_per_item(
                model_config,
                mm_registry,