[Bugfix] Exclude `language_model_only` key from MM AOT compile hash but...

[Bugfix] Exclude `language_model_only` key from MM AOT compile hash but include in model one (#34508) Signed-off-by: Roger Wang <hey@rogerw.io>

[Bugfix] Exclude `language_model_only` key from MM AOT compile hash but...
[Bugfix] Exclude `language_model_only` key from MM AOT compile hash but include in model one (#34508) Signed-off-by: Roger Wang <hey@rogerw.io>
1dae7b78 · Roger Wang · GitHub · 5885e330 · 1dae7b78 · 1dae7b78
Unverified Commit 1dae7b78 authored Feb 13, 2026 by Roger Wang Committed by GitHub Feb 13, 2026
Showing with 24 additions and 1 deletion

tests/config/test_multimodal_config.py tests/config/test_multimodal_config.py +18 -0

vllm/config/model.py vllm/config/model.py +6 -0

vllm/config/multimodal.py vllm/config/multimodal.py +0 -1

No files found.
--- a/tests/config/test_multimodal_config.py
+++ b/tests/config/test_multimodal_config.py
@@ -3,6 +3,7 @@

 import pytest

+from vllm.config.model import ModelConfig
 from vllm.config.multimodal import MultiModalConfig
 from vllm.v1.attention.backends.registry import AttentionBackendEnum

@@ -23,3 +24,20 @@ def test_mm_encoder_attn_backend_hash_updates():
        mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN
    ).compute_hash()
    assert base_hash != overridden_hash
+
+
+def test_language_model_only_does_not_affect_mm_hash():
+    """language_model_only does not affect the ViT computation graph,
+    so it should not change the multimodal config hash."""
+    base_hash = MultiModalConfig().compute_hash()
+    lm_only_hash = MultiModalConfig(language_model_only=True).compute_hash()
+    assert base_hash == lm_only_hash
+
+
+def test_language_model_only_affects_model_hash():
+    """language_model_only affects the LM computation graph,
+    so it should change the model config hash."""
+    model = "llava-hf/llava-1.5-7b-hf"
+    base_hash = ModelConfig(model).compute_hash()
+    lm_only_hash = ModelConfig(model, language_model_only=True).compute_hash()
+    assert base_hash != lm_only_hash
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -357,6 +357,12 @@ class ModelConfig:
        from vllm.config.utils import get_hash_factors, hash_factors

        factors = get_hash_factors(self, ignored_factors)
+
+        # NOTE: For some models (e.g, Qwen3-VL), whether the MM code path is enabled
+        # affects the computation graph of the language model, therefore we add it
+        # here early.
+        if self.multimodal_config:
+            factors["language_model_only"] = self.multimodal_config.language_model_only
        return hash_factors(factors)

    def _update_nested(

--- a/vllm/config/multimodal.py
+++ b/vllm/config/multimodal.py
@@ -219,7 +219,6 @@ class MultiModalConfig:
        the final hidden states.
        """
        factors: list[Any] = [
-            self.language_model_only,
            self.mm_encoder_attn_backend.name
            if self.mm_encoder_attn_backend is not None
            else None,