Fix `tie_word_embeddings` for multimodal models in Transformers v5 (#33359)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

Fix `tie_word_embeddings` for multimodal models in Transformers v5 (#33359)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
80b918f2 · Harry Mellor · GitHub · c46b0cd0 · 80b918f2
Unverified Commit 80b918f2 authored Jan 30, 2026 by Harry Mellor Committed by GitHub Jan 30, 2026
Show whitespace changes
Inline Side-by-side

Showing with 24 additions and 0 deletions

vllm/config/vllm.py vllm/config/vllm.py +24 -0

No files found.
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -458,6 +458,30 @@ class VllmConfig:
            hf_config.architectures = architectures
        model_config = copy.deepcopy(self.model_config)
+        if (
+            model_config.is_multimodal_model
+            and hasattr(model_config.hf_config, "tie_word_embeddings")
+            and not hasattr(hf_config.get_text_config(), "tie_word_embeddings")
+        ):
+            # In Transformers v5, tie_word_embeddings belongs to the config of the class
+            # that can see both layers to be tied. For example:
+            #
+            # SomeVLModel:
+            #   self.language_model = SomeLanguageModel()
+            #   self.vision_model = SomeVisionModel()
+            #
+            # SomeVLModelForMultimodalLM:
+            #   self.model = SomeVLModel()
+            #   self.lm_head = nn.Linear()
+            #
+            # Therefore, tie_word_embeddings is defined in SomeVLModelForMultimodalLM's
+            # config and is not present in SomeVLModel's config. In vLLM, the lm_head
+            # belongs to the language_model, so we must ensure that tie_word_embeddings
+            # is set in the language_model's config.
+            tie_word_embeddings = model_config.hf_config.tie_word_embeddings
+            hf_config.get_text_config().tie_word_embeddings = tie_word_embeddings
        model_config.hf_config = hf_config
        model_config.model_arch_config = model_config.get_model_arch_config()