"tests/vscode:/vscode.git/clone" did not exist on "d83becd503660fb876ea42beaa9f63217b857b99"
Unverified Commit 80b918f2 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Fix `tie_word_embeddings` for multimodal models in Transformers v5 (#33359)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent c46b0cd0
...@@ -458,6 +458,30 @@ class VllmConfig: ...@@ -458,6 +458,30 @@ class VllmConfig:
hf_config.architectures = architectures hf_config.architectures = architectures
model_config = copy.deepcopy(self.model_config) model_config = copy.deepcopy(self.model_config)
if (
model_config.is_multimodal_model
and hasattr(model_config.hf_config, "tie_word_embeddings")
and not hasattr(hf_config.get_text_config(), "tie_word_embeddings")
):
# In Transformers v5, tie_word_embeddings belongs to the config of the class
# that can see both layers to be tied. For example:
#
# SomeVLModel:
# self.language_model = SomeLanguageModel()
# self.vision_model = SomeVisionModel()
#
# SomeVLModelForMultimodalLM:
# self.model = SomeVLModel()
# self.lm_head = nn.Linear()
#
# Therefore, tie_word_embeddings is defined in SomeVLModelForMultimodalLM's
# config and is not present in SomeVLModel's config. In vLLM, the lm_head
# belongs to the language_model, so we must ensure that tie_word_embeddings
# is set in the language_model's config.
tie_word_embeddings = model_config.hf_config.tie_word_embeddings
hf_config.get_text_config().tie_word_embeddings = tie_word_embeddings
model_config.hf_config = hf_config model_config.hf_config = hf_config
model_config.model_arch_config = model_config.get_model_arch_config() model_config.model_arch_config = model_config.get_model_arch_config()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment