[BugFix] gemma loading after quantization or LoRA. (#3553)

b7050ca7 · Taemin Lee · GitHub · c188ecb0 · b7050ca7
Unverified Commit b7050ca7 authored Mar 22, 2024 by Taemin Lee Committed by GitHub Mar 21, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

vllm/model_executor/models/gemma.py vllm/model_executor/models/gemma.py +4 -0

No files found.
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -340,6 +340,10 @@ class GemmaForCausalLM(nn.Module):
                weight_loader(param, loaded_weight, shard_id)
                break
            else:
+                # lm_head is not used in vllm as it is tied with embed_token.
+                # To prevent errors, skip loading lm_head.weight.
+                if "lm_head.weight" in name:
+                    continue
                # Skip loading extra bias for GPTQ models.
                if name.endswith(".bias") and name not in params_dict:
                    continue