Add GPTQ support for Gemma (#3200)

d3c04b6a · TechxGenus · GitHub · 4cb3b924 · d3c04b6a
Unverified Commit d3c04b6a authored Mar 07, 2024 by TechxGenus Committed by GitHub Mar 07, 2024
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

vllm/model_executor/models/gemma.py vllm/model_executor/models/gemma.py +6 -0

No files found.
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -325,11 +325,17 @@ class GemmaForCausalLM(nn.Module):
                if shard_name not in name:
                    continue
                name = name.replace(shard_name, param_name)
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
                param = params_dict[name]
                weight_loader = param.weight_loader
                weight_loader(param, loaded_weight, shard_id)
                break
            else:
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
                # GemmaRMSNorm is different from Llama's in that it multiplies
                # (1 + weight) to the output, instead of just weight.
                if "norm.weight" in name: