Fixing starcoder based models with 15B (#118)

6c05f669 · Sebastian Bodza · GitHub · 0cadd65f · 6c05f669 · 6c05f669
Unverified Commit 6c05f669 authored Oct 28, 2023 by Sebastian Bodza Committed by GitHub Oct 28, 2023
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

awq/models/gpt_bigcode.py awq/models/gpt_bigcode.py +1 -0

awq/quantize/quantizer.py awq/quantize/quantizer.py +3 -0

No files found.
--- a/awq/models/gpt_bigcode.py
+++ b/awq/models/gpt_bigcode.py
@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM):
    @staticmethod
    def move_embed(model: GPTBigCodeForCausalLM, device):
        model.transformer.wte = model.transformer.wte.to(device)
+        model.transformer.wpe = model.transformer.wpe.to(device)
        model.transformer.drop = model.transformer.drop.to(device)
    @staticmethod

--- a/awq/quantize/quantizer.py
+++ b/awq/quantize/quantizer.py
@@ -319,6 +319,9 @@ class AwqQuantizer:
        clear_memory()
+        if "attention_mask" in layer_kwargs.keys():
+            layer_kwargs["attention_mask"] = layer_kwargs["attention_mask"].to("cuda")
        return modules, layer_kwargs, inps
    def _get_input_feat(self, layer, named_linears):