Fixing starcoder based models with 15B (#118)

6c05f669 · Sebastian Bodza · GitHub · 0cadd65f · 6c05f669 · 6c05f669
Unverified Commit 6c05f669 authored Oct 28, 2023 by Sebastian Bodza Committed by GitHub Oct 28, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

awq/models/gpt_bigcode.py awq/models/gpt_bigcode.py +1 -0

awq/quantize/quantizer.py awq/quantize/quantizer.py +3 -0

awq/quantize/scale.py awq/quantize/scale.py +1 -1

No files found.
--- a/awq/models/gpt_bigcode.py
+++ b/awq/models/gpt_bigcode.py
@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM):
    @staticmethod
    def move_embed(model: GPTBigCodeForCausalLM, device):
        model.transformer.wte = model.transformer.wte.to(device)
+        model.transformer.wpe = model.transformer.wpe.to(device)
        model.transformer.drop = model.transformer.drop.to(device)
    @staticmethod

--- a/awq/quantize/quantizer.py
+++ b/awq/quantize/quantizer.py
@@ -318,6 +318,9 @@ class AwqQuantizer:
        self.awq_model.move_embed(self.model, "cpu")
        clear_memory()
+        if "attention_mask" in layer_kwargs.keys():
+            layer_kwargs["attention_mask"] = layer_kwargs["attention_mask"].to("cuda")
        return modules, layer_kwargs, inps

--- a/awq/quantize/scale.py
+++ b/awq/quantize/scale.py
@@ -108,4 +108,4 @@ def scale_gelu_fc(gelu: allowed_act_fns, fc: nn.Linear, scales: torch.Tensor):
    fc.weight.mul_(scales.view(1, -1).to(fc.weight.device))
    for p in fc.parameters():
        assert torch.isnan(p).sum() == 0
\ No newline at end of file