Unverified Commit 6c05f669 authored by Sebastian Bodza's avatar Sebastian Bodza Committed by GitHub
Browse files

Fixing starcoder based models with 15B (#118)

parent 0cadd65f
...@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM): ...@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM):
@staticmethod @staticmethod
def move_embed(model: GPTBigCodeForCausalLM, device): def move_embed(model: GPTBigCodeForCausalLM, device):
model.transformer.wte = model.transformer.wte.to(device) model.transformer.wte = model.transformer.wte.to(device)
model.transformer.wpe = model.transformer.wpe.to(device)
model.transformer.drop = model.transformer.drop.to(device) model.transformer.drop = model.transformer.drop.to(device)
@staticmethod @staticmethod
......
...@@ -318,6 +318,9 @@ class AwqQuantizer: ...@@ -318,6 +318,9 @@ class AwqQuantizer:
self.awq_model.move_embed(self.model, "cpu") self.awq_model.move_embed(self.model, "cpu")
clear_memory() clear_memory()
if "attention_mask" in layer_kwargs.keys():
layer_kwargs["attention_mask"] = layer_kwargs["attention_mask"].to("cuda")
return modules, layer_kwargs, inps return modules, layer_kwargs, inps
......
...@@ -108,4 +108,4 @@ def scale_gelu_fc(gelu: allowed_act_fns, fc: nn.Linear, scales: torch.Tensor): ...@@ -108,4 +108,4 @@ def scale_gelu_fc(gelu: allowed_act_fns, fc: nn.Linear, scales: torch.Tensor):
fc.weight.mul_(scales.view(1, -1).to(fc.weight.device)) fc.weight.mul_(scales.view(1, -1).to(fc.weight.device))
for p in fc.parameters(): for p in fc.parameters():
assert torch.isnan(p).sum() == 0 assert torch.isnan(p).sum() == 0
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment