Unverified Commit 6c05f669 authored by Sebastian Bodza's avatar Sebastian Bodza Committed by GitHub
Browse files

Fixing starcoder based models with 15B (#118)

parent 0cadd65f
...@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM): ...@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM):
@staticmethod @staticmethod
def move_embed(model: GPTBigCodeForCausalLM, device): def move_embed(model: GPTBigCodeForCausalLM, device):
model.transformer.wte = model.transformer.wte.to(device) model.transformer.wte = model.transformer.wte.to(device)
model.transformer.wpe = model.transformer.wpe.to(device)
model.transformer.drop = model.transformer.drop.to(device) model.transformer.drop = model.transformer.drop.to(device)
@staticmethod @staticmethod
......
...@@ -319,6 +319,9 @@ class AwqQuantizer: ...@@ -319,6 +319,9 @@ class AwqQuantizer:
clear_memory() clear_memory()
if "attention_mask" in layer_kwargs.keys():
layer_kwargs["attention_mask"] = layer_kwargs["attention_mask"].to("cuda")
return modules, layer_kwargs, inps return modules, layer_kwargs, inps
def _get_input_feat(self, layer, named_linears): def _get_input_feat(self, layer, named_linears):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment