Unverified Commit 6c05f669 authored by Sebastian Bodza's avatar Sebastian Bodza Committed by GitHub
Browse files

Fixing starcoder based models with 15B (#118)

parent 0cadd65f
......@@ -21,6 +21,7 @@ class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM):
@staticmethod
def move_embed(model: GPTBigCodeForCausalLM, device):
model.transformer.wte = model.transformer.wte.to(device)
model.transformer.wpe = model.transformer.wpe.to(device)
model.transformer.drop = model.transformer.drop.to(device)
@staticmethod
......
......@@ -319,6 +319,9 @@ class AwqQuantizer:
clear_memory()
if "attention_mask" in layer_kwargs.keys():
layer_kwargs["attention_mask"] = layer_kwargs["attention_mask"].to("cuda")
return modules, layer_kwargs, inps
def _get_input_feat(self, layer, named_linears):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment