Unverified Commit 386fede8 authored by Casper's avatar Casper Committed by GitHub
Browse files

Merge pull request #61 from casper-hansen/bigcode

Add GPT BigCode support (StarCoder)
parents 133dd7a7 9060b980
...@@ -3,4 +3,5 @@ from .llama import LlamaAWQForCausalLM ...@@ -3,4 +3,5 @@ from .llama import LlamaAWQForCausalLM
from .opt import OptAWQForCausalLM from .opt import OptAWQForCausalLM
from .falcon import FalconAWQForCausalLM from .falcon import FalconAWQForCausalLM
from .bloom import BloomAWQForCausalLM from .bloom import BloomAWQForCausalLM
from .gptj import GPTJAWQForCausalLM from .gptj import GPTJAWQForCausalLM
\ No newline at end of file from .gpt_bigcode import GptBigCodeAWQForCausalLM
\ No newline at end of file
...@@ -11,7 +11,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = { ...@@ -11,7 +11,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = {
"RefinedWebModel": FalconAWQForCausalLM, "RefinedWebModel": FalconAWQForCausalLM,
"falcon": FalconAWQForCausalLM, "falcon": FalconAWQForCausalLM,
"bloom": BloomAWQForCausalLM, "bloom": BloomAWQForCausalLM,
"gptj": GPTJAWQForCausalLM "gptj": GPTJAWQForCausalLM,
"gpt_bigcode": GptBigCodeAWQForCausalLM
} }
def check_and_get_model_type(model_dir, trust_remote_code=True): def check_and_get_model_type(model_dir, trust_remote_code=True):
......
from .base import BaseAWQForCausalLM
from transformers.models.gpt_bigcode.modeling_gpt_bigcode import GPTBigCodeForCausalLM, GPTBigCodeBlock as OldGptBigCodeBlock
class GptBigCodeAWQForCausalLM(BaseAWQForCausalLM):
layer_type = "GPTBigCodeBlock"
max_new_tokens_key = "n_positions"
@staticmethod
def get_model_layers(model: GPTBigCodeForCausalLM):
return model.transformer.h
@staticmethod
def get_act_for_scaling(module: OldGptBigCodeBlock):
return dict(
is_scalable=True,
scale_name="mlp.act",
scale_layer=module.mlp.act,
scale_shape=module.mlp.c_fc.out_features
)
@staticmethod
def move_embed(model: GPTBigCodeForCausalLM, device):
model.transformer.wte = model.transformer.wte.to(device)
model.transformer.drop = model.transformer.drop.to(device)
@staticmethod
def get_layers_for_scaling(module:OldGptBigCodeBlock, input_feat, module_kwargs):
layers = []
# attention input
layers.append(dict(
prev_op=module.ln_1,
layers=[module.attn.c_attn],
inp=input_feat['attn.c_attn'],
module2inspect=module.attn,
kwargs=module_kwargs
))
# linear 1
layers.append(dict(
prev_op=module.ln_2,
layers=[module.mlp.c_fc],
inp=input_feat['mlp.c_fc'],
module2inspect=module.mlp
))
# linear 2
layers.append(dict(
prev_op=module.mlp.act,
layers=[module.mlp.c_proj],
inp=input_feat['mlp.c_proj']
))
return layers
import torch import torch
import torch.nn as nn import torch.nn as nn
from typing import Tuple from typing import Tuple
from awq.modules.act import ScaledActivation from awq.modules.act import ScaledActivation
from transformers.activations import NewGELUActivation
from awq.utils.module import get_op_by_name, set_op_by_name from awq.utils.module import get_op_by_name, set_op_by_name
from transformers.models.bloom.modeling_bloom import BloomGelu from transformers.models.bloom.modeling_bloom import BloomGelu
from transformers.models.llama.modeling_llama import LlamaRMSNorm from transformers.models.llama.modeling_llama import LlamaRMSNorm
from transformers.activations import NewGELUActivation, PytorchGELUTanh
allowed_norms = [nn.LayerNorm, LlamaRMSNorm] allowed_norms = [nn.LayerNorm, LlamaRMSNorm]
allowed_act_fns = [nn.GELU, BloomGelu, NewGELUActivation] allowed_act_fns = [nn.GELU, BloomGelu, NewGELUActivation, PytorchGELUTanh]
@torch.no_grad() @torch.no_grad()
def apply_clip(module, clip_list: Tuple[str, torch.Tensor]): def apply_clip(module, clip_list: Tuple[str, torch.Tensor]):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment