"git@developer.sourcefind.cn:OpenDAS/pytorch3d.git" did not exist on "8a27590c5fd6aba4d138660614c7a18832701671"
Commit 86fcf708 authored by EC2 Default User's avatar EC2 Default User
Browse files

added gptj

parent 3a8072a1
...@@ -2,4 +2,5 @@ from .mpt import MptAWQForCausalLM ...@@ -2,4 +2,5 @@ from .mpt import MptAWQForCausalLM
from .llama import LlamaAWQForCausalLM from .llama import LlamaAWQForCausalLM
from .opt import OptAWQForCausalLM from .opt import OptAWQForCausalLM
from .falcon import FalconAWQForCausalLM from .falcon import FalconAWQForCausalLM
from .bloom import BloomAWQForCausalLM from .bloom import BloomAWQForCausalLM
\ No newline at end of file from .gptj import GPTJAWQForCausalLM
\ No newline at end of file
...@@ -8,7 +8,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = { ...@@ -8,7 +8,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = {
"opt": OptAWQForCausalLM, "opt": OptAWQForCausalLM,
"RefinedWeb": FalconAWQForCausalLM, "RefinedWeb": FalconAWQForCausalLM,
"RefinedWebModel": FalconAWQForCausalLM, "RefinedWebModel": FalconAWQForCausalLM,
"bloom": BloomAWQForCausalLM "bloom": BloomAWQForCausalLM,
"gptj": GPTJAWQForCausalLM
} }
def check_and_get_model_type(model_dir, trust_remote_code=True): def check_and_get_model_type(model_dir, trust_remote_code=True):
......
from .base import BaseAWQForCausalLM
from transformers.models.gptj.modeling_gptj import GPTJForCausalLM, GPTJBlock
class GPTJAWQForCausalLM(BaseAWQForCausalLM):
layer_type = "GPTJBlock"
max_new_tokens_key = "max_position_embeddings" # check this
@staticmethod
def get_model_layers(model: GPTJForCausalLM):
return model.transformer.h
@staticmethod
def get_act_for_scaling(module: GPTJBlock):
return dict(
is_scalable=True,
scale_name="mlp.act",
scale_layer=module.mlp.act,
scale_shape=module.mlp.fc_in.out_features
)
@staticmethod
def move_embed(model: GPTJForCausalLM, device: str):
model.transformer.wte = model.transformer.wte.to(device)
@staticmethod
def get_layers_for_scaling(module: GPTJBlock, input_feat, module_kwargs):
layers = []
# attention input
layers.append(dict(
prev_op=module.ln_1,
layers=[module.attn.q_proj,
module.attn.k_proj, module.attn.v_proj, module.mlp.fc_in],
inp=input_feat['attn.q_proj'],
module2inspect=module,
kwargs=module_kwargs
))
# attention out
# for some reason falcon skips this too
layers.append(dict(
prev_op=module.attn.v_proj,
layers=[module.attn.out_proj],
inp=input_feat['attn.out_proj'],
))
# Linear 1 is included in the attention input
# GPTJ uses a parallel Attn + MLP block so they share an input
# linear 2
# Falcon doesn't use this - maybe we don't need this
layers.append(dict(
prev_op=module.mlp.act,
layers=[module.mlp.fc_out],
inp=input_feat['mlp.fc_out'],
))
return layers
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment