added gptj

86fcf708 · EC2 Default User · 3a8072a1 · 86fcf708 · 86fcf708 · 86fcf708
Commit 86fcf708 authored Aug 25, 2023 by EC2 Default User
Hide whitespace changes
Inline Side-by-side

Showing with 62 additions and 2 deletions

awq/models/__init__.py awq/models/__init__.py +2 -1

awq/models/auto.py awq/models/auto.py +2 -1

awq/models/gptj.py awq/models/gptj.py +58 -0

No files found.
--- a/awq/models/__init__.py
+++ b/awq/models/__init__.py
@@ -2,4 +2,5 @@ from .mpt import MptAWQForCausalLM
 from .llama import LlamaAWQForCausalLM
 from .opt import OptAWQForCausalLM
 from .falcon import FalconAWQForCausalLM
 from .bloom import BloomAWQForCausalLM
\ No newline at end of file
+from .gptj import GPTJAWQForCausalLM
\ No newline at end of file
--- a/awq/models/auto.py
+++ b/awq/models/auto.py
@@ -8,7 +8,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = {
    "opt": OptAWQForCausalLM,
    "RefinedWeb": FalconAWQForCausalLM,
    "RefinedWebModel": FalconAWQForCausalLM,
-    "bloom": BloomAWQForCausalLM
+    "bloom": BloomAWQForCausalLM,
+    "gptj": GPTJAWQForCausalLM
 }
 def check_and_get_model_type(model_dir, trust_remote_code=True):

--- a/awq/models/gptj.py
+++ b/awq/models/gptj.py
+from .base import BaseAWQForCausalLM
+from transformers.models.gptj.modeling_gptj import GPTJForCausalLM, GPTJBlock
+class GPTJAWQForCausalLM(BaseAWQForCausalLM):
+    layer_type = "GPTJBlock"
+    max_new_tokens_key = "max_position_embeddings" # check this
+    @staticmethod
+    def get_model_layers(model: GPTJForCausalLM):
+        return model.transformer.h
+    @staticmethod
+    def get_act_for_scaling(module: GPTJBlock):
+        return dict(
+            is_scalable=True,
+            scale_name="mlp.act",
+            scale_layer=module.mlp.act,
+            scale_shape=module.mlp.fc_in.out_features
+        )
+    @staticmethod
+    def move_embed(model: GPTJForCausalLM, device: str):
+        model.transformer.wte = model.transformer.wte.to(device)
+    @staticmethod
+    def get_layers_for_scaling(module: GPTJBlock, input_feat, module_kwargs):
+        layers = []
+        # attention input
+        layers.append(dict(
+            prev_op=module.ln_1,
+            layers=[module.attn.q_proj,
+                    module.attn.k_proj, module.attn.v_proj, module.mlp.fc_in],
+            inp=input_feat['attn.q_proj'],
+            module2inspect=module,
+            kwargs=module_kwargs
+        ))
+        # attention out
+        # for some reason falcon skips this too
+        layers.append(dict(
+            prev_op=module.attn.v_proj,
+            layers=[module.attn.out_proj],
+            inp=input_feat['attn.out_proj'],
+        ))
+        # Linear 1 is included in the attention input
+        # GPTJ uses a parallel Attn + MLP block so they share an input
+        # linear 2
+        # Falcon doesn't use this - maybe we don't need this
+        layers.append(dict(
+            prev_op=module.mlp.act,
+            layers=[module.mlp.fc_out],
+            inp=input_feat['mlp.fc_out'],
+        ))
+        return layers
\ No newline at end of file