Fix model references

0aa4a596 · Casper Hansen · 1df0136e · 0aa4a596
Commit 0aa4a596 authored Sep 02, 2023 by Casper Hansen
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 6 deletions

awq/models/llama.py awq/models/llama.py +5 -6

No files found.
--- a/awq/models/llama.py
+++ b/awq/models/llama.py
@@ -6,8 +6,8 @@ class LlamaAWQForCausalLM(BaseAWQForCausalLM):
    max_new_tokens_key = "max_position_embeddings"
    @staticmethod
-    def fuse_layers(awq_model: BaseAWQForCausalLM):
+    def fuse_layers(model: LlamaForCausalLM):
-        fuser = LlamaFuser(awq_model)
+        fuser = LlamaFuser(model)
        fuser.fuse_attention()
        fuser.fuse_rmsnorm()
        fuser.fuse_mlp()
@@ -75,9 +75,8 @@ from awq.modules.fused_attn import QuantLlamaAttention
 from transformers.models.llama.modeling_llama import LlamaAttention, LlamaRMSNorm, LlamaMLP
 class LlamaFuser:
-    def __init__(self, awq_model: BaseAWQForCausalLM):
+    def __init__(self, model):
-        self.awq_model = awq_model
+        self.model = model
-        self.model = awq_model.model
        self.attention_modules: List[Tuple[str, LlamaAttention]] = [
            (name, module) for name, module in self.model.named_modules()
@@ -103,7 +102,7 @@ class LlamaFuser:
                qkv_layer,
                module.o_proj,
                qkv_layer.qweight.device,
-                self.awq_model.model.config.max_new_tokens
+                self.model.config.max_new_tokens
            )
            set_module_name(self.model, name, attn)