Commit 5bd6fbc7 authored by Casper Hansen's avatar Casper Hansen
Browse files

Update module name

parent adc5304b
...@@ -70,7 +70,7 @@ from typing import List, Tuple, Union ...@@ -70,7 +70,7 @@ from typing import List, Tuple, Union
from awq.utils.utils import set_module_name from awq.utils.utils import set_module_name
from awq.modules.fused.mlp import QuantLlamaMLP from awq.modules.fused.mlp import QuantLlamaMLP
from awq.modules.fused.norm import FTLlamaRMSNorm from awq.modules.fused.norm import FTLlamaRMSNorm
from awq.modules.fused.attn import QuantLlamaAttentionFused from awq.modules.fused.attn import QuantAttentionFused
from awq.modules.linear import WQLinear_GEMM, WQLinear_GEMV from awq.modules.linear import WQLinear_GEMM, WQLinear_GEMV
from transformers.models.llama.modeling_llama import LlamaAttention, LlamaRMSNorm, LlamaMLP from transformers.models.llama.modeling_llama import LlamaAttention, LlamaRMSNorm, LlamaMLP
...@@ -97,7 +97,7 @@ class LlamaFuser: ...@@ -97,7 +97,7 @@ class LlamaFuser:
def fuse_attention(self): def fuse_attention(self):
for name, module in self.attention_modules: for name, module in self.attention_modules:
qkv_layer: Union[WQLinear_GEMM, WQLinear_GEMV] = self._fuse_qkv(module) qkv_layer: Union[WQLinear_GEMM, WQLinear_GEMV] = self._fuse_qkv(module)
attn = QuantLlamaAttentionFused( attn = QuantAttentionFused(
module.hidden_size, module.hidden_size,
module.num_heads, module.num_heads,
qkv_layer, qkv_layer,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment