Commit bcfdeb38 authored by twaka's avatar twaka
Browse files

add comments

parent ad45716f
from .base import BaseAWQForCausalLM from .base import BaseAWQForCausalLM
from typing import Dict
from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXLayer, GPTNeoXForCausalLM from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXLayer, GPTNeoXForCausalLM
class GPTNeoXAWQForCausalLM(BaseAWQForCausalLM): class GPTNeoXAWQForCausalLM(BaseAWQForCausalLM):
...@@ -34,14 +33,16 @@ class GPTNeoXAWQForCausalLM(BaseAWQForCausalLM): ...@@ -34,14 +33,16 @@ class GPTNeoXAWQForCausalLM(BaseAWQForCausalLM):
inp=input_feat['attention.query_key_value'], inp=input_feat['attention.query_key_value'],
)) ))
# # attention out # attention out
# layers.append(dict( # Please refer to https://github.com/mit-han-lab/llm-awq/issues/2#issuecomment-1606297469
# prev_op=module.attention.query_key_value, """
# layers=[module.attention.dense], layers.append(dict(
# inp=input_feat['attention.dense'], prev_op=module.attention.query_key_value,
# )) layers=[module.attention.dense],
inp=input_feat['attention.dense'],
# NOTE: assumes "use_parallel_residual": false ))
"""
# linear 1 # linear 1
layers.append(dict( layers.append(dict(
prev_op=module.post_attention_layernorm, prev_op=module.post_attention_layernorm,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment