Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
e2858cef
Commit
e2858cef
authored
Aug 18, 2023
by
Casper Hansen
Browse files
Refactor model class. Add LLaMa support (LLaMa-2, Vicuna, etc).
parent
d430694b
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
84 additions
and
29 deletions
+84
-29
awq/models/__init__.py
awq/models/__init__.py
+2
-1
awq/models/auto.py
awq/models/auto.py
+2
-1
awq/models/base.py
awq/models/base.py
+9
-9
awq/models/llama.py
awq/models/llama.py
+56
-0
awq/models/mpt.py
awq/models/mpt.py
+15
-18
No files found.
awq/models/__init__.py
View file @
e2858cef
from
.mpt
import
MptAWQForCausalLM
from
.llama
import
LlamaAWQForCausalLM
\ No newline at end of file
awq/models/auto.py
View file @
e2858cef
from
transformers
import
AutoConfig
from
awq.models
import
MptAWQForCausalLM
from
awq.models
import
*
from
awq.models.base
import
BaseAWQForCausalLM
AWQ_CAUSAL_LM_MODEL_MAP
=
{
"mpt"
:
MptAWQForCausalLM
,
'llama'
:
LlamaAWQForCausalLM
}
def
check_and_get_model_type
(
model_dir
,
trust_remote_code
=
True
):
...
...
awq/models/base.py
View file @
e2858cef
...
...
@@ -270,13 +270,13 @@ class BaseAWQForCausalLM:
@
staticmethod
def
_scale_activations
(
self
,
layer
):
act_function
=
self
.
get_act_f
rom_layer
(
layer
)
scale_dict
=
self
.
get_act_f
or_scaling
(
layer
)
if
act_function
is
not
None
and
not
isinstance
(
act_function
,
ScaledActivation
):
if
scale_dict
[
'is_scalable'
]:
if
not
isinstance
(
scale_dict
[
'scale_layer'
],
ScaledActivation
):
param
=
next
(
layer
.
parameters
())
# get activation scale
scale_dict
=
self
.
get_act_for_scaling
(
layer
)
scale_like
=
torch
.
ones
(
scale_dict
[
'scale_shape'
],
dtype
=
param
.
dtype
,
device
=
param
.
device
)
# scale activation
...
...
awq/models/llama.py
0 → 100644
View file @
e2858cef
from
.base
import
BaseAWQForCausalLM
from
transformers.models.llama.modeling_llama
import
LlamaDecoderLayer
,
LlamaForCausalLM
class
LlamaAWQForCausalLM
(
BaseAWQForCausalLM
):
layer_type
=
"LlamaDecoderLayer"
@
staticmethod
def
get_model_layers
(
model
:
LlamaForCausalLM
):
return
model
.
model
.
layers
@
staticmethod
def
get_act_for_scaling
(
module
:
LlamaDecoderLayer
):
return
dict
(
is_scalable
=
False
)
@
staticmethod
def
move_embed
(
model
:
LlamaForCausalLM
,
device
:
str
):
model
.
model
.
embed_tokens
=
model
.
model
.
embed_tokens
.
to
(
device
)
@
staticmethod
def
get_layers_for_scaling
(
module
:
LlamaDecoderLayer
,
input_feat
,
module_kwargs
):
layers
=
[]
# attention input
layers
.
append
(
dict
(
prev_op
=
module
.
input_layernorm
,
layers
=
[
module
.
self_attn
.
q_proj
,
module
.
self_attn
.
k_proj
,
module
.
self_attn
.
v_proj
],
inp
=
input_feat
[
'self_attn.q_proj'
],
module2inspect
=
module
.
self_attn
,
kwargs
=
module_kwargs
,
))
# attention out
# Please refer to https://github.com/mit-han-lab/llm-awq/pull/67#issue-1850622696
if
module
.
self_attn
.
v_proj
.
weight
.
shape
==
module
.
self_attn
.
o_proj
.
weight
.
shape
:
layers
.
append
(
dict
(
prev_op
=
module
.
self_attn
.
v_proj
,
layers
=
[
module
.
self_attn
.
o_proj
],
inp
=
input_feat
[
'self_attn.o_proj'
],
))
# fc1
layers
.
append
(
dict
(
prev_op
=
module
.
post_attention_layernorm
,
layers
=
[
module
.
mlp
.
gate_proj
,
module
.
mlp
.
up_proj
],
inp
=
input_feat
[
'mlp.gate_proj'
],
module2inspect
=
module
.
mlp
,
))
# fc2
layers
.
append
(
dict
(
prev_op
=
module
.
mlp
.
up_proj
,
layers
=
[
module
.
mlp
.
down_proj
],
inp
=
input_feat
[
'mlp.down_proj'
],
))
return
layers
\ No newline at end of file
awq/models/mpt.py
View file @
e2858cef
...
...
@@ -7,6 +7,20 @@ class MptAWQForCausalLM(BaseAWQForCausalLM):
def
get_model_layers
(
model
):
return
model
.
transformer
.
blocks
@
staticmethod
def
get_act_for_scaling
(
module
):
return
dict
(
is_scalable
=
True
,
scale_name
=
"ffn.act"
,
scale_layer
=
module
.
ffn
.
act
,
scale_shape
=
module
.
ffn
.
up_proj
.
out_features
)
@
staticmethod
def
move_embed
(
model
,
device
):
model
.
transformer
.
wte
=
model
.
transformer
.
wte
.
to
(
device
)
model
.
transformer
.
emb_drop
=
model
.
transformer
.
emb_drop
.
to
(
device
)
@
staticmethod
def
get_layers_for_scaling
(
module
,
input_feat
,
module_kwargs
):
layers
=
[]
...
...
@@ -43,20 +57,3 @@ class MptAWQForCausalLM(BaseAWQForCausalLM):
))
return
layers
\ No newline at end of file
@
staticmethod
def
get_act_from_layer
(
layer
):
return
layer
.
ffn
.
act
@
staticmethod
def
get_act_for_scaling
(
module
):
return
dict
(
scale_name
=
"ffn.act"
,
scale_layer
=
module
.
ffn
.
act
,
scale_shape
=
module
.
ffn
.
up_proj
.
out_features
)
@
staticmethod
def
move_embed
(
model
,
device
):
model
.
transformer
.
wte
=
model
.
transformer
.
wte
.
to
(
device
)
model
.
transformer
.
emb_drop
=
model
.
transformer
.
emb_drop
.
to
(
device
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment