Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
1c5ccc79
Unverified
Commit
1c5ccc79
authored
Sep 27, 2023
by
Casper
Committed by
GitHub
Sep 27, 2023
Browse files
Merge pull request #79 from casper-hansen/mistral
Mistral support
parents
c57da6b8
1ac3f976
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
71 additions
and
2 deletions
+71
-2
awq/models/__init__.py
awq/models/__init__.py
+2
-1
awq/models/auto.py
awq/models/auto.py
+2
-1
awq/models/mistral.py
awq/models/mistral.py
+67
-0
No files found.
awq/models/__init__.py
View file @
1c5ccc79
...
...
@@ -5,3 +5,4 @@ from .falcon import FalconAWQForCausalLM
from
.bloom
import
BloomAWQForCausalLM
from
.gptj
import
GPTJAWQForCausalLM
from
.gpt_bigcode
import
GptBigCodeAWQForCausalLM
from
.mistral
import
MistralAWQForCausalLM
\ No newline at end of file
awq/models/auto.py
View file @
1c5ccc79
...
...
@@ -12,7 +12,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = {
"falcon"
:
FalconAWQForCausalLM
,
"bloom"
:
BloomAWQForCausalLM
,
"gptj"
:
GPTJAWQForCausalLM
,
"gpt_bigcode"
:
GptBigCodeAWQForCausalLM
"gpt_bigcode"
:
GptBigCodeAWQForCausalLM
,
"mistral"
:
MistralAWQForCausalLM
}
def
check_and_get_model_type
(
model_dir
,
trust_remote_code
=
True
):
...
...
awq/models/mistral.py
0 → 100644
View file @
1c5ccc79
import
logging
from
.base
import
BaseAWQForCausalLM
try
:
from
transformers.models.mistral.modeling_mistral
import
MistralDecoderLayer
,
MistralForCausalLM
except
:
# TODO: Remove once released on PyPi
logging
.
warning
(
"You need the latest transformers 4.34.0.dev0: pip install git+https://github.com/huggingface/transformers.git"
)
MistralForCausalLM
=
None
MistralDecoderLayer
=
None
class
MistralAWQForCausalLM
(
BaseAWQForCausalLM
):
layer_type
=
"MistralDecoderLayer"
max_new_tokens_key
=
"max_position_embeddings"
@
staticmethod
def
get_model_layers
(
model
:
MistralForCausalLM
):
return
model
.
model
.
layers
@
staticmethod
def
get_act_for_scaling
(
module
:
MistralDecoderLayer
):
return
dict
(
is_scalable
=
False
)
@
staticmethod
def
move_embed
(
model
:
MistralForCausalLM
,
device
:
str
):
model
.
model
.
embed_tokens
=
model
.
model
.
embed_tokens
.
to
(
device
)
@
staticmethod
def
get_layers_for_scaling
(
module
:
MistralDecoderLayer
,
input_feat
,
module_kwargs
):
layers
=
[]
# attention input
layers
.
append
(
dict
(
prev_op
=
module
.
input_layernorm
,
layers
=
[
module
.
self_attn
.
q_proj
,
module
.
self_attn
.
k_proj
,
module
.
self_attn
.
v_proj
],
inp
=
input_feat
[
'self_attn.q_proj'
],
module2inspect
=
module
.
self_attn
,
kwargs
=
module_kwargs
,
))
# attention out
# Please refer to https://github.com/mit-han-lab/llm-awq/pull/67#issue-1850622696
if
module
.
self_attn
.
v_proj
.
weight
.
shape
==
module
.
self_attn
.
o_proj
.
weight
.
shape
:
layers
.
append
(
dict
(
prev_op
=
module
.
self_attn
.
v_proj
,
layers
=
[
module
.
self_attn
.
o_proj
],
inp
=
input_feat
[
'self_attn.o_proj'
],
))
# linear 1
layers
.
append
(
dict
(
prev_op
=
module
.
post_attention_layernorm
,
layers
=
[
module
.
mlp
.
gate_proj
,
module
.
mlp
.
up_proj
],
inp
=
input_feat
[
'mlp.gate_proj'
],
module2inspect
=
module
.
mlp
,
))
# linear 2
layers
.
append
(
dict
(
prev_op
=
module
.
mlp
.
up_proj
,
layers
=
[
module
.
mlp
.
down_proj
],
inp
=
input_feat
[
'mlp.down_proj'
],
))
return
layers
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment