Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
d73d13b2
Commit
d73d13b2
authored
Aug 19, 2023
by
Casper Hansen
Browse files
Support Falcon 7B+40B
parent
06073073
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
64 additions
and
2 deletions
+64
-2
awq/models/__init__.py
awq/models/__init__.py
+2
-1
awq/models/auto.py
awq/models/auto.py
+3
-1
awq/models/falcon.py
awq/models/falcon.py
+59
-0
No files found.
awq/models/__init__.py
View file @
d73d13b2
from
.mpt
import
MptAWQForCausalLM
from
.mpt
import
MptAWQForCausalLM
from
.llama
import
LlamaAWQForCausalLM
from
.llama
import
LlamaAWQForCausalLM
from
.opt
import
OptAWQForCausalLM
from
.opt
import
OptAWQForCausalLM
\ No newline at end of file
from
.falcon
import
FalconAWQForCausalLM
\ No newline at end of file
awq/models/auto.py
View file @
d73d13b2
...
@@ -5,7 +5,9 @@ from awq.models.base import BaseAWQForCausalLM
...
@@ -5,7 +5,9 @@ from awq.models.base import BaseAWQForCausalLM
AWQ_CAUSAL_LM_MODEL_MAP
=
{
AWQ_CAUSAL_LM_MODEL_MAP
=
{
"mpt"
:
MptAWQForCausalLM
,
"mpt"
:
MptAWQForCausalLM
,
"llama"
:
LlamaAWQForCausalLM
,
"llama"
:
LlamaAWQForCausalLM
,
"opt"
:
OptAWQForCausalLM
"opt"
:
OptAWQForCausalLM
,
"RefinedWeb"
:
FalconAWQForCausalLM
,
"RefinedWebModel"
:
FalconAWQForCausalLM
}
}
def
check_and_get_model_type
(
model_dir
,
trust_remote_code
=
True
):
def
check_and_get_model_type
(
model_dir
,
trust_remote_code
=
True
):
...
...
awq/models/falcon.py
0 → 100644
View file @
d73d13b2
from
.base
import
BaseAWQForCausalLM
from
transformers.models.falcon.modeling_falcon
import
FalconDecoderLayer
,
FalconForCausalLM
class
FalconAWQForCausalLM
(
BaseAWQForCausalLM
):
layer_type
=
"FalconDecoderLayer"
@
staticmethod
def
get_model_layers
(
model
:
FalconForCausalLM
):
return
model
.
transformer
.
h
@
staticmethod
def
get_act_for_scaling
(
module
:
FalconDecoderLayer
):
return
dict
(
is_scalable
=
True
,
scale_name
=
"mlp.act"
,
scale_layer
=
module
.
mlp
.
act
,
scale_shape
=
module
.
mlp
.
dense_h_to_4h
.
out_features
)
@
staticmethod
def
move_embed
(
model
:
FalconForCausalLM
,
device
):
model
.
transformer
.
word_embeddings
=
model
.
transformer
.
word_embeddings
.
to
(
device
)
@
staticmethod
def
get_layers_for_scaling
(
module
:
FalconDecoderLayer
,
input_feat
,
module_kwargs
):
layers
=
[]
# Falcon 7B (older architecture)
if
module
.
config
.
num_attention_heads
==
71
:
# linear 1 + attention
layers
.
append
(
dict
(
prev_op
=
module
.
input_layernorm
,
layers
=
[
module
.
mlp
.
dense_h_to_4h
,
module
.
self_attention
.
query_key_value
],
inp
=
input_feat
[
'self_attention.query_key_value'
],
module2inspect
=
module
,
kwargs
=
module_kwargs
,
))
# Falcon 40B (newer architecture)
else
:
# linear 1 + attention
layers
.
append
(
dict
(
prev_op
=
module
.
ln_attn
,
layers
=
[
module
.
self_attention
.
query_key_value
],
inp
=
input_feat
[
'self_attention.query_key_value'
],
module2inspect
=
module
,
kwargs
=
module_kwargs
,
))
# linear 2
layers
.
append
(
dict
(
prev_op
=
module
.
ln_mlp
,
layers
=
[
module
.
mlp
.
dense_h_to_4h
],
inp
=
input_feat
[
'mlp.dense_h_to_4h'
],
module2inspect
=
module
,
kwargs
=
module_kwargs
,
))
return
layers
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment