Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
fac1af55
Commit
fac1af55
authored
Aug 25, 2023
by
EC2 Default User
Browse files
updated max_new_tokens_key
parent
86fcf708
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
7 deletions
+2
-7
awq/models/gptj.py
awq/models/gptj.py
+2
-7
No files found.
awq/models/gptj.py
View file @
fac1af55
...
...
@@ -3,7 +3,7 @@ from transformers.models.gptj.modeling_gptj import GPTJForCausalLM, GPTJBlock
class
GPTJAWQForCausalLM
(
BaseAWQForCausalLM
):
layer_type
=
"GPTJBlock"
max_new_tokens_key
=
"
max
_position
_embeddings"
# check thi
s
max_new_tokens_key
=
"
n
_positions
"
@
staticmethod
def
get_model_layers
(
model
:
GPTJForCausalLM
):
...
...
@@ -26,7 +26,7 @@ class GPTJAWQForCausalLM(BaseAWQForCausalLM):
def
get_layers_for_scaling
(
module
:
GPTJBlock
,
input_feat
,
module_kwargs
):
layers
=
[]
# attention input
# attention input
+ linear 1
layers
.
append
(
dict
(
prev_op
=
module
.
ln_1
,
layers
=
[
module
.
attn
.
q_proj
,
...
...
@@ -37,18 +37,13 @@ class GPTJAWQForCausalLM(BaseAWQForCausalLM):
))
# attention out
# for some reason falcon skips this too
layers
.
append
(
dict
(
prev_op
=
module
.
attn
.
v_proj
,
layers
=
[
module
.
attn
.
out_proj
],
inp
=
input_feat
[
'attn.out_proj'
],
))
# Linear 1 is included in the attention input
# GPTJ uses a parallel Attn + MLP block so they share an input
# linear 2
# Falcon doesn't use this - maybe we don't need this
layers
.
append
(
dict
(
prev_op
=
module
.
mlp
.
act
,
layers
=
[
module
.
mlp
.
fc_out
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment