Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
386fede8
Unverified
Commit
386fede8
authored
Sep 21, 2023
by
Casper
Committed by
GitHub
Sep 21, 2023
Browse files
Merge pull request #61 from casper-hansen/bigcode
Add GPT BigCode support (StarCoder)
parents
133dd7a7
9060b980
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
5 deletions
+60
-5
awq/models/__init__.py
awq/models/__init__.py
+2
-1
awq/models/auto.py
awq/models/auto.py
+2
-1
awq/models/gpt_bigcode.py
awq/models/gpt_bigcode.py
+54
-0
awq/quantize/scale.py
awq/quantize/scale.py
+2
-3
No files found.
awq/models/__init__.py
View file @
386fede8
...
@@ -3,4 +3,5 @@ from .llama import LlamaAWQForCausalLM
...
@@ -3,4 +3,5 @@ from .llama import LlamaAWQForCausalLM
from
.opt
import
OptAWQForCausalLM
from
.opt
import
OptAWQForCausalLM
from
.falcon
import
FalconAWQForCausalLM
from
.falcon
import
FalconAWQForCausalLM
from
.bloom
import
BloomAWQForCausalLM
from
.bloom
import
BloomAWQForCausalLM
from
.gptj
import
GPTJAWQForCausalLM
from
.gptj
import
GPTJAWQForCausalLM
\ No newline at end of file
from
.gpt_bigcode
import
GptBigCodeAWQForCausalLM
\ No newline at end of file
awq/models/auto.py
View file @
386fede8
...
@@ -11,7 +11,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = {
...
@@ -11,7 +11,8 @@ AWQ_CAUSAL_LM_MODEL_MAP = {
"RefinedWebModel"
:
FalconAWQForCausalLM
,
"RefinedWebModel"
:
FalconAWQForCausalLM
,
"falcon"
:
FalconAWQForCausalLM
,
"falcon"
:
FalconAWQForCausalLM
,
"bloom"
:
BloomAWQForCausalLM
,
"bloom"
:
BloomAWQForCausalLM
,
"gptj"
:
GPTJAWQForCausalLM
"gptj"
:
GPTJAWQForCausalLM
,
"gpt_bigcode"
:
GptBigCodeAWQForCausalLM
}
}
def
check_and_get_model_type
(
model_dir
,
trust_remote_code
=
True
):
def
check_and_get_model_type
(
model_dir
,
trust_remote_code
=
True
):
...
...
awq/models/gpt_bigcode.py
0 → 100644
View file @
386fede8
from
.base
import
BaseAWQForCausalLM
from
transformers.models.gpt_bigcode.modeling_gpt_bigcode
import
GPTBigCodeForCausalLM
,
GPTBigCodeBlock
as
OldGptBigCodeBlock
class
GptBigCodeAWQForCausalLM
(
BaseAWQForCausalLM
):
layer_type
=
"GPTBigCodeBlock"
max_new_tokens_key
=
"n_positions"
@
staticmethod
def
get_model_layers
(
model
:
GPTBigCodeForCausalLM
):
return
model
.
transformer
.
h
@
staticmethod
def
get_act_for_scaling
(
module
:
OldGptBigCodeBlock
):
return
dict
(
is_scalable
=
True
,
scale_name
=
"mlp.act"
,
scale_layer
=
module
.
mlp
.
act
,
scale_shape
=
module
.
mlp
.
c_fc
.
out_features
)
@
staticmethod
def
move_embed
(
model
:
GPTBigCodeForCausalLM
,
device
):
model
.
transformer
.
wte
=
model
.
transformer
.
wte
.
to
(
device
)
model
.
transformer
.
drop
=
model
.
transformer
.
drop
.
to
(
device
)
@
staticmethod
def
get_layers_for_scaling
(
module
:
OldGptBigCodeBlock
,
input_feat
,
module_kwargs
):
layers
=
[]
# attention input
layers
.
append
(
dict
(
prev_op
=
module
.
ln_1
,
layers
=
[
module
.
attn
.
c_attn
],
inp
=
input_feat
[
'attn.c_attn'
],
module2inspect
=
module
.
attn
,
kwargs
=
module_kwargs
))
# linear 1
layers
.
append
(
dict
(
prev_op
=
module
.
ln_2
,
layers
=
[
module
.
mlp
.
c_fc
],
inp
=
input_feat
[
'mlp.c_fc'
],
module2inspect
=
module
.
mlp
))
# linear 2
layers
.
append
(
dict
(
prev_op
=
module
.
mlp
.
act
,
layers
=
[
module
.
mlp
.
c_proj
],
inp
=
input_feat
[
'mlp.c_proj'
]
))
return
layers
awq/quantize/scale.py
View file @
386fede8
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
typing
import
Tuple
from
typing
import
Tuple
from
awq.modules.act
import
ScaledActivation
from
awq.modules.act
import
ScaledActivation
from
transformers.activations
import
NewGELUActivation
from
awq.utils.module
import
get_op_by_name
,
set_op_by_name
from
awq.utils.module
import
get_op_by_name
,
set_op_by_name
from
transformers.models.bloom.modeling_bloom
import
BloomGelu
from
transformers.models.bloom.modeling_bloom
import
BloomGelu
from
transformers.models.llama.modeling_llama
import
LlamaRMSNorm
from
transformers.models.llama.modeling_llama
import
LlamaRMSNorm
from
transformers.activations
import
NewGELUActivation
,
PytorchGELUTanh
allowed_norms
=
[
nn
.
LayerNorm
,
LlamaRMSNorm
]
allowed_norms
=
[
nn
.
LayerNorm
,
LlamaRMSNorm
]
allowed_act_fns
=
[
nn
.
GELU
,
BloomGelu
,
NewGELUActivation
]
allowed_act_fns
=
[
nn
.
GELU
,
BloomGelu
,
NewGELUActivation
,
PytorchGELUTanh
]
@
torch
.
no_grad
()
@
torch
.
no_grad
()
def
apply_clip
(
module
,
clip_list
:
Tuple
[
str
,
torch
.
Tensor
]):
def
apply_clip
(
module
,
clip_list
:
Tuple
[
str
,
torch
.
Tensor
]):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment