Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
b5db7fcd
Unverified
Commit
b5db7fcd
authored
Apr 06, 2024
by
Casper
Committed by
GitHub
Apr 06, 2024
Browse files
Implement `apply_clip` argument to `quantize()` (#427)
parent
c780d650
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
7 deletions
+17
-7
awq/models/base.py
awq/models/base.py
+7
-0
awq/quantize/quantizer.py
awq/quantize/quantizer.py
+10
-7
No files found.
awq/models/base.py
View file @
b5db7fcd
...
...
@@ -136,6 +136,12 @@ class BaseAWQForCausalLM(nn.Module):
"This argument avoids real quantization by only applying the scales without quantizing down to FP16."
),
]
=
False
,
apply_clip
:
Annotated
[
bool
,
Doc
(
"Whether to apply clipping to the model during quantization. Some models may perform better with this set to False."
),
]
=
True
,
):
"""
The main quantization function that you can use to quantize your model.
...
...
@@ -173,6 +179,7 @@ class BaseAWQForCausalLM(nn.Module):
duo_scaling
,
modules_to_not_convert
=
self
.
quant_config
.
modules_to_not_convert
,
export_compatible
=
export_compatible
,
apply_clip
=
apply_clip
,
)
self
.
quantizer
.
quantize
()
...
...
awq/quantize/quantizer.py
View file @
b5db7fcd
...
...
@@ -40,6 +40,7 @@ class AwqQuantizer:
duo_scaling
,
modules_to_not_convert
=
None
,
export_compatible
=
False
,
apply_clip
=
True
,
)
->
None
:
self
.
awq_model
=
awq_model
self
.
model
=
model
...
...
@@ -53,6 +54,7 @@ class AwqQuantizer:
self
.
text_column
=
text_column
self
.
duo_scaling
=
duo_scaling
self
.
export_compatible
=
export_compatible
self
.
apply_clip
=
apply_clip
self
.
modules_to_not_convert
=
(
modules_to_not_convert
if
modules_to_not_convert
is
not
None
else
[]
)
...
...
@@ -161,13 +163,14 @@ class AwqQuantizer:
)
# [STEP 3]: Compute and apply clipping list
clip_list
=
self
.
_search_best_clip
(
self
.
modules
[
i
],
named_linears
,
input_feat
)
apply_clip
(
self
.
modules
[
i
],
clip_list
)
clip_list
=
append_str_prefix
(
clip_list
,
get_op_name
(
self
.
model
,
self
.
modules
[
i
])
+
"."
)
if
self
.
apply_clip
:
clip_list
=
self
.
_search_best_clip
(
self
.
modules
[
i
],
named_linears
,
input_feat
)
apply_clip
(
self
.
modules
[
i
],
clip_list
)
clip_list
=
append_str_prefix
(
clip_list
,
get_op_name
(
self
.
model
,
self
.
modules
[
i
])
+
"."
)
# [STEP 4]: Quantize weights
if
not
self
.
export_compatible
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment