Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
b5db7fcd
Unverified
Commit
b5db7fcd
authored
Apr 06, 2024
by
Casper
Committed by
GitHub
Apr 06, 2024
Browse files
Implement `apply_clip` argument to `quantize()` (#427)
parent
c780d650
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
7 deletions
+17
-7
awq/models/base.py
awq/models/base.py
+7
-0
awq/quantize/quantizer.py
awq/quantize/quantizer.py
+10
-7
No files found.
awq/models/base.py
View file @
b5db7fcd
...
@@ -136,6 +136,12 @@ class BaseAWQForCausalLM(nn.Module):
...
@@ -136,6 +136,12 @@ class BaseAWQForCausalLM(nn.Module):
"This argument avoids real quantization by only applying the scales without quantizing down to FP16."
"This argument avoids real quantization by only applying the scales without quantizing down to FP16."
),
),
]
=
False
,
]
=
False
,
apply_clip
:
Annotated
[
bool
,
Doc
(
"Whether to apply clipping to the model during quantization. Some models may perform better with this set to False."
),
]
=
True
,
):
):
"""
"""
The main quantization function that you can use to quantize your model.
The main quantization function that you can use to quantize your model.
...
@@ -173,6 +179,7 @@ class BaseAWQForCausalLM(nn.Module):
...
@@ -173,6 +179,7 @@ class BaseAWQForCausalLM(nn.Module):
duo_scaling
,
duo_scaling
,
modules_to_not_convert
=
self
.
quant_config
.
modules_to_not_convert
,
modules_to_not_convert
=
self
.
quant_config
.
modules_to_not_convert
,
export_compatible
=
export_compatible
,
export_compatible
=
export_compatible
,
apply_clip
=
apply_clip
,
)
)
self
.
quantizer
.
quantize
()
self
.
quantizer
.
quantize
()
...
...
awq/quantize/quantizer.py
View file @
b5db7fcd
...
@@ -40,6 +40,7 @@ class AwqQuantizer:
...
@@ -40,6 +40,7 @@ class AwqQuantizer:
duo_scaling
,
duo_scaling
,
modules_to_not_convert
=
None
,
modules_to_not_convert
=
None
,
export_compatible
=
False
,
export_compatible
=
False
,
apply_clip
=
True
,
)
->
None
:
)
->
None
:
self
.
awq_model
=
awq_model
self
.
awq_model
=
awq_model
self
.
model
=
model
self
.
model
=
model
...
@@ -53,6 +54,7 @@ class AwqQuantizer:
...
@@ -53,6 +54,7 @@ class AwqQuantizer:
self
.
text_column
=
text_column
self
.
text_column
=
text_column
self
.
duo_scaling
=
duo_scaling
self
.
duo_scaling
=
duo_scaling
self
.
export_compatible
=
export_compatible
self
.
export_compatible
=
export_compatible
self
.
apply_clip
=
apply_clip
self
.
modules_to_not_convert
=
(
self
.
modules_to_not_convert
=
(
modules_to_not_convert
if
modules_to_not_convert
is
not
None
else
[]
modules_to_not_convert
if
modules_to_not_convert
is
not
None
else
[]
)
)
...
@@ -161,6 +163,7 @@ class AwqQuantizer:
...
@@ -161,6 +163,7 @@ class AwqQuantizer:
)
)
# [STEP 3]: Compute and apply clipping list
# [STEP 3]: Compute and apply clipping list
if
self
.
apply_clip
:
clip_list
=
self
.
_search_best_clip
(
clip_list
=
self
.
_search_best_clip
(
self
.
modules
[
i
],
named_linears
,
input_feat
self
.
modules
[
i
],
named_linears
,
input_feat
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment