Implement `apply_clip` argument to `quantize()` (#427)

b5db7fcd · Casper · GitHub · c780d650 · b5db7fcd · b5db7fcd
Unverified Commit b5db7fcd authored Apr 06, 2024 by Casper Committed by GitHub Apr 06, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 7 deletions

awq/models/base.py awq/models/base.py +7 -0

awq/quantize/quantizer.py awq/quantize/quantizer.py +10 -7

No files found.
--- a/awq/models/base.py
+++ b/awq/models/base.py
@@ -136,6 +136,12 @@ class BaseAWQForCausalLM(nn.Module):
                "This argument avoids real quantization by only applying the scales without quantizing down to FP16."
            ),
        ] = False,
+        apply_clip: Annotated[
+            bool,
+            Doc(
+                "Whether to apply clipping to the model during quantization. Some models may perform better with this set to False."
+            ),
+        ] = True,
    ):
        """
        The main quantization function that you can use to quantize your model.
@@ -173,6 +179,7 @@ class BaseAWQForCausalLM(nn.Module):
            duo_scaling,
            modules_to_not_convert=self.quant_config.modules_to_not_convert,
            export_compatible=export_compatible,
+            apply_clip=apply_clip,
        )
        self.quantizer.quantize()


--- a/awq/quantize/quantizer.py
+++ b/awq/quantize/quantizer.py
@@ -40,6 +40,7 @@ class AwqQuantizer:
        duo_scaling,
        modules_to_not_convert=None,
        export_compatible=False,
+        apply_clip=True,
    ) -> None:
        self.awq_model = awq_model
        self.model = model
@@ -53,6 +54,7 @@ class AwqQuantizer:
        self.text_column = text_column
        self.duo_scaling = duo_scaling
        self.export_compatible = export_compatible
+        self.apply_clip = apply_clip
        self.modules_to_not_convert = (
            modules_to_not_convert if modules_to_not_convert is not None else []
        )
@@ -161,13 +163,14 @@ class AwqQuantizer:
            )

            # [STEP 3]: Compute and apply clipping list
-            clip_list = self._search_best_clip(
-                self.modules[i], named_linears, input_feat
-            )
-            apply_clip(self.modules[i], clip_list)
-            clip_list = append_str_prefix(
-                clip_list, get_op_name(self.model, self.modules[i]) + "."
-            )
+            if self.apply_clip:
+                clip_list = self._search_best_clip(
+                    self.modules[i], named_linears, input_feat
+                )
+                apply_clip(self.modules[i], clip_list)
+                clip_list = append_str_prefix(
+                    clip_list, get_op_name(self.model, self.modules[i]) + "."
+                )

            # [STEP 4]: Quantize weights
            if not self.export_compatible: