Pseudo dequantize function (#127)

f1a2e56e · Casper · GitHub · b9ed6641 · f1a2e56e
Unverified Commit f1a2e56e authored Oct 29, 2023 by Casper Committed by GitHub Oct 29, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 0 deletions

awq/quantize/quantizer.py awq/quantize/quantizer.py +13 -0

No files found.
--- a/awq/quantize/quantizer.py
+++ b/awq/quantize/quantizer.py
@@ -54,6 +54,19 @@ class AwqQuantizer:
        else:
            return w
+    def pseudo_dequantize_tensor(self, w: nn.Linear, scales: torch.Tensor, zeros: torch.Tensor):
+        # get repeated count
+        repeat_count = w.weight.data.shape[-1] // zeros.shape[-1]
+        # get zeros and scales in correct shape
+        zeros = zeros.repeat(1, repeat_count).reshape(w.weight.data.shape)
+        scales = scales.repeat(1, repeat_count).reshape(w.weight.data.shape)
+        # dequantize
+        w = (w.weight.data - zeros) * scales
+        return w
    def quantize(self):
        for i in tqdm(range(len(self.modules)), desc="AWQ"):
            # [STEP 1]: Get layer, extract linear modules, extract input features