Commit 9848b6a4 authored by Casper Hansen's avatar Casper Hansen
Browse files

Correct comments

parent c9d01ac3
...@@ -176,14 +176,13 @@ class AwqQuantizer: ...@@ -176,14 +176,13 @@ class AwqQuantizer:
# create new scales # create new scales
ratio = ratio / n_grid ratio = ratio / n_grid
# s^-1 # NOTE: s^-1 * x is fused here, according to paper
scales = (x_max.pow(ratio) / w_max.pow(1-ratio)).clamp(min=1e-4) scales = (x_max.pow(ratio) / w_max.pow(1-ratio)).clamp(min=1e-4)
scales = scales / (scales.max() * scales.min()).sqrt() scales = scales / (scales.max() * scales.min()).sqrt()
scales_view = scales.view(1, -1).to(device) scales_view = scales.view(1, -1).to(device)
# NOTE: s^-1 * x is fused here, according to paper # Q(W * s)
for fc in linears2scale: for fc in linears2scale:
# Q(W * s)
fc.weight.mul_(scales_view) fc.weight.mul_(scales_view)
fc.weight.data = self.pseudo_quantize_tensor(fc.weight.data) / scales_view fc.weight.data = self.pseudo_quantize_tensor(fc.weight.data) / scales_view
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment