Unverified Commit bcaa8a36 authored by Casper's avatar Casper Committed by GitHub
Browse files

v0.2.0 (#330)


Co-authored-by: default avatarjinz2014 <7799920+jinz2014@users.noreply.github.com>
Co-authored-by: default avatarJin Z <5zj@cousteau.ftpn.ornl.gov>
parent c69d3b65
This diff is collapsed.
import torch.nn as nn import torch.nn as nn
def get_named_linears(module): def get_named_linears(module):
return {name: m for name, m in module.named_modules() if isinstance(m, nn.Linear)} return {name: m for name, m in module.named_modules() if isinstance(m, nn.Linear)}
def get_op_by_name(module, op_name): def get_op_by_name(module, op_name):
# get the op by its name relative to the module # get the op by its name relative to the module
for name, m in module.named_modules(): for name, m in module.named_modules():
...@@ -12,10 +14,10 @@ def get_op_by_name(module, op_name): ...@@ -12,10 +14,10 @@ def get_op_by_name(module, op_name):
def set_op_by_name(layer, name, new_module): def set_op_by_name(layer, name, new_module):
levels = name.split('.') levels = name.split(".")
if len(levels) > 1: if len(levels) > 1:
mod_ = layer mod_ = layer
for l_idx in range(len(levels)-1): for l_idx in range(len(levels) - 1):
if levels[l_idx].isdigit(): if levels[l_idx].isdigit():
mod_ = mod_[int(levels[l_idx])] mod_ = mod_[int(levels[l_idx])]
else: else:
...@@ -43,6 +45,7 @@ def append_str_prefix(x, prefix): ...@@ -43,6 +45,7 @@ def append_str_prefix(x, prefix):
else: else:
return x return x
def exclude_layers_to_not_quantize(linear_layers, modules_to_not_convert): def exclude_layers_to_not_quantize(linear_layers, modules_to_not_convert):
if modules_to_not_convert is None: if modules_to_not_convert is None:
return linear_layers return linear_layers
...@@ -51,4 +54,4 @@ def exclude_layers_to_not_quantize(linear_layers, modules_to_not_convert): ...@@ -51,4 +54,4 @@ def exclude_layers_to_not_quantize(linear_layers, modules_to_not_convert):
for name, linear_layer in linear_layers.items(): for name, linear_layer in linear_layers.items():
if not any(key in name for key in modules_to_not_convert): if not any(key in name for key in modules_to_not_convert):
filtered_layers[name] = linear_layer filtered_layers[name] = linear_layer
return filtered_layers return filtered_layers
\ No newline at end of file
...@@ -79,6 +79,7 @@ def unpack_reorder_pack(qweight, qzeros, bits): ...@@ -79,6 +79,7 @@ def unpack_reorder_pack(qweight, qzeros, bits):
return qweight, qzeros return qweight, qzeros
def dequantize_gemm(qweight, qzeros, scales, bits, group_size): def dequantize_gemm(qweight, qzeros, scales, bits, group_size):
# Unpack the qweight and qzeros tensors # Unpack the qweight and qzeros tensors
iweight, izeros = unpack_awq(qweight, qzeros, bits) iweight, izeros = unpack_awq(qweight, qzeros, bits)
...@@ -94,4 +95,4 @@ def dequantize_gemm(qweight, qzeros, scales, bits, group_size): ...@@ -94,4 +95,4 @@ def dequantize_gemm(qweight, qzeros, scales, bits, group_size):
izeros = izeros.repeat_interleave(group_size, dim=0) izeros = izeros.repeat_interleave(group_size, dim=0)
iweight = (iweight - izeros) * scales iweight = (iweight - izeros) * scales
return iweight return iweight
\ No newline at end of file
...@@ -23,6 +23,7 @@ def auto_parallel(args): ...@@ -23,6 +23,7 @@ def auto_parallel(args):
else: else:
cuda_visible_devices = list(range(8)) cuda_visible_devices = list(range(8))
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join( os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
[str(dev) for dev in cuda_visible_devices[:n_gpu]]) [str(dev) for dev in cuda_visible_devices[:n_gpu]]
)
logging.debug("CUDA_VISIBLE_DEVICES: ", os.environ["CUDA_VISIBLE_DEVICES"]) logging.debug("CUDA_VISIBLE_DEVICES: ", os.environ["CUDA_VISIBLE_DEVICES"])
return cuda_visible_devices return cuda_visible_devices
...@@ -115,7 +115,7 @@ def dequantize(imatrix, scales, zeros, group_size): ...@@ -115,7 +115,7 @@ def dequantize(imatrix, scales, zeros, group_size):
) * scales.repeat_interleave(group_size, dim=0) ) * scales.repeat_interleave(group_size, dim=0)
fmatrix = fmatrix.to(torch.float16) fmatrix = fmatrix.to(torch.float16)
return fmatrix return fmatrix
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Auto and Base model classes in AutoAWQ
View the documentation of the main classes of AutoAWQ models below.
::: awq.models.auto.AutoAWQForCausalLM
::: awq.models.base.BaseAWQForCausalLM
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment