Unverified Commit bcaa8a36 authored by Casper's avatar Casper Committed by GitHub
Browse files

v0.2.0 (#330)


Co-authored-by: default avatarjinz2014 <7799920+jinz2014@users.noreply.github.com>
Co-authored-by: default avatarJin Z <5zj@cousteau.ftpn.ornl.gov>
parent c69d3b65
This diff is collapsed.
import torch.nn as nn
def get_named_linears(module):
return {name: m for name, m in module.named_modules() if isinstance(m, nn.Linear)}
def get_op_by_name(module, op_name):
# get the op by its name relative to the module
for name, m in module.named_modules():
......@@ -12,10 +14,10 @@ def get_op_by_name(module, op_name):
def set_op_by_name(layer, name, new_module):
levels = name.split('.')
levels = name.split(".")
if len(levels) > 1:
mod_ = layer
for l_idx in range(len(levels)-1):
for l_idx in range(len(levels) - 1):
if levels[l_idx].isdigit():
mod_ = mod_[int(levels[l_idx])]
else:
......@@ -43,6 +45,7 @@ def append_str_prefix(x, prefix):
else:
return x
def exclude_layers_to_not_quantize(linear_layers, modules_to_not_convert):
if modules_to_not_convert is None:
return linear_layers
......@@ -51,4 +54,4 @@ def exclude_layers_to_not_quantize(linear_layers, modules_to_not_convert):
for name, linear_layer in linear_layers.items():
if not any(key in name for key in modules_to_not_convert):
filtered_layers[name] = linear_layer
return filtered_layers
\ No newline at end of file
return filtered_layers
......@@ -79,6 +79,7 @@ def unpack_reorder_pack(qweight, qzeros, bits):
return qweight, qzeros
def dequantize_gemm(qweight, qzeros, scales, bits, group_size):
# Unpack the qweight and qzeros tensors
iweight, izeros = unpack_awq(qweight, qzeros, bits)
......@@ -94,4 +95,4 @@ def dequantize_gemm(qweight, qzeros, scales, bits, group_size):
izeros = izeros.repeat_interleave(group_size, dim=0)
iweight = (iweight - izeros) * scales
return iweight
\ No newline at end of file
return iweight
......@@ -23,6 +23,7 @@ def auto_parallel(args):
else:
cuda_visible_devices = list(range(8))
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
[str(dev) for dev in cuda_visible_devices[:n_gpu]])
[str(dev) for dev in cuda_visible_devices[:n_gpu]]
)
logging.debug("CUDA_VISIBLE_DEVICES: ", os.environ["CUDA_VISIBLE_DEVICES"])
return cuda_visible_devices
......@@ -115,7 +115,7 @@ def dequantize(imatrix, scales, zeros, group_size):
) * scales.repeat_interleave(group_size, dim=0)
fmatrix = fmatrix.to(torch.float16)
return fmatrix
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Auto and Base model classes in AutoAWQ
View the documentation of the main classes of AutoAWQ models below.
::: awq.models.auto.AutoAWQForCausalLM
::: awq.models.base.BaseAWQForCausalLM
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment