Commit e197a733 authored by Casper Hansen's avatar Casper Hansen
Browse files

Fix variables and no contiguous memory for GEMV

parent 331ff953
...@@ -77,13 +77,13 @@ class BaseAWQForCausalLM(nn.Module): ...@@ -77,13 +77,13 @@ class BaseAWQForCausalLM(nn.Module):
module.weight.data, scales, zeros = pseudo_quantize_tensor( module.weight.data, scales, zeros = pseudo_quantize_tensor(
module.weight.data, module.weight.data,
get_scale_zp=True, get_scale_zp=True,
**self.quant_config w_bit=self.quant_config["w_bit"],
q_group_size=self.quant_config["q_group_size"]
) )
scales = scales.t().contiguous()
zeros = zeros.t().contiguous()
if self.quant_config["version"] == 'GEMM': if self.quant_config["version"] == 'GEMM':
scales = scales.t().contiguous()
zeros = zeros.t().contiguous()
q_linear_module = WQLinear_GEMM q_linear_module = WQLinear_GEMM
elif self.quant_config["version"] == 'GEMV': elif self.quant_config["version"] == 'GEMV':
q_linear_module = WQLinear_GEMV q_linear_module = WQLinear_GEMV
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment