Commit 41d0696e authored by gaoqiong's avatar gaoqiong
Browse files

增加lm_nn的量化控控控制,控制为tn

parent 8c61b2ed
......@@ -360,6 +360,7 @@ class GPTNeoXForCausalLM(nn.Module, SupportsPP):
loaded_params.add(name)
#当为triton支持推理的时候不能进行处理
if self.quant_method == "compressed_tensors":
os.environ['LM_NN'] = '0'
lay_key_words = [
"attention.query_key_value.weight",
"attention.dense.weight",
......
......@@ -550,6 +550,7 @@ class LlamaModel(nn.Module):
#当为triton支持推理的时候不能进行处理
if self.quant_method == "compressed_tensors":
os.environ['LM_NN'] = '0'
lay_key_words = [
"self_attn.qkv_proj.weight",
"self_attn.o_proj.weight",
......
......@@ -1174,6 +1174,7 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
qweight.data=torch.cat((qweight.data,qweight_pad),dim=1).contiguous()
if self.quant_method == "compressed_tensors":
os.environ['LM_NN'] = '0'
lay_key_words = [
"attn.c_attn.weight",
"attn.c_proj.weight",
......
......@@ -527,6 +527,7 @@ class Qwen2Model(nn.Module):
qweight.data=torch.cat((qweight.data,qweight_pad),dim=1).contiguous()
if self.quant_method == "compressed_tensors":
os.environ['LM_NN'] = '0'
lay_key_words = [
"self_attn.qkv_proj.weight",
"self_attn.o_proj.weight",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment