update lm_head tn layout for awq

f0e7d72d · zhuwenwen · fce0353c · f0e7d72d · f0e7d72d · f0e7d72d
Commit f0e7d72d authored Feb 19, 2025 by zhuwenwen
5 changed files
--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -519,6 +519,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
                    weight.data=weight.data.reshape(ori_shape[1], -1)

        if self.quant_method == "awq":
+            os.environ['LM_NN'] = '0'
            lay_key_words = [
                "self_attn.W_pack.qweight",
                "self_attn.o_proj.qweight",

--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -507,6 +507,7 @@ class LlamaModel(nn.Module):
                    weight.data=weight.data.reshape(ori_shape[1], -1)
     
        if self.quant_method == "awq":
+            os.environ['LM_NN'] = '0'
            lay_key_words = [
                "self_attn.qkv_proj.qweight",
                "self_attn.o_proj.qweight",

--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
@@ -1132,6 +1132,7 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
                    weight.data=weight.data.reshape(ori_shape[1],-1)
                    
        if self.quant_method == "awq":
+            os.environ['LM_NN'] = '0'
            lay_key_words = [
                "attn.c_attn.qweight",
                "attn.c_proj.qweight",

--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -485,6 +485,7 @@ class Qwen2Model(nn.Module):
                    weight.data=weight.data.reshape(ori_shape[1],-1)
                    
        if self.quant_method == "awq":
+            os.environ['LM_NN'] = '0'
            lay_key_words = [
                "self_attn.qkv_proj.qweight",
                "self_attn.o_proj.qweight",