update lm_head layout of chatglm

77ae0f0d · zhuwenwen · 2ff1c360 · 77ae0f0d
Commit 77ae0f0d authored Nov 28, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 1 deletion

vllm/model_executor/models/chatglm.py vllm/model_executor/models/chatglm.py +6 -1

No files found.
--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
@@ -697,7 +697,6 @@ class ChatGLMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
                "self_attention.dense.weight",
                "mlp.dense_h_to_4h.weight",
                "mlp.dense_4h_to_h.weight",
-                "lm_head.weight"
            ]
            combined_words = "|".join(lay_key_words)
@@ -708,6 +707,12 @@ class ChatGLMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
            qkv_bias_words = "|".join(lay_qkv_bias_words)
            for layername, weight in params_dict.items():
+                if "lm_head.weight" in layername and weight.shape[1] == 4096:
+                    lay_key_words.append("lm_head.weight")
+                    combined_words = "|".join(lay_key_words)
+                    os.environ['LM_NN'] = '1'  
+                else:
+                    os.environ['LM_NN'] = '0'
                if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
                    weight.data = pad_weight(weight.data, 32)