update lm_head weight to support llama3.2

4e0b233d · zhuwenwen · aba40fda · 4e0b233d
Commit 4e0b233d authored Oct 23, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/model_loader/utils.py vllm/model_executor/model_loader/utils.py +2 -2

No files found.
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -30,7 +30,7 @@ def get_model_architecture(
                os.environ['LLAMA_NN'] = '0'
             else:
                os.environ['LLAMA_NN'] = '1'
-        if architectures == ['BloomForCausalLM']:
+        if architectures == ['BloomForCausalLM'] or architectures == ['LlamaForCausalLM']:
            os.environ['LM_TN'] = '1'
        else:
            os.environ['LM_TN'] = '0'
@@ -50,7 +50,7 @@ def get_model_architecture(
                os.environ['AWQ_PAD'] = '0'
    else:
        os.environ['LLAMA_NN'] = '0'
-        os.environ['LM_TN'] = '0'
+        os.environ['LM_TN'] = '1'
        os.environ['GEMM_PAD'] = '0'
        os.environ['FA_PAD'] = '0'
        os.environ['AWQ_PAD'] = '0'