support llama model tn/nn

6634a0e0 · zhuwenwen · a10e9cee · 6634a0e0
Commit 6634a0e0 authored May 30, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

vllm/model_executor/model_loader.py vllm/model_executor/model_loader.py +2 -1

No files found.
--- a/vllm/model_executor/model_loader.py
+++ b/vllm/model_executor/model_loader.py
@@ -24,7 +24,8 @@ def _set_default_torch_dtype(dtype: torch.dtype):
 def _get_model_architecture(model_config: ModelConfig) -> Type[nn.Module]:
    architectures = getattr(model_config.hf_config, "architectures", [])
    if architectures == ['LlamaForCausalLM']:
-        os.environ['LLAMA_NN'] = '1'
+        if os.getenv('LLAMA_NN') != '0':  
+            os.environ['LLAMA_NN'] = '1'
    # Special handling for quantized Mixtral.
    # FIXME(woosuk): This is a temporary hack.
    if (model_config.quantization is not None