support glm and baichuan nn

c9156538 · zhuwenwen · 6634a0e0 · c9156538
Commit c9156538 authored May 30, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/model_loader.py vllm/model_executor/model_loader.py +2 -2

No files found.
--- a/vllm/model_executor/model_loader.py
+++ b/vllm/model_executor/model_loader.py
@@ -23,8 +23,8 @@ def _set_default_torch_dtype(dtype: torch.dtype):
 def _get_model_architecture(model_config: ModelConfig) -> Type[nn.Module]:
    architectures = getattr(model_config.hf_config, "architectures", [])
-    if architectures == ['LlamaForCausalLM']:
+    if architectures == ['LlamaForCausalLM'] or architectures == ['ChatGLMModel'] or architectures == ['BaichuanForCausalLM']:
-        if os.getenv('LLAMA_NN') != '0':  
+        if os.getenv('LLAMA_NN') != '0': 
            os.environ['LLAMA_NN'] = '1'
    # Special handling for quantized Mixtral.
    # FIXME(woosuk): This is a temporary hack.