Merge branch 'v0.7.2-dev_wm' into 'v0.7.2-dev'

[fix]修复ds3量化代码引入的报错 See merge request dcutoolkit/deeplearing/vllm!75

Merge branch 'v0.7.2-dev_wm' into 'v0.7.2-dev'
[fix]修复ds3量化代码引入的报错 See merge request dcutoolkit/deeplearing/vllm!75
6b7651af · zhuwenwen · 4fb33500 · bcce8974 · 6b7651af · 6b7651af
Commit 6b7651af authored Feb 26, 2025 by zhuwenwen
Showing with 2 additions and 2 deletions

vllm/model_executor/model_loader/utils.py vllm/model_executor/model_loader/utils.py +1 -1

vllm/model_executor/models/deepseek_v2.py vllm/model_executor/models/deepseek_v2.py +1 -1

No files found.
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -84,7 +84,7 @@ def get_model_architecture(
                                'Qwen2MoeForCausalLM', 'ChatGLMModel', 'ChatGLMForConditionalGeneration', 
                                'BaichuanForCausalLM', 'BloomForCausalLM', 'MedusaModel', 'MixtralForCausalLM', 
                                'MLPSpeculatorPreTrainedModel', 'FalconForCausalLM', 'DeepseekV2ForCausalLM', 
-                                'DeepseekV3ForCausalLM', 'DeepSeekMTP']  
+                                'DeepseekV3ForCausalLM', 'DeepSeekMTPModel']  
    if any(arch in architectures for arch in support_nn_architectures): 
        if os.getenv('LLAMA_NN') != '0': 
             if (architectures == ['QWenLMHeadModel'] or architectures == ['ChatGLMModel'] ) and visions != []:

--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -870,7 +870,7 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP):
                    
                    weight.data=weight.data.reshape(ori_shape[1],-1)

-        if self.config.quantization_config["quant_method"] == "awq" and not envs.VLLM_USE_TRITON_AWQ:
+        if hasattr(self.config, "quantization_config") and self.config.quantization_config["quant_method"] == "awq" and not envs.VLLM_USE_TRITON_AWQ:
            lay_key_words = [
                "self_attn.q_a_proj.qweight",
                "self_attn.q_b_proj.qweight",