Commit 6b7651af authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.7.2-dev_wm' into 'v0.7.2-dev'

[fix]修复ds3量化代码引入的报错

See merge request dcutoolkit/deeplearing/vllm!75
parents 4fb33500 bcce8974
......@@ -84,7 +84,7 @@ def get_model_architecture(
'Qwen2MoeForCausalLM', 'ChatGLMModel', 'ChatGLMForConditionalGeneration',
'BaichuanForCausalLM', 'BloomForCausalLM', 'MedusaModel', 'MixtralForCausalLM',
'MLPSpeculatorPreTrainedModel', 'FalconForCausalLM', 'DeepseekV2ForCausalLM',
'DeepseekV3ForCausalLM', 'DeepSeekMTP']
'DeepseekV3ForCausalLM', 'DeepSeekMTPModel']
if any(arch in architectures for arch in support_nn_architectures):
if os.getenv('LLAMA_NN') != '0':
if (architectures == ['QWenLMHeadModel'] or architectures == ['ChatGLMModel'] ) and visions != []:
......
......@@ -870,7 +870,7 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP):
weight.data=weight.data.reshape(ori_shape[1],-1)
if self.config.quantization_config["quant_method"] == "awq" and not envs.VLLM_USE_TRITON_AWQ:
if hasattr(self.config, "quantization_config") and self.config.quantization_config["quant_method"] == "awq" and not envs.VLLM_USE_TRITON_AWQ:
lay_key_words = [
"self_attn.q_a_proj.qweight",
"self_attn.q_b_proj.qweight",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment