修复awq模型的VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD设置位置

ab66909d · yangql · 475dcaa0 · ab66909d
Commit ab66909d authored Jan 15, 2026 by yangql
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

vllm/model_executor/model_loader/utils.py vllm/model_executor/model_loader/utils.py +6 -5

No files found.
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -232,6 +232,11 @@ def get_model_architecture(
                                'ChatGLMModel', 'Glm4ForCausalLM', 'ChatGLMForConditionalGeneration', 'BaichuanForCausalLM', 'BloomForCausalLM', 'TeleChat2ForCausalLM', 'MixtralForCausalLM', 'FalconForCausalLM',
                                'MedusaModel', 'MLPSpeculatorPreTrainedModel', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'DeepSeekMTPModel']  
    if any(arch in architectures for arch in support_nn_architectures): 
+        #针对使用dtype为fp16的情况的量化默认关闭"VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"
+        if model_config.quantization in {"awq", "awq_marlin", "moe_wna16"}:
+            if not envs.is_set("VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"):
+                os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '0'      
+        
        if not envs.VLLM_USE_NN:
            if os.getenv('LLAMA_NN') != '0': 
                if (architectures == ['QWenLMHeadModel'] or architectures == ['ChatGLMModel'] ) and visions != []:
@@ -288,10 +293,6 @@ def get_model_architecture(
                os.environ['FA_PAD'] = '0'
        else:
            if architectures in [['DeepseekV3ForCausalLM'], ['DeepSeekMTPModel']]:          
-                #针对使用dtype为fp16的情况的量化默认关闭"VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"
-                if model_config.quantization in {"awq", "awq_marlin", "moe_wna16"}:
-                    if not envs.is_set("VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"):
-                        os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '0'                
                if not envs.is_set("VLLM_USE_LIGHTOP"):
                    os.environ['VLLM_USE_LIGHTOP'] = '1'
                if not envs.is_set("VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"):