update qwen3_moe.py

9b0aa006 · zhuwenwen · 3f9af065 · 9b0aa006 · 9b0aa006
Commit 9b0aa006 authored May 16, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 4 deletions

README.md README.md +2 -0

vllm/model_executor/models/qwen3_moe.py vllm/model_executor/models/qwen3_moe.py +1 -4

No files found.
--- a/README.md
+++ b/README.md
@@ -33,6 +33,8 @@ vLLM是一个快速且易于使用的LLM推理和服务库,使用PageAttention
 | Qwen2MoeForCausalLM                 | Qwen2-57B-A14B,Qwen2-57B-A14B-Instruct        | Yes | No  | -   |
 | LlavaForConditionalGeneration       | LLaMA,LLaMA-2,LLaMA-3                         | Yes | No  | -   |
 | Qwen2VLForConditionalGeneration     | Qwen2-VL                                      | Yes | No  | Yes |
+| Qwen2_5_VLForConditionalGeneration  | Qwen.5-VL                                     | Yes | No  | Yes |
+| Gemma3ForConditionalGeneration      | Gemma 3                                       | Yes | -   | -   |
 | MiniCPMV                            | MiniCPM-V                                     | Yes | No  | -   |
 | Phi3VForCausalLM                    | Phi-3.5-vision                                | Yes | No  | -   |
 | BertModel                           | bge-large-zh-v1.5                             | Yes | No  | -   |

--- a/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm/model_executor/models/qwen3_moe.py
@@ -414,9 +414,6 @@ class Qwen3MoeModel(nn.Module):
        params_dict = dict(self.named_parameters())
        loaded_params: Set[str] = set()
        for name, loaded_weight in weights:
-            if self.use_llama_nn:
-                current_count = loaded_weight.current_count 
-                total_count = loaded_weight.total_count
            for (param_name, weight_name, shard_id) in stacked_params_mapping:
                # Skip non-stacked layers and experts (experts handled below).
                if weight_name not in name:
@@ -493,7 +490,7 @@ class Qwen3MoeModel(nn.Module):
                    weight_loader(param, loaded_weight)
            loaded_params.add(name)
        
-        if self.use_llama_nn and self.quant_method is None and current_count==total_count:
+        if self.use_llama_nn and self.quant_method is None:
            lay_key_words = [
                "gate_up_proj.weight",
                "down_proj.weight",