Commit 9b0aa006 authored by zhuwenwen's avatar zhuwenwen
Browse files

update qwen3_moe.py

parent 3f9af065
......@@ -33,6 +33,8 @@ vLLM是一个快速且易于使用的LLM推理和服务库,使用PageAttention
| Qwen2MoeForCausalLM | Qwen2-57B-A14B,Qwen2-57B-A14B-Instruct | Yes | No | - |
| LlavaForConditionalGeneration | LLaMA,LLaMA-2,LLaMA-3 | Yes | No | - |
| Qwen2VLForConditionalGeneration | Qwen2-VL | Yes | No | Yes |
| Qwen2_5_VLForConditionalGeneration | Qwen.5-VL | Yes | No | Yes |
| Gemma3ForConditionalGeneration | Gemma 3 | Yes | - | - |
| MiniCPMV | MiniCPM-V | Yes | No | - |
| Phi3VForCausalLM | Phi-3.5-vision | Yes | No | - |
| BertModel | bge-large-zh-v1.5 | Yes | No | - |
......
......@@ -414,9 +414,6 @@ class Qwen3MoeModel(nn.Module):
params_dict = dict(self.named_parameters())
loaded_params: Set[str] = set()
for name, loaded_weight in weights:
if self.use_llama_nn:
current_count = loaded_weight.current_count
total_count = loaded_weight.total_count
for (param_name, weight_name, shard_id) in stacked_params_mapping:
# Skip non-stacked layers and experts (experts handled below).
if weight_name not in name:
......@@ -493,7 +490,7 @@ class Qwen3MoeModel(nn.Module):
weight_loader(param, loaded_weight)
loaded_params.add(name)
if self.use_llama_nn and self.quant_method is None and current_count==total_count:
if self.use_llama_nn and self.quant_method is None:
lay_key_words = [
"gate_up_proj.weight",
"down_proj.weight",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment