Unverified Commit be874c02 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Bugfix] Fix Qwen3-VL-MoE weight loading for EP (#25300)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
parent 9607d5eb
...@@ -122,9 +122,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): ...@@ -122,9 +122,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
def load_fused_expert_weights(self, name: str, params_dict: dict, def load_fused_expert_weights(self, name: str, params_dict: dict,
loaded_weight: torch.Tensor, shard_id: str, loaded_weight: torch.Tensor, shard_id: str,
num_experts: int): num_experts: int) -> bool:
param = params_dict[name] param = params_dict[name]
weight_loader = typing.cast(Callable[..., bool], param.weight_loader) weight_loader = typing.cast(Callable[..., bool], param.weight_loader)
loaded_local_expert = False
for expert_id in range(num_experts): for expert_id in range(num_experts):
curr_expert_weight = loaded_weight[expert_id] curr_expert_weight = loaded_weight[expert_id]
success = weight_loader(param, success = weight_loader(param,
...@@ -133,9 +134,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): ...@@ -133,9 +134,10 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
shard_id, shard_id,
expert_id, expert_id,
return_success=True) return_success=True)
if not success: if success:
return False loaded_local_expert = True
return True
return loaded_local_expert
def load_weights(self, weights: Iterable[tuple[str, def load_weights(self, weights: Iterable[tuple[str,
torch.Tensor]]) -> set[str]: torch.Tensor]]) -> set[str]:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment