"vscode:/vscode.git/clone" did not exist on "a44138d18de2b039d62a700417aeef34aefc1633"
Unverified Commit 14d1075f authored by Yi Zhang's avatar Yi Zhang Committed by GitHub
Browse files

fix qwen3moe eplb prefill bug (#6617)

parent 006ead9d
...@@ -136,7 +136,7 @@ class ExpertLocationMetadata: ...@@ -136,7 +136,7 @@ class ExpertLocationMetadata:
num_physical_experts = common["num_physical_experts"] num_physical_experts = common["num_physical_experts"]
phase = server_args.disaggregation_mode phase = server_args.disaggregation_mode
if phase == "null": if phase == "null" or model_config_for_expert_location.num_groups is None:
phase = "decode" phase = "decode"
physical_to_logical_map, logical_to_all_physical_map, expert_count = ( physical_to_logical_map, logical_to_all_physical_map, expert_count = (
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
import logging import logging
from datetime import timedelta
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
import torch import torch
...@@ -340,7 +341,7 @@ def update_expert_weights_single_layer( ...@@ -340,7 +341,7 @@ def update_expert_weights_single_layer(
reqs = torch.distributed.batch_isend_irecv(p2p_ops) reqs = torch.distributed.batch_isend_irecv(p2p_ops)
try: try:
for req in reqs: for req in reqs:
req.wait(timeout=30) req.wait(timeout=timedelta(seconds=30))
except RuntimeError: except RuntimeError:
logger.error( logger.error(
f"Context: {rank=} {old_physical_to_logical_map=} {new_physical_to_logical_map=} {num_local_physical_experts=} {num_gpu_per_node=}" f"Context: {rank=} {old_physical_to_logical_map=} {new_physical_to_logical_map=} {num_local_physical_experts=} {num_gpu_per_node=}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment