fix qwen3moe eplb prefill bug (#6617)

14d1075f · Yi Zhang · GitHub · 006ead9d · 14d1075f · 14d1075f
Unverified Commit 14d1075f authored May 26, 2025 by Yi Zhang Committed by GitHub May 26, 2025
Showing with 3 additions and 2 deletions

python/sglang/srt/managers/expert_location.py python/sglang/srt/managers/expert_location.py +1 -1

python/sglang/srt/model_executor/expert_location_updater.py python/sglang/srt/model_executor/expert_location_updater.py +2 -1

No files found.
--- a/python/sglang/srt/managers/expert_location.py
+++ b/python/sglang/srt/managers/expert_location.py
@@ -136,7 +136,7 @@ class ExpertLocationMetadata:
        num_physical_experts = common["num_physical_experts"]

        phase = server_args.disaggregation_mode
-        if phase == "null":
+        if phase == "null" or model_config_for_expert_location.num_groups is None:
            phase = "decode"

        physical_to_logical_map, logical_to_all_physical_map, expert_count = (

--- a/python/sglang/srt/model_executor/expert_location_updater.py
+++ b/python/sglang/srt/model_executor/expert_location_updater.py
@@ -12,6 +12,7 @@
 # limitations under the License.
 # ==============================================================================
 import logging
+from datetime import timedelta
 from typing import Dict, List, Tuple

 import torch
@@ -340,7 +341,7 @@ def update_expert_weights_single_layer(
        reqs = torch.distributed.batch_isend_irecv(p2p_ops)
        try:
            for req in reqs:
-                req.wait(timeout=30)
+                req.wait(timeout=timedelta(seconds=30))
        except RuntimeError:
            logger.error(
                f"Context: {rank=} {old_physical_to_logical_map=} {new_physical_to_logical_map=} {num_local_physical_experts=} {num_gpu_per_node=}"