[RL] use cpu group to prepare_mlp_sync_batch_raw when the server is offloaded (#10152)

e68a2b5b · Zilin Zhu · GitHub · 31b9f19e · e68a2b5b · e68a2b5b
Unverified Commit e68a2b5b authored Oct 18, 2025 by Zilin Zhu Committed by GitHub Oct 18, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

python/sglang/bench_one_batch.py python/sglang/bench_one_batch.py +1 -0

python/sglang/srt/managers/scheduler.py python/sglang/srt/managers/scheduler.py +3 -1

No files found.
--- a/python/sglang/bench_one_batch.py
+++ b/python/sglang/bench_one_batch.py
@@ -320,6 +320,7 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
            speculative_num_draft_tokens=None,
            require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
            disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
+            offload_tags=set(),
        )



--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -2339,6 +2339,7 @@ class Scheduler(
            speculative_num_draft_tokens=self.server_args.speculative_num_draft_tokens,
            require_mlp_tp_gather=require_mlp_tp_gather(self.server_args),
            disable_overlap_schedule=self.server_args.disable_overlap_schedule,
+            offload_tags=self.offload_tags,
        )

    @staticmethod
@@ -2353,6 +2354,7 @@ class Scheduler(
        speculative_num_draft_tokens,
        require_mlp_tp_gather: bool,
        disable_overlap_schedule: bool,
+        offload_tags: set[str],
    ):
        # Check if other DP workers have running batches
        if local_batch is None:
@@ -2383,7 +2385,7 @@ class Scheduler(
        )

        tbo_preparer = TboDPAttentionPreparer()
-        if disable_overlap_schedule:
+        if len(offload_tags) == 0 and disable_overlap_schedule:
            group = tp_group.device_group
            device = tp_group.device
        else: