设置tbo相关变量 & 修改tbo拷贝

ae7f9123 · maxiao1 · 5533c538 · ae7f9123 · ae7f9123
Commit ae7f9123 authored Nov 22, 2025 by maxiao1
Showing with 2 additions and 2 deletions

python/sglang/srt/layers/moe/token_dispatcher/deepep.py python/sglang/srt/layers/moe/token_dispatcher/deepep.py +1 -1

python/sglang/srt/two_batch_overlap.py python/sglang/srt/two_batch_overlap.py +1 -1

No files found.
--- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
@@ -541,7 +541,7 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
        num_max_dispatch_tokens_per_rank: the actual batch size in the decoding engine should be less than 256
        https://github.com/deepseek-ai/DeepEP?tab=readme-ov-file#example-use-in-inference-decoding
        """
-        self.return_recv_hook = False
+        self.return_recv_hook = return_recv_hook
        self.device_module = torch.get_device_module()
        self.quant_config = {}

--- a/python/sglang/srt/two_batch_overlap.py
+++ b/python/sglang/srt/two_batch_overlap.py
@@ -758,7 +758,7 @@ class TboForwardBatchPreparer:
        # TODO we may make padding on both sub-batches to make it slightly more balanced
        value_a = min(tbo_split_token_index, num_token_non_padded)
        value_b = max(0, num_token_non_padded - tbo_split_token_index)
-        return torch.tensor([value_a, value_b], dtype=torch.int32).to(
+        return torch.tensor([value_a, value_b], dtype=torch.int32).pin_memory().to(
            device=get_global_server_args().device, non_blocking=True
        )