Forbid DeepEP racing condition when too many tokens (#9567)

adf73175 · fzyzcjy · GitHub · 13705dae · adf73175
Unverified Commit adf73175 authored Sep 05, 2025 by fzyzcjy Committed by GitHub Sep 05, 2025
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

python/sglang/srt/layers/moe/token_dispatcher/deepep.py python/sglang/srt/layers/moe/token_dispatcher/deepep.py +3 -0

No files found.
--- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
@@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase:
        self.num_max_dispatch_tokens_per_rank = get_int_env_var(
            "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
        )
+        # DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024
+        # and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it
+        assert self.num_max_dispatch_tokens_per_rank <= 1024
        self.handle = None