Unverified Commit adf73175 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Forbid DeepEP racing condition when too many tokens (#9567)

parent 13705dae
...@@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase: ...@@ -272,6 +272,9 @@ class _DeepEPDispatcherImplBase:
self.num_max_dispatch_tokens_per_rank = get_int_env_var( self.num_max_dispatch_tokens_per_rank = get_int_env_var(
"SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128 "SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK", 128
) )
# DeepEP internode_ll dispatch uses FINISHED_SUM_TAG=1024
# and the logic requires num-tokens-sent-from-one-rank-to-another-rank less than it
assert self.num_max_dispatch_tokens_per_rank <= 1024
self.handle = None self.handle = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment