Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d09135fb
Unverified
Commit
d09135fb
authored
Jan 29, 2026
by
Ilya Markov
Committed by
GitHub
Jan 29, 2026
Browse files
[BugFix] Async Eplb fix potential race condition (#32881)
Signed-off-by:
ilmarkov
<
markovilya197@gmail.com
>
parent
8688c3d4
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
0 deletions
+18
-0
vllm/distributed/eplb/async_worker.py
vllm/distributed/eplb/async_worker.py
+6
-0
vllm/distributed/eplb/eplb_state.py
vllm/distributed/eplb/eplb_state.py
+12
-0
No files found.
vllm/distributed/eplb/async_worker.py
View file @
d09135fb
...
...
@@ -86,6 +86,12 @@ async def transfer_run_periodically(
if
model_state
.
layer_to_transfer
>=
current_num_layers
:
break
# Wait for the main thread to finish consuming the buffer
# before overwriting it
if
model_state
.
buffer_consumed_event
is
not
None
:
cuda_stream
.
wait_event
(
model_state
.
buffer_consumed_event
)
model_state
.
buffer_consumed_event
=
None
(
model_state
.
is_unchanged
,
model_state
.
is_received_locally
,
...
...
vllm/distributed/eplb/eplb_state.py
View file @
d09135fb
...
...
@@ -151,6 +151,11 @@ class EplbModelState:
CUDA event recorded when the async worker finishes filling the buffer.
The main thread waits on this before consuming the buffer.
"""
buffer_consumed_event
:
torch
.
cuda
.
Event
|
None
"""
CUDA event recorded after the main thread finishes consuming the buffer.
The async worker waits on this before writing to the buffer again.
"""
ep_buffer_ready
:
int
"""
The flag indicates whether the expert buffer is ready for transfer.
...
...
@@ -502,6 +507,7 @@ class EplbState:
expert_buffer
=
expert_buffer
,
buffer_lock
=
threading
.
Lock
(),
buffer_ready_event
=
None
,
buffer_consumed_event
=
None
,
ep_buffer_ready
=
0
,
layer_to_transfer
=
0
,
rebalanced
=
False
,
...
...
@@ -1012,6 +1018,12 @@ class EplbState:
new_indices
=
new_indices
,
ep_rank
=
ep_group
.
rank
(),
)
# Record event after consuming buffer to signal async thread
# that it's safe to overwrite the buffer
consumed_event
=
torch
.
cuda
.
Event
()
consumed_event
.
record
()
model_state
.
buffer_consumed_event
=
consumed_event
transferred_layer
=
model_state
.
layer_to_transfer
self
.
_update_layer_mapping_from_new
(
model_state
,
transferred_layer
)
# After the main thread consumes, advance layer_to_transfer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment