Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
14acf429
Unverified
Commit
14acf429
authored
Mar 24, 2026
by
Ilya Markov
Committed by
GitHub
Mar 24, 2026
Browse files
[EPLB] Remove main waits in case of slow EPLB (#36271)
Signed-off-by:
ilmarkov
<
markovilya197@gmail.com
>
parent
ce57fd55
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
14 deletions
+3
-14
vllm/distributed/eplb/async_worker.py
vllm/distributed/eplb/async_worker.py
+3
-3
vllm/distributed/eplb/eplb_state.py
vllm/distributed/eplb/eplb_state.py
+0
-11
No files found.
vllm/distributed/eplb/async_worker.py
View file @
14acf429
...
@@ -160,9 +160,9 @@ async def transfer_run_periodically(
...
@@ -160,9 +160,9 @@ async def transfer_run_periodically(
is_profile
=
is_profile
,
is_profile
=
is_profile
,
cuda_stream
=
cuda_stream
,
cuda_stream
=
cuda_stream
,
)
)
event
=
torch
.
cuda
.
Event
(
blocking
=
False
)
# block the async thread until the transfer to
cuda_stream
.
record_event
(
event
)
# the intermediate buffer is complete.
model_state
.
buffer_ready_event
=
event
cuda_stream
.
synchronize
()
model_state
.
ep_buffer_ready
=
1
model_state
.
ep_buffer_ready
=
1
finally
:
finally
:
model_state
.
buffer_lock
.
release
()
model_state
.
buffer_lock
.
release
()
...
...
vllm/distributed/eplb/eplb_state.py
View file @
14acf429
...
@@ -176,11 +176,6 @@ class EplbModelState:
...
@@ -176,11 +176,6 @@ class EplbModelState:
"""
"""
The lock to protect the expert buffer.
The lock to protect the expert buffer.
"""
"""
buffer_ready_event
:
torch
.
cuda
.
Event
|
None
"""
CUDA event recorded when the async worker finishes filling the buffer.
The main thread waits on this before consuming the buffer.
"""
buffer_consumed_event
:
torch
.
cuda
.
Event
|
None
buffer_consumed_event
:
torch
.
cuda
.
Event
|
None
"""
"""
CUDA event recorded after the main thread finishes consuming the buffer.
CUDA event recorded after the main thread finishes consuming the buffer.
...
@@ -480,7 +475,6 @@ class EplbState:
...
@@ -480,7 +475,6 @@ class EplbState:
model
=
model
,
model
=
model
,
expert_buffer
=
expert_buffer
,
expert_buffer
=
expert_buffer
,
buffer_lock
=
threading
.
Lock
(),
buffer_lock
=
threading
.
Lock
(),
buffer_ready_event
=
None
,
buffer_consumed_event
=
None
,
buffer_consumed_event
=
None
,
window_ready_event
=
None
,
window_ready_event
=
None
,
ep_buffer_ready
=
0
,
ep_buffer_ready
=
0
,
...
@@ -919,11 +913,6 @@ class EplbState:
...
@@ -919,11 +913,6 @@ class EplbState:
)
)
try
:
try
:
assert
model_state
.
new_physical_to_logical_map
is
not
None
assert
model_state
.
new_physical_to_logical_map
is
not
None
device_index
=
model_state
.
cuda_device_index
or
self
.
cuda_device_index
if
model_state
.
buffer_ready_event
is
not
None
and
device_index
is
not
None
:
stream
=
torch
.
cuda
.
current_stream
(
device
=
device_index
)
stream
.
wait_event
(
model_state
.
buffer_ready_event
)
model_state
.
buffer_ready_event
=
None
expert_weights
=
model_state
.
model
.
expert_weights
[
expert_weights
=
model_state
.
model
.
expert_weights
[
model_state
.
layer_to_transfer
model_state
.
layer_to_transfer
]
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment