Unverified Commit 14acf429 authored by Ilya Markov's avatar Ilya Markov Committed by GitHub
Browse files

[EPLB] Remove main waits in case of slow EPLB (#36271)


Signed-off-by: default avatarilmarkov <markovilya197@gmail.com>
parent ce57fd55
...@@ -160,9 +160,9 @@ async def transfer_run_periodically( ...@@ -160,9 +160,9 @@ async def transfer_run_periodically(
is_profile=is_profile, is_profile=is_profile,
cuda_stream=cuda_stream, cuda_stream=cuda_stream,
) )
event = torch.cuda.Event(blocking=False) # block the async thread until the transfer to
cuda_stream.record_event(event) # the intermediate buffer is complete.
model_state.buffer_ready_event = event cuda_stream.synchronize()
model_state.ep_buffer_ready = 1 model_state.ep_buffer_ready = 1
finally: finally:
model_state.buffer_lock.release() model_state.buffer_lock.release()
......
...@@ -176,11 +176,6 @@ class EplbModelState: ...@@ -176,11 +176,6 @@ class EplbModelState:
""" """
The lock to protect the expert buffer. The lock to protect the expert buffer.
""" """
buffer_ready_event: torch.cuda.Event | None
"""
CUDA event recorded when the async worker finishes filling the buffer.
The main thread waits on this before consuming the buffer.
"""
buffer_consumed_event: torch.cuda.Event | None buffer_consumed_event: torch.cuda.Event | None
""" """
CUDA event recorded after the main thread finishes consuming the buffer. CUDA event recorded after the main thread finishes consuming the buffer.
...@@ -480,7 +475,6 @@ class EplbState: ...@@ -480,7 +475,6 @@ class EplbState:
model=model, model=model,
expert_buffer=expert_buffer, expert_buffer=expert_buffer,
buffer_lock=threading.Lock(), buffer_lock=threading.Lock(),
buffer_ready_event=None,
buffer_consumed_event=None, buffer_consumed_event=None,
window_ready_event=None, window_ready_event=None,
ep_buffer_ready=0, ep_buffer_ready=0,
...@@ -919,11 +913,6 @@ class EplbState: ...@@ -919,11 +913,6 @@ class EplbState:
) )
try: try:
assert model_state.new_physical_to_logical_map is not None assert model_state.new_physical_to_logical_map is not None
device_index = model_state.cuda_device_index or self.cuda_device_index
if model_state.buffer_ready_event is not None and device_index is not None:
stream = torch.cuda.current_stream(device=device_index)
stream.wait_event(model_state.buffer_ready_event)
model_state.buffer_ready_event = None
expert_weights = model_state.model.expert_weights[ expert_weights = model_state.model.expert_weights[
model_state.layer_to_transfer model_state.layer_to_transfer
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment