Unverified Commit 6accb21f authored by Aaron Hao's avatar Aaron Hao Committed by GitHub
Browse files

[bug] Fix deadlock with pause resume and collective_rpc (#37024)


Signed-off-by: default avatarhao-aaron <ahao@anyscale.com>
parent 053f3b63
......@@ -1632,7 +1632,11 @@ class DPEngineCoreProc(EngineCoreProc):
if self.has_coordinator and request_wave != self.current_wave:
if request_wave > self.current_wave:
self.current_wave = request_wave
elif not self.engines_running:
elif (
not self.engines_running
and self.scheduler.pause_state == PauseState.UNPAUSED
):
self.engines_running = True
# Request received for an already-completed wave, notify
# front-end that we need to start the next one.
self.output_queue.put_nowait(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment