Unverified Commit 7e1f10d5 authored by Joe Runde's avatar Joe Runde Committed by GitHub
Browse files

[Core][Bugfix] allow graceful worker termination (#32965)


Signed-off-by: default avatarJoe Runde <Joseph.Runde@ibm.com>
parent a28b94e6
...@@ -391,14 +391,17 @@ class MultiprocExecutor(Executor): ...@@ -391,14 +391,17 @@ class MultiprocExecutor(Executor):
time.sleep(0.1) time.sleep(0.1)
return False return False
active_procs = lambda: [proc for proc in worker_procs if proc.is_alive()]
# Give processes time to clean themselves up properly first
if wait_for_termination(active_procs(), 4):
return
# Send SIGTERM if still running # Send SIGTERM if still running
active_procs = [proc for proc in worker_procs if proc.is_alive()] for p in active_procs():
for p in active_procs:
p.terminate() p.terminate()
if not wait_for_termination(active_procs, 4): if not wait_for_termination(active_procs(), 4):
# Send SIGKILL if still running # Send SIGKILL if still running
active_procs = [p for p in active_procs if p.is_alive()] for p in active_procs():
for p in active_procs:
p.kill() p.kill()
def shutdown(self): def shutdown(self):
...@@ -701,6 +704,9 @@ class WorkerProc: ...@@ -701,6 +704,9 @@ class WorkerProc:
nonlocal shutdown_requested nonlocal shutdown_requested
if not shutdown_requested: if not shutdown_requested:
shutdown_requested = True shutdown_requested = True
logger.debug(
"WorkerProc handling signal %d, raising SystemExit", signum
)
raise SystemExit() raise SystemExit()
# Either SIGTERM or SIGINT will terminate the worker # Either SIGTERM or SIGINT will terminate the worker
...@@ -774,6 +780,13 @@ class WorkerProc: ...@@ -774,6 +780,13 @@ class WorkerProc:
# SystemExit() to avoid zmq exceptions in __del__. # SystemExit() to avoid zmq exceptions in __del__.
shutdown_requested = True shutdown_requested = True
except SystemExit as e:
# SystemExit is raised on SIGTERM or SIGKILL, which usually indicates that
# the graceful shutdown process did not succeed
logger.warning("WorkerProc was terminated")
# SystemExit must never be ignored
raise e
finally: finally:
if ready_writer is not None: if ready_writer is not None:
ready_writer.close() ready_writer.close()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment