Unverified Commit 73687a66 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Refactor integration test (step 1) - wait for kill (#4892)

parent d38359e2
......@@ -4,9 +4,10 @@
from subprocess import call, check_output
import sys
import os
import time
import signal
import psutil
from .common_utils import print_error
from .common_utils import print_error, print_warning
def check_output_command(file_path, head=None, tail=None):
......@@ -31,14 +32,25 @@ def check_output_command(file_path, head=None, tail=None):
exit(1)
def kill_command(pid):
"""kill command"""
def kill_command(pid, timeout=60):
"""Kill the process of pid (with a terminate signal).
Waiting up to 60 seconds until the process is killed.
"""
# TODO: The input argument should better be Popen rather than pid.
if sys.platform == 'win32':
try:
process = psutil.Process(pid=pid)
process.send_signal(signal.CTRL_BREAK_EVENT)
except psutil.NoSuchProcess:
print_warning(f'Tried to kill process (pid = {pid}), but the process does not exist.')
else:
cmds = ['kill', str(pid)]
call(cmds)
if not _wait_till_process_killed(pid, timeout):
print_warning(
f'One subprocess (pid = {pid}) still exists after {timeout} seconds since sending the killing signal is sent. '
'Perhaps the shutdown of this process has hang for some reason. You might have to kill it by yourself.'
)
def install_package_command(package_name):
......@@ -65,6 +77,71 @@ def install_requirements_command(requirements_path):
return call(_get_pip_install() + ["-r", requirements_path], shell=False)
def _wait_till_process_killed(pid, timeout):
keyboard_interrupted = False
time_count = 0
# Usually, a process is killed very quickly.
# This little nap will save 1 second.
time.sleep(0.01)
while True:
try:
# Implementation of waiting
while time_count < timeout:
pid_running = _check_pid_running(pid)
if not pid_running:
return True
time.sleep(1)
time_count += 1
return False
except KeyboardInterrupt:
# Warn at the first keyboard interrupt and do nothing
# Stop at the second
if keyboard_interrupted:
print_warning('Wait of process killing cancelled.')
# I think throwing an exception is more reasonable.
# Another option is to return false here, which is also acceptable.
raise
print_warning(
f'Waiting for the cleanup of a process (pid = {pid}). '
'We suggest you waiting for it to complete. '
'Press Ctrl-C again if you intend to interrupt the cleanup.'
)
keyboard_interrupted = True
# Actually we will never reach here
return False
def _check_pid_running(pid):
# Check whether process still running.
# FIXME: the correct impl should be using ``proc.poll()``
# Using pid here is unsafe.
# We should make Popen object directly accessible.
if sys.platform == 'win32':
# NOTE: Tests show that the behavior of psutil is unreliable, and varies from runs to runs.
# Also, Windows didn't explicitly handle child / non-child process.
# This might be a potential problem.
try:
psutil.Process(pid).wait(timeout=0)
return False
except psutil.TimeoutExpired:
return True
except psutil.NoSuchProcess:
return False
else:
try:
indicator, _ = os.waitpid(pid, os.WNOHANG)
return indicator == 0
except ChildProcessError:
# One of the reasons we reach here is: pid may be not a child process.
# In that case, we can use the famous kill 0 to poll the process.
try:
os.kill(pid, 0)
return True
except OSError:
return False
def _get_pip_install():
python = "python" if sys.platform == "win32" else "python3"
ret = [python, "-m", "pip", "install"]
......
import argparse
import multiprocessing
import os
import subprocess
import signal
import sys
import signal
import time
import pytest
from nni.tools.nnictl.command_utils import kill_command, _check_pid_running
# Windows sometimes fail with "Terminate batch job (Y/N)?"
pytestmark = pytest.mark.skipif(sys.platform == 'win32', reason='Windows has confirmation upon process killing.')
def process_normal():
time.sleep(360)
def process_kill_slow(kill_time=2):
def handler_stop_signals(signum, frame):
print('debug proceess kill: signal received')
time.sleep(kill_time)
print('debug proceess kill: signal processed')
sys.exit(0)
signal.signal(signal.SIGINT, handler_stop_signals)
signal.signal(signal.SIGTERM, handler_stop_signals)
print('debug process kill: sleep')
time.sleep(360)
def process_patiently_kill():
process = subprocess.Popen([sys.executable, __file__, '--mode', 'kill_very_slow'])
time.sleep(1)
kill_command(process.pid) # wait long enough
def test_kill_process():
process = multiprocessing.Process(target=process_normal)
process.start()
time.sleep(0.5)
start_time = time.time()
kill_command(process.pid)
end_time = time.time()
assert not _check_pid_running(process.pid)
assert end_time - start_time < 2
def test_kill_process_slow_no_patience():
process = subprocess.Popen([sys.executable, __file__, '--mode', 'kill_slow'])
time.sleep(1) # wait 1 second for the process to launch and register hooks
start_time = time.time()
kill_command(process.pid, timeout=1) # didn't wait long enough
end_time = time.time()
if sys.platform == 'linux': # FIXME: on non-linux, seems that the time of termination can't be controlled
assert 0.5 < end_time - start_time < 2
assert process.poll() is None
assert _check_pid_running(process.pid)
else:
assert end_time - start_time < 2
# Wait more seconds and it will exit eventually
for _ in range(20):
time.sleep(1)
if not _check_pid_running(process.pid):
return
def test_kill_process_slow_patiently():
process = subprocess.Popen([sys.executable, __file__, '--mode', 'kill_slow'])
time.sleep(1) # wait 1 second for the process to launch and register hooks
start_time = time.time()
kill_command(process.pid, timeout=3) # wait long enough
end_time = time.time()
assert end_time - start_time < 5
if sys.platform == 'linux':
assert end_time - start_time > 1 # I don't know why windows is super fast
@pytest.mark.skipif(sys.platform != 'linux', reason='Signal issues on non-linux.')
def test_kill_process_interrupted():
# Launch a subprocess that launches and kills another subprocess
process = multiprocessing.Process(target=process_patiently_kill)
process.start()
time.sleep(3)
os.kill(process.pid, signal.SIGINT)
# it doesn't work
assert process.is_alive() # Sometimes this is false on darwin.
time.sleep(0.5)
# Ctrl+C again.
os.kill(process.pid, signal.SIGINT)
time.sleep(0.5)
assert not process.is_alive()
if sys.platform == 'linux':
# exit code could be different on non-linux platforms
assert process.exitcode != 0
def start_new_process_group(cmd):
# Otherwise cmd will be killed after this process is killed
# To mock the behavior of nni experiment launch
if sys.platform == 'win32':
return subprocess.Popen(cmd, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
else:
return subprocess.Popen(cmd, preexec_fn=os.setpgrp)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['kill_slow', 'kill_very_slow'])
args = parser.parse_args()
if args.mode == 'kill_slow':
process_kill_slow()
elif args.mode == 'kill_very_slow':
process_kill_slow(15)
else:
# debuggings here
pass
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment