"model/vscode:/vscode.git/clone" did not exist on "f0ad49ea17d587cce7f4b2c6a6ccb3139ec083c8"
Unverified Commit f0d1f62f authored by fishyds's avatar fishyds Committed by GitHub
Browse files

Fix trial keeper wrongly exit issue (#152)

* Fix trial keeper bug, use actual exitcode to exit rather than 1
parent d6bfe2a9
...@@ -45,7 +45,6 @@ def main_loop(args): ...@@ -45,7 +45,6 @@ def main_loop(args):
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior # Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
process = Popen(args.trial_command, shell = True, stdout = stdout_file, stderr = stderr_file) process = Popen(args.trial_command, shell = True, stdout = stdout_file, stderr = stderr_file)
print('Subprocess pid is {}'.format(process.pid)) print('Subprocess pid is {}'.format(process.pid))
print('Current cwd is {}'.format(os.getcwd()))
while True: while True:
retCode = process.poll() retCode = process.poll()
## Read experiment metrics, to avoid missing metrics ## Read experiment metrics, to avoid missing metrics
...@@ -55,15 +54,15 @@ def main_loop(args): ...@@ -55,15 +54,15 @@ def main_loop(args):
print('subprocess terminated. Exit code is {}. Quit'.format(retCode)) print('subprocess terminated. Exit code is {}. Quit'.format(retCode))
#copy local directory to hdfs #copy local directory to hdfs
nni_local_output_dir = os.environ['NNI_OUTPUT_DIR'] nni_local_output_dir = os.environ['NNI_OUTPUT_DIR']
hdfs_client = HdfsClient(hosts='{0}:{1}'.format(args.pai_hdfs_host, '50070'), user_name=args.pai_user_name) hdfs_client = HdfsClient(hosts='{0}:{1}'.format(args.pai_hdfs_host, '50070'), user_name=args.pai_user_name, timeout=5)
print(nni_local_output_dir, args.pai_hdfs_output_dir)
try: try:
if copyDirectoryToHdfs(nni_local_output_dir, args.pai_hdfs_output_dir, hdfs_client): if copyDirectoryToHdfs(nni_local_output_dir, args.pai_hdfs_output_dir, hdfs_client):
print('copy directory success!') print('copy directory from {0} to {1} success!'.format(nni_local_output_dir, args.pai_hdfs_output_dir))
else: else:
print('copy directory failed!') print('copy directory from {0} to {1} failed!'.format(nni_local_output_dir, args.pai_hdfs_output_dir))
except Exception as exception: except Exception as exception:
print(exception) print('HDFS copy directory got exception')
raise exception
## Exit as the retCode of subprocess(trial) ## Exit as the retCode of subprocess(trial)
exit(retCode) exit(retCode)
...@@ -91,7 +90,10 @@ if __name__ == '__main__': ...@@ -91,7 +90,10 @@ if __name__ == '__main__':
try: try:
main_loop(args) main_loop(args)
except: except SystemExit as se:
print('Exiting by user request') print('NNI trial keeper exit with code {}'.format(se.code))
sys.exit(se.code)
except Exception as e:
print('Exit trial keeper with code 1 because Exception: {} is catched'.format(str(e)))
sys.exit(1) sys.exit(1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment