Unverified Commit bf2b9290 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Support monitor mode when creating or resuming a new experiment (#1933)

parent 4ed78edd
...@@ -49,6 +49,7 @@ nnictl support commands: ...@@ -49,6 +49,7 @@ nnictl support commands:
|--config, -c| True| |YAML configure file of the experiment| |--config, -c| True| |YAML configure file of the experiment|
|--port, -p|False| |the port of restful server| |--port, -p|False| |the port of restful server|
|--debug, -d|False||set debug mode| |--debug, -d|False||set debug mode|
|--watch, -w|False||set watch mode|
* Examples * Examples
...@@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper. ...@@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper.
|id| True| |The id of the experiment you want to resume| |id| True| |The id of the experiment you want to resume|
|--port, -p| False| |Rest port of the experiment you want to resume| |--port, -p| False| |Rest port of the experiment you want to resume|
|--debug, -d|False||set debug mode| |--debug, -d|False||set debug mode|
|--watch, -w|False||set watch mode|
* Example * Example
......
# list of commands/arguments # list of commands/arguments
__nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top" __nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top"
__nnictl_create_cmds="--config --port --debug" __nnictl_create_cmds="--config --port --debug --watch"
__nnictl_resume_cmds="--port --debug" __nnictl_resume_cmds="--port --debug --watch"
__nnictl_view_cmds="--port" __nnictl_view_cmds="--port"
__nnictl_update_cmds="searchspace concurrency duration trialnum" __nnictl_update_cmds="searchspace concurrency duration trialnum"
__nnictl_update_searchspace_cmds="--filename" __nnictl_update_searchspace_cmds="--filename"
......
...@@ -20,7 +20,7 @@ from .common_utils import get_yml_content, get_json_content, print_error, print_ ...@@ -20,7 +20,7 @@ from .common_utils import get_yml_content, get_json_content, print_error, print_
detect_port, get_user, get_python_dir detect_port, get_user, get_python_dir
from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS
from .command_utils import check_output_command, kill_command from .command_utils import check_output_command, kill_command
from .nnictl_utils import update_experiment from .nnictl_utils import update_experiment, set_monitor
def get_log_path(config_file_name): def get_log_path(config_file_name):
'''generate stdout and stderr log path''' '''generate stdout and stderr log path'''
...@@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
experiment_config['experimentName']) experiment_config['experimentName'])
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list))) print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list)))
if args.watch:
set_monitor(True, 3, args.port, rest_process.pid)
def create_experiment(args): def create_experiment(args):
'''start a new experiment''' '''start a new experiment'''
...@@ -506,8 +508,8 @@ def create_experiment(args): ...@@ -506,8 +508,8 @@ def create_experiment(args):
validate_all_content(experiment_config, config_path) validate_all_content(experiment_config, config_path)
nni_config.set_config('experimentConfig', experiment_config) nni_config.set_config('experimentConfig', experiment_config)
launch_experiment(args, experiment_config, 'new', config_file_name)
nni_config.set_config('restServerPort', args.port) nni_config.set_config('restServerPort', args.port)
launch_experiment(args, experiment_config, 'new', config_file_name)
def manage_stopped_experiment(args, mode): def manage_stopped_experiment(args, mode):
'''view a stopped experiment''' '''view a stopped experiment'''
......
...@@ -51,6 +51,7 @@ def parse_args(): ...@@ -51,6 +51,7 @@ def parse_args():
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file') parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode') parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_start.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
parser_start.set_defaults(func=create_experiment) parser_start.set_defaults(func=create_experiment)
# parse resume command # parse resume command
...@@ -58,6 +59,7 @@ def parse_args(): ...@@ -58,6 +59,7 @@ def parse_args():
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume') parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode') parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_resume.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
parser_resume.set_defaults(func=resume_experiment) parser_resume.set_defaults(func=resume_experiment)
# parse view command # parse view command
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import csv import csv
import os import os
import sys
import json import json
import time import time
import re import re
...@@ -623,23 +624,44 @@ def show_experiment_info(): ...@@ -623,23 +624,44 @@ def show_experiment_info():
content[index].get('endTime'), content[index].get('status'))) content[index].get('endTime'), content[index].get('status')))
print(TRIAL_MONITOR_TAIL) print(TRIAL_MONITOR_TAIL)
def monitor_experiment(args): def set_monitor(auto_exit, time_interval, port=None, pid=None):
'''monitor the experiment''' '''set the experiment monitor engine'''
if args.time <= 0:
print_error('please input a positive integer as time interval, the unit is second.')
exit(1)
while True: while True:
try: try:
if sys.platform == 'win32':
os.system('cls')
else:
os.system('clear') os.system('clear')
update_experiment() update_experiment()
show_experiment_info() show_experiment_info()
time.sleep(args.time) if auto_exit:
status = get_experiment_status(port)
if status in ['DONE', 'ERROR', 'STOPPED']:
print_normal('Experiment status is {0}.'.format(status))
print_normal('Stopping experiment...')
kill_command(pid)
print_normal('Stop experiment success.')
exit(0)
time.sleep(time_interval)
except KeyboardInterrupt: except KeyboardInterrupt:
if auto_exit:
print_normal('Stopping experiment...')
kill_command(pid)
print_normal('Stop experiment success.')
else:
print_normal('Exiting...')
exit(0) exit(0)
except Exception as exception: except Exception as exception:
print_error(exception) print_error(exception)
exit(1) exit(1)
def monitor_experiment(args):
'''monitor the experiment'''
if args.time <= 0:
print_error('please input a positive integer as time interval, the unit is second.')
exit(1)
set_monitor(False, args.time)
def export_trials_data(args): def export_trials_data(args):
'''export experiment metadata to csv '''export experiment metadata to csv
''' '''
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment