Unverified Commit ebbadfeb authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Refactor nnictl to support listing stopped experiments. (#256)

Refactor nnictl to support listing stopped experiments. 
parent 32478a1f
......@@ -49,7 +49,8 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --experiment, -e| False| |ID of the experiment you want to resume|
| id| False| |The id of the experiment you want to resume|
| --port, -p| False| |Rest port of the experiment you want to resume|
......@@ -87,8 +88,8 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| id| False| |ID of the experiment you want to set|
| --filename, -f| True| |the file storing your new search space|
| --id, -i| False| |ID of the experiment you want to set|
* __nnictl update concurrency__
* Description
......@@ -103,8 +104,8 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| id| False| |ID of the experiment you want to set|
| --value, -v| True| |the number of allowed concurrent trials|
| --id, -i| False| |ID of the experiment you want to set|
* __nnictl update duration__
* Description
......@@ -119,8 +120,8 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --value, -v| True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.|
| --id, -i| False| |ID of the experiment you want to set|
| id| False| |ID of the experiment you want to set|
| --value, -v| True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.|
* __nnictl trial__
......@@ -137,7 +138,7 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --id, -i| False| |ID of the experiment you want to set|
| id| False| |ID of the experiment you want to set|
* __nnictl trial kill__
* Description
......@@ -151,9 +152,8 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| id| False| |ID of the experiment you want to set|
| --trialid, -t| True| |ID of the trial you want to kill.|
| --id, -i| False| |ID of the experiment you want to set|
......@@ -171,7 +171,7 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --id, -i| False| |ID of the experiment you want to set|
| id| False| |ID of the experiment you want to set|
* __nnictl experiment status__
......@@ -186,17 +186,23 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --id, -i| False| |ID of the experiment you want to set|
| id| False| |ID of the experiment you want to set|
* __nnictl experiment list__
* Description
Show the id and start time of all running experiments.
Show the information of all the (running) experiments.
* Usage
nnictl experiment list
Options:
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| all| False| False|Show all of experiments, including stopped experiments.|
* __nnictl config show__
......@@ -223,10 +229,11 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| id| False| |ID of the experiment you want to set|
| --head, -h| False| |show head lines of stdout|
| --tail, -t| False| |show tail lines of stdout|
| --path, -p| False| |show the path of stdout file|
| --id, -i| False| |ID of the experiment you want to set|
* __nnictl log stderr__
* Description
......@@ -241,10 +248,11 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| id| False| |ID of the experiment you want to set|
| --head, -h| False| |show head lines of stderr|
| --tail, -t| False| |show tail lines of stderr|
| --path, -p| False| |show the path of stderr file|
| --id, -i| False| |ID of the experiment you want to set|
* __nnictl log trial__
* Description
......@@ -259,7 +267,7 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --id, -I| False| |the id of trial|
| id| False| |the id of trial|
### Manage webui
......@@ -276,4 +284,4 @@ nnictl webui
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --id, -i| False| |ID of the experiment you want to set|
\ No newline at end of file
| id| False| |ID of the experiment you want to set|
\ No newline at end of file
......@@ -26,8 +26,8 @@ from .constants import NNICTL_HOME_DIR
class Config:
'''a util class to load and save config'''
def __init__(self, port):
config_path = os.path.join(NNICTL_HOME_DIR, str(port))
def __init__(self, file_path):
config_path = os.path.join(NNICTL_HOME_DIR, str(file_path))
os.makedirs(config_path, exist_ok=True)
self.config_file = os.path.join(config_path, '.config')
self.config = self.read_file()
......@@ -73,11 +73,24 @@ class Experiments:
self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment')
self.experiments = self.read_file()
def add_experiment(self, id, port, time):
def add_experiment(self, id, port, time, file_name):
'''set {key:value} paris to self.experiment'''
self.experiments[id] = [port, time]
self.experiments[id] = {}
self.experiments[id]['port'] = port
self.experiments[id]['startTime'] = time
self.experiments[id]['endTime'] = 'N/A'
self.experiments[id]['status'] = 'running'
self.experiments[id]['fileName'] = file_name
self.write_file()
def update_experiment(self, id, key, value):
'''Update experiment'''
if id not in self.experiments:
return False
self.experiments[id][key] = value
self.write_file()
return True
def remove_experiment(self, id):
'''remove an experiment by id'''
if id in self.experiments:
......
......@@ -54,11 +54,13 @@ LOG_HEADER = '------------------------------------------------------------------
EXPERIMENT_START_FAILED_INFO = 'There is an experiment running in the port %d, please stop it first or set another port!\n' \
'You could use \'nnictl stop --port [PORT]\' command to stop an experiment!\nOr you could use \'nnictl create --config [CONFIG_PATH] --port [PORT]\' to set port!\n'
EXPERIMENT_ID_INFO = '-----------------------------------------------------------------------\n' \
EXPERIMENT_INFORMATION_FORMAT = '-----------------------------------------------------------------------\n' \
' Experiment information\n' \
'%s\n' \
'-----------------------------------------------------------------------\n'
EXPERIMENT_DETAIL_FORMAT = 'Id: %s Status: %s StartTime: %s EndTime: %s \n'
PACKAGE_REQUIREMENTS = {
'SMAC': 'smac_tuner'
}
......
......@@ -34,17 +34,12 @@ from .common_utils import get_yml_content, get_json_content, print_error, print_
from .constants import *
from .webui_utils import *
import time
import random
import string
def start_rest_server(port, platform, mode, experiment_id=None):
def start_rest_server(port, platform, mode, config_file_name, experiment_id=None):
'''Run nni manager process'''
print_normal('Checking environment...')
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if rest_port and running:
print_error(EXPERIMENT_START_FAILED_INFO % port)
exit(1)
nni_config = Config(config_file_name)
if detect_port(port):
print_error('Port %s is used by another process, please reset the port!' % port)
exit(1)
......@@ -54,8 +49,8 @@ def start_rest_server(port, platform, mode, experiment_id=None):
cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode]
if mode == 'resume':
cmds += ['--experiment_id', experiment_id]
stdout_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stdout')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
stdout_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stdout')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr')
stdout_file = open(stdout_full_path, 'a+')
stderr_file = open(stderr_full_path, 'a+')
time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
......@@ -66,7 +61,7 @@ def start_rest_server(port, platform, mode, experiment_id=None):
process = Popen(cmds, stdout=stdout_file, stderr=stderr_file)
return process, str(time_now)
def set_trial_config(experiment_config, port):
def set_trial_config(experiment_config, port, config_file_name):
'''set trial configuration'''
request_data = dict()
value_dict = dict()
......@@ -89,16 +84,16 @@ def set_trial_config(experiment_config, port):
return True
else:
print('Error message is {}'.format(response.text))
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))
return False
def set_local_config(experiment_config, port):
def set_local_config(experiment_config, port, config_file_name):
'''set local configuration'''
return set_trial_config(experiment_config, port)
return set_trial_config(experiment_config, port, config_file_name)
def set_remote_config(experiment_config, port):
def set_remote_config(experiment_config, port, config_file_name):
'''Call setClusterMetadata to pass trial'''
#set machine_list
request_data = dict()
......@@ -108,15 +103,15 @@ def set_remote_config(experiment_config, port):
if not response or not check_response(response):
if response is not None:
err_message = response.text
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
#set trial_config
return set_trial_config(experiment_config, port), err_message
return set_trial_config(experiment_config, port, config_file_name), err_message
def set_pai_config(experiment_config, port):
def set_pai_config(experiment_config, port, config_file_name):
'''set pai configuration'''
pai_config_data = dict()
pai_config_data['pai_config'] = experiment_config['paiConfig']
......@@ -125,15 +120,15 @@ def set_pai_config(experiment_config, port):
if not response or not response.status_code == 200:
if response is not None:
err_message = response.text
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
#set trial_config
return set_trial_config(experiment_config, port), err_message
return set_trial_config(experiment_config, port, config_file_name), err_message
def set_experiment(experiment_config, mode, port):
def set_experiment(experiment_config, mode, port, config_file_name):
'''Call startExperiment (rest POST /experiment) with yaml file content'''
request_data = dict()
request_data['authorName'] = experiment_config['authorName']
......@@ -191,17 +186,17 @@ def set_experiment(experiment_config, mode, port):
if check_response(response):
return response
else:
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))
print_error('Setting experiment error, error message is {}'.format(response.text))
return None
def launch_experiment(args, experiment_config, mode, experiment_id=None):
def launch_experiment(args, experiment_config, mode, config_file_name, experiment_id=None):
'''follow steps to start rest server and start experiment'''
nni_config = Config(args.port)
nni_config = Config(config_file_name)
# start rest server
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, experiment_id)
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id)
nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation
if experiment_config.get('useAnnotation'):
......@@ -236,7 +231,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
# set remote config
if experiment_config['trainingServicePlatform'] == 'remote':
print_normal('Setting remote config...')
config_result, err_msg = set_remote_config(experiment_config, args.port)
config_result, err_msg = set_remote_config(experiment_config, args.port, config_file_name)
if config_result:
print_normal('Successfully set remote config!')
else:
......@@ -251,7 +246,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
# set local config
if experiment_config['trainingServicePlatform'] == 'local':
print_normal('Setting local config...')
if set_local_config(experiment_config, args.port):
if set_local_config(experiment_config, args.port, config_file_name):
print_normal('Successfully set local config!')
else:
print_error('Failed!')
......@@ -265,7 +260,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
#set pai config
if experiment_config['trainingServicePlatform'] == 'pai':
print_normal('Setting pai config...')
config_result, err_msg = set_pai_config(experiment_config, args.port)
config_result, err_msg = set_pai_config(experiment_config, args.port, config_file_name)
if config_result:
print_normal('Successfully set pai config!')
else:
......@@ -280,7 +275,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
# start a new experiment
print_normal('Starting experiment...')
response = set_experiment(experiment_config, mode, args.port)
response = set_experiment(experiment_config, mode, args.port, config_file_name)
if response:
if experiment_id is None:
experiment_id = json.loads(response.text).get('experiment_id')
......@@ -293,24 +288,61 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
except Exception:
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(1)
web_ui_url_list = get_web_ui_urls(args.port)
web_ui_url_list = get_web_ui_urls(args.port, config_file_name)
#save experiment information
experiment_config = Experiments()
experiment_config.add_experiment(experiment_id, args.port, start_time)
experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name)
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list)))
def cmp_time(time1, time2):
'''compare the time'''
try:
time1 = time.strptime(time1,'%Y-%m-%d %H:%M:%S')
time2 = time.strptime(time2,'%Y-%m-%d %H:%M:%S')
return int(time1) - int(time2)
except:
return 0
def resume_experiment(args):
'''resume an experiment'''
nni_config = Config(args.port)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
experiment_id = None
experiment_endTime = None
#find the latest stopped experiment
if not args.id:
for key in experiment_dict.keys():
if experiment_dict[key]['status'] == 'stopped':
if experiment_id is None:
experiment_id = key
experiment_endTime = experiment_dict[key]['endTime']
else:
if cmp_time(experiment_dict[key]['endTime'], experiment_endTime) > 0:
experiment_id = key
experiment_endTime = experiment_dict[key]['endTime']
if experiment_id is None:
print_error('There is no experiment stopped!')
exit(1)
else:
if experiment_dict.get(args.id) is None:
print_error('Id %s not exist!' % args.id)
exit(1)
if experiment_dict[args.id]['status'] == 'running':
print_error('Experiment %s is running!' % args.id)
exit(1)
experiment_id = args.id
print_normal('Resuming experiment %s...' % experiment_id)
nni_config = Config(experiment_dict[experiment_id]['fileName'])
experiment_config = nni_config.get_config('experimentConfig')
experiment_id = nni_config.get_config('experimentId')
launch_experiment(args, experiment_config, 'resume', experiment_id)
launch_experiment(args, experiment_config, 'resume', experiment_dict[experiment_id]['fileName'], experiment_id)
def create_experiment(args):
'''start a new experiment'''
nni_config = Config(args.port)
config_file_name = ''.join(random.sample(string.ascii_letters + string.digits, 8))
nni_config = Config(config_file_name)
config_path = os.path.abspath(args.config)
if not os.path.exists(config_path):
print_error('Please set correct config path!')
......@@ -319,5 +351,5 @@ def create_experiment(args):
validate_all_content(experiment_config, config_path)
nni_config.set_config('experimentConfig', experiment_config)
launch_experiment(args, experiment_config, 'new')
launch_experiment(args, experiment_config, 'new', config_file_name)
nni_config.set_config('restServerPort', args.port)
......@@ -45,8 +45,7 @@ def parse_args():
# parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('--experiment', '-e', dest='id', help='ID of the experiment you want to resume')
parser_resume.add_argument('--manager', '-m', default='nnimanager', dest='manager')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.set_defaults(func=resume_experiment)
......@@ -55,15 +54,15 @@ def parse_args():
#add subparsers for parser_updater
parser_updater_subparsers = parser_updater.add_subparsers()
parser_updater_searchspace = parser_updater_subparsers.add_parser('searchspace', help='update searchspace')
parser_updater_searchspace.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_updater_searchspace.add_argument('id', nargs='?', help='the id of experiment')
parser_updater_searchspace.add_argument('--filename', '-f', required=True)
parser_updater_searchspace.set_defaults(func=update_searchspace)
parser_updater_concurrency = parser_updater_subparsers.add_parser('concurrency', help='update concurrency')
parser_updater_concurrency.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_updater_concurrency.add_argument('id', nargs='?', help='the id of experiment')
parser_updater_concurrency.add_argument('--value', '-v', required=True)
parser_updater_concurrency.set_defaults(func=update_concurrency)
parser_updater_duration = parser_updater_subparsers.add_parser('duration', help='update duration')
parser_updater_duration.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_updater_duration.add_argument('id', nargs='?', help='the id of experiment')
parser_updater_duration.add_argument('--value', '-v', required=True)
parser_updater_duration.set_defaults(func=update_duration)
parser_updater_trialnum = parser_updater_subparsers.add_parser('trialnum', help='update maxtrialnum')
......@@ -81,10 +80,10 @@ def parse_args():
#add subparsers for parser_trial
parser_trial_subparsers = parser_trial.add_subparsers()
parser_trial_ls = parser_trial_subparsers.add_parser('ls', help='list trial jobs')
parser_trial_ls.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_trial_ls.add_argument('id', nargs='?', help='the id of experiment')
parser_trial_ls.set_defaults(func=trial_ls)
parser_trial_kill = parser_trial_subparsers.add_parser('kill', help='kill trial jobs')
parser_trial_kill.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_trial_kill.add_argument('id', nargs='?', help='the id of experiment')
parser_trial_kill.add_argument('--trialid', '-t', required=True, dest='trialid', help='the id of trial to be killed')
parser_trial_kill.set_defaults(func=trial_kill)
......@@ -93,13 +92,14 @@ def parse_args():
#add subparsers for parser_experiment
parser_experiment_subparsers = parser_experiment.add_subparsers()
parser_experiment_show = parser_experiment_subparsers.add_parser('show', help='show the information of experiment')
parser_experiment_show.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_experiment_show.add_argument('id', nargs='?', help='the id of experiment')
parser_experiment_show.set_defaults(func=list_experiment)
parser_experiment_status = parser_experiment_subparsers.add_parser('status', help='show the status of experiment')
parser_experiment_status.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_experiment_status.add_argument('id', nargs='?', help='the id of experiment')
parser_experiment_status.set_defaults(func=experiment_status)
parser_experiment_list = parser_experiment_subparsers.add_parser('list', help='list all of running experiment ids')
parser_experiment_list.set_defaults(func=experiment_id)
parser_experiment_list.add_argument('all', nargs='?', help='list all of experiments')
parser_experiment_list.set_defaults(func=experiment_list)
#TODO:finish webui function
#parse board command
......@@ -107,14 +107,14 @@ def parse_args():
#add subparsers for parser_board
parser_webui_subparsers = parser_webui.add_subparsers()
parser_webui_url = parser_webui_subparsers.add_parser('url', help='show the url of web ui')
parser_webui_url.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_webui_url.add_argument('id', nargs='?', help='the id of experiment')
parser_webui_url.set_defaults(func=webui_url)
#parse config command
parser_config = subparsers.add_parser('config', help='get config information')
parser_config_subparsers = parser_config.add_subparsers()
parser_config_show = parser_config_subparsers.add_parser('show', help='show the information of config')
parser_config_show.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_config_show.add_argument('id', nargs='?', help='the id of experiment')
parser_config_show.set_defaults(func=get_config)
#parse log command
......@@ -122,19 +122,19 @@ def parse_args():
# add subparsers for parser_log
parser_log_subparsers = parser_log.add_subparsers()
parser_log_stdout = parser_log_subparsers.add_parser('stdout', help='get stdout information')
parser_log_stdout.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_log_stdout.add_argument('id', nargs='?', help='the id of experiment')
parser_log_stdout.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stdout')
parser_log_stdout.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stdout')
parser_log_stdout.add_argument('--path', action='store_true', default=False, help='get the path of stdout file')
parser_log_stdout.set_defaults(func=log_stdout)
parser_log_stderr = parser_log_subparsers.add_parser('stderr', help='get stderr information')
parser_log_stderr.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_log_stderr.add_argument('id', nargs='?', help='the id of experiment')
parser_log_stderr.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stderr')
parser_log_stderr.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stderr')
parser_log_stderr.add_argument('--path', action='store_true', default=False, help='get the path of stderr file')
parser_log_stderr.set_defaults(func=log_stderr)
parser_log_trial = parser_log_subparsers.add_parser('trial', help='get trial log path')
parser_log_trial.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_log_trial.add_argument('id', nargs='?', help='the id of experiment')
parser_log_trial.add_argument('--trialid', '-T', dest='trialid', help='find trial log path by id')
parser_log_trial.set_defaults(func=log_trial)
......@@ -144,7 +144,7 @@ def parse_args():
parser_package_subparsers = parser_package.add_subparsers()
parser_package_install = parser_package_subparsers.add_parser('install', help='install packages')
parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed')
parser_package_install.set_defaults(func=package_install)
parser_package_install.set_defaults(func=package_install)
parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages')
parser_package_show.set_defaults(func=package_show)
......
......@@ -22,96 +22,87 @@ import os
import psutil
import json
import datetime
import time
from subprocess import call, check_output
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
from .config_utils import Config, Experiments
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url
from .constants import NNICTL_HOME_DIR, EXPERIMENT_ID_INFO
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT
import time
from .common_utils import print_normal, print_error, detect_process
from .common_utils import print_normal, print_error, print_warning, detect_process
def get_experiment_port(args):
'''get the port of an experiment'''
def check_experiment_id(args):
'''check if the id is valid
'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
#1.If there is an id specified, return the corresponding port
#2.If there is no id specified, and there is an experiment running, return it as default port, or return Error
#3.If the id matches an experiment, nnictl will return the id.
#4.If the id ends with *, nnictl will match all ids matchs the regular
#5.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id
#6.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information
#7.Users could use 'nnictl stop all' to stop all experiments
if not experiment_dict:
print_normal('Experiment is not running...')
return None
if not args.id and len(experiment_dict.keys()) > 1:
print_error('There are multiple experiments running, please set the experiment id...')
experiment_information = ""
for key in experiment_dict.keys():
experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n')
print(EXPERIMENT_ID_INFO % experiment_information)
return None
print_normal('There is no experiment running...')
exit(1)
if not args.id:
return list(experiment_dict.values())[0][0]
running_experiment_list = []
for key in experiment_dict.keys():
if experiment_dict[key]['status'] == 'running':
running_experiment_list.append(key)
if len(running_experiment_list) > 1:
print_error('There are multiple experiments running, please set the experiment id...')
experiment_information = ""
for key in running_experiment_list:
experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \
experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))
print(EXPERIMENT_INFORMATION_FORMAT % experiment_information)
exit(1)
elif not running_experiment_list:
print_error('There is no experiment running!')
exit(1)
else:
return running_experiment_list[0]
if experiment_dict.get(args.id):
return experiment_dict[args.id][0]
return args.id
else:
print_error('Id not correct!')
return None
def convert_time_stamp_to_date(content):
'''Convert time stamp to date time format'''
start_time_stamp = content.get('startTime')
end_time_stamp = content.get('endTime')
if start_time_stamp:
start_time = datetime.datetime.utcfromtimestamp(start_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S")
content['startTime'] = str(start_time)
if end_time_stamp:
end_time = datetime.datetime.utcfromtimestamp(end_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S")
content['endTime'] = str(end_time)
return content
def check_rest(args):
'''check if restful server is running'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if not running:
print_normal('Restful server is running...')
else:
print_normal('Restful server is not running...')
print_error('Id not correct!')
exit(1)
def parse_ids(args):
'''Parse the arguments for nnictl stop'''
'''Parse the arguments for nnictl stop
1.If there is an id specified, return the corresponding id
2.If there is no id specified, and there is an experiment running, return the id, or return Error
3.If the id matches an experiment, nnictl will return the id.
4.If the id ends with *, nnictl will match all ids matchs the regular
5.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id
6.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information
'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if not experiment_dict:
print_normal('Experiment is not running...')
return None
experiment_id_list = list(experiment_dict.keys())
result_list = []
running_experiment_list = []
for key in experiment_dict.keys():
if experiment_dict[key]['status'] == 'running':
running_experiment_list.append(key)
if not args.id:
if len(experiment_id_list) > 1:
if len(running_experiment_list) > 1:
print_error('There are multiple experiments running, please set the experiment id...')
experiment_information = ""
for key in experiment_dict.keys():
experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n')
print(EXPERIMENT_ID_INFO % experiment_information)
return None
result_list = experiment_id_list
for key in running_experiment_list:
experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \
experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))
print(EXPERIMENT_INFORMATION_FORMAT % experiment_information)
exit(1)
else:
result_list = running_experiment_list
elif args.id == 'all':
result_list = experiment_id_list
result_list = running_experiment_list
elif args.id.endswith('*'):
for id in experiment_id_list:
for id in running_experiment_list:
if id.startswith(args.id[:-1]):
result_list.append(id)
elif args.id in experiment_id_list:
elif args.id in running_experiment_list:
result_list.append(args.id)
else:
for id in experiment_id_list:
for id in running_experiment_list:
if id.startswith(args.id):
result_list.append(id)
if len(result_list) > 1:
......@@ -121,6 +112,42 @@ def parse_ids(args):
print_error('There are no experiments matched, please check experiment id...')
return result_list
def get_config_filename(args):
'''get the file name of config file'''
experiment_id = check_experiment_id(args)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
return experiment_dict[experiment_id]['fileName']
def get_experiment_port(args):
'''get the port of experiment'''
experiment_id = check_experiment_id(args)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
return experiment_dict[experiment_id]['port']
def convert_time_stamp_to_date(content):
'''Convert time stamp to date time format'''
start_time_stamp = content.get('startTime')
end_time_stamp = content.get('endTime')
if start_time_stamp:
start_time = datetime.datetime.utcfromtimestamp(start_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S")
content['startTime'] = str(start_time)
if end_time_stamp:
end_time = datetime.datetime.utcfromtimestamp(end_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S")
content['endTime'] = str(end_time)
return content
def check_rest(args):
'''check if restful server is running'''
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if not running:
print_normal('Restful server is running...')
else:
print_normal('Restful server is not running...')
def stop_experiment(args):
'''Stop the experiment which is running'''
experiment_id_list = parse_ids(args)
......@@ -128,15 +155,13 @@ def stop_experiment(args):
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
for experiment_id in experiment_id_list:
port = experiment_dict.get(experiment_id)[0]
if port is None:
return None
print_normal('Stoping experiment %s' % experiment_id)
nni_config = Config(port)
nni_config = Config(experiment_dict[experiment_id]['fileName'])
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
print_normal('Experiment is not running...')
experiment_config.update_experiment(experiment_id, 'status', 'stopped')
return
running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
......@@ -153,15 +178,13 @@ def stop_experiment(args):
call(cmds)
if stop_rest_result:
print_normal('Stop experiment success!')
experiment_config = Experiments()
experiment_config.remove_experiment(experiment_id)
experiment_config.update_experiment(experiment_id, 'status', 'stopped')
time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
experiment_config.update_experiment(experiment_id, 'endTime', str(time_now))
def trial_ls(args):
'''List trial'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -182,10 +205,7 @@ def trial_ls(args):
def trial_kill(args):
'''List trial'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -203,10 +223,7 @@ def trial_kill(args):
def list_experiment(args):
'''Get experiment information'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -225,10 +242,7 @@ def list_experiment(args):
def experiment_status(args):
'''Show the status of experiment'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
result, response = check_rest_server_quick(rest_port)
if not result:
......@@ -246,13 +260,11 @@ def get_log_content(file_name, cmds):
def log_internal(args, filetype):
'''internal function to call get_log_content'''
port = get_experiment_port(args)
if port is None:
return None
file_name = get_config_filename(args)
if filetype == 'stdout':
file_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stdout')
file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stdout')
else:
file_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stderr')
if args.head:
get_log_content(file_full_path, ['head', '-' + str(args.head), file_full_path])
elif args.tail:
......@@ -273,10 +285,7 @@ def log_stderr(args):
def log_trial(args):
''''get trial log path'''
trial_id_path_dict = {}
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -304,28 +313,33 @@ def log_trial(args):
def get_config(args):
'''get config info'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
print(nni_config.get_all_config())
def webui_url(args):
'''show the url of web ui'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
nni_config = Config(get_config_filename(args))
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
def experiment_id(args):
'''get the id of all experiments'''
def experiment_list(args):
'''get the information of all experiments'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if not experiment_dict:
print('There is no experiment running...')
exit(1)
experiment_id_list = []
if args.all and args.all == 'all':
for key in experiment_dict.keys():
experiment_id_list.append(key)
else:
experiment_information = ""
for key in experiment_dict.keys():
experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n')
print(EXPERIMENT_ID_INFO % experiment_information)
\ No newline at end of file
if experiment_dict[key]['status'] == 'running':
experiment_id_list.append(key)
if not experiment_id_list:
print_warning('There is no experiment running...\nYou can use \'nnictl experiment list all\' to list all stopped experiments!')
experiment_information = ""
for key in experiment_id_list:
experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \
experiment_dict[key]['startTime'], experiment_dict[key]['endTime']))
print(EXPERIMENT_INFORMATION_FORMAT % experiment_information)
......@@ -25,7 +25,7 @@ from .rest_utils import rest_put, rest_get, check_rest_server_quick, check_respo
from .url_utils import experiment_url
from .config_utils import Config
from .common_utils import get_json_content
from .nnictl_utils import get_experiment_port
from .nnictl_utils import check_experiment_id, get_experiment_port, get_config_filename
def validate_digit(value, start, end):
'''validate if a digit is valid'''
......@@ -57,7 +57,7 @@ def get_query_type(key):
def update_experiment_profile(args, key, value):
'''call restful server to update experiment profile'''
nni_config = Config(args.port)
nni_config = Config(get_config_filename(args))
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if running:
......@@ -102,9 +102,7 @@ def update_duration(args):
def update_trialnum(args):
validate_digit(args.value, 1, 999999999)
args.port = get_experiment_port(args)
if args.port is not None:
if update_experiment_profile(args, 'maxTrialNum', int(args.value)):
print('INFO: update %s success!' % 'trialnum')
else:
print('ERROR: update %s failed!' % 'trialnum')
\ No newline at end of file
if update_experiment_profile(args, 'maxTrialNum', int(args.value)):
print('INFO: update %s success!' % 'trialnum')
else:
print('ERROR: update %s failed!' % 'trialnum')
\ No newline at end of file
......@@ -22,12 +22,12 @@ import psutil
from socket import AddressFamily
from .config_utils import Config
def get_web_ui_urls(port):
def get_web_ui_urls(port, CONFIG_FILE_NAME):
webui_url_list = []
for name, info in psutil.net_if_addrs().items():
for addr in info:
if AddressFamily.AF_INET == addr.family:
webui_url_list.append('http://{}:{}'.format(addr.address, port))
nni_config = Config(port)
nni_config = Config(CONFIG_FILE_NAME)
nni_config.set_config('webuiUrl', webui_url_list)
return webui_url_list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment