Unverified Commit 16dcfebb authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Refactor nnictl to support list multiple experiment (#207)

1.fix some bugs
2.support nnictl stop id, and add some regulars
parent 3833d986
dist: xenial
sudo: required
language: python
python:
- "3.6"
before_install:
- wget https://nodejs.org/dist/v10.12.0/node-v10.12.0-linux-x64.tar.xz
- tar xf node-v10.12.0-linux-x64.tar.xz
- sudo mv node-v10.12.0-linux-x64 /usr/local/node
- export PATH=/usr/local/node/bin:$PATH
install:
- make
- make easy-install
- export PATH=$HOME/.nni/bin:$PATH
before_script:
- cd test/naive
script:
- python3 run.py
......@@ -4,7 +4,7 @@
[![Issues](https://img.shields.io/github/issues-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen)
[![Bugs](https://img.shields.io/github/issues/Microsoft/nni/bug.svg)](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
[![Pull Requests](https://img.shields.io/github/issues-pr-raw/Microsoft/nni.svg)](https://github.com/Microsoft/nni/pulls?q=is%3Apr+is%3Aopen)
[![Version](https://img.shields.io/github/tag/Microsoft/nni.svg)]()
[![Version](https://img.shields.io/github/release/Microsoft/nni.svg)](https://github.com/Microsoft/nni/releases)
NNI (Neural Network Intelligence) is a toolkit to help users run automated machine learning experiments.
The tool dispatches and runs trial jobs that generated by tuning algorithms to search the best neural architecture and/or hyper-parameters in different environments (e.g. local machine, remote servers and cloud).
......
......@@ -2,6 +2,8 @@ Dockerfile
===
## 1.Description
This is the Dockerfile of nni project, including the most kinds of deeplearning frameworks and nni source code. You can run your nni experiment in this docker container directly.
Dockerfile.build.base could build the base Docker image, users can get a docker image with Ubuntu and NNI environment after building this file.
Dockerfile could build the customized docker image, users could build their customized docker image using this file.
## 2.Including Libraries
```
......
......@@ -82,4 +82,4 @@ if __name__ == '__main__':
traceback.print_exc()
raise error
subprocess.run(['nnictl', 'stop', '--port', '51188'])
subprocess.run(['nnictl', 'stop'])
......@@ -21,6 +21,7 @@
import json
import yaml
import psutil
import socket
from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT
def get_yml_content(file_path):
......@@ -60,3 +61,13 @@ def detect_process(pid):
return process.is_running()
except:
return False
def detect_port(port):
'''Detect if the port is used'''
socket_test = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
try:
socket_test.connect(('127.0.0.1', int(port)))
socket_test.shutdown(2)
return True
except:
return False
......@@ -22,12 +22,12 @@
import os
import json
import shutil
from .constants import HOME_DIR
from .constants import NNICTL_HOME_DIR
class Config:
'''a util class to load and save config'''
def __init__(self, port):
config_path = os.path.join(HOME_DIR, str(port))
config_path = os.path.join(NNICTL_HOME_DIR, str(port))
os.makedirs(config_path, exist_ok=True)
self.config_file = os.path.join(config_path, '.config')
self.config = self.read_file()
......@@ -46,12 +46,6 @@ class Config:
'''get a value according to key'''
return self.config.get(key)
def copy_metadata_to_new_path(self, path):
'''copy metadata to a new path'''
if not os.path.exists(path):
os.mkdir(path)
shutil.copy(self.config_file, path)
def write_file(self):
'''save config to local file'''
if self.config:
......@@ -71,3 +65,44 @@ class Config:
except ValueError:
return {}
return {}
class Experiments:
'''Maintain experiment list'''
def __init__(self):
os.makedirs(NNICTL_HOME_DIR, exist_ok=True)
self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment')
self.experiments = self.read_file()
def add_experiment(self, id, port, time):
'''set {key:value} paris to self.experiment'''
self.experiments[id] = [port, time]
self.write_file()
def remove_experiment(self, id):
'''remove an experiment by id'''
if id in self.experiments:
self.experiments.pop(id)
self.write_file()
def get_all_experiments(self):
'''return all of experiments'''
return self.experiments
def write_file(self):
'''save config to local file'''
try:
with open(self.experiment_file, 'w') as file:
json.dump(self.experiments, file)
except IOError as error:
print('Error:', error)
return
def read_file(self):
'''load config from local file'''
if os.path.exists(self.experiment_file):
try:
with open(self.experiment_file, 'r') as file:
return json.load(file)
except ValueError:
return {}
return {}
\ No newline at end of file
......@@ -20,7 +20,7 @@
import os
HOME_DIR = os.path.join(os.environ['HOME'], '.local', 'nni', 'nnictl')
NNICTL_HOME_DIR = os.path.join(os.environ['HOME'], '.local', 'nni', 'nnictl')
ERROR_INFO = 'ERROR: %s'
......@@ -28,12 +28,12 @@ NORMAL_INFO = 'INFO: %s'
WARNING_INFO = 'WARNING: %s'
DEFAULT_REST_PORT = 51188
DEFAULT_REST_PORT = 8080
EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0m' \
'-----------------------------------------------------------------------\n' \
'The experiment id is %s\n'\
'The restful server post is %s\n' \
'The Web UI urls are: %s\n' \
'-----------------------------------------------------------------------\n\n' \
'You can use these commands to get more information about the experiment\n' \
'-----------------------------------------------------------------------\n' \
......@@ -42,11 +42,23 @@ EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0
'2. nnictl trial ls list all of trial jobs\n' \
'3. nnictl log stderr show stderr log content\n' \
'4. nnictl log stdout show stdout log content\n' \
'5. nnictl stop stop a experiment\n' \
'5. nnictl stop stop an experiment\n' \
'6. nnictl trial kill kill a trial job by id\n' \
'7. nnictl --help get help information about nnictl\n' \
'-----------------------------------------------------------------------\n' \
LOG_HEADER = '-----------------------------------------------------------------------\n' \
' Experiment start time %s\n' \
'-----------------------------------------------------------------------\n'
EXPERIMENT_START_FAILED_INFO = 'There is an experiment running in the port %d, please stop it first or set another port!\n' \
'You could use \'nnictl stop --port [PORT]\' command to stop an experiment!\nOr you could use \'nnictl create --config [CONFIG_PATH] --port [PORT]\' to set port!\n'
EXPERIMENT_ID_INFO = '-----------------------------------------------------------------------\n' \
' Experiment information\n' \
'%s\n' \
'-----------------------------------------------------------------------\n'
PACKAGE_REQUIREMENTS = {
'SMAC': 'smac_tuner'
}
......
......@@ -29,9 +29,11 @@ from nni_annotation import *
from .launcher_utils import validate_all_content
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response
from .url_utils import cluster_metadata_url, experiment_url
from .config_utils import Config
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process
from .config_utils import Config, Experiments
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process, detect_port
from .constants import *
from .webui_utils import *
import time
def start_rest_server(port, platform, mode, experiment_id=None):
'''Run nni manager process'''
......@@ -40,21 +42,29 @@ def start_rest_server(port, platform, mode, experiment_id=None):
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if rest_port and running:
print_error('There is an experiment running, please stop it first...')
print_normal('You can use \'nnictl stop\' command to stop an experiment!')
exit(0)
print_error(EXPERIMENT_START_FAILED_INFO % port)
exit(1)
if detect_port(port):
print_error('Port %s is used by another process, please reset the port!' % port)
exit(1)
print_normal('Starting restful server...')
manager = os.environ.get('NNI_MANAGER', 'nnimanager')
cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode]
if mode == 'resume':
cmds += ['--experiment_id', experiment_id]
stdout_full_path = os.path.join(HOME_DIR, str(port), 'stdout')
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
stdout_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stdout')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
stdout_file = open(stdout_full_path, 'a+')
stderr_file = open(stderr_full_path, 'a+')
time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
#add time information in the header of log files
log_header = LOG_HEADER % str(time_now)
stdout_file.write(log_header)
stderr_file.write(log_header)
process = Popen(cmds, stdout=stdout_file, stderr=stderr_file)
return process
return process, str(time_now)
def set_trial_config(experiment_config, port):
'''set trial configuration'''
......@@ -79,7 +89,7 @@ def set_trial_config(experiment_config, port):
return True
else:
print('Error message is {}'.format(response.text))
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))
return False
......@@ -98,7 +108,7 @@ def set_remote_config(experiment_config, port):
if not response or not check_response(response):
if response is not None:
err_message = response.text
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
......@@ -115,7 +125,8 @@ def set_pai_config(experiment_config, port):
if not response or not response.status_code == 200:
if response is not None:
err_message = response.text
with open(STDERR_FULL_PATH, 'a+') as fout:
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
......@@ -180,7 +191,7 @@ def set_experiment(experiment_config, mode, port):
if check_response(response):
return response
else:
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))
print_error('Setting experiment error, error message is {}'.format(response.text))
......@@ -189,14 +200,8 @@ def set_experiment(experiment_config, mode, port):
def launch_experiment(args, experiment_config, mode, experiment_id=None):
'''follow steps to start rest server and start experiment'''
nni_config = Config(args.port)
#Check if there is an experiment running
origin_rest_pid = nni_config.get_config('restServerPid')
if origin_rest_pid and detect_process(origin_rest_pid):
print_error('There is an experiment running, please stop it first...')
print_normal('You can use \'nnictl stop\' command to stop an experiment!')
exit(1)
# start rest server
rest_process = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, experiment_id)
rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, experiment_id)
nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation
if experiment_config.get('useAnnotation'):
......@@ -233,7 +238,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
print_normal('Setting remote config...')
config_result, err_msg = set_remote_config(experiment_config, args.port)
if config_result:
print_normal('Success!')
print_normal('Successfully set remote config!')
else:
print_error('Failed! Error is: {}'.format(err_msg))
try:
......@@ -288,7 +293,13 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None):
except Exception:
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(1)
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, args.port))
web_ui_url_list = get_web_ui_urls(args.port)
#save experiment information
experiment_config = Experiments()
experiment_config.add_experiment(experiment_id, args.port, start_time)
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list)))
def resume_experiment(args):
'''resume an experiment'''
......
......@@ -55,21 +55,21 @@ def parse_args():
#add subparsers for parser_updater
parser_updater_subparsers = parser_updater.add_subparsers()
parser_updater_searchspace = parser_updater_subparsers.add_parser('searchspace', help='update searchspace')
parser_updater_searchspace.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_updater_searchspace.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_updater_searchspace.add_argument('--filename', '-f', required=True)
parser_updater_searchspace.set_defaults(func=update_searchspace)
parser_updater_concurrency = parser_updater_subparsers.add_parser('concurrency', help='update concurrency')
parser_updater_concurrency.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_updater_concurrency.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_updater_concurrency.add_argument('--value', '-v', required=True)
parser_updater_concurrency.set_defaults(func=update_concurrency)
parser_updater_duration = parser_updater_subparsers.add_parser('duration', help='update duration')
parser_updater_duration.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_updater_duration.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_updater_duration.add_argument('--value', '-v', required=True)
parser_updater_duration.set_defaults(func=update_duration)
#parse stop command
parser_stop = subparsers.add_parser('stop', help='stop the experiment')
parser_stop.add_argument('--port', '-p', required=True, dest='port', help='the port of restful server')
parser_stop.add_argument('id', nargs='?', help='the id of experiment')
parser_stop.set_defaults(func=stop_experiment)
#parse trial command
......@@ -77,10 +77,10 @@ def parse_args():
#add subparsers for parser_trial
parser_trial_subparsers = parser_trial.add_subparsers()
parser_trial_ls = parser_trial_subparsers.add_parser('ls', help='list trial jobs')
parser_trial_ls.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_trial_ls.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_trial_ls.set_defaults(func=trial_ls)
parser_trial_kill = parser_trial_subparsers.add_parser('kill', help='kill trial jobs')
parser_trial_kill.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_trial_kill.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_trial_kill.add_argument('--trialid', '-t', required=True, dest='trialid', help='the id of trial to be killed')
parser_trial_kill.set_defaults(func=trial_kill)
......@@ -89,17 +89,28 @@ def parse_args():
#add subparsers for parser_experiment
parser_experiment_subparsers = parser_experiment.add_subparsers()
parser_experiment_show = parser_experiment_subparsers.add_parser('show', help='show the information of experiment')
parser_experiment_show.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_experiment_show.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_experiment_show.set_defaults(func=list_experiment)
parser_experiment_status = parser_experiment_subparsers.add_parser('status', help='show the status of experiment')
parser_experiment_status.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_experiment_status.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_experiment_status.set_defaults(func=experiment_status)
parser_experiment_list = parser_experiment_subparsers.add_parser('list', help='list all of running experiment ids')
parser_experiment_list.set_defaults(func=experiment_id)
#TODO:finish webui function
#parse board command
parser_webui = subparsers.add_parser('webui', help='get web ui information')
#add subparsers for parser_board
parser_webui_subparsers = parser_webui.add_subparsers()
parser_webui_url = parser_webui_subparsers.add_parser('url', help='show the url of web ui')
parser_webui_url.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_webui_url.set_defaults(func=webui_url)
#parse config command
parser_config = subparsers.add_parser('config', help='get config information')
parser_config_subparsers = parser_config.add_subparsers()
parser_config_show = parser_config_subparsers.add_parser('show', help='show the information of config')
parser_config_show.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_config_show.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_config_show.set_defaults(func=get_config)
#parse log command
......@@ -107,23 +118,22 @@ def parse_args():
# add subparsers for parser_log
parser_log_subparsers = parser_log.add_subparsers()
parser_log_stdout = parser_log_subparsers.add_parser('stdout', help='get stdout information')
parser_log_stdout.add_argument('--port', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_log_stdout.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_log_stdout.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stdout')
parser_log_stdout.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stdout')
parser_log_stdout.add_argument('--path', action='store_true', default=False, help='get the path of stdout file')
parser_log_stdout.set_defaults(func=log_stdout)
parser_log_stderr = parser_log_subparsers.add_parser('stderr', help='get stderr information')
parser_log_stderr.add_argument('--port', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_log_stderr.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_log_stderr.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stderr')
parser_log_stderr.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stderr')
parser_log_stderr.add_argument('--path', action='store_true', default=False, help='get the path of stderr file')
parser_log_stderr.set_defaults(func=log_stderr)
parser_log_trial = parser_log_subparsers.add_parser('trial', help='get trial log path')
parser_log_trial.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_log_trial.add_argument('--id', '-I', dest='id', help='find trial log path by id')
parser_log_trial.add_argument('--id', '-i', dest='id', help='the id of experiment')
parser_log_trial.add_argument('--trialid', '-T', dest='trialid', help='find trial log path by id')
parser_log_trial.set_defaults(func=log_trial)
#parse package command
parser_package = subparsers.add_parser('package', help='control nni tuner and assessor packages')
# add subparsers for parser_package
......@@ -134,7 +144,6 @@ def parse_args():
parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages')
parser_package_show.set_defaults(func=package_show)
args = parser.parse_args()
args.func(args)
......
......@@ -24,12 +24,41 @@ import json
import datetime
from subprocess import call, check_output
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
from .config_utils import Config
from .config_utils import Config, Experiments
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url
from .constants import HOME_DIR
from .constants import NNICTL_HOME_DIR, EXPERIMENT_ID_INFO
import time
from .common_utils import print_normal, print_error, detect_process
def get_experiment_port(args):
'''get the port of an experiment'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
#1.If there is an id specified, return the corresponding port
#2.If there is no id specified, and there is an experiment running, return it as default port, or return Error
#3.If the id matches an experiment, nnictl will return the id.
#4.If the id ends with *, nnictl will match all ids matchs the regular
#5.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id
#6.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information
#7.Users could use 'nnictl stop all' to stop all experiments
if not experiment_dict:
print_normal('Experiment is not running...')
return None
if not args.id and len(experiment_dict.keys()) > 1:
print_error('There are multiple experiments running, please set the experiment id...')
experiment_information = ""
for key in experiment_dict.keys():
experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n')
print(EXPERIMENT_ID_INFO % experiment_information)
return None
if not args.id:
return list(experiment_dict.values())[0][0]
if experiment_dict.get(args.id):
return experiment_dict[args.id]
else:
print_error('Id not correct!')
return None
def convert_time_stamp_to_date(content):
'''Convert time stamp to date time format'''
start_time_stamp = content.get('startTime')
......@@ -44,7 +73,10 @@ def convert_time_stamp_to_date(content):
def check_rest(args):
'''check if restful server is running'''
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if not running:
......@@ -52,33 +84,84 @@ def check_rest(args):
else:
print_normal('Restful server is not running...')
def parse_ids(args):
'''Parse the arguments for nnictl stop'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if not experiment_dict:
print_normal('Experiment is not running...')
return None
experiment_id_list = list(experiment_dict.keys())
result_list = []
if not args.id:
if len(experiment_id_list) > 1:
print_error('There are multiple experiments running, please set the experiment id...')
experiment_information = ""
for key in experiment_dict.keys():
experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n')
print(EXPERIMENT_ID_INFO % experiment_information)
return None
result_list = experiment_id_list
elif args.id == 'all':
result_list = experiment_id_list
elif args.id.endswith('*'):
for id in experiment_id_list:
if id.startswith(args.id[:-1]):
result_list.append(id)
elif args.id in experiment_id_list:
result_list.append(args.id)
else:
for id in experiment_id_list:
if id.startswith(args.id):
result_list.append(id)
if len(result_list) > 1:
print_error(args.id + ' is ambiguous, please choose ' + ' '.join(result_list) )
return None
if not result_list:
print_error('There are no experiments matched, please check experiment id...')
return result_list
def stop_experiment(args):
'''Stop the experiment which is running'''
print_normal('Stoping experiment...')
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
print_normal('Experiment is not running...')
return
running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
if running:
response = rest_delete(experiment_url(rest_port), 20)
if not response or not check_response(response):
print_error('Stop experiment failed!')
stop_rest_result = False
#sleep to wait rest handler done
time.sleep(3)
rest_pid = nni_config.get_config('restServerPid')
cmds = ['pkill', '-P', str(rest_pid)]
call(cmds)
if stop_rest_result:
print_normal('Stop experiment success!')
experiment_id_list = parse_ids(args)
if experiment_id_list:
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
for experiment_id in experiment_id_list:
port = experiment_dict.get(experiment_id)[0]
if port is None:
return None
print_normal('Stoping experiment %s' % experiment_id)
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
print_normal('Experiment is not running...')
return
running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
if running:
response = rest_delete(experiment_url(rest_port), 20)
if not response or not check_response(response):
print_error('Stop experiment failed!')
stop_rest_result = False
#sleep to wait rest handler done
time.sleep(3)
rest_pid = nni_config.get_config('restServerPid')
if rest_pid:
cmds = ['pkill', '-P', str(rest_pid)]
call(cmds)
if stop_rest_result:
print_normal('Stop experiment success!')
experiment_config = Experiments()
experiment_config.remove_experiment(experiment_id)
def trial_ls(args):
'''List trial'''
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -99,7 +182,10 @@ def trial_ls(args):
def trial_kill(args):
'''List trial'''
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -117,7 +203,10 @@ def trial_kill(args):
def list_experiment(args):
'''Get experiment information'''
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -136,7 +225,10 @@ def list_experiment(args):
def experiment_status(args):
'''Show the status of experiment'''
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
result, response = check_rest_server_quick(rest_port)
if not result:
......@@ -154,10 +246,13 @@ def get_log_content(file_name, cmds):
def log_internal(args, filetype):
'''internal function to call get_log_content'''
port = get_experiment_port(args)
if port is None:
return None
if filetype == 'stdout':
file_full_path = os.path.join(HOME_DIR, args.port, 'stdout')
file_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stdout')
else:
file_full_path = os.path.join(HOME_DIR, args.port, 'stderr')
file_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr')
if args.head:
get_log_content(file_full_path, ['head', '-' + str(args.head), file_full_path])
elif args.tail:
......@@ -178,7 +273,10 @@ def log_stderr(args):
def log_trial(args):
''''get trial log path'''
trial_id_path_dict = {}
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -206,5 +304,28 @@ def log_trial(args):
def get_config(args):
'''get config info'''
nni_config = Config(args.port)
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
print(nni_config.get_all_config())
def webui_url(args):
'''show the url of web ui'''
port = get_experiment_port(args)
if port is None:
return None
nni_config = Config(port)
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
def experiment_id(args):
'''get the id of all experiments'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if not experiment_dict:
print('There is no experiment running...')
else:
experiment_information = ""
for key in experiment_dict.keys():
experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n')
print(EXPERIMENT_ID_INFO % experiment_information)
\ No newline at end of file
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import psutil
from socket import AddressFamily
from .config_utils import Config
def get_web_ui_urls(port):
webui_url_list = []
for name, info in psutil.net_if_addrs().items():
for addr in info:
if AddressFamily.AF_INET == addr.family:
webui_url_list.append('http://{}:{}'.format(addr.address, port))
nni_config = Config(port)
nni_config.set_config('webuiUrl', webui_url_list)
return webui_url_list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment