Unverified Commit 0da67d7b authored by goooxu's avatar goooxu Committed by GitHub
Browse files

Support multiple experiments (#194)

* fix nnictl bug

* fix nnictl create bug

* add experiment status logic

* add more information for nnictl

* fix Evolution Tuner bug

* refactor code

* fix code in updater.py

* fix nnictl --help

* fix classArgs bug

* update check response.status_code logic

* show trial log path

* update document

* fix install.sh

* set default vallue for maxTrialNum and maxExecDuration

* fix nnictl

* fix config path hint

* support multiPhase

* fix bash-completion

* refactor bash-completion

* add sklearn-regression

* add search_space

* fix bug

* fix install.sh

* refactor code

* remove unused code

* support multi experiments

* fix issues

* Support multiple experiments of nnictl (#183)

* fix nnictl bug

* fix nnictl create bug

* add experiment status logic

* add more information for nnictl

* fix Evolution Tuner bug

* refactor code

* fix code in updater.py

* fix nnictl --help

* fix classArgs bug

* update check response.status_code logic

* show trial log path

* update document

* fix install.sh

* set default vallue for maxTrialNum and maxExecDuration

* fix nnictl

* fix config path hint

* support multiPhase

* fix bash-completion

* refactor bash-completion

* add sklearn-regression

* add search_space

* fix bug

* fix install.sh

* refactor code

* remove unused code

* support multi experiments

* fix issues

* Let nni manager web server handle static content

* set nnictl stop require the port

* Support multiple experiments of nnictl (#183)

* fix nnictl bug

* fix nnictl create bug

* add experiment status logic

* add more information for nnictl

* fix Evolution Tuner bug

* refactor code

* fix code in updater.py

* fix nnictl --help

* fix classArgs bug

* update check response.status_code logic

* show trial log path

* update document

* fix install.sh

* set default vallue for maxTrialNum and maxExecDuration

* fix nnictl

* fix config path hint

* support multiPhase

* fix bash-completion

* refactor bash-completion

* add sklearn-regression

* add search_space

* fix bug

* fix install.sh

* refactor code

* remove unused code

* support multi experiments

* fix issues

* Let nni manager web server handle static content

* Dev multiple experiments (#189)

* fix nnictl bug

* fix nnictl create bug

* add experiment status logic

* add more information for nnictl

* fix Evolution Tuner bug

* refactor code

* fix code in updater.py

* fix nnictl --help

* fix classArgs bug

* update check response.status_code logic

* show trial log path

* update document

* fix install.sh

* set default vallue for maxTrialNum and maxExecDuration

* fix nnictl

* fix config path hint

* support multiPhase

* fix bash-completion

* refactor bash-completion

* add sklearn-regression

* add search_space

* fix bug

* fix install.sh

* refactor code

* remove unused code

* support multi experiments

* fix issues

* set nnictl stop require the port

* Update documents for supporting multiple experiments

* create a constant variable for 51188

* Fixed issue that WebUI can not refresh page

* Upgrade Node.js and Yarn to latest version
parent c0d13c76
......@@ -4,11 +4,10 @@ language: python
python:
- "3.6"
before_install:
- wget https://nodejs.org/dist/v10.10.0/node-v10.10.0-linux-x64.tar.xz
- tar xf node-v10.10.0-linux-x64.tar.xz
- sudo mv node-v10.10.0-linux-x64 /usr/local/node
- wget https://nodejs.org/dist/v10.12.0/node-v10.12.0-linux-x64.tar.xz
- tar xf node-v10.12.0-linux-x64.tar.xz
- sudo mv node-v10.12.0-linux-x64 /usr/local/node
- export PATH=/usr/local/node/bin:$PATH
- sudo sh -c 'PATH=/usr/local/node/bin:$PATH yarn global add serve'
install:
- make
- make easy-install
......
......@@ -28,19 +28,14 @@ else # is normal user
endif
## Dependency information
NODE_VERSION ?= v10.10.0
NODE_VERSION ?= v10.12.0
NODE_TARBALL ?= node-$(NODE_VERSION)-linux-x64.tar.xz
NODE_PATH ?= $(INSTALL_PREFIX)/nni/node
YARN_VERSION ?= v1.9.4
YARN_VERSION ?= v1.10.1
YARN_TARBALL ?= yarn-$(YARN_VERSION).tar.gz
YARN_PATH ?= /tmp/nni-yarn
SERVE_VERSION ?= 10.0.1
SERVE_TARBALL ?= serve-$(SERVE_VERSION).tgz
SERVE_PATH ?= $(INSTALL_PREFIX)/nni/serve
## Check if dependencies have been installed globally
ifeq (, $(shell command -v node 2>/dev/null))
$(info $(_INFO) Node.js not found $(_END))
......@@ -57,21 +52,15 @@ ifeq (, $(shell command -v yarnpkg 2>/dev/null))
$(info $(_INFO) Yarn not found $(_END))
_MISS_DEPS := 1 # yarn not found
endif
ifeq (, $(shell command -v serve 2>/dev/null))
$(info $(_INFO) Serve not found $(_END))
_MISS_DEPS := 1 # serve not found
endif
ifdef _MISS_DEPS
$(info $(_INFO) Missing dependencies, use local toolchain $(_END))
NODE := $(NODE_PATH)/bin/node
YARN := PATH=$${PATH}:$(NODE_PATH)/bin $(YARN_PATH)/bin/yarn
SERVE := $(SERVE_PATH)/serve
else
$(info $(_INFO) All dependencies found, use global toolchain $(_END))
NODE := node
YARN := yarnpkg
SERVE := serve
endif
......@@ -85,7 +74,7 @@ build:
#$(_INFO) Building NNI Manager $(_END)
cd src/nni_manager && $(YARN) && $(YARN) build
#$(_INFO) Building Web UI $(_END)
#$(_INFO) Building WebUI $(_END)
cd src/webui && $(YARN) && $(YARN) build
#$(_INFO) Building Python SDK $(_END)
......@@ -169,19 +158,13 @@ $(YARN_TARBALL):
#$(_INFO) Downloading Yarn $(_END)
wget https://github.com/yarnpkg/yarn/releases/download/$(YARN_VERSION)/$(YARN_TARBALL)
$(SERVE_TARBALL):
#$(_INFO) Downloading serve $(_END)
wget https://registry.npmjs.org/serve/-/$(SERVE_TARBALL)
.PHONY: intall-dependencies
install-dependencies: $(NODE_TARBALL) $(YARN_TARBALL) $(SERVE_TARBALL)
install-dependencies: $(NODE_TARBALL) $(YARN_TARBALL)
#$(_INFO) Cleaning $(_END)
rm -rf $(NODE_PATH)
rm -rf $(YARN_PATH)
rm -rf $(SERVE_PATH)
mkdir -p $(NODE_PATH)
mkdir -p $(YARN_PATH)
mkdir -p $(SERVE_PATH)
#$(_INFO) Extracting Node.js $(_END)
tar -xf $(NODE_TARBALL)
......@@ -191,15 +174,6 @@ install-dependencies: $(NODE_TARBALL) $(YARN_TARBALL) $(SERVE_TARBALL)
tar -xf $(YARN_TARBALL)
mv -fT yarn-$(YARN_VERSION) $(YARN_PATH)
#$(_INFO) Installing serve $(_END)
PATH=$${PATH}:$(NODE_PATH)/bin npm install --prefix $(SERVE_PATH) $(SERVE_TARBALL)
#$(_INFO) Creating serve executable script $(_END)
echo '#!/bin/sh' > $(SERVE_PATH)/serve
echo '$(NODE) $(SERVE_PATH)/node_modules/serve/bin/serve.js $$@' >> $(SERVE_PATH)/serve
chmod +x $(SERVE_PATH)/serve
.PHONY: install-python-modules
install-python-modules:
#$(_INFO) Installing Python SDK $(_END)
......@@ -217,8 +191,8 @@ install-node-modules:
cp -rT src/nni_manager/dist $(INSTALL_PREFIX)/nni/nni_manager
cp -rT src/nni_manager/node_modules $(INSTALL_PREFIX)/nni/nni_manager/node_modules
#$(_INFO) Installing Web UI $(_END)
cp -rT src/webui/build $(INSTALL_PREFIX)/nni/webui
#$(_INFO) Installing WebUI $(_END)
cp -rT src/webui/build $(INSTALL_PREFIX)/nni/nni_manager/static
.PHONY: install-dev-modules
......@@ -235,8 +209,8 @@ install-dev-modules:
ln -sf ${PWD}/src/nni_manager/dist $(INSTALL_PREFIX)/nni/nni_manager
ln -sf ${PWD}/src/nni_manager/node_modules $(INSTALL_PREFIX)/nni/nni_manager/node_modules
#$(_INFO) Installing Web UI $(_END)
ln -sf ${PWD}/src/webui/build $(INSTALL_PREFIX)/nni/webui
#$(_INFO) Installing WebUI $(_END)
ln -sf ${PWD}/src/webui/build $(INSTALL_PREFIX)/nni/nni_manager/static
.PHONY: install-scripts
......@@ -250,8 +224,6 @@ install-scripts:
echo '#!/bin/sh' > $(BIN_PATH)/nnictl
echo 'NNI_MANAGER=$(BIN_PATH)/nnimanager \' >> $(BIN_PATH)/nnictl
echo 'NNI_SERVE=$(SERVE) \' >> $(BIN_PATH)/nnictl
echo 'WEB_UI_FOLDER=$(INSTALL_PREFIX)/nni/webui \' >> $(BIN_PATH)/nnictl
echo 'python3 -m nnicmd.nnictl $$@' >> $(BIN_PATH)/nnictl
chmod +x $(BIN_PATH)/nnictl
......@@ -300,7 +272,7 @@ ifdef _ROOT
$(error You should not develop NNI as root)
endif
ifdef _MISS_DEPS
# $(error Please install Node.js, Yarn, and Serve to develop NNI)
# $(error Please install Node.js and Yarn to develop NNI)
endif
#$(_INFO) Pass! $(_END)
......
......@@ -15,14 +15,14 @@ The targets will be detailed later.
NNI requires at least Node.js, Yarn, and PIP to build, while TypeScript is also recommended.
NNI requires Node.js, serve, and all dependency libraries to run.
NNI requires Node.js, and all dependency libraries to run.
Required Node.js libraries (including TypeScript) can be installed by Yarn, and required Python libraries can be installed by setuptools or PIP.
For NNI *users*, `make install-dependencies` can be used to install Node.js, Yarn, and serve.
This will install Node.js and serve to NNI's installation directory, and install Yarn to `/tmp/nni-yarn`.
For NNI *users*, `make install-dependencies` can be used to install Node.js and Yarn.
This will install Node.js to NNI's installation directory, and install Yarn to `/tmp/nni-yarn`.
This target requires wget to work.
For NNI *developers*, it is recommended to install Node.js, Yarn, and serve manually.
For NNI *developers*, it is recommended to install Node.js and Yarn manually.
See their official sites for installation guide.
## Building NNI
......@@ -42,7 +42,7 @@ When install as non-priviledged user and virtualenv is not detected, `--user` fl
In addition, `nnictl` offers a bash completion scripts, which will be installed to `/usr/share/bash-completion/completions` or `~/.bash_completion.d`.
In some configuration, NNI will also install Node.js and the serve module to `/usr/share/nni`.
In some configuration, NNI will also install Node.js to `/usr/share/nni`.
All directories mentioned above are configurable. See next section for details.
......@@ -60,10 +60,8 @@ Available variables are listed below:
| `BASH_COMP_SCRIPT` | Path of bash completion script | `~/.bash_completion.d/nnictl` | `/usr/share/bash-completion/completions/nnictl` |
| `PIP_MODE` | Arguments for `python3 setup.py install` | `--user` if `VIRTUAL_ENV` not set | (empty) |
| `NODE_PATH` | Path to install Node.js runtime | `$INSTALL_PREFIX/nni/node` | `$INSTALL_PREFIX/nni/node` |
| `SERVE_PATH` | Path to install serve package | `$INSTALL_PREFIX/nni/serve` | `$INSTALL_PREFIX/nni/serve` |
| `YARN_PATH` | Path to install Yarn | `/tmp/nni-yarn` | `/tmp/nni-yarn` |
| `NODE` | Node.js command | see source file | see source file |
| `SERVE` | serve command | see source file | see source file |
| `YARN` | Yarn command | see source file | see source file |
Note that these variables will influence installation destination as well as generated `nnictl` and `nnimanager` scripts.
......
......@@ -42,9 +42,9 @@ Try it out:
nnictl create --config ~/nni/examples/trials/mnist/config.yml
```
In the command output, find out the **Web UI url** and open it in your browser. You can analyze your experiment through WebUI, or browse trials' tensorboard.
In the command output, find out the **WebUI url** and open it in your browser. You can analyze your experiment through WebUI, or browse trials' tensorboard.
To learn more about how this example was constructed and how to analyze the experiment results in NNI Web UI, please refer to [How to write a trial run on NNI (MNIST as an example)?](docs/WriteYourTrial.md)
To learn more about how this example was constructed and how to analyze the experiment results in NNI WebUI, please refer to [How to write a trial run on NNI (MNIST as an example)?](docs/WriteYourTrial.md)
## **Please refer to [Get Started Tutorial](docs/GetStarted.md) for more detailed information.**
## More tutorials
......
......@@ -43,7 +43,7 @@ For example, run the command
```
nnictl create --config ~/nni/examples/trials/mnist/config.yml
```
And open web ui to check if everything is OK
And open WebUI to check if everything is OK
**6. Redeploy**
......
......@@ -11,7 +11,6 @@ nnictl stop
nnictl update
nnictl resume
nnictl trial
nnictl webui
nnictl experiment
nnictl config
nnictl log
......@@ -34,8 +33,6 @@ nnictl log
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --config, -c| True| |yaml configure file of the experiment|
| --webuiport, -w| False| 8080|assign a port for webui|
* __nnictl resume__
......@@ -140,44 +137,6 @@ nnictl log
### Manage WebUI
* __nnictl webui start__
* Description
Start web ui function for nni, and will get a url list, you can open any of the url to see nni web page.
* Usage
nnictl webui start [OPTIONS]
Options:
| Name, shorthand | Required|Default | Description |
| ------ | ------ | ------ |------ |
| --port, -p| False| 8080|assign a port for webui|
* __nnictl webui stop__
* Description
Stop web ui function, and release url occupied. If you want to start again, use 'nnictl start webui' command
* Usage
nnictl webui stop
* __nnictl webui url__
* Description
Show the urls of web ui.
* Usage
nnictl webui url
### Manage experiment information
* __nnictl experiment show__
......
......@@ -64,7 +64,7 @@ You can see the pai jobs created by NNI in your OpenPAI cluster's web portal, li
Notice: In pai mode, NNIManager will start a rest server and listen on `51189` port, to receive metrics from trial job running in PAI container. So you should `enable 51189` TCP port in your firewall rule to allow incoming traffic.
Once a trial job is completed, you can goto NNI WebUI's overview page (like http://localhost:8080/oview) to check trial's information.
Once a trial job is completed, you can goto NNI WebUI's overview page (like http://localhost:51188/oview) to check trial's information.
Expand a trial information in trial list view, click the logPath link like:
![](./nni_webui_joblist.jpg)
......
......@@ -28,7 +28,7 @@ Initial release of Neural Network Intelligence (NNI).
* Provide Python API for user to wrap trial code as NNI deployable codes
* Experiments
* Provide a command line toolkit 'nnictl' for experiments management
* Provide a web UI for viewing experiments details and managing experiments
* Provide a WebUI for viewing experiments details and managing experiments
* Continuous Integration
* Support CI by providing out-of-box integration with [travis-ci](https://github.com/travis-ci) on ubuntu
* Others
......
......@@ -20,11 +20,11 @@ NNICTL need to set experiment config before start an experiment, experiment conf
### 2.5 Check experiment cofig
NNICTL will ensure the request to set config is successfully executed.
### 2.6 Start Web UI
NNICTL will start a Web UI process to show Web UI information,the default port of Web UI is 8080.
### 2.6 Start WebUI
NNICTL will start a WebUI to show information,the default port of WebUI is 51188.
### 2.7 Check Web UI
If Web UI is not successfully started, nnictl will give a warning information, and will continue to start experiment.
### 2.7 Check WebUI
If WebUI is not successfully started, nnictl will give a warning information, and will continue to start experiment.
### 2.8 Start Experiment
This is the most import step of starting an nni experiment, nnictl will call restful server process to setup an experiment.
......
......@@ -67,12 +67,14 @@ function usage(): void {
console.info('usage: node main.js --port <port> --mode <local/remote/pai> --start_mode <new/resume> --experiment_id <id>');
}
let port: number = NNIRestServer.DEFAULT_PORT;
const strPort: string = parseArg(['--port', '-p']);
if (strPort && strPort.length > 0) {
port = parseInt(strPort, 10);
if (!strPort || strPort.length === 0) {
usage();
process.exit(1);
}
const port: number = parseInt(strPort, 10);
const mode: string = parseArg(['--mode', '-m']);
if (!['local', 'remote', 'pai'].includes(mode)) {
usage();
......
......@@ -19,7 +19,9 @@
'use strict';
import * as express from 'express';
import * as bodyParser from 'body-parser';
import * as path from 'path';
import * as component from '../common/component';
import { RestServer } from '../common/restServer'
import { createRestHandler } from './restHandler';
......@@ -27,14 +29,11 @@ import { createRestHandler } from './restHandler';
/**
* NNI Main rest server, provides rest API to support
* # nnictl CLI tool
* # NNI Web UI
* # NNI WebUI
*
*/
@component.Singleton
export class NNIRestServer extends RestServer{
/** NNI main rest service default port */
public static readonly DEFAULT_PORT: number = 51188;
export class NNIRestServer extends RestServer {
private readonly API_ROOT_URL: string = '/api/v1/nni';
/**
......@@ -42,14 +41,17 @@ export class NNIRestServer extends RestServer{
*/
constructor() {
super();
this.port = NNIRestServer.DEFAULT_PORT;
}
/**
* NNIRestServer's own router registration
*/
protected registerRestHandler(): void {
this.app.use(express.static('static'));
this.app.use(bodyParser.json());
this.app.use(this.API_ROOT_URL, createRestHandler(this));
this.app.get('*', (req: express.Request, res: express.Response) => {
res.sendFile(path.resolve('static/index.html'));
});
}
}
export const MANAGER_IP = `${window.location.protocol}//${window.location.hostname}:51188/api/v1/nni`;
export const MANAGER_IP = `/api/v1/nni`;
export const trialJobStatus = [
'UNKNOWN',
'WAITING',
......
#!/bin/sh
WEB_UI_FOLDER=${PWD}/../../src/webui python3 -m nnicmd.nnictl $@
python3 -m nnicmd.nnictl $@
# list of commands/arguments
__nnictl_cmds="create resume update stop trial webui experiment config rest log"
__nnictl_cmds="create resume update stop trial experiment config rest log"
__nnictl_create_cmds="--config --webuiport"
__nnictl_resume_cmds="--experiment --manager --webuiport"
__nnictl_update_cmds="searchspace concurrency duration"
......@@ -17,7 +17,6 @@ __nnictl_log_cmds="stdout stderr"
__nnictl_log_stdout_cmds="--tail --head --path"
__nnictl_log_stderr_cmds="--tail --head --path"
# list of arguments that accept a file name
__nnictl_file_args=" --config -c --filename -f "
......
......@@ -22,13 +22,14 @@
import os
import json
import shutil
from .constants import METADATA_DIR, METADATA_FULL_PATH
from .constants import HOME_DIR
class Config:
'''a util class to load and save config'''
def __init__(self):
os.makedirs(METADATA_DIR, exist_ok=True)
self.config_file = METADATA_FULL_PATH
def __init__(self, port):
config_path = os.path.join(HOME_DIR, str(port))
os.makedirs(config_path, exist_ok=True)
self.config_file = os.path.join(config_path, '.config')
self.config = self.read_file()
def get_all_config(self):
......
......@@ -20,19 +20,7 @@
import os
REST_PORT = 51188
HOME_DIR = os.path.join(os.environ['HOME'], 'nni')
METADATA_DIR = os.path.join(HOME_DIR, 'nnictl')
METADATA_FULL_PATH = os.path.join(METADATA_DIR, 'metadata')
LOG_DIR = os.path.join(HOME_DIR, 'nnictl', 'log')
STDOUT_FULL_PATH = os.path.join(LOG_DIR, 'stdout')
STDERR_FULL_PATH = os.path.join(LOG_DIR, 'stderr')
HOME_DIR = os.path.join(os.environ['HOME'], '.local', 'nni', 'nnictl')
ERROR_INFO = 'ERROR: %s'
......@@ -40,11 +28,12 @@ NORMAL_INFO = 'INFO: %s'
WARNING_INFO = 'WARNING: %s'
DEFAULT_REST_PORT = 51188
EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0m' \
'-----------------------------------------------------------------------\n' \
'The experiment id is %s\n'\
'The restful server post is %s\n' \
'The Web UI urls are: %s\n' \
'-----------------------------------------------------------------------\n\n' \
'You can use these commands to get more information about the experiment\n' \
'-----------------------------------------------------------------------\n' \
......@@ -55,8 +44,7 @@ EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0
'4. nnictl log stdout show stdout log content\n' \
'5. nnictl stop stop a experiment\n' \
'6. nnictl trial kill kill a trial job by id\n' \
'7. nnictl webui url get the url of web ui\n' \
'8. nnictl --help get help information about nnictl\n' \
'7. nnictl --help get help information about nnictl\n' \
'-----------------------------------------------------------------------\n' \
PACKAGE_REQUIREMENTS = {
......
......@@ -32,12 +32,11 @@ from .url_utils import cluster_metadata_url, experiment_url
from .config_utils import Config
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process
from .constants import *
from .webui_utils import start_web_ui, check_web_ui
def start_rest_server(port, platform, mode, experiment_id=None):
'''Run nni manager process'''
print_normal('Checking environment...')
nni_config = Config()
nni_config = Config(port)
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if rest_port and running:
......@@ -50,10 +49,10 @@ def start_rest_server(port, platform, mode, experiment_id=None):
cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode]
if mode == 'resume':
cmds += ['--experiment_id', experiment_id]
if not os.path.exists(LOG_DIR):
os.makedirs(LOG_DIR)
stdout_file = open(STDOUT_FULL_PATH, 'a+')
stderr_file = open(STDERR_FULL_PATH, 'a+')
stdout_full_path = os.path.join(HOME_DIR, str(port), 'stdout')
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
stdout_file = open(stdout_full_path, 'a+')
stderr_file = open(stderr_full_path, 'a+')
process = Popen(cmds, stdout=stdout_file, stderr=stderr_file)
return process
......@@ -80,7 +79,8 @@ def set_trial_config(experiment_config, port):
return True
else:
print('Error message is {}'.format(response.text))
with open(STDERR_FULL_PATH, 'a+') as fout:
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))
return False
......@@ -98,7 +98,8 @@ def set_remote_config(experiment_config, port):
if not response or not check_response(response):
if response is not None:
err_message = response.text
with open(STDERR_FULL_PATH, 'a+') as fout:
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
......@@ -179,22 +180,23 @@ def set_experiment(experiment_config, mode, port):
if check_response(response):
return response
else:
with open(STDERR_FULL_PATH, 'a+') as fout:
stderr_full_path = os.path.join(HOME_DIR, str(port), 'stderr')
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))
print_error('Setting experiment error, error message is {}'.format(response.text))
return None
def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=None):
def launch_experiment(args, experiment_config, mode, experiment_id=None):
'''follow steps to start rest server and start experiment'''
nni_config = Config()
nni_config = Config(args.port)
#Check if there is an experiment running
origin_rest_pid = nni_config.get_config('restServerPid')
if origin_rest_pid and detect_process(origin_rest_pid):
print_error('There is an experiment running, please stop it first...')
print_normal('You can use \'nnictl stop\' command to stop an experiment!')
exit(0)
exit(1)
# start rest server
rest_process = start_rest_server(REST_PORT, experiment_config['trainingServicePlatform'], mode, experiment_id)
rest_process = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, experiment_id)
nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation
if experiment_config.get('useAnnotation'):
......@@ -214,7 +216,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
experiment_config['searchSpace'] = json.dumps('')
# check rest server
running, _ = check_rest_server(REST_PORT)
running, _ = check_rest_server(args.port)
if running:
print_normal('Successfully started Restful server!')
else:
......@@ -224,12 +226,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0)
exit(1)
# set remote config
if experiment_config['trainingServicePlatform'] == 'remote':
print_normal('Setting remote config...')
config_result, err_msg = set_remote_config(experiment_config, REST_PORT)
config_result, err_msg = set_remote_config(experiment_config, args.port)
if config_result:
print_normal('Success!')
else:
......@@ -239,12 +241,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0)
exit(1)
# set local config
if experiment_config['trainingServicePlatform'] == 'local':
print_normal('Setting local config...')
if set_local_config(experiment_config, REST_PORT):
if set_local_config(experiment_config, args.port):
print_normal('Successfully set local config!')
else:
print_error('Failed!')
......@@ -253,12 +255,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0)
exit(1)
#set pai config
if experiment_config['trainingServicePlatform'] == 'pai':
print_normal('Setting pai config...')
config_result, err_msg = set_pai_config(experiment_config, REST_PORT)
config_result, err_msg = set_pai_config(experiment_config, args.port)
if config_result:
print_normal('Successfully set pai config!')
else:
......@@ -269,22 +271,11 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(0)
#start webui
if check_web_ui():
print_warning('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!'))
print_normal('You can use \'nnictl webui stop\' to stop old Web UI process...')
else:
print_normal('Starting Web UI...')
webui_process = start_web_ui(webuiport)
if webui_process:
nni_config.set_config('webuiPid', webui_process.pid)
print_normal('Successfully started Web UI!')
exit(1)
# start a new experiment
print_normal('Starting experiment...')
response = set_experiment(experiment_config, mode, REST_PORT)
response = set_experiment(experiment_config, mode, args.port)
if response:
if experiment_id is None:
experiment_id = json.loads(response.text).get('experiment_id')
......@@ -294,27 +285,28 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
try:
cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
cmds = ['pkill', '-P', str(webui_process.pid)]
call(cmds)
except Exception:
raise Exception(ERROR_INFO % 'Restful server stopped!')
exit(0)
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT, ' '.join(nni_config.get_config('webuiUrl'))))
exit(1)
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, args.port))
def resume_experiment(args):
'''resume an experiment'''
nni_config = Config()
nni_config = Config(args.port)
experiment_config = nni_config.get_config('experimentConfig')
experiment_id = nni_config.get_config('experimentId')
launch_experiment(args, experiment_config, 'resume', args.webuiport, experiment_id)
launch_experiment(args, experiment_config, 'resume', experiment_id)
def create_experiment(args):
'''start a new experiment'''
nni_config = Config()
nni_config = Config(args.port)
config_path = os.path.abspath(args.config)
if not os.path.exists(config_path):
print_error('Please set correct config path!')
exit(1)
experiment_config = get_yml_content(config_path)
validate_all_content(experiment_config, config_path)
nni_config.set_config('experimentConfig', experiment_config)
launch_experiment(args, experiment_config, 'new', args.webuiport)
nni_config.set_config('restServerPort', REST_PORT)
launch_experiment(args, experiment_config, 'new')
nni_config.set_config('restServerPort', args.port)
......@@ -24,6 +24,7 @@ from .launcher import create_experiment, resume_experiment
from .updater import update_searchspace, update_concurrency, update_duration
from .nnictl_utils import *
from .package_management import *
from .constants import *
def nni_help_info(*args):
print('please run "nnictl {positional argument} --help" to see nnictl guidance')
......@@ -39,14 +40,14 @@ def parse_args():
# parse start command
parser_start = subparsers.add_parser('create', help='create a new experiment')
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--webuiport', '-w', default=8080, dest='webuiport')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_start.set_defaults(func=create_experiment)
# parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('--experiment', '-e', dest='id', help='ID of the experiment you want to resume')
parser_resume.add_argument('--manager', '-m', default='nnimanager', dest='manager')
parser_resume.add_argument('--webuiport', '-w', default=8080, dest='webuiport')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.set_defaults(func=resume_experiment)
# parse update command
......@@ -54,17 +55,21 @@ def parse_args():
#add subparsers for parser_updater
parser_updater_subparsers = parser_updater.add_subparsers()
parser_updater_searchspace = parser_updater_subparsers.add_parser('searchspace', help='update searchspace')
parser_updater_searchspace.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_updater_searchspace.add_argument('--filename', '-f', required=True)
parser_updater_searchspace.set_defaults(func=update_searchspace)
parser_updater_searchspace = parser_updater_subparsers.add_parser('concurrency', help='update concurrency')
parser_updater_searchspace.add_argument('--value', '-v', required=True)
parser_updater_searchspace.set_defaults(func=update_concurrency)
parser_updater_searchspace = parser_updater_subparsers.add_parser('duration', help='update duration')
parser_updater_searchspace.add_argument('--value', '-v', required=True)
parser_updater_searchspace.set_defaults(func=update_duration)
parser_updater_concurrency = parser_updater_subparsers.add_parser('concurrency', help='update concurrency')
parser_updater_concurrency.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_updater_concurrency.add_argument('--value', '-v', required=True)
parser_updater_concurrency.set_defaults(func=update_concurrency)
parser_updater_duration = parser_updater_subparsers.add_parser('duration', help='update duration')
parser_updater_duration.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_updater_duration.add_argument('--value', '-v', required=True)
parser_updater_duration.set_defaults(func=update_duration)
#parse stop command
parser_stop = subparsers.add_parser('stop', help='stop the experiment')
parser_stop.add_argument('--port', '-p', required=True, dest='port', help='the port of restful server')
parser_stop.set_defaults(func=stop_experiment)
#parse trial command
......@@ -72,37 +77,29 @@ def parse_args():
#add subparsers for parser_trial
parser_trial_subparsers = parser_trial.add_subparsers()
parser_trial_ls = parser_trial_subparsers.add_parser('ls', help='list trial jobs')
parser_trial_ls.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_trial_ls.set_defaults(func=trial_ls)
parser_trial_kill = parser_trial_subparsers.add_parser('kill', help='kill trial jobs')
parser_trial_kill.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_trial_kill.add_argument('--trialid', '-t', required=True, dest='trialid', help='the id of trial to be killed')
parser_trial_kill.set_defaults(func=trial_kill)
#TODO:finish webui function
#parse board command
parser_webui = subparsers.add_parser('webui', help='get web ui information')
#add subparsers for parser_board
parser_webui_subparsers = parser_webui.add_subparsers()
parser_webui_start = parser_webui_subparsers.add_parser('start', help='start web ui')
parser_webui_start.add_argument('--port', '-p', dest='port', default=8080, help='the port of web ui')
parser_webui_start.set_defaults(func=start_webui)
parser_webui_stop = parser_webui_subparsers.add_parser('stop', help='stop web ui')
parser_webui_stop.set_defaults(func=stop_webui)
parser_webui_url = parser_webui_subparsers.add_parser('url', help='show the url of web ui')
parser_webui_url.set_defaults(func=webui_url)
#parse experiment command
parser_experiment = subparsers.add_parser('experiment', help='get experiment information')
#add subparsers for parser_experiment
parser_experiment_subparsers = parser_experiment.add_subparsers()
parser_experiment_show = parser_experiment_subparsers.add_parser('show', help='show the information of experiment')
parser_experiment_show.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_experiment_show.set_defaults(func=list_experiment)
parser_experiment_status = parser_experiment_subparsers.add_parser('status', help='show the status of experiment')
parser_experiment_status.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_experiment_status.set_defaults(func=experiment_status)
#parse config command
parser_config = subparsers.add_parser('config', help='get config information')
parser_config_subparsers = parser_config.add_subparsers()
parser_config_show = parser_config_subparsers.add_parser('show', help='show the information of config')
parser_config_show.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_config_show.set_defaults(func=get_config)
#parse log command
......@@ -110,16 +107,19 @@ def parse_args():
# add subparsers for parser_log
parser_log_subparsers = parser_log.add_subparsers()
parser_log_stdout = parser_log_subparsers.add_parser('stdout', help='get stdout information')
parser_log_stdout.add_argument('--port', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_log_stdout.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stdout')
parser_log_stdout.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stdout')
parser_log_stdout.add_argument('--path', '-p', action='store_true', default=False, help='get the path of stdout file')
parser_log_stdout.add_argument('--path', action='store_true', default=False, help='get the path of stdout file')
parser_log_stdout.set_defaults(func=log_stdout)
parser_log_stderr = parser_log_subparsers.add_parser('stderr', help='get stderr information')
parser_log_stderr.add_argument('--port', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_log_stderr.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stderr')
parser_log_stderr.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stderr')
parser_log_stderr.add_argument('--path', '-p', action='store_true', default=False, help='get the path of stderr file')
parser_log_stderr.add_argument('--path', action='store_true', default=False, help='get the path of stderr file')
parser_log_stderr.set_defaults(func=log_stderr)
parser_log_trial = parser_log_subparsers.add_parser('trial', help='get trial log path')
parser_log_trial.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_log_trial.add_argument('--id', '-I', dest='id', help='find trial log path by id')
parser_log_trial.set_defaults(func=log_trial)
......
......@@ -26,10 +26,9 @@ from subprocess import call, check_output
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
from .config_utils import Config
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url
from .constants import STDERR_FULL_PATH, STDOUT_FULL_PATH
from .constants import HOME_DIR
import time
from .common_utils import print_normal, print_error, detect_process
from .webui_utils import stop_web_ui, check_web_ui, start_web_ui
def convert_time_stamp_to_date(content):
'''Convert time stamp to date time format'''
......@@ -45,7 +44,7 @@ def convert_time_stamp_to_date(content):
def check_rest(args):
'''check if restful server is running'''
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if not running:
......@@ -56,12 +55,11 @@ def check_rest(args):
def stop_experiment(args):
'''Stop the experiment which is running'''
print_normal('Stoping experiment...')
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
print_normal('Experiment is not running...')
stop_web_ui()
return
running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
......@@ -75,13 +73,12 @@ def stop_experiment(args):
rest_pid = nni_config.get_config('restServerPid')
cmds = ['pkill', '-P', str(rest_pid)]
call(cmds)
stop_web_ui()
if stop_rest_result:
print_normal('Stop experiment success!')
def trial_ls(args):
'''List trial'''
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -102,7 +99,7 @@ def trial_ls(args):
def trial_kill(args):
'''List trial'''
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -120,7 +117,7 @@ def trial_kill(args):
def list_experiment(args):
'''Get experiment information'''
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -139,7 +136,7 @@ def list_experiment(args):
def experiment_status(args):
'''Show the status of experiment'''
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
result, response = check_rest_server_quick(rest_port)
if not result:
......@@ -158,9 +155,9 @@ def get_log_content(file_name, cmds):
def log_internal(args, filetype):
'''internal function to call get_log_content'''
if filetype == 'stdout':
file_full_path = STDOUT_FULL_PATH
file_full_path = os.path.join(HOME_DIR, args.port, 'stdout')
else:
file_full_path = STDERR_FULL_PATH
file_full_path = os.path.join(HOME_DIR, args.port, 'stderr')
if args.head:
get_log_content(file_full_path, ['head', '-' + str(args.head), file_full_path])
elif args.tail:
......@@ -181,7 +178,7 @@ def log_stderr(args):
def log_trial(args):
''''get trial log path'''
trial_id_path_dict = {}
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
......@@ -196,52 +193,18 @@ def log_trial(args):
trial_id_path_dict[trial['id']] = trial['logPath']
else:
print_error('Restful server is not running...')
exit(0)
exit(1)
if args.id:
if trial_id_path_dict.get(args.id):
print('id:' + args.id + ' path:' + trial_id_path_dict[args.id])
else:
print_error('trial id is not valid!')
exit(0)
exit(1)
else:
for key in trial_id_path_dict.keys():
print('id:' + key + ' path:' + trial_id_path_dict[key])
def get_config(args):
'''get config info'''
nni_config = Config()
nni_config = Config(args.port)
print(nni_config.get_all_config())
def start_webui(args):
'''start web ui'''
# start webui
print_normal('Checking webui...')
nni_config = Config()
rest_pid = nni_config.get_config('restServerPid')
if not detect_process(rest_pid):
print_error('Experiment is not running...')
return
if check_web_ui():
print_error('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')), 'is being used, please stop it first!'))
print_normal('You can use \'nnictl webui stop\' to stop old web ui process...')
else:
print_normal('Starting webui...')
webui_process = start_web_ui(args.port)
nni_config = Config()
nni_config.set_config('webuiPid', webui_process.pid)
print_normal('Starting webui success!')
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
def stop_webui(args):
'''stop web ui'''
print_normal('Stopping Web UI...')
if stop_web_ui():
print_normal('Web UI stopped success!')
else:
print_error('Web UI stop failed...')
def webui_url(args):
'''show the url of web ui'''
nni_config = Config()
print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl'))))
......@@ -52,9 +52,9 @@ def get_query_type(key):
if key == 'searchSpace':
return '?update_type=SEARCH_SPACE'
def update_experiment_profile(key, value):
def update_experiment_profile(args, key, value):
'''call restful server to update experiment profile'''
nni_config = Config()
nni_config = Config(args.port)
rest_port = nni_config.get_config('restServerPort')
running, _ = check_rest_server_quick(rest_port)
if running:
......@@ -72,21 +72,21 @@ def update_experiment_profile(key, value):
def update_searchspace(args):
validate_file(args.filename)
content = load_search_space(args.filename)
if update_experiment_profile('searchSpace', content):
if update_experiment_profile(args, 'searchSpace', content):
print('INFO: update %s success!' % 'searchSpace')
else:
print('ERROR: update %s failed!' % 'searchSpace')
def update_concurrency(args):
validate_digit(args.value, 1, 1000)
if update_experiment_profile('trialConcurrency', int(args.value)):
if update_experiment_profile(args, 'trialConcurrency', int(args.value)):
print('INFO: update %s success!' % 'concurrency')
else:
print('ERROR: update %s failed!' % 'concurrency')
def update_duration(args):
validate_digit(args.value, 1, 999999999)
if update_experiment_profile('maxExecDuration', int(args.value)):
if update_experiment_profile(args, 'maxExecDuration', int(args.value)):
print('INFO: update %s success!' % 'duration')
else:
print('ERROR: update %s failed!' % 'duration')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment