Unverified Commit 2a28a578 authored by fishyds's avatar fishyds Committed by GitHub
Browse files

Merge branch V0.2 to Master (#143)

* webui logpath and document (#135)

* Add webui document and logpath as a href

* fix tslint

* fix comments by Chengmin

* Pai training service bug fix and enhancement (#136)

* Add NNI installation scripts

* Update pai script, update NNI_out_dir

* Update NNI dir in nni sdk local.py

* Create .nni folder in nni sdk local.py

* Add check before creating .nni folder

* Fix typo for PAI_INSTALL_NNI_SHELL_FORMAT

* Improve annotation (#138)

* Improve annotation

* Minor bugfix

* Selectively install through pip (#139)

Selectively install through pip 
* update setup.py

* fix paiTrainingService bugs (#137)

* fix nnictl bug

* add hdfs host validation

* fix bugs

* fix dockerfile

* fix install.sh

* update install.sh

* fix dockerfile

* Set timeout for HDFSUtility exists function

* remove unused TODO

* fix sdk

* add optional for outputDir and dataDir

* refactor dockerfile.base

* Remove unused import in hdfsclientUtility

* Add documentation for NNI PAI mode experiment (#141)

* Add documentation for NNI PAI mode

* Fix typo based on PR comments

* Exit with subprocess return code of trial keeper

* Remove additional exit code

* Fix typo based on PR comments

* update doc for smac tuner (#140)

* Revert "Selectively install through pip (#139)" due to potential pip install issue (#142)

* Revert "Selectively install through pip (#139)"

This reverts commit 1d174836.

* Add exit code of subprocess for trial_keeper

* Update README, add link to PAImode doc
parent 36b583b7
......@@ -8,7 +8,8 @@ trainingServicePlatform: local
#choice: true, false
useAnnotation: true
tuner:
#choice: TPE, Random, Anneal, Evolution
#choice: TPE, Random, Anneal, Evolution,
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
......@@ -16,4 +17,4 @@ tuner:
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
\ No newline at end of file
gpuNum: 0
......@@ -9,7 +9,8 @@ searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution
#choice: TPE, Random, Anneal, Evolution,
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
......@@ -17,4 +18,4 @@ tuner:
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
\ No newline at end of file
gpuNum: 0
......@@ -9,7 +9,8 @@ searchSpacePath: ~/nni/examples/trials/mnist/search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution
#choice: TPE, Random, Anneal, Evolution,
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
......@@ -23,4 +24,4 @@ assessor:
trial:
command: python3 mnist.py
codeDir: ~/nni/examples/trials/mnist
gpuNum: 0
\ No newline at end of file
gpuNum: 0
......@@ -9,7 +9,8 @@ searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution
#choice: TPE, Random, Anneal, Evolution,
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
......
#!/bin/bash
make build
make install-dependencies
make build
make dev-install
make install-examples
make update-bash-config
source ~/.bashrc
......@@ -131,7 +131,10 @@ export namespace HDFSClientUtility {
const deferred : Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist : boolean ) => {
deferred.resolve(exist);
})
});
// Set timeout and reject the promise once reach timeout (5 seconds)
setTimeout(() => deferred.reject(`Check HDFS path ${hdfsPath} exists timeout`), 5000);
return deferred.promise;
}
......
......@@ -47,12 +47,21 @@ export class PAITrialJobDetail implements TrialJobDetail {
}
}
export const PAI_INSTALL_NNI_SHELL_FORMAT: string =
`#!/bin/bash
if python3 -c 'import nni' > /dev/null 2>&1; then
# nni module is already installed, skip
return
else
# Install nni
pip3 install -v --user git+https://github.com/Microsoft/nni.git@v0.2
fi`;
export const PAI_TRIAL_COMMAND_FORMAT: string =
`pip3 install -v --user git+https://github.com/Microsoft/nni.git@master
&& export NNI_PLATFORM=pai NNI_SYS_DIR={0} NNI_OUTPUT_DIR={0} NNI_TRIAL_JOB_ID={1} NNI_EXP_ID={2}
&& cd $NNI_SYS_DIR && mkdir .nni
&& python3 -m trial_tool.trial_keeper --trial_command '{3}' --nnimanager_ip '{4}' --pai_hdfs_output_dir '{5}'
--pai_hdfs_host '{6}' --pai_user_name {7}`;
`export NNI_PLATFORM=pai NNI_SYS_DIR={0} NNI_OUTPUT_DIR={1} NNI_TRIAL_JOB_ID={2} NNI_EXP_ID={3}
&& cd $NNI_SYS_DIR && sh install_nni.sh
&& python3 -m trial_tool.trial_keeper --trial_command '{4}' --nnimanager_ip '{5}' --pai_hdfs_output_dir '{6}'
--pai_hdfs_host '{7}' --pai_user_name {8}`;
export const PAI_OUTPUT_DIR_FORMAT: string =
`hdfs://{0}:9000/`;
......
......@@ -39,7 +39,7 @@ import {
} from '../../common/trainingService';
import { delay, getExperimentRootDir, getIPV4Address, uniqueString } from '../../common/utils';
import { PAIJobRestServer } from './paiJobRestServer'
import { PAITrialJobDetail, PAI_TRIAL_COMMAND_FORMAT, PAI_OUTPUT_DIR_FORMAT, PAI_LOG_PATH_FORMAT } from './paiData';
import { PAITrialJobDetail, PAI_INSTALL_NNI_SHELL_FORMAT, PAI_TRIAL_COMMAND_FORMAT, PAI_OUTPUT_DIR_FORMAT, PAI_LOG_PATH_FORMAT } from './paiData';
import { PAIJobInfoCollector } from './paiJobInfoCollector';
import { String } from 'typescript-string-operations';
import { NNIPAITrialConfig, PAIClusterConfig, PAIJobConfig, PAITaskRole } from './paiConfig';
......@@ -64,6 +64,8 @@ class PAITrainingService implements TrainingService {
private experimentId! : string;
private readonly paiJobCollector : PAIJobInfoCollector;
private readonly hdfsDirPattern: string;
private hdfsBaseDir: string | undefined;
private hdfsOutputHost: string | undefined;
constructor() {
this.log = getLogger();
......@@ -131,6 +133,14 @@ class PAITrainingService implements TrainingService {
if (!this.paiToken) {
throw new Error('PAI token is not initialized');
}
if(!this.hdfsBaseDir){
throw new Error('hdfsBaseDir is not initialized');
}
if(!this.hdfsOutputHost){
throw new Error('hdfsOutputHost is not initialized');
}
this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`);
......@@ -142,6 +152,10 @@ class PAITrainingService implements TrainingService {
//create tmp trial working folder locally.
await cpp.exec(`mkdir -p ${path.dirname(trialLocalTempFolder)}`);
await cpp.exec(`cp -r ${this.paiTrialConfig.codeDir} ${trialLocalTempFolder}`);
const runScriptContent : string = PAI_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' });
// Write file content ( parameter.cfg ) to local tmp folders
const trialForm : TrialJobApplicationForm = (<TrialJobApplicationForm>form)
......@@ -152,26 +166,11 @@ class PAITrainingService implements TrainingService {
// Step 1. Prepare PAI job configuration
const paiJobName : string = `nni_exp_${this.experimentId}_trial_${trialJobId}`;
const hdfsCodeDir : string = path.join(this.expRootDir, trialJobId);
const hdfsDirContent = this.paiTrialConfig.outputDir.match(this.hdfsDirPattern);
if(hdfsDirContent === null) {
throw new Error('Trial outputDir format Error');
}
const groups = hdfsDirContent.groups;
if(groups === undefined) {
throw new Error('Trial outputDir format Error');
}
const hdfsHost = groups['host'];
let hdfsBaseDirectory = groups['baseDir'];
if(hdfsBaseDirectory === undefined) {
hdfsBaseDirectory = "/";
}
const hdfsOutputDir : string = path.join(hdfsBaseDirectory, this.experimentId, trialJobId);
const hdfsOutputDir : string = path.join(this.hdfsBaseDir, this.experimentId, trialJobId);
const hdfsLogPath : string = String.Format(
PAI_LOG_PATH_FORMAT,
hdfsHost,
this.hdfsOutputHost,
hdfsOutputDir);
const trialJobDetail: PAITrialJobDetail = new PAITrialJobDetail(
......@@ -188,12 +187,13 @@ class PAITrainingService implements TrainingService {
PAI_TRIAL_COMMAND_FORMAT,
// PAI will copy job's codeDir into /root directory
`/root/${trialJobId}`,
`/root/${trialJobId}/nnioutput`,
trialJobId,
this.experimentId,
this.paiTrialConfig.command,
getIPV4Address(),
hdfsOutputDir,
hdfsHost,
this.hdfsOutputHost,
this.paiClusterConfig.userName
).replace(/\r\n|\n|\r/gm, '');
......@@ -304,7 +304,7 @@ class PAITrainingService implements TrainingService {
return deferred.promise;
}
public setClusterMetadata(key: string, value: string): Promise<void> {
public async setClusterMetadata(key: string, value: string): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>();
switch (key) {
......@@ -331,13 +331,12 @@ class PAITrainingService implements TrainingService {
request(authentication_req, (error: Error, response: request.Response, body: any) => {
if (error) {
//TODO: should me make the setClusterMetadata's return type to Promise<string>?
this.log.error(`Get PAI token failed: ${error.message}`);
deferred.reject();
deferred.reject(new Error(`Get PAI token failed: ${error.message}`));
} else {
if(response.statusCode !== 200){
this.log.error(`Get PAI token failed: get PAI Rest return code ${response.statusCode}`);
deferred.reject();
deferred.reject(new Error(`Get PAI token failed, please check paiConfig username or password`));
}
this.paiToken = body.token;
......@@ -348,7 +347,7 @@ class PAITrainingService implements TrainingService {
case TrialConfigMetadataKey.TRIAL_CONFIG:
if (!this.paiClusterConfig){
this.log.error('pai cluster config is not initialized');
deferred.reject();
deferred.reject(new Error('pai cluster config is not initialized'));
break;
}
this.paiTrialConfig = <NNIPAITrialConfig>JSON.parse(value);
......@@ -359,6 +358,38 @@ class PAITrainingService implements TrainingService {
this.paiClusterConfig.host
).replace(/\r\n|\n|\r/gm, '');
}
const hdfsDirContent = this.paiTrialConfig.outputDir.match(this.hdfsDirPattern);
if(hdfsDirContent === null) {
throw new Error('Trial outputDir format Error');
}
const groups = hdfsDirContent.groups;
if(groups === undefined) {
throw new Error('Trial outputDir format Error');
}
this.hdfsOutputHost = groups['host'];
this.hdfsBaseDir = groups['baseDir'];
if(this.hdfsBaseDir === undefined) {
this.hdfsBaseDir = "/";
}
const hdfsClient = WebHDFS.createClient({
user: this.paiClusterConfig.userName,
port: 50070,
host: this.hdfsOutputHost
});
try {
const exist : boolean = await HDFSClientUtility.pathExists("/", hdfsClient);
if(!exist) {
deferred.reject(new Error(`Please check hdfsOutputDir host!`));
}
} catch(error) {
deferred.reject(new Error(`HDFS encounters problem, error is ${error}. Please check hdfsOutputDir host!`));
}
deferred.resolve();
break;
default:
......
This diff is collapsed.
# How to use Tuner that NNI supports?
For now, NNI could support tuner algorithms as following:
For now, NNI has supported the following tuner algorithms. Note that NNI installation only installs a subset of those algorithms, other algorithms should be installed through `nnictl package install` before you use them. For example, for SMAC the installation command is `nnictl package install --name=SMAC`.
- TPE
- Random Search
- Anneal
- Naive Evolution
- SMAC
- SMAC (to install through `nnictl`)
- ENAS (ongoing)
- Batch (ongoing)
......@@ -75,4 +75,4 @@ There are two filed you need to set:
[2]: http://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf
[3]: https://arxiv.org/pdf/1703.01041.pdf
[4]: https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf
[5]: https://github.com/automl/SMAC3
\ No newline at end of file
[5]: https://github.com/automl/SMAC3
......@@ -24,16 +24,20 @@ import os
from ..common import init_logger
_dir = os.environ['NNI_SYS_DIR']
_metric_file = open(os.path.join(_dir, '.nni', 'metrics'), 'wb')
_log_file_path = os.path.join(_dir, 'trial.log')
_sysdir = os.environ['NNI_SYS_DIR']
if not os.path.exists(os.path.join(_sysdir, '.nni')):
os.makedirs(os.path.join(_sysdir, '.nni'))
_metric_file = open(os.path.join(_sysdir, '.nni', 'metrics'), 'wb')
_outputdir = os.environ['NNI_OUTPUT_DIR']
if not os.path.exists(_outputdir):
os.makedirs(_outputdir)
_log_file_path = os.path.join(_outputdir, 'trial.log')
init_logger(_log_file_path)
def get_parameters():
params_file = open(os.path.join(_dir, 'parameter.cfg'), 'r')
params_file = open(os.path.join(_sysdir, 'parameter.cfg'), 'r')
return json.load(params_file)
def send_metric(string):
......
......@@ -124,6 +124,6 @@ else:
del frame # see official doc
module = inspect.getmodulename(filename)
if name is None:
name = '#{:d}'.format(lineno)
name = '__line{:d}'.format(lineno)
key = '{}/{}/{}'.format(module, name, func)
return trial.get_parameter(key)
......@@ -33,9 +33,9 @@ class SmartParamTestCase(TestCase):
def setUp(self):
params = {
'test_smartparam/choice1/choice': 2,
'test_smartparam/#{:d}/uniform'.format(lineno1): '5',
'test_smartparam/__line{:d}/uniform'.format(lineno1): '5',
'test_smartparam/func/function_choice': 1,
'test_smartparam/#{:d}/function_choice'.format(lineno2): 0
'test_smartparam/__line{:d}/function_choice'.format(lineno2): 0
}
nni.trial._params = { 'parameter_id': 'test_trial', 'parameters': params }
......
# webui
# WebUI
NNI is a research platform for metalearning. It provides easy-to-use interface so that you could perform neural architecture search, hyperparameter optimization and optimizer design for your own problems and models.
Web UI allows user to monitor the status of the NNI system using a graphical interface.
## Deployment
### To start the webui
> $ yarn
> $ yarn start
## Usage
### View summary page
## View summary page
Click the tab "Overview".
* See the experiment parameters.
* See good performance trial.
* See search_space json.
### View job accuracy
## View job accuracy
Click the tab "Optimization Progress" to see the point graph of all trials. Hover every point to see its specific accuracy.
### View hyper parameter
## View hyper parameter
Click the tab "Hyper Parameter" to see the parallel graph.
* You can select the percentage to cut down some lines.
* You can select the percentage to see top trials.
* Choose two axis to swap its positions
### View trial status
## View trial status
Click the tab "Trial Status" to see the status of the all trials. Specifically:
* Running trial: running trial's duration in the bar graph.
* Trial detail: trial's id, trial's duration, start time, end time, status and accuracy.
* Trial duration: trial's duration in the bar graph.
* Trial detail: trial's id, trial's duration, start time, end time, status, accuracy and search space file.
* Kill: you can kill a job that status is running.
* Tensor: you can see a job in the tensorflow graph, it will link to the Tensorboard page.
### Control
## Control
Click the tab "Control" to add a new trial or update the search_space file and some experiment parameters.
### View Tensorboard Graph
Click the tab "Tensorboard" to see a job in the tensorflow graph.
\ No newline at end of file
## Feedback
[Known Issues](https://github.com/Microsoft/nni/issues).
\ No newline at end of file
......@@ -3,7 +3,7 @@
"version": "0.1.0",
"private": true,
"dependencies": {
"antd": "^3.8.1",
"antd": "^3.9.3",
"axios": "^0.18.0",
"babel-polyfill": "^6.26.0",
"echarts": "^4.1.0",
......
......@@ -5,6 +5,7 @@ import { MANAGER_IP, overviewItem } from '../const';
const Option = Select.Option;
import JSONTree from 'react-json-tree';
require('../style/sessionpro.css');
require('../style/logPath.css');
interface TableObj {
key: number;
......@@ -14,12 +15,13 @@ interface TableObj {
end: string;
status: string;
acc?: number;
description: object;
description: Parameters;
}
interface Parameters {
parameters: object;
logPath?: string;
isLink?: boolean;
}
interface Experiment {
......@@ -76,7 +78,9 @@ class Sessionpro extends React.Component<{}, SessionState> {
end: '',
status: '',
acc: 0,
description: {}
description: {
parameters: {}
}
}],
selNum: overviewItem,
option: {},
......@@ -162,6 +166,10 @@ class Sessionpro extends React.Component<{}, SessionState> {
desJobDetail.parameters = JSON.parse(tableData[item].hyperParameters).parameters;
if (tableData[item].logPath !== undefined) {
desJobDetail.logPath = tableData[item].logPath;
const isSessionLink = /^http/gi.test(tableData[item].logPath);
if (isSessionLink) {
desJobDetail.isLink = true;
}
}
topTableData.push({
key: topTableData.length,
......@@ -261,14 +269,35 @@ class Sessionpro extends React.Component<{}, SessionState> {
}];
const openRow = (record: TableObj) => {
const openRowDataSource = {
parameters: record.description.parameters
};
let isLogLink: boolean = false;
const logPathRow = record.description.logPath;
if (record.description.isLink !== undefined) {
isLogLink = true;
}
return (
<pre id="description" className="jsontree">
<JSONTree
hideRoot={true}
shouldExpandNode={() => true} // default expandNode
getItemString={() => (<span />)} // remove the {} items
data={record.description}
data={openRowDataSource}
/>
{
isLogLink
?
<div className="logpath">
<span className="logName">logPath: </span>
<a className="logContent logHref" href={logPathRow} target="_blank">{logPathRow}</a>
</div>
:
<div className="logpath">
<span className="logName">logPath: </span>
<span className="logContent">{logPathRow}</span>
</div>
}
</pre>
);
};
......
......@@ -11,13 +11,13 @@ class SlideBar extends React.Component<{}, {}> {
<ul className="nav">
<li>
<IndexLink to={'/oview'} activeClassName="high">
<Icon className="icon" type="dot-chart" />Overview
<Icon className="icon" type="experiment" theme="outlined" />Overview
<Icon className="floicon" type="right" />
</IndexLink>
</li>
<li>
<Link to={'/all'} activeClassName="high">
<Icon className="icon" type="message" />Optimization Progress
<Icon className="icon" type="dot-chart" />Optimization Progress
<Icon className="floicon" type="right" />
</Link>
</li>
......@@ -39,6 +39,11 @@ class SlideBar extends React.Component<{}, {}> {
<Icon className="floicon" type="right" />
</Link>
</li>
<li>
<a href="https://github.com/Microsoft/nni/issues" target="_blank">
<Icon className="icon" type="smile" theme="outlined" />Feedback
</a>
</li>
</ul>
</div>
);
......
......@@ -12,6 +12,7 @@ require('echarts/lib/chart/scatter');
require('echarts/lib/component/tooltip');
require('echarts/lib/component/title');
require('../style/trialStatus.css');
require('../style/logPath.css');
echarts.registerTheme('my_theme', {
color: '#3c8dbc'
});
......@@ -19,6 +20,7 @@ echarts.registerTheme('my_theme', {
interface DescObj {
parameters: Object;
logPath?: string;
isLink?: boolean;
}
interface TableObj {
......@@ -238,6 +240,10 @@ class TrialStatus extends React.Component<{}, TabState> {
}
if (trialJobs[item].logPath !== undefined) {
desc.logPath = trialJobs[item].logPath;
const isHyperLink = /^http/gi.test(trialJobs[item].logPath);
if (isHyperLink) {
desc.isLink = true;
}
}
if (trialJobs[item].finalMetricData !== undefined) {
acc = parseFloat(trialJobs[item].finalMetricData.data);
......@@ -472,14 +478,35 @@ class TrialStatus extends React.Component<{}, TabState> {
];
const openRow = (record: TableObj) => {
const parametersRow = {
parameters: record.description.parameters
};
let isLogLink: boolean = false;
const logPathRow = record.description.logPath;
if (record.description.isLink !== undefined) {
isLogLink = true;
}
return (
<pre className="hyperpar">
<JSONTree
hideRoot={true}
shouldExpandNode={() => true} // default expandNode
getItemString={() => (<span />)} // remove the {} items
data={record.description}
data={parametersRow}
/>
{
isLogLink
?
<div className="logpath">
<span className="logName">logPath: </span>
<a className="logContent logHref" href={logPathRow} target="_blank">{logPathRow}</a>
</div>
:
<div className="logpath">
<span className="logName">logPath: </span>
<span className="logContent">{logPathRow}</span>
</div>
}
<Button
type="primary"
className="tableButton"
......
.logpath{
margin-bottom: 10px;
margin-left: 10px;
}
.logName{
color: #268BD2;
}
.logContent{
color: #333;
}
.logHref:hover{
color: blue;
text-decoration: underline;
}
......@@ -23,6 +23,7 @@
.nav .icon{
padding-left: 13px;
padding-right: 13px;
font-size: 18px;
}
.nav .floicon {
font-size: 16px;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment