Commit 8314d6ee authored by Deshui Yu's avatar Deshui Yu Committed by fishyds
Browse files

Merge from dogfood branch to master

parent 98530fd2
#!/bin/bash #!/bin/bash
INSTALL_PREFIX=${HOME}/.local
mkdir -p ${INSTALL_PREFIX}
wget -4 -nc https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz --header "Referer: nodejs.org" wget -4 -nc https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz --header "Referer: nodejs.org"
tar -xf 'node-v10.9.0-linux-x64.tar.xz' tar -xf 'node-v10.9.0-linux-x64.tar.xz'
sudo cp -rf node-v10.9.0-linux-x64/* /usr/local/node/ cp -rT node-v10.9.0-linux-x64 ${INSTALL_PREFIX}/node
rm -rf node-v10.9.0-linux-x64* rm -rf node-v10.9.0-linux-x64*
wget -4 -nc https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz wget -4 -nc https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
tar -xf 'yarn-v1.9.4.tar.gz' tar -xf 'yarn-v1.9.4.tar.gz'
sudo cp -rf yarn-v1.9.4/* /usr/local/yarn/ cp -rT yarn-v1.9.4 ${INSTALL_PREFIX}/yarn
rm -rf yarn-v1.9.4* rm -rf yarn-v1.9.4*
export PATH=/usr/local/node/bin:/usr/local/yarn/bin:$PATH NODE_BIN=${INSTALL_PREFIX}/node/bin
YARN_BIN=${INSTALL_PREFIX}/yarn/bin
export PATH=${INSTALL_PREFIX}/node/bin:${INSTALL_PREFIX}/yarn/bin:$PATH
echo $PATH|grep -q ${NODE_BIN} || echo "export PATH=${NODE_BIN}:\${PATH}" >> ${HOME}/.bashrc
echo $PATH|grep -q ${YARN_BIN} || echo "export PATH=${YARN_BIN}:\${PATH}" >> ${HOME}/.bashrc
source ${HOME}/.bashrc
make make
sudo make install make install
\ No newline at end of file
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import os
from setuptools import setup, find_packages
from setuptools.command.install import install
from subprocess import Popen
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
class CustomInstallCommand(install):
'''a customized install class in pip module'''
def makeInstall(self):
'''execute make pip-install command'''
cmds = ['make', 'pip-install']
process = Popen(cmds)
if process.wait() != 0:
print('Error: Make Install Failed')
exit(-1)
def writeEnvironmentVariables(self, variable_name):
'''write an environment variable into ~/.bashrc'''
paths = os.getenv("PATH").split(':')
bin_path = os.path.join(os.getenv('HOME'),'.local/'+variable_name+'/bin')
if bin_path not in paths:
bashrc_path = os.path.join(os.getenv('HOME'), '.bashrc')
process = Popen('echo export PATH=' + bin_path + ':\$PATH >> ' + bashrc_path, shell=True)
if process.wait() != 0:
print('Error: Write Environment Variables Failed')
exit(-1)
def run(self):
install.run(self)
self.makeInstall()
self.writeEnvironmentVariables('node')
self.writeEnvironmentVariables('yarn')
setup(
name = 'NNI',
version = '0.0.1',
author = 'Microsoft NNI Team',
author_email = 'nni@microsoft.com',
description = 'Neural Network Intelligence project',
long_description = read('docs/NNICTLDOC.md'),
license = 'MIT',
url = 'https://msrasrg.visualstudio.com/NeuralNetworkIntelligence',
packages = find_packages('src/sdk/pynni', exclude=['tests']) + find_packages('tools'),
package_dir = {
'annotation': 'tools/annotation',
'nni': 'src/sdk/pynni/nni',
'nnicmd': 'tools/nnicmd'
},
python_requires = '>=3.5',
install_requires = [
'astor',
'json_tricks',
'numpy',
'psutil',
'pymc3',
'pyyaml',
'requests',
'scipy'
],
dependency_links = [
'git+https://github.com/hyperopt/hyperopt.git',
],
cmdclass={
'install': CustomInstallCommand
},
entry_points={
'console_scripts': ['nnictl = nnicmd.nnictl:parse_args']
}
)
...@@ -32,16 +32,22 @@ interface ExperimentParams { ...@@ -32,16 +32,22 @@ interface ExperimentParams {
maxTrialNum: number; maxTrialNum: number;
searchSpace: string; searchSpace: string;
tuner: { tuner: {
tunerCommand: string; className: string;
tunerCwd: string; builtinTunerName?: string;
tunerCheckpointDirectory: string; codeDir?: string;
tunerGpuNum?: number; classArgs?: any;
classFileName?: string;
checkpointDir: string;
gpuNum?: number;
}; };
assessor?: { assessor?: {
assessorCommand: string; className: string;
assessorCwd: string; builtinAssessorName?: string;
assessorCheckpointDirectory: string; codeDir?: string;
assessorGpuNum?: number; classArgs?: any;
classFileName?: string;
checkpointDir: string;
gpuNum?: number;
}; };
clusterMetaData?: { clusterMetaData?: {
key: string; key: string;
......
...@@ -105,6 +105,8 @@ abstract class TrainingService { ...@@ -105,6 +105,8 @@ abstract class TrainingService {
public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void; public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void; public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>; public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>;
public abstract updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail>;
public abstract get isMultiPhaseJobSupported(): boolean;
public abstract cancelTrialJob(trialJobId: string): Promise<void>; public abstract cancelTrialJob(trialJobId: string): Promise<void>;
public abstract setClusterMetadata(key: string, value: string): Promise<void>; public abstract setClusterMetadata(key: string, value: string): Promise<void>;
public abstract getClusterMetadata(key: string): Promise<string>; public abstract getClusterMetadata(key: string): Promise<string>;
......
...@@ -28,7 +28,7 @@ import { Container } from 'typescript-ioc'; ...@@ -28,7 +28,7 @@ import { Container } from 'typescript-ioc';
import * as util from 'util'; import * as util from 'util';
import { Database, DataStore } from './datastore'; import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, setExperimentStartupInfo, getExperimentId } from './experimentStartupInfo'; import { ExperimentStartupInfo, getExperimentId, setExperimentStartupInfo } from './experimentStartupInfo';
import { Manager } from './manager'; import { Manager } from './manager';
import { TrainingService } from './trainingService'; import { TrainingService } from './trainingService';
...@@ -127,6 +127,63 @@ function parseArg(names: string[]): string { ...@@ -127,6 +127,63 @@ function parseArg(names: string[]): string {
return ''; return '';
} }
/**
* Generate command line to start advisor process which runs tuner and assessor
* @param tuner : For builtin tuner:
* {
* className: 'EvolutionTuner'
* classArgs: {
* optimize_mode: 'maximize',
* population_size: 3
* }
* }
* customized:
* {
* codeDir: '/tmp/mytuner'
* classFile: 'best_tuner.py'
* className: 'BestTuner'
* classArgs: {
* optimize_mode: 'maximize',
* population_size: 3
* }
* }
*
* @param assessor: similiar as tuner
*
*/
function getMsgDispatcherCommand(tuner: any, assessor: any): string {
let command: string = `python3 -m nni --tuner_class_name ${tuner.className}`;
if (tuner.classArgs !== undefined) {
command += ` --tuner_args ${JSON.stringify(JSON.stringify(tuner.classArgs))}`;
}
if (tuner.codeDir !== undefined && tuner.codeDir.length > 1) {
command += ` --tuner_directory ${tuner.codeDir}`;
}
if (tuner.classFileName !== undefined && tuner.classFileName.length > 1) {
command += ` --tuner_class_filename ${tuner.classFileName}`;
}
if (assessor !== undefined && assessor.className !== undefined) {
command += ` --assessor_class_name ${assessor.className}`;
if (assessor.classArgs !== undefined) {
command += ` --assessor_args ${JSON.stringify(JSON.stringify(assessor.classArgs))}`;
}
if (assessor.codeDir !== undefined && assessor.codeDir.length > 1) {
command += ` --assessor_directory ${assessor.codeDir}`;
}
if (assessor.classFileName !== undefined && assessor.classFileName.length > 1) {
command += ` --assessor_class_filename ${assessor.classFileName}`;
}
}
return command;
}
/** /**
* Initialize a pseudo experiment environment for unit test. * Initialize a pseudo experiment environment for unit test.
* Must be paired with `cleanupUnitTest()`. * Must be paired with `cleanupUnitTest()`.
...@@ -161,5 +218,5 @@ function cleanupUnitTest(): void { ...@@ -161,5 +218,5 @@ function cleanupUnitTest(): void {
Container.restore(ExperimentStartupInfo); Container.restore(ExperimentStartupInfo);
} }
export { getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest, export { getMsgDispatcherCommand, getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest,
parseArg, cleanupUnitTest, uniqueString }; parseArg, cleanupUnitTest, uniqueString };
...@@ -135,16 +135,8 @@ class IpcInterface { ...@@ -135,16 +135,8 @@ class IpcInterface {
* Create IPC proxy for tuner process * Create IPC proxy for tuner process
* @param process_ the tuner process * @param process_ the tuner process
*/ */
function createTunerInterface(process: ChildProcess): IpcInterface { function createDispatcherInterface(process: ChildProcess): IpcInterface {
return new IpcInterface(process, CommandType.TUNER_COMMANDS); return new IpcInterface(process, new Set([...CommandType.TUNER_COMMANDS, ...CommandType.ASSESSOR_COMMANDS]));
} }
/** export { IpcInterface, createDispatcherInterface };
* Create IPC proxy for assessor process
* @param process_ the assessor process
*/
function createAssessorInterface(process: ChildProcess): IpcInterface {
return new IpcInterface(process, CommandType.ASSESSOR_COMMANDS);
}
export { IpcInterface, createTunerInterface, createAssessorInterface };
...@@ -185,6 +185,9 @@ class NNIDataStore implements DataStore { ...@@ -185,6 +185,9 @@ class NNIDataStore implements DataStore {
// assume data is stored by time ASC order // assume data is stored by time ASC order
for (const record of trialJobEvents) { for (const record of trialJobEvents) {
let jobInfo: TrialJobInfo | undefined; let jobInfo: TrialJobInfo | undefined;
if (record.trialJobId === undefined || record.trialJobId.length < 1) {
continue;
}
if (map.has(record.trialJobId)) { if (map.has(record.trialJobId)) {
jobInfo = map.get(record.trialJobId); jobInfo = map.get(record.trialJobId);
} else { } else {
......
...@@ -34,12 +34,12 @@ import { ...@@ -34,12 +34,12 @@ import {
import { import {
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../common/trainingService'; } from '../common/trainingService';
import { delay , getLogDir} from '../common/utils'; import { delay , getLogDir, getMsgDispatcherCommand} from '../common/utils';
import { import {
ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA, ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA,
REQUEST_TRIAL_JOBS, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE REQUEST_TRIAL_JOBS, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
} from './commands'; } from './commands';
import { createAssessorInterface, createTunerInterface, IpcInterface } from './ipcInterface'; import { createDispatcherInterface, IpcInterface } from './ipcInterface';
import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs'; import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
/** /**
...@@ -47,8 +47,7 @@ import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs'; ...@@ -47,8 +47,7 @@ import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
*/ */
class NNIManager implements Manager { class NNIManager implements Manager {
private trainingService: TrainingService; private trainingService: TrainingService;
private tuner: IpcInterface | undefined; private dispatcher: IpcInterface | undefined;
private assessor: IpcInterface | undefined;
private trialJobsMaintainer: TrialJobs | undefined; private trialJobsMaintainer: TrialJobs | undefined;
private currSubmittedTrialNum: number; // need to be recovered private currSubmittedTrialNum: number; // need to be recovered
private trialConcurrencyReduction: number; private trialConcurrencyReduction: number;
...@@ -56,9 +55,7 @@ class NNIManager implements Manager { ...@@ -56,9 +55,7 @@ class NNIManager implements Manager {
private log: Logger; private log: Logger;
private dataStore: DataStore; private dataStore: DataStore;
private experimentProfile: ExperimentProfile; private experimentProfile: ExperimentProfile;
// TO DO: could use struct here private dispatcherPid: number;
private tunerPid: number;
private assessorPid: number;
constructor() { constructor() {
this.currSubmittedTrialNum = 0; this.currSubmittedTrialNum = 0;
...@@ -67,8 +64,7 @@ class NNIManager implements Manager { ...@@ -67,8 +64,7 @@ class NNIManager implements Manager {
const experimentId: string = getExperimentId(); const experimentId: string = getExperimentId();
this.trainingService = component.get(TrainingService); this.trainingService = component.get(TrainingService);
assert(this.trainingService); assert(this.trainingService);
this.tunerPid = 0; this.dispatcherPid = 0;
this.assessorPid = 0;
this.log = getLogger(); this.log = getLogger();
this.dataStore = component.get(DataStore); this.dataStore = component.get(DataStore);
...@@ -84,9 +80,9 @@ class NNIManager implements Manager { ...@@ -84,9 +80,9 @@ class NNIManager implements Manager {
maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs
searchSpace: '', searchSpace: '',
tuner: { tuner: {
tunerCommand: '', className: '',
tunerCwd: '', classArgs: {},
tunerCheckpointDirectory: '' checkpointDir: ''
} }
} }
}; };
...@@ -134,21 +130,15 @@ class NNIManager implements Manager { ...@@ -134,21 +130,15 @@ class NNIManager implements Manager {
this.experimentProfile.params = expParams; this.experimentProfile.params = expParams;
await this.storeExperimentProfile(); await this.storeExperimentProfile();
this.log.debug('Setup tuner...'); this.log.debug('Setup tuner...');
const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor);
console.log(`dispatcher command: ${dispatcherCommand}`);
this.setupTuner( this.setupTuner(
expParams.tuner.tunerCommand, //expParams.tuner.tunerCommand,
expParams.tuner.tunerCwd, dispatcherCommand,
undefined,
'start', 'start',
expParams.tuner.tunerCheckpointDirectory); expParams.tuner.checkpointDir);
if (expParams.assessor !== undefined) {
this.log.debug('Setup assessor...');
this.setupAssessor(
expParams.assessor.assessorCommand,
expParams.assessor.assessorCwd,
'start',
expParams.assessor.assessorCheckpointDirectory
);
}
this.experimentProfile.startTime = new Date(); this.experimentProfile.startTime = new Date();
await this.storeExperimentProfile(); await this.storeExperimentProfile();
...@@ -164,20 +154,13 @@ class NNIManager implements Manager { ...@@ -164,20 +154,13 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId); this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
const expParams: ExperimentParams = this.experimentProfile.params; const expParams: ExperimentParams = this.experimentProfile.params;
const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor);
console.log(`dispatcher command: ${dispatcherCommand}`);
this.setupTuner( this.setupTuner(
expParams.tuner.tunerCommand, dispatcherCommand,
expParams.tuner.tunerCwd, undefined,
'resume', 'resume',
expParams.tuner.tunerCheckpointDirectory); expParams.tuner.checkpointDir);
if (expParams.assessor !== undefined) {
this.setupAssessor(
expParams.assessor.assessorCommand,
expParams.assessor.assessorCwd,
'resume',
expParams.assessor.assessorCheckpointDirectory
);
}
const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs(); const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs();
...@@ -204,7 +187,7 @@ class NNIManager implements Manager { ...@@ -204,7 +187,7 @@ class NNIManager implements Manager {
// TO DO: move timeout value to constants file // TO DO: move timeout value to constants file
const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => { const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => {
timeoutId = setTimeout( timeoutId = setTimeout(
() => { reject(new Error('TrainingService setClusterMetadata timeout.')); }, () => { reject(new Error('TrainingService setClusterMetadata timeout. Please check your config file.')); },
10000); 10000);
}); });
await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => { await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => {
...@@ -248,8 +231,8 @@ class NNIManager implements Manager { ...@@ -248,8 +231,8 @@ class NNIManager implements Manager {
return this.dataStore.listTrialJobs(status); return this.dataStore.listTrialJobs(status);
} }
private setupTuner(command: string, cwd: string, mode: 'start' | 'resume', dataDirectory: string): void { private setupTuner(command: string, cwd: string | undefined, mode: 'start' | 'resume', dataDirectory: string): void {
if (this.tuner !== undefined) { if (this.dispatcher !== undefined) {
return; return;
} }
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe']; const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
...@@ -270,36 +253,8 @@ class NNIManager implements Manager { ...@@ -270,36 +253,8 @@ class NNIManager implements Manager {
}, },
shell: true shell: true
}); });
this.tunerPid = tunerProc.pid; this.dispatcherPid = tunerProc.pid;
this.tuner = createTunerInterface(tunerProc); this.dispatcher = createDispatcherInterface(tunerProc);
return;
}
private setupAssessor(command: string, cwd: string, mode: 'start' | 'resume', dataDirectory: string): void {
if (this.assessor !== undefined) {
return;
}
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
let newCwd: string;
if (cwd === undefined || cwd === '') {
newCwd = getLogDir();
} else {
newCwd = cwd;
}
// TO DO: add CUDA_VISIBLE_DEVICES
const assessorProc: ChildProcess = spawn(command, [], {
stdio,
cwd: newCwd,
env: {
NNI_MODE: mode,
NNI_CHECKPOINT_DIRECTORY: dataDirectory,
NNI_LOG_DIRECTORY: getLogDir()
},
shell: true
});
this.assessorPid = assessorProc.pid;
this.assessor = createAssessorInterface(assessorProc);
return; return;
} }
...@@ -307,10 +262,10 @@ class NNIManager implements Manager { ...@@ -307,10 +262,10 @@ class NNIManager implements Manager {
private updateTrialConcurrency(trialConcurrency: number): void { private updateTrialConcurrency(trialConcurrency: number): void {
// TO DO: this method can only be called after startExperiment/resumeExperiment // TO DO: this method can only be called after startExperiment/resumeExperiment
if (trialConcurrency > this.experimentProfile.params.trialConcurrency) { if (trialConcurrency > this.experimentProfile.params.trialConcurrency) {
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has to be initialized'); throw new Error('Error: tuner has to be initialized');
} }
this.tuner.sendCommand( this.dispatcher.sendCommand(
REQUEST_TRIAL_JOBS, REQUEST_TRIAL_JOBS,
String(trialConcurrency - this.experimentProfile.params.trialConcurrency) String(trialConcurrency - this.experimentProfile.params.trialConcurrency)
); );
...@@ -333,45 +288,31 @@ class NNIManager implements Manager { ...@@ -333,45 +288,31 @@ class NNIManager implements Manager {
} }
private updateSearchSpace(searchSpace: string): void { private updateSearchSpace(searchSpace: string): void {
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.tuner.sendCommand(UPDATE_SEARCH_SPACE, searchSpace); this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, searchSpace);
this.experimentProfile.params.searchSpace = searchSpace; this.experimentProfile.params.searchSpace = searchSpace;
return; return;
} }
private async experimentDoneCleanUp(): Promise<void> { private async experimentDoneCleanUp(): Promise<void> {
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.tuner.sendCommand(TERMINATE); this.dispatcher.sendCommand(TERMINATE);
if (this.assessor !== undefined) {
this.assessor.sendCommand(TERMINATE);
}
let tunerAlive: boolean = true; let tunerAlive: boolean = true;
let assessorAlive: boolean = true;
// gracefully terminate tuner and assessor here, wait at most 30 seconds. // gracefully terminate tuner and assessor here, wait at most 30 seconds.
for (let i: number = 0; i < 30; i++) { for (let i: number = 0; i < 30; i++) {
if (!tunerAlive && !assessorAlive) { break; } if (!tunerAlive) { break; }
try { try {
await cpp.exec(`kill -0 ${this.tunerPid}`); await cpp.exec(`kill -0 ${this.dispatcherPid}`);
} catch (error) { tunerAlive = false; } } catch (error) { tunerAlive = false; }
if (this.assessor !== undefined) {
try {
await cpp.exec(`kill -0 ${this.assessorPid}`);
} catch (error) { assessorAlive = false; }
} else {
assessorAlive = false;
}
await delay(1000); await delay(1000);
} }
try { try {
await cpp.exec(`kill ${this.tunerPid}`); await cpp.exec(`kill ${this.dispatcherPid}`);
if (this.assessorPid !== undefined) {
await cpp.exec(`kill ${this.assessorPid}`);
}
} catch (error) { } catch (error) {
// this.tunerPid does not exist, do nothing here // this.tunerPid does not exist, do nothing here
} }
...@@ -408,25 +349,18 @@ class NNIManager implements Manager { ...@@ -408,25 +349,18 @@ class NNIManager implements Manager {
return this.dataStore.storeExperimentProfile(this.experimentProfile); return this.dataStore.storeExperimentProfile(this.experimentProfile);
} }
// tslint:disable-next-line:max-func-body-length
private runInternal(): Promise<void> { private runInternal(): Promise<void> {
// TO DO: cannot run this method more than once in one NNIManager instance // TO DO: cannot run this method more than once in one NNIManager instance
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.trainingService.addTrialJobMetricListener(async (metric: TrialJobMetric) => { this.trainingService.addTrialJobMetricListener(async (metric: TrialJobMetric) => {
await this.dataStore.storeMetricData(metric.id, metric.data); await this.dataStore.storeMetricData(metric.id, metric.data);
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.tuner.sendCommand(REPORT_METRIC_DATA, metric.data); this.dispatcher.sendCommand(REPORT_METRIC_DATA, metric.data);
if (this.assessor !== undefined) {
try {
this.assessor.sendCommand(REPORT_METRIC_DATA, metric.data);
} catch (error) {
this.log.critical(`ASSESSOR ERROR: ${error.message}`);
this.log.critical(`ASSESSOR ERROR: ${error.stack}`);
}
}
}); });
this.trialJobsMaintainer = new TrialJobs( this.trialJobsMaintainer = new TrialJobs(
...@@ -439,7 +373,7 @@ class NNIManager implements Manager { ...@@ -439,7 +373,7 @@ class NNIManager implements Manager {
} else { } else {
this.log.debug(`Job event: ${event}`); this.log.debug(`Job event: ${event}`);
} }
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
switch (event) { switch (event) {
...@@ -453,15 +387,13 @@ class NNIManager implements Manager { ...@@ -453,15 +387,13 @@ class NNIManager implements Manager {
if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) { if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) {
if (this.customizedTrials.length > 0) { if (this.customizedTrials.length > 0) {
const hyperParams: string | undefined = this.customizedTrials.shift(); const hyperParams: string | undefined = this.customizedTrials.shift();
this.tuner.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams); this.dispatcher.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams);
} else { } else {
this.tuner.sendCommand(REQUEST_TRIAL_JOBS, '1'); this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, '1');
}
} }
} }
if (this.assessor !== undefined) {
this.assessor.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
} }
this.dispatcher.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url); await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url);
break; break;
case 'RUNNING': case 'RUNNING':
...@@ -478,15 +410,14 @@ class NNIManager implements Manager { ...@@ -478,15 +410,14 @@ class NNIManager implements Manager {
}); });
// TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner // TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
// TO DO: we should send INITIALIZE command to assessor if user's tuner needs to run init method in tuner this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`);
this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`) this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
this.tuner.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
if (this.trialConcurrencyReduction !== 0) { if (this.trialConcurrencyReduction !== 0) {
return Promise.reject(new Error('Error: cannot modify trialConcurrency before startExperiment')); return Promise.reject(new Error('Error: cannot modify trialConcurrency before startExperiment'));
} }
this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`) this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`)
this.tuner.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency)); this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency));
this.tuner.onCommand(async (commandType: string, content: string) => { this.dispatcher.onCommand(async (commandType: string, content: string) => {
this.log.info(`Command from tuner: ${commandType}, ${content}`); this.log.info(`Command from tuner: ${commandType}, ${content}`);
if (this.trialJobsMaintainer === undefined) { if (this.trialJobsMaintainer === undefined) {
throw new Error('Error: trialJobsMaintainer not initialized'); throw new Error('Error: trialJobsMaintainer not initialized');
...@@ -501,8 +432,7 @@ class NNIManager implements Manager { ...@@ -501,8 +432,7 @@ class NNIManager implements Manager {
}; };
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm); const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail)); this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail));
// TO DO: to uncomment assert(trialJobDetail.status === 'WAITING');
//assert(trialJobDetail.status === 'WAITING');
await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url); await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url);
if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) { if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) {
this.trialJobsMaintainer.setNoMoreTrials(); this.trialJobsMaintainer.setNoMoreTrials();
...@@ -512,19 +442,13 @@ class NNIManager implements Manager { ...@@ -512,19 +442,13 @@ class NNIManager implements Manager {
case NO_MORE_TRIAL_JOBS: case NO_MORE_TRIAL_JOBS:
this.trialJobsMaintainer.setNoMoreTrials(); this.trialJobsMaintainer.setNoMoreTrials();
break; break;
default: case KILL_TRIAL_JOB:
throw new Error('Error: unsupported command type from tuner');
}
});
if (this.assessor !== undefined) {
this.assessor.onCommand(async (commandType: string, content: string) => {
if (commandType === KILL_TRIAL_JOB) {
await this.trainingService.cancelTrialJob(JSON.parse(content)); await this.trainingService.cancelTrialJob(JSON.parse(content));
} else { break;
throw new Error('Error: unsupported command type from assessor'); default:
throw new Error(`Error: unsupported command type: [${commandType}]`);
} }
}); });
}
return this.trialJobsMaintainer.run(); return this.trialJobsMaintainer.run();
} }
......
...@@ -69,10 +69,9 @@ describe('Unit test for dataStore', () => { ...@@ -69,10 +69,9 @@ describe('Unit test for dataStore', () => {
} }
}`, }`,
tuner: { tuner: {
tunerCommand: 'python3 tunner.py', className: 'testTuner',
tunerCwd: '/tmp', checkpointDir: '/tmp/cp',
tunerCheckpointDirectory: '/tmp/cp', gpuNum: 0
tunerGpuNum: 0
} }
}, },
id: 'exp123', id: 'exp123',
......
...@@ -21,5 +21,3 @@ from nni.assessor import Assessor, AssessResult ...@@ -21,5 +21,3 @@ from nni.assessor import Assessor, AssessResult
class DummyAssessor(Assessor): class DummyAssessor(Assessor):
def assess_trial(self, trial_job_id, trial_history): def assess_trial(self, trial_job_id, trial_history):
return AssessResult.Good return AssessResult.Good
DummyAssessor().run()
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from nni.tuner import Tuner
class DummyTuner(Tuner):
def generate_parameters(self, parameter_id):
return 'unit-test-parm'
def generate_multiple_parameters(self, parameter_id_list):
return ['unit-test-param1', 'unit-test-param2']
def receive_trial_result(self, parameter_id, parameters, reward):
pass
def receive_customized_trial_result(self, parameter_id, parameters, reward):
pass
def update_search_space(self, search_space):
pass
...@@ -24,7 +24,7 @@ import { ChildProcess, spawn } from 'child_process'; ...@@ -24,7 +24,7 @@ import { ChildProcess, spawn } from 'child_process';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils'; import { cleanupUnitTest, prepareUnitTest } from '../../common/utils';
import * as CommandType from '../commands'; import * as CommandType from '../commands';
import { createAssessorInterface, createTunerInterface, IpcInterface } from '../ipcInterface'; import { createDispatcherInterface, IpcInterface } from '../ipcInterface';
let sentCommands: {[key: string]: string}[] = []; let sentCommands: {[key: string]: string}[] = [];
const receivedCommands: {[key: string]: string}[] = []; const receivedCommands: {[key: string]: string}[] = [];
...@@ -52,27 +52,27 @@ function runProcess(): Promise<Error | null> { ...@@ -52,27 +52,27 @@ function runProcess(): Promise<Error | null> {
}); });
// create IPC interface // create IPC interface
const assessor: IpcInterface = createAssessorInterface(proc); const dispatcher: IpcInterface = createDispatcherInterface(proc);
assessor.onCommand((commandType: string, content: string): void => { dispatcher.onCommand((commandType: string, content: string): void => {
receivedCommands.push({ commandType, content }); receivedCommands.push({ commandType, content });
}); });
// Command #1: ok // Command #1: ok
assessor.sendCommand('IN'); dispatcher.sendCommand('IN');
// Command #2: ok // Command #2: ok
assessor.sendCommand('ME', '123'); dispatcher.sendCommand('ME', '123');
// Command #3: too long // Command #3: too long
try { try {
assessor.sendCommand('ME', 'x'.repeat(1_000_000)); dispatcher.sendCommand('ME', 'x'.repeat(1_000_000));
} catch (error) { } catch (error) {
commandTooLong = error; commandTooLong = error;
} }
// Command #4: not assessor command // Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
try { try {
assessor.sendCommand('GE', '1'); dispatcher.sendCommand('FE', '1');
} catch (error) { } catch (error) {
rejectCommandType = error; rejectCommandType = error;
} }
......
...@@ -22,18 +22,34 @@ ...@@ -22,18 +22,34 @@
import * as assert from 'assert'; import * as assert from 'assert';
import { ChildProcess, spawn } from 'child_process'; import { ChildProcess, spawn } from 'child_process';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils'; import { cleanupUnitTest, prepareUnitTest, getMsgDispatcherCommand } from '../../common/utils';
import * as CommandType from '../commands'; import * as CommandType from '../commands';
import { createAssessorInterface, IpcInterface } from '../ipcInterface'; import { createDispatcherInterface, IpcInterface } from '../ipcInterface';
let assessor: IpcInterface | undefined; let dispatcher: IpcInterface | undefined;
let procExit: boolean = false; let procExit: boolean = false;
let procError: boolean = false; let procError: boolean = false;
function startProcess(): void { function startProcess(): void {
// create fake assessor process // create fake assessor process
const stdio: {}[] = ['ignore', 'pipe', process.stderr, 'pipe', 'pipe']; const stdio: {}[] = ['ignore', 'pipe', process.stderr, 'pipe', 'pipe'];
const proc: ChildProcess = spawn('python3 dummy_assessor.py', [], { stdio, cwd: 'core/test', shell: true });
const dispatcherCmd : string = getMsgDispatcherCommand(
// Mock tuner config
{
className: 'DummyTuner',
codeDir: './',
classFileName: 'dummy_tuner.py'
},
// Mock assessor config
{
className: 'DummyAssessor',
codeDir: './',
classFileName: 'dummy_assessor.py'
}
);
const proc: ChildProcess = spawn(dispatcherCmd, [], { stdio, cwd: 'core/test', shell: true });
proc.on('error', (error: Error): void => { proc.on('error', (error: Error): void => {
procExit = true; procExit = true;
...@@ -45,8 +61,8 @@ function startProcess(): void { ...@@ -45,8 +61,8 @@ function startProcess(): void {
}); });
// create IPC interface // create IPC interface
assessor = createAssessorInterface(proc); dispatcher = createDispatcherInterface(proc);
(<IpcInterface>assessor).onCommand((commandType: string, content: string): void => { (<IpcInterface>dispatcher).onCommand((commandType: string, content: string): void => {
console.log(commandType, content); // tslint:disable-line:no-console console.log(commandType, content); // tslint:disable-line:no-console
}); });
} }
...@@ -62,9 +78,9 @@ describe('core/ipcInterface.terminate', (): void => { ...@@ -62,9 +78,9 @@ describe('core/ipcInterface.terminate', (): void => {
}); });
it('normal', () => { it('normal', () => {
(<IpcInterface>assessor).sendCommand( (<IpcInterface>dispatcher).sendCommand(
CommandType.REPORT_METRIC_DATA, CommandType.REPORT_METRIC_DATA,
'{"trial_job_id":"A","type":"periodical","value":1}'); '{"trial_job_id":"A","type":"PERIODICAL","value":1,"sequence":123}');
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
setTimeout( setTimeout(
...@@ -79,7 +95,7 @@ describe('core/ipcInterface.terminate', (): void => { ...@@ -79,7 +95,7 @@ describe('core/ipcInterface.terminate', (): void => {
}); });
it('terminate', () => { it('terminate', () => {
(<IpcInterface>assessor).sendCommand(CommandType.TERMINATE); (<IpcInterface>dispatcher).sendCommand(CommandType.TERMINATE);
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
setTimeout( setTimeout(
...@@ -88,7 +104,7 @@ describe('core/ipcInterface.terminate', (): void => { ...@@ -88,7 +104,7 @@ describe('core/ipcInterface.terminate', (): void => {
assert.ok(!procError); assert.ok(!procError);
deferred.resolve(); deferred.resolve();
}, },
1000); 2000);
return deferred.promise; return deferred.promise;
}); });
......
...@@ -30,7 +30,6 @@ const testTrainingServiceProvider: Provider = { ...@@ -30,7 +30,6 @@ const testTrainingServiceProvider: Provider = {
}; };
class MockedTrainingService extends TrainingService { class MockedTrainingService extends TrainingService {
public mockedMetaDataValue: string = "default"; public mockedMetaDataValue: string = "default";
public jobDetail1: TrialJobDetail = { public jobDetail1: TrialJobDetail = {
id: '1234', id: '1234',
...@@ -93,6 +92,14 @@ class MockedTrainingService extends TrainingService { ...@@ -93,6 +92,14 @@ class MockedTrainingService extends TrainingService {
return deferred.promise; return deferred.promise;
} }
public updateTrialJob(trialJobId: string, form: TrialJobApplicationForm): Promise<TrialJobDetail> {
throw new MethodNotImplementedError();
}
public get isMultiPhaseJobSupported(): boolean {
return false;
}
public cancelTrialJob(trialJobId: string): Promise<void> { public cancelTrialJob(trialJobId: string): Promise<void> {
const deferred = new Deferred<void>(); const deferred = new Deferred<void>();
if(trialJobId === '1234' || trialJobId === '3456'){ if(trialJobId === '1234' || trialJobId === '3456'){
...@@ -125,7 +132,7 @@ class MockedTrainingService extends TrainingService { ...@@ -125,7 +132,7 @@ class MockedTrainingService extends TrainingService {
} }
public cleanUp(): Promise<void> { public cleanUp(): Promise<void> {
throw new MethodNotImplementedError(); return Promise.resolve();
} }
} }
......
...@@ -56,16 +56,17 @@ describe('Unit test for nnimanager', function () { ...@@ -56,16 +56,17 @@ describe('Unit test for nnimanager', function () {
maxTrialNum: 2, maxTrialNum: 2,
searchSpace: '{"x":1}', searchSpace: '{"x":1}',
tuner: { tuner: {
tunerCommand: 'python3 hyperopt.py', className: 'EvolutionTuner',
tunerCwd: 'core/test', classArgs: {
tunerCheckpointDirectory: '', optimize_mode: 'maximize'
tunerGpuNum: 1 },
checkpointDir: '',
gpuNum: 1
}, },
assessor: { assessor: {
assessorCommand: 'python3 dummy_assessor.py', className: 'MedianstopAssessor',
assessorCwd: 'core/test', checkpointDir: '',
assessorCheckpointDirectory: '', gpuNum: 1
assessorGpuNum: 1
} }
} }
......
...@@ -38,10 +38,9 @@ const expParams1: ExperimentParams = { ...@@ -38,10 +38,9 @@ const expParams1: ExperimentParams = {
maxTrialNum: 5, maxTrialNum: 5,
searchSpace: 'SS', searchSpace: 'SS',
tuner: { tuner: {
tunerCommand: './tuner.sh', className: 'testTuner',
tunerCwd: '.', checkpointDir: '/tmp',
tunerCheckpointDirectory: '/tmp', gpuNum: 0
tunerGpuNum: 0
} }
}; };
...@@ -53,14 +52,12 @@ const expParams2: ExperimentParams = { ...@@ -53,14 +52,12 @@ const expParams2: ExperimentParams = {
maxTrialNum: 5, maxTrialNum: 5,
searchSpace: '', searchSpace: '',
tuner: { tuner: {
tunerCommand: 'python tuner.py', className: 'testTuner',
tunerCwd: '/tmp', checkpointDir: '/tmp'
tunerCheckpointDirectory: '/tmp'
}, },
assessor: { assessor: {
assessorCommand: 'python assessor.py', className: 'testAssessor',
assessorCwd: '/tmp', checkpointDir: '/tmp'
assessorCheckpointDirectory: '/tmp'
} }
}; };
......
...@@ -37,7 +37,7 @@ export const testManagerProvider: Provider = { ...@@ -37,7 +37,7 @@ export const testManagerProvider: Provider = {
}; };
export class MockedNNIManager extends Manager { export class MockedNNIManager extends Manager {
public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType ): Promise<void> { public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType): Promise<void> {
return Promise.resolve(); return Promise.resolve();
} }
public getTrialJobStatistics(): Promise<TrialJobStatistics[]> { public getTrialJobStatistics(): Promise<TrialJobStatistics[]> {
...@@ -103,23 +103,15 @@ export class MockedNNIManager extends Manager { ...@@ -103,23 +103,15 @@ export class MockedNNIManager extends Manager {
return deferred.promise; return deferred.promise;
} }
public getTrialJob(trialJobId: string): Promise<TrialJobDetail> { public getTrialJob(trialJobId: string): Promise<TrialJobInfo> {
const deferred: Deferred<TrialJobDetail> = new Deferred<TrialJobDetail>(); const deferred: Deferred<TrialJobInfo> = new Deferred<TrialJobInfo>();
const jobDetail: TrialJobDetail = { const jobInfo: TrialJobInfo = {
id: '1234', id: '1234',
status: 'SUCCEEDED', status: 'SUCCEEDED',
submitTime: new Date(),
startTime: new Date(), startTime: new Date(),
endTime: new Date(), endTime: new Date()
tags: ['test'],
// tslint:disable-next-line:no-http-string
url: 'http://test',
workingDirectory: '/tmp/mocked',
form: {
jobType: 'TRIAL'
}
}; };
deferred.resolve(jobDetail); deferred.resolve(jobInfo);
return deferred.promise; return deferred.promise;
} }
...@@ -139,9 +131,8 @@ export class MockedNNIManager extends Manager { ...@@ -139,9 +131,8 @@ export class MockedNNIManager extends Manager {
maxTrialNum: 3, maxTrialNum: 3,
searchSpace: '{lr: 0.01}', searchSpace: '{lr: 0.01}',
tuner: { tuner: {
tunerCommand: 'python3 tuner.py', className: 'testTuner',
tunerCwd: '/tmp/tunner', checkpointDir: ''
tunerCheckpointDirectory: ''
} }
}, },
id: '2345', id: '2345',
......
...@@ -116,7 +116,7 @@ describe('Unit test for rest server', () => { ...@@ -116,7 +116,7 @@ describe('Unit test for rest server', () => {
} }
const req: request.Options = { const req: request.Options = {
uri: `${ROOT_URL}/experiment`, uri: `${ROOT_URL}/experiment?update_type=TRIAL_CONCURRENCY`,
method: 'PUT', method: 'PUT',
json: true, json: true,
body: profile body: profile
...@@ -141,7 +141,7 @@ describe('Unit test for rest server', () => { ...@@ -141,7 +141,7 @@ describe('Unit test for rest server', () => {
body: { body: {
exception_test_key: 'test' exception_test_key: 'test'
} }
} };
request(req, (err: Error, res: request.Response) => { request(req, (err: Error, res: request.Response) => {
if (err) { if (err) {
assert.fail(err.message); assert.fail(err.message);
...@@ -158,7 +158,7 @@ describe('Unit test for rest server', () => { ...@@ -158,7 +158,7 @@ describe('Unit test for rest server', () => {
method: 'PUT', method: 'PUT',
json: true, json: true,
body: { body: {
MACHINE_LIST: [{ machine_list: [{
ip: '10.10.10.101', ip: '10.10.10.101',
port: 22, port: 22,
username: 'test', username: 'test',
...@@ -170,37 +170,12 @@ describe('Unit test for rest server', () => { ...@@ -170,37 +170,12 @@ describe('Unit test for rest server', () => {
passwd: '1234' passwd: '1234'
}] }]
} }
}
request(req, (err: Error, res: request.Response) => {
if (err) {
assert.fail(err.message);
} else {
expect(res.statusCode).to.equal(200);
}
done();
});
});
it('Test POST experiment', (done: Mocha.Done) => {
const req: request.Options = {
uri: `${ROOT_URL}/experiment`,
method: 'POST',
json: true,
body: {
author: 'test',
trial: {
entrypoint: 'python',
args: 'mnist.py'
}
}
}; };
// tslint:disable-next-line:no-any request(req, (err: Error, res: request.Response) => {
request(req, (err: Error, res: request.Response, body: any) => {
if (err) { if (err) {
assert.fail(err.message); assert.fail(err.message);
} else { } else {
expect(res.statusCode).to.equal(200); expect(res.statusCode).to.equal(200);
expect(body.experiment_id).to.equal('id-1234');
} }
done(); done();
}); });
......
...@@ -25,7 +25,7 @@ import { EventEmitter } from 'events'; ...@@ -25,7 +25,7 @@ import { EventEmitter } from 'events';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
import * as ts from 'tail-stream'; import * as ts from 'tail-stream';
import { NNIError, NNIErrorNames } from '../../common/errors'; import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { TrialConfig } from '../common/trialConfig'; import { TrialConfig } from '../common/trialConfig';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
...@@ -205,6 +205,22 @@ class LocalTrainingService implements TrainingService { ...@@ -205,6 +205,22 @@ class LocalTrainingService implements TrainingService {
} }
} }
/**
* Update trial job for multi-phase
* @param trialJobId trial job id
* @param form job application form
*/
public updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail> {
throw new MethodNotImplementedError();
}
/**
* Is multiphase job supported in current training service
*/
public get isMultiPhaseJobSupported(): boolean {
return false;
}
public async cancelTrialJob(trialJobId: string): Promise<void> { public async cancelTrialJob(trialJobId: string): Promise<void> {
this.log.info(`cancelTrialJob: ${trialJobId}`); this.log.info(`cancelTrialJob: ${trialJobId}`);
const trialJob: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId); const trialJob: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId);
...@@ -309,7 +325,7 @@ class LocalTrainingService implements TrainingService { ...@@ -309,7 +325,7 @@ class LocalTrainingService implements TrainingService {
runScriptLines.push(`export ${variable.key}=${variable.value}`); runScriptLines.push(`export ${variable.key}=${variable.value}`);
} }
runScriptLines.push( runScriptLines.push(
`eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, '.nni', 'stderr')}`, `eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, 'stderr')}`,
`echo $? \`date +%s%3N\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`); `echo $? \`date +%s%3N\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`);
await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`); await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`);
......
...@@ -82,7 +82,12 @@ export class MetricsCollector { ...@@ -82,7 +82,12 @@ export class MetricsCollector {
private getTrialJobIdsGroupByRmMeta(status: TrialJobStatus[]): Map<RemoteMachineMeta, string[]> { private getTrialJobIdsGroupByRmMeta(status: TrialJobStatus[]): Map<RemoteMachineMeta, string[]> {
const map: Map<RemoteMachineMeta, string[]> = new Map<RemoteMachineMeta, string[]>(); const map: Map<RemoteMachineMeta, string[]> = new Map<RemoteMachineMeta, string[]>();
this.trialJobsMap.forEach((trialJob, id) => { this.trialJobsMap.forEach((trialJob, id) => {
if (status.includes(trialJob.status)) { let reservedTrialJobIds : string[] = [];
if(trialJob.rmMeta !== undefined
&& trialJob.rmMeta.gpuReservation !== undefined) {
reservedTrialJobIds = Array.from(trialJob.rmMeta.gpuReservation.values());
}
if (reservedTrialJobIds.includes(id) || status.includes(trialJob.status)) {
if (map.has(trialJob.rmMeta)) { if (map.has(trialJob.rmMeta)) {
const ids = map.get(trialJob.rmMeta); const ids = map.get(trialJob.rmMeta);
if (ids !== undefined && !ids.includes(id)) { if (ids !== undefined && !ids.includes(id)) {
...@@ -93,7 +98,7 @@ export class MetricsCollector { ...@@ -93,7 +98,7 @@ export class MetricsCollector {
// If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data // If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data
if(trialJob.rmMeta.gpuReservation !== undefined) { if(trialJob.rmMeta.gpuReservation !== undefined) {
const concatJobIds : string[] = initJobIds.concat(Array.from(trialJob.rmMeta.gpuReservation.values())); const concatJobIds : string[] = initJobIds.concat(reservedTrialJobIds);
initJobIds = concatJobIds.filter((item, pos) => concatJobIds.indexOf(item) === pos); initJobIds = concatJobIds.filter((item, pos) => concatJobIds.indexOf(item) === pos);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment