"src/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "829842385da4c02c416ec0ff7999510fdb43700d"
Unverified Commit e9f137f0 authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

merge from master (#2019)

parent f7cf3ea5
authorName: default
experimentName: example_mnist-keras
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: BatchTuner
trial:
command: python3 mnist-keras.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
...@@ -23,10 +23,13 @@ trial: ...@@ -23,10 +23,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist-keras
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist-keras.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
...@@ -23,10 +23,13 @@ trial: ...@@ -23,10 +23,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist_pytorch
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
...@@ -23,10 +23,13 @@ trial: ...@@ -23,10 +23,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: Unknown
experimentName: enas_macro
trialConcurrency: 20
maxExecDuration: 2400h
maxTrialNum: 20000
#choice: local, remote
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: true
multiPhase: false
versionCheck: false
nniManagerIp: 0.0.0.0
tuner:
builtinTunerName: PPOTuner
classArgs:
optimize_mode: maximize
trials_per_update: 60
epochs_per_update: 20
minibatch_size: 6
trial:
command: sh ./macro_cifar10_pai.sh
codeDir: ./
gpuNum: 1
cpuNum: 1
memoryMB: 8196
image: msranni/nni:latest
virtualCluster: nni
paiYarnConfig:
userName: your_account
passWord: your_passwd
host: 0.0.0.0
...@@ -25,7 +25,10 @@ trial: ...@@ -25,7 +25,10 @@ trial:
memoryMB: 8196 memoryMB: 8196
image: msranni/nni:latest image: msranni/nni:latest
virtualCluster: nni virtualCluster: nni
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
userName: your_account userName: your_account
passWord: your_pwd token: your_token
host: 0.0.0.0 host: 0.0.0.0
...@@ -30,10 +30,13 @@ trial: ...@@ -30,10 +30,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_FashionMNIST-network-morphism
trialConcurrency: 1
maxExecDuration: 24h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: NetworkMorphism
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
# for now, this tuner only supports cv domain
task: cv
#input image width
input_width: 28
#input image channel
input_channel: 1
#number of classes
n_output_node: 10
trial:
command: python3 FashionMNIST_keras.py
codeDir: .
gpuNum: 1
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
...@@ -30,10 +30,13 @@ trial: ...@@ -30,10 +30,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_cifar10-network-morphism
trialConcurrency: 1
maxExecDuration: 24h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: NetworkMorphism
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
# for now, this tuner only supports cv domain
task: cv
#input image width
input_width: 32
#input image channel
input_channel: 3
#number of classes
n_output_node: 10
trial:
command: python3 cifar10_keras.py
codeDir: .
gpuNum: 1
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
...@@ -23,10 +23,13 @@ trial: ...@@ -23,10 +23,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_sklearn
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 100
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner,MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
...@@ -23,10 +23,13 @@ trial: ...@@ -23,10 +23,13 @@ trial:
memoryMB: 8196 memoryMB: 8196
#The docker image to run nni job on pai #The docker image to run nni job on pai
image: msranni/nni:latest image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig: paiConfig:
#The username to login pai #The username to login pai
userName: username userName: username
#The password to login pai #The token to login pai
passWord: password token: token
#The host of restful server of pai #The host of restful server of pai
host: 10.10.10.10 host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_sklearn
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 100
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
...@@ -4,13 +4,11 @@ ...@@ -4,13 +4,11 @@
'use strict'; 'use strict';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path';
import { Writable } from 'stream'; import { Writable } from 'stream';
import { WritableStreamBuffer } from 'stream-buffers'; import { WritableStreamBuffer } from 'stream-buffers';
import { format } from 'util'; import { format } from 'util';
import * as component from '../common/component'; import * as component from '../common/component';
import { getExperimentStartupInfo, isReadonly } from './experimentStartupInfo'; import { getExperimentStartupInfo, isReadonly } from './experimentStartupInfo';
import { getLogDir } from './utils';
const FATAL: number = 1; const FATAL: number = 1;
const ERROR: number = 2; const ERROR: number = 2;
...@@ -55,23 +53,21 @@ class BufferSerialEmitter { ...@@ -55,23 +53,21 @@ class BufferSerialEmitter {
@component.Singleton @component.Singleton
class Logger { class Logger {
private DEFAULT_LOGFILE: string = path.join(getLogDir(), 'nnimanager.log');
private level: number = INFO; private level: number = INFO;
private bufferSerialEmitter: BufferSerialEmitter; private bufferSerialEmitter?: BufferSerialEmitter;
private writable: Writable; private writable?: Writable;
private readonly: boolean = false; private readonly: boolean = false;
constructor(fileName?: string) { constructor(fileName?: string) {
let logFile: string | undefined = fileName; const logFile: string | undefined = fileName;
if (logFile === undefined) { if (logFile) {
logFile = this.DEFAULT_LOGFILE; this.writable = fs.createWriteStream(logFile, {
flags: 'a+',
encoding: 'utf8',
autoClose: true
});
this.bufferSerialEmitter = new BufferSerialEmitter(this.writable);
} }
this.writable = fs.createWriteStream(logFile, {
flags: 'a+',
encoding: 'utf8',
autoClose: true
});
this.bufferSerialEmitter = new BufferSerialEmitter(this.writable);
const logLevelName: string = getExperimentStartupInfo() const logLevelName: string = getExperimentStartupInfo()
.getLogLevel(); .getLogLevel();
...@@ -84,7 +80,9 @@ class Logger { ...@@ -84,7 +80,9 @@ class Logger {
} }
public close(): void { public close(): void {
this.writable.destroy(); if (this.writable) {
this.writable.destroy();
}
} }
public trace(...param: any[]): void { public trace(...param: any[]): void {
...@@ -128,12 +126,15 @@ class Logger { ...@@ -128,12 +126,15 @@ class Logger {
*/ */
private log(level: string, param: any[]): void { private log(level: string, param: any[]): void {
if (!this.readonly) { if (!this.readonly) {
const buffer: WritableStreamBuffer = new WritableStreamBuffer(); const logContent = `[${(new Date()).toLocaleString()}] ${level} ${format(param)}\n`;
buffer.write(`[${(new Date()).toLocaleString()}] ${level} `); if (this.writable && this.bufferSerialEmitter) {
buffer.write(format(param)); const buffer: WritableStreamBuffer = new WritableStreamBuffer();
buffer.write('\n'); buffer.write(logContent);
buffer.end(); buffer.end();
this.bufferSerialEmitter.feed(buffer.getContents()); this.bufferSerialEmitter.feed(buffer.getContents());
} else {
console.log(logContent);
}
} }
} }
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import { Container, Scope } from 'typescript-ioc'; import { Container, Scope } from 'typescript-ioc';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path';
import * as component from './common/component'; import * as component from './common/component';
import { Database, DataStore } from './common/datastore'; import { Database, DataStore } from './common/datastore';
import { setExperimentStartupInfo } from './common/experimentStartupInfo'; import { setExperimentStartupInfo } from './common/experimentStartupInfo';
...@@ -34,7 +35,7 @@ function initStartupInfo( ...@@ -34,7 +35,7 @@ function initStartupInfo(
setExperimentStartupInfo(createNew, expId, basePort, logDirectory, experimentLogLevel, readonly); setExperimentStartupInfo(createNew, expId, basePort, logDirectory, experimentLogLevel, readonly);
} }
async function initContainer(platformMode: string, logFileName?: string): Promise<void> { async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
if (platformMode === 'local') { if (platformMode === 'local') {
Container.bind(TrainingService) Container.bind(TrainingService)
.to(LocalTrainingService) .to(LocalTrainingService)
...@@ -71,6 +72,12 @@ async function initContainer(platformMode: string, logFileName?: string): Promis ...@@ -71,6 +72,12 @@ async function initContainer(platformMode: string, logFileName?: string): Promis
Container.bind(DataStore) Container.bind(DataStore)
.to(NNIDataStore) .to(NNIDataStore)
.scope(Scope.Singleton); .scope(Scope.Singleton);
const DEFAULT_LOGFILE: string = path.join(getLogDir(), 'nnimanager.log');
if (foreground) {
logFileName = undefined;
} else if (logFileName === undefined) {
logFileName = DEFAULT_LOGFILE;
}
Container.bind(Logger).provider({ Container.bind(Logger).provider({
get: (): Logger => new Logger(logFileName) get: (): Logger => new Logger(logFileName)
}); });
...@@ -81,7 +88,7 @@ async function initContainer(platformMode: string, logFileName?: string): Promis ...@@ -81,7 +88,7 @@ async function initContainer(platformMode: string, logFileName?: string): Promis
function usage(): void { function usage(): void {
console.info('usage: node main.js --port <port> --mode \ console.info('usage: node main.js --port <port> --mode \
<local/remote/pai/kubeflow/frameworkcontroller/paiYarn> --start_mode <new/resume> --experiment_id <id>'); <local/remote/pai/kubeflow/frameworkcontroller/paiYarn> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
} }
const strPort: string = parseArg(['--port', '-p']); const strPort: string = parseArg(['--port', '-p']);
...@@ -90,6 +97,14 @@ if (!strPort || strPort.length === 0) { ...@@ -90,6 +97,14 @@ if (!strPort || strPort.length === 0) {
process.exit(1); process.exit(1);
} }
const foregroundArg: string = parseArg(['--foreground', '-f']);
if (!('true' || 'false').includes(foregroundArg.toLowerCase())) {
console.log(`FATAL: foreground property should only be true or false`);
usage();
process.exit(1);
}
const foreground: boolean = foregroundArg.toLowerCase() === 'true' ? true : false;
const port: number = parseInt(strPort, 10); const port: number = parseInt(strPort, 10);
const mode: string = parseArg(['--mode', '-m']); const mode: string = parseArg(['--mode', '-m']);
...@@ -138,7 +153,7 @@ initStartupInfo(startMode, experimentId, port, logDir, logLevel, readonly); ...@@ -138,7 +153,7 @@ initStartupInfo(startMode, experimentId, port, logDir, logLevel, readonly);
mkDirP(getLogDir()) mkDirP(getLogDir())
.then(async () => { .then(async () => {
try { try {
await initContainer(mode); await initContainer(foreground, mode);
const restServer: NNIRestServer = component.get(NNIRestServer); const restServer: NNIRestServer = component.get(NNIRestServer);
await restServer.start(); await restServer.start();
const log: Logger = getLogger(); const log: Logger = getLogger();
...@@ -162,6 +177,15 @@ function getStopSignal(): any { ...@@ -162,6 +177,15 @@ function getStopSignal(): any {
} }
} }
function getCtrlCSignal(): any {
return 'SIGINT';
}
process.on(getCtrlCSignal(), async () => {
const log: Logger = getLogger();
log.info(`Get SIGINT signal!`);
});
process.on(getStopSignal(), async () => { process.on(getStopSignal(), async () => {
const log: Logger = getLogger(); const log: Logger = getLogger();
let hasError: boolean = false; let hasError: boolean = false;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
"azure-storage": "^2.10.2", "azure-storage": "^2.10.2",
"chai-as-promised": "^7.1.1", "chai-as-promised": "^7.1.1",
"child-process-promise": "^2.2.1", "child-process-promise": "^2.2.1",
"deepmerge": "^4.2.2",
"express": "^4.16.3", "express": "^4.16.3",
"express-joi-validator": "^2.0.0", "express-joi-validator": "^2.0.0",
"js-base64": "^2.4.9", "js-base64": "^2.4.9",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment