Unverified Commit e9f137f0 authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

merge from master (#2019)

parent f7cf3ea5
authorName: default
experimentName: example_mnist-keras
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: BatchTuner
trial:
command: python3 mnist-keras.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist-keras
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist-keras.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist_pytorch
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: Unknown
experimentName: enas_macro
trialConcurrency: 20
maxExecDuration: 2400h
maxTrialNum: 20000
#choice: local, remote
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: true
multiPhase: false
versionCheck: false
nniManagerIp: 0.0.0.0
tuner:
builtinTunerName: PPOTuner
classArgs:
optimize_mode: maximize
trials_per_update: 60
epochs_per_update: 20
minibatch_size: 6
trial:
command: sh ./macro_cifar10_pai.sh
codeDir: ./
gpuNum: 1
cpuNum: 1
memoryMB: 8196
image: msranni/nni:latest
virtualCluster: nni
paiYarnConfig:
userName: your_account
passWord: your_passwd
host: 0.0.0.0
......@@ -25,7 +25,10 @@ trial:
memoryMB: 8196
image: msranni/nni:latest
virtualCluster: nni
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
userName: your_account
passWord: your_pwd
token: your_token
host: 0.0.0.0
......@@ -30,10 +30,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_FashionMNIST-network-morphism
trialConcurrency: 1
maxExecDuration: 24h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: NetworkMorphism
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
# for now, this tuner only supports cv domain
task: cv
#input image width
input_width: 28
#input image channel
input_channel: 1
#number of classes
n_output_node: 10
trial:
command: python3 FashionMNIST_keras.py
codeDir: .
gpuNum: 1
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -30,10 +30,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_cifar10-network-morphism
trialConcurrency: 1
maxExecDuration: 24h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: NetworkMorphism
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
# for now, this tuner only supports cv domain
task: cv
#input image width
input_width: 32
#input image channel
input_channel: 3
#number of classes
n_output_node: 10
trial:
command: python3 cifar10_keras.py
codeDir: .
gpuNum: 1
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_sklearn
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 100
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner,MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_sklearn
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 100
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -4,13 +4,11 @@
'use strict';
import * as fs from 'fs';
import * as path from 'path';
import { Writable } from 'stream';
import { WritableStreamBuffer } from 'stream-buffers';
import { format } from 'util';
import * as component from '../common/component';
import { getExperimentStartupInfo, isReadonly } from './experimentStartupInfo';
import { getLogDir } from './utils';
const FATAL: number = 1;
const ERROR: number = 2;
......@@ -55,23 +53,21 @@ class BufferSerialEmitter {
@component.Singleton
class Logger {
private DEFAULT_LOGFILE: string = path.join(getLogDir(), 'nnimanager.log');
private level: number = INFO;
private bufferSerialEmitter: BufferSerialEmitter;
private writable: Writable;
private bufferSerialEmitter?: BufferSerialEmitter;
private writable?: Writable;
private readonly: boolean = false;
constructor(fileName?: string) {
let logFile: string | undefined = fileName;
if (logFile === undefined) {
logFile = this.DEFAULT_LOGFILE;
}
const logFile: string | undefined = fileName;
if (logFile) {
this.writable = fs.createWriteStream(logFile, {
flags: 'a+',
encoding: 'utf8',
autoClose: true
});
this.bufferSerialEmitter = new BufferSerialEmitter(this.writable);
}
const logLevelName: string = getExperimentStartupInfo()
.getLogLevel();
......@@ -84,8 +80,10 @@ class Logger {
}
public close(): void {
if (this.writable) {
this.writable.destroy();
}
}
public trace(...param: any[]): void {
if (this.level >= TRACE) {
......@@ -128,12 +126,15 @@ class Logger {
*/
private log(level: string, param: any[]): void {
if (!this.readonly) {
const logContent = `[${(new Date()).toLocaleString()}] ${level} ${format(param)}\n`;
if (this.writable && this.bufferSerialEmitter) {
const buffer: WritableStreamBuffer = new WritableStreamBuffer();
buffer.write(`[${(new Date()).toLocaleString()}] ${level} `);
buffer.write(format(param));
buffer.write('\n');
buffer.write(logContent);
buffer.end();
this.bufferSerialEmitter.feed(buffer.getContents());
} else {
console.log(logContent);
}
}
}
}
......
......@@ -6,6 +6,7 @@
import { Container, Scope } from 'typescript-ioc';
import * as fs from 'fs';
import * as path from 'path';
import * as component from './common/component';
import { Database, DataStore } from './common/datastore';
import { setExperimentStartupInfo } from './common/experimentStartupInfo';
......@@ -34,7 +35,7 @@ function initStartupInfo(
setExperimentStartupInfo(createNew, expId, basePort, logDirectory, experimentLogLevel, readonly);
}
async function initContainer(platformMode: string, logFileName?: string): Promise<void> {
async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
if (platformMode === 'local') {
Container.bind(TrainingService)
.to(LocalTrainingService)
......@@ -71,6 +72,12 @@ async function initContainer(platformMode: string, logFileName?: string): Promis
Container.bind(DataStore)
.to(NNIDataStore)
.scope(Scope.Singleton);
const DEFAULT_LOGFILE: string = path.join(getLogDir(), 'nnimanager.log');
if (foreground) {
logFileName = undefined;
} else if (logFileName === undefined) {
logFileName = DEFAULT_LOGFILE;
}
Container.bind(Logger).provider({
get: (): Logger => new Logger(logFileName)
});
......@@ -81,7 +88,7 @@ async function initContainer(platformMode: string, logFileName?: string): Promis
function usage(): void {
console.info('usage: node main.js --port <port> --mode \
<local/remote/pai/kubeflow/frameworkcontroller/paiYarn> --start_mode <new/resume> --experiment_id <id>');
<local/remote/pai/kubeflow/frameworkcontroller/paiYarn> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
}
const strPort: string = parseArg(['--port', '-p']);
......@@ -90,6 +97,14 @@ if (!strPort || strPort.length === 0) {
process.exit(1);
}
const foregroundArg: string = parseArg(['--foreground', '-f']);
if (!('true' || 'false').includes(foregroundArg.toLowerCase())) {
console.log(`FATAL: foreground property should only be true or false`);
usage();
process.exit(1);
}
const foreground: boolean = foregroundArg.toLowerCase() === 'true' ? true : false;
const port: number = parseInt(strPort, 10);
const mode: string = parseArg(['--mode', '-m']);
......@@ -138,7 +153,7 @@ initStartupInfo(startMode, experimentId, port, logDir, logLevel, readonly);
mkDirP(getLogDir())
.then(async () => {
try {
await initContainer(mode);
await initContainer(foreground, mode);
const restServer: NNIRestServer = component.get(NNIRestServer);
await restServer.start();
const log: Logger = getLogger();
......@@ -162,6 +177,15 @@ function getStopSignal(): any {
}
}
function getCtrlCSignal(): any {
return 'SIGINT';
}
process.on(getCtrlCSignal(), async () => {
const log: Logger = getLogger();
log.info(`Get SIGINT signal!`);
});
process.on(getStopSignal(), async () => {
const log: Logger = getLogger();
let hasError: boolean = false;
......
......@@ -13,6 +13,7 @@
"azure-storage": "^2.10.2",
"chai-as-promised": "^7.1.1",
"child-process-promise": "^2.2.1",
"deepmerge": "^4.2.2",
"express": "^4.16.3",
"express-joi-validator": "^2.0.0",
"js-base64": "^2.4.9",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment