Unverified Commit 6b52fb12 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Fix 3rd-party training service bug (#3726)


Co-authored-by: default avatarliuzhe <zhe.liu@microsoft.com>
parent d9dd29f3
...@@ -39,8 +39,8 @@ def register(args): ...@@ -39,8 +39,8 @@ def register(args):
try: try:
service_config = { service_config = {
'node_module_path': info.node_module_path, 'nodeModulePath': str(info.node_module_path),
'node_class_name': info.node_class_name, 'nodeClassName': info.node_class_name,
} }
json.dumps(service_config) json.dumps(service_config)
except Exception: except Exception:
......
...@@ -6,36 +6,43 @@ ...@@ -6,36 +6,43 @@
import * as assert from 'assert'; import * as assert from 'assert';
import * as os from 'os'; import * as os from 'os';
import * as path from 'path'; import * as path from 'path';
import * as component from '../common/component';
const API_ROOT_URL: string = '/api/v1/nni';
@component.Singleton
class ExperimentStartupInfo { let singleton: ExperimentStartupInfo | null = null;
private readonly API_ROOT_URL: string = '/api/v1/nni';
export class ExperimentStartupInfo {
private experimentId: string = '';
private newExperiment: boolean = true; public experimentId: string = '';
private basePort: number = -1; public newExperiment: boolean = true;
private initialized: boolean = false; public basePort: number = -1;
private logDir: string = ''; public initialized: boolean = false;
private logLevel: string = ''; public logDir: string = '';
private readonly: boolean = false; public logLevel: string = '';
private dispatcherPipe: string | null = null; public readonly: boolean = false;
private platform: string = ''; public dispatcherPipe: string | null = null;
private urlprefix: string = ''; public platform: string = '';
public urlprefix: string = '';
public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number, platform: string, logDir?: string, logLevel?: string, readonly?: boolean, dispatcherPipe?: string, urlprefix?: string): void {
assert(!this.initialized); constructor(
assert(experimentId.trim().length > 0); newExperiment: boolean,
experimentId: string,
basePort: number,
platform: string,
logDir?: string,
logLevel?: string,
readonly?: boolean,
dispatcherPipe?: string,
urlprefix?: string) {
this.newExperiment = newExperiment; this.newExperiment = newExperiment;
this.experimentId = experimentId; this.experimentId = experimentId;
this.basePort = basePort; this.basePort = basePort;
this.initialized = true;
this.platform = platform; this.platform = platform;
if (logDir !== undefined && logDir.length > 0) { if (logDir !== undefined && logDir.length > 0) {
this.logDir = path.join(path.normalize(logDir), this.getExperimentId()); this.logDir = path.join(path.normalize(logDir), experimentId);
} else { } else {
this.logDir = path.join(os.homedir(), 'nni-experiments', this.getExperimentId()); this.logDir = path.join(os.homedir(), 'nni-experiments', experimentId);
} }
if (logLevel !== undefined && logLevel.length > 1) { if (logLevel !== undefined && logLevel.length > 1) {
...@@ -55,98 +62,67 @@ class ExperimentStartupInfo { ...@@ -55,98 +62,67 @@ class ExperimentStartupInfo {
} }
} }
public getExperimentId(): string { public get apiRootUrl(): string {
assert(this.initialized); return this.urlprefix === '' ? API_ROOT_URL : `/${this.urlprefix}${API_ROOT_URL}`;
return this.experimentId;
}
public getBasePort(): number {
assert(this.initialized);
return this.basePort;
}
public isNewExperiment(): boolean {
assert(this.initialized);
return this.newExperiment;
}
public getPlatform(): string {
assert(this.initialized);
return this.platform;
} }
public getLogDir(): string { public static getInstance(): ExperimentStartupInfo {
assert(this.initialized); assert(singleton !== null);
return singleton!;
return this.logDir;
}
public getLogLevel(): string {
assert(this.initialized);
return this.logLevel;
}
public isReadonly(): boolean {
assert(this.initialized);
return this.readonly;
}
public getDispatcherPipe(): string | null {
assert(this.initialized);
return this.dispatcherPipe;
}
public getAPIRootUrl(): string {
assert(this.initialized);
return this.urlprefix==''?this.API_ROOT_URL:`/${this.urlprefix}${this.API_ROOT_URL}`;
} }
} }
function getExperimentId(): string { export function getExperimentStartupInfo(): ExperimentStartupInfo {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getExperimentId(); return ExperimentStartupInfo.getInstance();
} }
function getBasePort(): number { export function setExperimentStartupInfo(
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getBasePort(); newExperiment: boolean,
experimentId: string,
basePort: number,
platform: string,
logDir?: string,
logLevel?: string,
readonly?: boolean,
dispatcherPipe?: string,
urlprefix?: string): void {
singleton = new ExperimentStartupInfo(
newExperiment,
experimentId,
basePort,
platform,
logDir,
logLevel,
readonly,
dispatcherPipe,
urlprefix
);
} }
function isNewExperiment(): boolean { export function getExperimentId(): string {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isNewExperiment(); return getExperimentStartupInfo().experimentId;
} }
function getPlatform(): string { export function getBasePort(): number {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getPlatform(); return getExperimentStartupInfo().basePort;
} }
function getExperimentStartupInfo(): ExperimentStartupInfo { export function isNewExperiment(): boolean {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo); return getExperimentStartupInfo().newExperiment;
} }
function setExperimentStartupInfo( export function getPlatform(): string {
newExperiment: boolean, experimentId: string, basePort: number, platform: string, logDir?: string, logLevel?: string, readonly?: boolean, dispatcherPipe?: string, urlprefix?: string): void { return getExperimentStartupInfo().platform;
component.get<ExperimentStartupInfo>(ExperimentStartupInfo)
.setStartupInfo(newExperiment, experimentId, basePort, platform, logDir, logLevel, readonly, dispatcherPipe, urlprefix);
} }
function isReadonly(): boolean { export function isReadonly(): boolean {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isReadonly(); return getExperimentStartupInfo().readonly;
} }
function getDispatcherPipe(): string | null { export function getDispatcherPipe(): string | null {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getDispatcherPipe(); return getExperimentStartupInfo().dispatcherPipe;
} }
function getAPIRootUrl(): string { export function getAPIRootUrl(): string {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getAPIRootUrl(); return getExperimentStartupInfo().apiRootUrl;
} }
export {
ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment, getPlatform, getExperimentStartupInfo,
setExperimentStartupInfo, isReadonly, getDispatcherPipe, getAPIRootUrl
};
...@@ -6,27 +6,32 @@ ...@@ -6,27 +6,32 @@
import { spawn } from 'child_process'; import { spawn } from 'child_process';
import { Logger, getLogger } from './log'; import { Logger, getLogger } from './log';
const python = process.platform === 'win32' ? 'python.exe' : 'python3'; const logger: Logger = getLogger('pythonScript');
export async function runPythonScript(script: string, logger?: Logger): Promise<string> { const python: string = process.platform === 'win32' ? 'python.exe' : 'python3';
export async function runPythonScript(script: string, logTag?: string): Promise<string> {
const proc = spawn(python, [ '-c', script ]); const proc = spawn(python, [ '-c', script ]);
let stdout: string = '';
let stderr: string = '';
proc.stdout.on('data', (data: string) => { stdout += data; });
proc.stderr.on('data', (data: string) => { stderr += data; });
const procPromise = new Promise<void>((resolve, reject) => { const procPromise = new Promise<void>((resolve, reject) => {
proc.on('error', (err: Error) => { reject(err); }); proc.on('error', (err: Error) => { reject(err); });
proc.on('exit', () => { resolve(); }); proc.on('exit', () => { resolve(); });
}); });
await procPromise; await procPromise;
const stdout = proc.stdout.read().toString();
const stderr = proc.stderr.read().toString();
if (stderr) { if (stderr) {
if (logger === undefined) { if (logTag) {
logger = getLogger('pythonScript'); logger.warning(`Python script [${logTag}] has stderr:`, stderr);
} else {
logger.warning('Python script has stderr.');
logger.warning(' script:', script);
logger.warning(' stderr:', stderr);
} }
logger.warning('python script has stderr.');
logger.warning('script:', script);
logger.warning('stderr:', stderr);
} }
return stdout; return stdout;
......
...@@ -19,13 +19,13 @@ import * as util from 'util'; ...@@ -19,13 +19,13 @@ import * as util from 'util';
import * as glob from 'glob'; import * as glob from 'glob';
import { Database, DataStore } from './datastore'; import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, getExperimentStartupInfo, setExperimentStartupInfo } from './experimentStartupInfo'; import { getExperimentStartupInfo, setExperimentStartupInfo } from './experimentStartupInfo';
import { ExperimentConfig, Manager } from './manager'; import { ExperimentConfig, Manager } from './manager';
import { ExperimentManager } from './experimentManager'; import { ExperimentManager } from './experimentManager';
import { HyperParameters, TrainingService, TrialJobStatus } from './trainingService'; import { HyperParameters, TrainingService, TrialJobStatus } from './trainingService';
function getExperimentRootDir(): string { function getExperimentRootDir(): string {
return getExperimentStartupInfo().getLogDir(); return getExperimentStartupInfo().logDir;
} }
function getLogDir(): string { function getLogDir(): string {
...@@ -33,7 +33,7 @@ function getLogDir(): string { ...@@ -33,7 +33,7 @@ function getLogDir(): string {
} }
function getLogLevel(): string { function getLogLevel(): string {
return getExperimentStartupInfo().getLogLevel(); return getExperimentStartupInfo().logLevel;
} }
function getDefaultDatabaseDir(): string { function getDefaultDatabaseDir(): string {
...@@ -184,7 +184,6 @@ function generateParamFileName(hyperParameters: HyperParameters): string { ...@@ -184,7 +184,6 @@ function generateParamFileName(hyperParameters: HyperParameters): string {
* Must be paired with `cleanupUnitTest()`. * Must be paired with `cleanupUnitTest()`.
*/ */
function prepareUnitTest(): void { function prepareUnitTest(): void {
Container.snapshot(ExperimentStartupInfo);
Container.snapshot(Database); Container.snapshot(Database);
Container.snapshot(DataStore); Container.snapshot(DataStore);
Container.snapshot(TrainingService); Container.snapshot(TrainingService);
...@@ -213,8 +212,9 @@ function cleanupUnitTest(): void { ...@@ -213,8 +212,9 @@ function cleanupUnitTest(): void {
Container.restore(TrainingService); Container.restore(TrainingService);
Container.restore(DataStore); Container.restore(DataStore);
Container.restore(Database); Container.restore(Database);
Container.restore(ExperimentStartupInfo);
Container.restore(ExperimentManager); Container.restore(ExperimentManager);
const logLevel: string = parseArg(['--log_level', '-ll']);
setExperimentStartupInfo(true, 'unittest', 8080, 'unittest', undefined, logLevel);
} }
let cachedipv4Address: string = ''; let cachedipv4Address: string = '';
......
...@@ -8,6 +8,7 @@ import * as path from 'path'; ...@@ -8,6 +8,7 @@ import * as path from 'path';
import * as component from '../../../common/component'; import * as component from '../../../common/component';
import { getLogger, Logger } from '../../../common/log'; import { getLogger, Logger } from '../../../common/log';
import { ExperimentConfig, AmlConfig, flattenConfig } from '../../../common/experimentConfig'; import { ExperimentConfig, AmlConfig, flattenConfig } from '../../../common/experimentConfig';
import { ExperimentStartupInfo } from '../../../common/experimentStartupInfo';
import { validateCodeDir } from '../../common/util'; import { validateCodeDir } from '../../common/util';
import { AMLClient } from '../aml/amlClient'; import { AMLClient } from '../aml/amlClient';
import { AMLEnvironmentInformation } from '../aml/amlConfig'; import { AMLEnvironmentInformation } from '../aml/amlConfig';
...@@ -29,10 +30,10 @@ export class AMLEnvironmentService extends EnvironmentService { ...@@ -29,10 +30,10 @@ export class AMLEnvironmentService extends EnvironmentService {
private experimentRootDir: string; private experimentRootDir: string;
private config: FlattenAmlConfig; private config: FlattenAmlConfig;
constructor(experimentRootDir: string, experimentId: string, config: ExperimentConfig) { constructor(config: ExperimentConfig, info: ExperimentStartupInfo) {
super(); super();
this.experimentId = experimentId; this.experimentId = info.experimentId;
this.experimentRootDir = experimentRootDir; this.experimentRootDir = info.logDir;
this.config = flattenConfig(config, 'aml'); this.config = flattenConfig(config, 'aml');
validateCodeDir(this.config.trialCodeDirectory); validateCodeDir(this.config.trialCodeDirectory);
} }
......
...@@ -4,24 +4,22 @@ import { LocalEnvironmentService } from './localEnvironmentService'; ...@@ -4,24 +4,22 @@ import { LocalEnvironmentService } from './localEnvironmentService';
import { RemoteEnvironmentService } from './remoteEnvironmentService'; import { RemoteEnvironmentService } from './remoteEnvironmentService';
import { EnvironmentService } from '../environment'; import { EnvironmentService } from '../environment';
import { ExperimentConfig } from '../../../common/experimentConfig'; import { ExperimentConfig } from '../../../common/experimentConfig';
import { getExperimentId } from '../../../common/experimentStartupInfo'; import { ExperimentStartupInfo } from '../../../common/experimentStartupInfo';
import { getCustomEnvironmentServiceConfig } from '../../../common/nniConfig'; import { getCustomEnvironmentServiceConfig } from '../../../common/nniConfig';
import { getExperimentRootDir, importModule } from '../../../common/utils'; import { importModule } from '../../../common/utils';
export async function createEnvironmentService(name: string, config: ExperimentConfig): Promise<EnvironmentService> { export async function createEnvironmentService(name: string, config: ExperimentConfig): Promise<EnvironmentService> {
const expId = getExperimentId(); const info = ExperimentStartupInfo.getInstance();
const rootDir = getExperimentRootDir();
switch(name) { switch(name) {
case 'local': case 'local':
return new LocalEnvironmentService(rootDir, expId, config); return new LocalEnvironmentService(config, info);
case 'remote': case 'remote':
return new RemoteEnvironmentService(rootDir, expId, config); return new RemoteEnvironmentService(config, info);
case 'aml': case 'aml':
return new AMLEnvironmentService(rootDir, expId, config); return new AMLEnvironmentService(config, info);
case 'openpai': case 'openpai':
return new OpenPaiEnvironmentService(rootDir, expId, config); return new OpenPaiEnvironmentService(config, info);
} }
const esConfig = await getCustomEnvironmentServiceConfig(name); const esConfig = await getCustomEnvironmentServiceConfig(name);
...@@ -30,5 +28,5 @@ export async function createEnvironmentService(name: string, config: ExperimentC ...@@ -30,5 +28,5 @@ export async function createEnvironmentService(name: string, config: ExperimentC
} }
const esModule = importModule(esConfig.nodeModulePath); const esModule = importModule(esConfig.nodeModulePath);
const esClass = esModule[esConfig.nodeClassName] as any; const esClass = esModule[esConfig.nodeClassName] as any;
return new esClass(rootDir, expId, config); return new esClass(config, info);
} }
...@@ -9,6 +9,7 @@ import * as tkill from 'tree-kill'; ...@@ -9,6 +9,7 @@ import * as tkill from 'tree-kill';
import * as component from '../../../common/component'; import * as component from '../../../common/component';
import { getLogger, Logger } from '../../../common/log'; import { getLogger, Logger } from '../../../common/log';
import { ExperimentConfig } from '../../../common/experimentConfig'; import { ExperimentConfig } from '../../../common/experimentConfig';
import { ExperimentStartupInfo } from '../../../common/experimentStartupInfo';
import { EnvironmentInformation, EnvironmentService } from '../environment'; import { EnvironmentInformation, EnvironmentService } from '../environment';
import { isAlive, getNewLine } from '../../../common/utils'; import { isAlive, getNewLine } from '../../../common/utils';
import { execMkdir, runScript, getScriptName, execCopydir } from '../../common/util'; import { execMkdir, runScript, getScriptName, execCopydir } from '../../common/util';
...@@ -21,10 +22,10 @@ export class LocalEnvironmentService extends EnvironmentService { ...@@ -21,10 +22,10 @@ export class LocalEnvironmentService extends EnvironmentService {
private experimentRootDir: string; private experimentRootDir: string;
private experimentId: string; private experimentId: string;
constructor(experimentRootDir: string, experimentId: string, _config: ExperimentConfig) { constructor(_config: ExperimentConfig, info: ExperimentStartupInfo) {
super(); super();
this.experimentId = experimentId; this.experimentId = info.experimentId;
this.experimentRootDir = experimentRootDir; this.experimentRootDir = info.logDir;
} }
public get environmentMaintenceLoopInterval(): number { public get environmentMaintenceLoopInterval(): number {
......
...@@ -8,6 +8,7 @@ import * as request from 'request'; ...@@ -8,6 +8,7 @@ import * as request from 'request';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import * as component from '../../../common/component'; import * as component from '../../../common/component';
import { ExperimentConfig, OpenpaiConfig, flattenConfig, toMegaBytes } from '../../../common/experimentConfig'; import { ExperimentConfig, OpenpaiConfig, flattenConfig, toMegaBytes } from '../../../common/experimentConfig';
import { ExperimentStartupInfo } from '../../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../../common/log'; import { getLogger, Logger } from '../../../common/log';
import { PAIClusterConfig } from '../../pai/paiConfig'; import { PAIClusterConfig } from '../../pai/paiConfig';
import { NNIPAITrialConfig } from '../../pai/paiConfig'; import { NNIPAITrialConfig } from '../../pai/paiConfig';
...@@ -31,9 +32,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService { ...@@ -31,9 +32,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
private experimentId: string; private experimentId: string;
private config: FlattenOpenpaiConfig; private config: FlattenOpenpaiConfig;
constructor(_experimentRootDir: string, experimentId: string, config: ExperimentConfig) { constructor(config: ExperimentConfig, info: ExperimentStartupInfo) {
super(); super();
this.experimentId = experimentId; this.experimentId = info.experimentId;
this.config = flattenConfig(config, 'openpai'); this.config = flattenConfig(config, 'openpai');
this.paiToken = this.config.token; this.paiToken = this.config.token;
this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http'; this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http';
......
...@@ -10,6 +10,7 @@ import { getLogger, Logger } from '../../../common/log'; ...@@ -10,6 +10,7 @@ import { getLogger, Logger } from '../../../common/log';
import { EnvironmentInformation, EnvironmentService } from '../environment'; import { EnvironmentInformation, EnvironmentService } from '../environment';
import { getLogLevel } from '../../../common/utils'; import { getLogLevel } from '../../../common/utils';
import { ExperimentConfig, RemoteConfig, RemoteMachineConfig, flattenConfig } from '../../../common/experimentConfig'; import { ExperimentConfig, RemoteConfig, RemoteMachineConfig, flattenConfig } from '../../../common/experimentConfig';
import { ExperimentStartupInfo } from '../../../common/experimentStartupInfo';
import { execMkdir } from '../../common/util'; import { execMkdir } from '../../common/util';
import { ExecutorManager } from '../../remote_machine/remoteMachineData'; import { ExecutorManager } from '../../remote_machine/remoteMachineData';
import { ShellExecutor } from 'training_service/remote_machine/shellExecutor'; import { ShellExecutor } from 'training_service/remote_machine/shellExecutor';
...@@ -32,13 +33,13 @@ export class RemoteEnvironmentService extends EnvironmentService { ...@@ -32,13 +33,13 @@ export class RemoteEnvironmentService extends EnvironmentService {
private experimentId: string; private experimentId: string;
private config: FlattenRemoteConfig; private config: FlattenRemoteConfig;
constructor(experimentRootDir: string, experimentId: string, config: ExperimentConfig) { constructor(config: ExperimentConfig, info: ExperimentStartupInfo) {
super(); super();
this.experimentId = experimentId; this.experimentId = info.experimentId;
this.environmentExecutorManagerMap = new Map<string, ExecutorManager>(); this.environmentExecutorManagerMap = new Map<string, ExecutorManager>();
this.machineExecutorManagerMap = new Map<RemoteMachineConfig, ExecutorManager>(); this.machineExecutorManagerMap = new Map<RemoteMachineConfig, ExecutorManager>();
this.remoteMachineMetaOccupiedMap = new Map<RemoteMachineConfig, boolean>(); this.remoteMachineMetaOccupiedMap = new Map<RemoteMachineConfig, boolean>();
this.experimentRootDir = experimentRootDir; this.experimentRootDir = info.logDir;
this.log = getLogger('RemoteEnvironmentService'); this.log = getLogger('RemoteEnvironmentService');
this.config = flattenConfig(config, 'remote'); this.config = flattenConfig(config, 'remote');
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment