Unverified Commit 143c6615 authored by Chi Song's avatar Chi Song Committed by GitHub
Browse files

Reusable environment support GPU scheduler, add test cases and refactoring. (#2627)

parent 8a20c348
......@@ -222,15 +222,16 @@ function getIPV4Address(): string {
return cachedipv4Address;
}
if (os.networkInterfaces().eth0) {
for (const item of os.networkInterfaces().eth0) {
const networkInterfaces = os.networkInterfaces();
if (networkInterfaces.eth0) {
for (const item of networkInterfaces.eth0) {
if (item.family === 'IPv4') {
cachedipv4Address = item.address;
return cachedipv4Address;
}
}
} else {
throw Error('getIPV4Address() failed because os.networkInterfaces().eth0 is undefined.');
throw Error(`getIPV4Address() failed because os.networkInterfaces().eth0 is undefined. Please specify NNI manager IP in config.`);
}
throw Error('getIPV4Address() failed because no valid IPv4 address found.')
......
......@@ -39,6 +39,7 @@
"@types/express": "^4.16.0",
"@types/glob": "^7.1.1",
"@types/js-base64": "^2.3.1",
"@types/js-yaml": "^3.12.5",
"@types/mocha": "^5.2.5",
"@types/node": "10.12.18",
"@types/request": "^2.47.1",
......
......@@ -107,6 +107,11 @@ export namespace ValidationSchemas {
token: joi.string().min(1),
host: joi.string().min(1).required(),
reuse: joi.boolean(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
gpuNum: joi.number().min(1),
maxTrialNumPerGpu: joi.number(),
useActiveGpu: joi.boolean(),
}),
kubeflow_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
operator: joi.string().min(1).required(),
......
......@@ -3,6 +3,17 @@
'use strict';
export enum ScheduleResultType {
// Schedule succeeded
SUCCEED,
// Temporarily, no enough available GPU right now
TMP_NO_AVAILABLE_GPU,
// Cannot match requirement even if all GPU are a
REQUIRE_EXCEED_TOTAL
}
/**
* GPU Infromation class
* Representing the dynamic and static information retrieved from Nvidia-smi
......@@ -52,6 +63,19 @@ export class GPUSummary {
}
}
export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
if (gpuIndices !== undefined) {
const indices: number[] = gpuIndices.split(',')
.map((x: string) => parseInt(x, 10));
if (indices.length > 0) {
return new Set(indices);
} else {
throw new Error('gpuIndices can not be empty if specified.');
}
}
}
export const GPU_INFO_COLLECTOR_FORMAT_WINDOWS: string =
`
$env:METRIC_OUTPUT_DIR="{0}"
......
......@@ -17,6 +17,10 @@ export class TrialConfig {
// Required GPU number for trial job. The number should be in [0,100]
public readonly gpuNum: number;
// this flag uses for UT now.
// in future, all environments should be reusable, and this can be configurable by user.
public reuseEnvironment: boolean | undefined = true;
/**
* Constructor
* @param command Trail command
......
......@@ -12,6 +12,13 @@ export class PAIClusterConfig {
public readonly token?: string;
public readonly reuse?: boolean;
public cpuNum?: number;
public memoryMB?: number;
public gpuNum?: number;
public useActiveGpu?: boolean;
public maxTrialNumPerGpu?: number;
/**
* Constructor
* @param userName User name of PAI Cluster
......@@ -20,12 +27,16 @@ export class PAIClusterConfig {
* @param token PAI token of PAI Cluster
* @param reuse If job is reusable for multiple trials
*/
constructor(userName: string, host: string, passWord?: string, token?: string, reuse?: boolean) {
constructor(userName: string, host: string, passWord?: string, token?: string, reuse?: boolean,
cpuNum?: number, memoryMB?: number, gpuNum?: number) {
this.userName = userName;
this.passWord = passWord;
this.host = host;
this.token = token;
this.reuse = reuse;
this.cpuNum = cpuNum;
this.memoryMB = memoryMB;
this.gpuNum = gpuNum;
}
}
......
......@@ -6,10 +6,8 @@
import * as assert from 'assert';
import { getLogger, Logger } from '../../common/log';
import { randomSelect } from '../../common/utils';
import { GPUInfo } from '../common/gpuData';
import {
parseGpuIndices, RemoteMachineMeta, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail, ScheduleResultType, ExecutorManager
} from './remoteMachineData';
import { GPUInfo, parseGpuIndices, ScheduleResultType } from '../common/gpuData';
import { ExecutorManager, RemoteMachineMeta, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail } from './remoteMachineData';
type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
......@@ -39,7 +37,7 @@ export class GPUScheduler {
* @param requiredGPUNum required GPU number
*/
public scheduleMachine(requiredGPUNum: number | undefined, trialJobDetail: RemoteMachineTrialJobDetail): RemoteMachineScheduleResult {
if(requiredGPUNum === undefined) {
if (requiredGPUNum === undefined) {
requiredGPUNum = 0;
}
assert(requiredGPUNum >= 0);
......@@ -75,8 +73,8 @@ export class GPUScheduler {
this.log.warning(`Scheduler: trialJob id ${trialJobDetail.id}, no machine can be scheduled, return TMP_NO_AVAILABLE_GPU `);
return {
resultType : ScheduleResultType.TMP_NO_AVAILABLE_GPU,
scheduleInfo : undefined
resultType: ScheduleResultType.TMP_NO_AVAILABLE_GPU,
scheduleInfo: undefined
};
}
......
......@@ -4,7 +4,7 @@
'use strict';
import { TrialJobApplicationForm, TrialJobDetail, TrialJobStatus } from '../../common/trainingService';
import { GPUInfo, GPUSummary } from '../common/gpuData';
import { GPUInfo, GPUSummary, ScheduleResultType } from '../common/gpuData';
import { ShellExecutor } from './shellExecutor';
/**
......@@ -25,18 +25,6 @@ export class RemoteMachineMeta {
public readonly useActiveGpu?: boolean = false;
}
export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
if (gpuIndices !== undefined) {
const indices: number[] = gpuIndices.split(',')
.map((x: string) => parseInt(x, 10));
if (indices.length > 0) {
return new Set(indices);
} else {
throw new Error('gpuIndices can not be empty if specified.');
}
}
}
/**
* The execution result for command executed on remote machine
*/
......@@ -168,14 +156,3 @@ export class ExecutorManager {
export type RemoteMachineScheduleResult = { scheduleInfo: RemoteMachineScheduleInfo | undefined; resultType: ScheduleResultType };
export type RemoteMachineScheduleInfo = { rmMeta: RemoteMachineMeta; cudaVisibleDevice: string };
export enum ScheduleResultType {
// Schedule succeeded
SUCCEED,
// Temporarily, no enough available GPU right now
TMP_NO_AVAILABLE_GPU,
// Cannot match requirement even if all GPU are a
REQUIRE_EXCEED_TOTAL
}
......@@ -7,6 +7,7 @@ import * as assert from 'assert';
import { EventEmitter } from 'events';
import * as fs from 'fs';
import * as path from 'path';
import { ShellExecutor } from 'training_service/remote_machine/shellExecutor';
import { Deferred } from 'ts-deferred';
import * as component from '../../common/component';
import { NNIError, NNIErrorNames } from '../../common/errors';
......@@ -22,18 +23,16 @@ import {
getVersion, uniqueString
} from '../../common/utils';
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData';
import { GPUSummary } from '../common/gpuData';
import { GPUSummary, ScheduleResultType } from '../common/gpuData';
import { TrialConfig } from '../common/trialConfig';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { execMkdir, validateCodeDir } from '../common/util';
import { GPUScheduler } from './gpuScheduler';
import {
RemoteMachineMeta,
RemoteMachineScheduleInfo, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail,
ScheduleResultType, ExecutorManager
ExecutorManager, RemoteMachineMeta,
RemoteMachineScheduleInfo, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail
} from './remoteMachineData';
import { RemoteMachineJobRestServer } from './remoteMachineJobRestServer';
import { ShellExecutor } from 'training_service/remote_machine/shellExecutor';
/**
* Training Service implementation for Remote Machine (Linux)
......
......@@ -3,7 +3,6 @@
'use strict';
import { EventEmitter } from 'events';
import { delay } from "../../../common/utils";
import { AMLEnvironmentInformation } from '../aml/amlConfig';
import { CommandChannel, RunnerConnection } from "../commandChannel";
......@@ -15,11 +14,7 @@ class AMLRunnerConnection extends RunnerConnection {
export class AMLCommandChannel extends CommandChannel {
private stopping: boolean = false;
private sendQueues: [EnvironmentInformation, string][] = [];
private readonly NNI_METRICS_PATTERN: string = `NNISDK_MEb'(?<metrics>.*?)'`;
public constructor(commandEmitter: EventEmitter) {
super(commandEmitter);
}
public get channelName(): Channel {
return "aml";
}
......@@ -99,11 +94,11 @@ export class AMLCommandChannel extends CommandChannel {
const messages = command['trial_runner'];
if (messages) {
if (messages instanceof Object && currentMessageIndex < messages.length - 1) {
for (let index = currentMessageIndex + 1; index < messages.length; index ++) {
for (let index = currentMessageIndex + 1; index < messages.length; index++) {
this.handleCommand(runnerConnection.environment, messages[index]);
}
currentMessageIndex = messages.length - 1;
} else if (currentMessageIndex === -1){
} else if (currentMessageIndex === -1) {
this.handleCommand(runnerConnection.environment, messages);
currentMessageIndex += 1;
}
......
......@@ -3,10 +3,10 @@
'use strict';
import { GPUSummary } from "training_service/common/gpuData";
import { EventEmitter } from "events";
import { getLogger, Logger } from "../../common/log";
import { TrialJobStatus } from "../../common/trainingService";
import { EventEmitter } from "events";
import { GPUInfo } from "../../training_service/common/gpuData";
import { WebCommandChannel } from "./channels/webCommandChannel";
import { CommandChannel } from "./commandChannel";
......@@ -14,24 +14,50 @@ import { CommandChannel } from "./commandChannel";
export type EnvironmentStatus = 'UNKNOWN' | 'WAITING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED' | 'USER_CANCELED';
export type Channel = "web" | "file" | "aml" | "ut";
export class TrialGpuSummary {
// GPU count on the machine
public gpuCount: number;
// The timestamp when GPU summary data queried
public timestamp: string;
// The array of GPU information for each GPU card
public gpuInfos: GPUInfo[];
// GPU assigned status
public assignedGpuIndexMap: Map<number, number> = new Map<number, number>();
constructor(gpuCount: number, timestamp: string, gpuInfos: GPUInfo[]) {
this.gpuCount = gpuCount;
this.timestamp = timestamp;
this.gpuInfos = gpuInfos;
}
}
export class EnvironmentInformation {
// node id is 5 chars, so won't conflict.
private readonly defaultNodeId = "default";
private log: Logger;
// NNI environment ID
public id: string;
// training platform unique job ID.
public jobId: string;
// training platform job friendly name, in case it's different with job ID.
public jobName: string;
private isNoGpuWarned: boolean = false;
// key states
// true: environment is ready to run trial.
public isIdle: boolean = false;
// true: environment is running, waiting, or unknown.
public isAlive: boolean = true;
// true: Runner is initialized, and can receive trials.
public isRunnerReady: boolean = false;
// don't set status in environment directly, use setFinalState function to set a final state.
public status: EnvironmentStatus = "UNKNOWN";
// true: environment is ready to run trial.
public runningTrialCount: number = 0;
// uses to count how many trial runs on this environment.
// it can be used in many scenarios, but for now, it uses for reusable.
public assignedTrialCount: number = 0;
// NNI environment ID
public id: string;
// training platform unique job ID.
public envId: string;
// training platform job friendly name, in case it's different with job ID.
public name: string;
public trackingUrl: string = "";
public workingFolder: string = "";
public runnerWorkingFolder: string = "";
......@@ -40,41 +66,82 @@ export class EnvironmentInformation {
// it's used to aggregate node status for multiple node trial
public nodes: Map<string, NodeInfomation>;
public gpuSummary: Map<string, GPUSummary> = new Map<string, GPUSummary>();
public gpuSummaries: Map<string, TrialGpuSummary> = new Map<string, TrialGpuSummary>();
// use can specify which gpus can be used by NNI.
// it's usable for sharable environment like remote machine.
public usableGpus?: number[];
// user can specify how to use GPU resource for an environment, like local and remote.
public maxTrialNumberPerGpu?: number;
public useActiveGpu?: boolean;
constructor(id: string, jobName: string, jobId?: string) {
constructor(id: string, name: string, envId?: string) {
this.log = getLogger();
this.id = id;
this.jobName = jobName;
this.jobId = jobId ? jobId : jobName;
this.name = name;
this.envId = envId ? envId : name;
this.nodes = new Map<string, NodeInfomation>();
}
public setFinalStatus(status: EnvironmentStatus): void {
switch (status) {
case 'WAITING':
case 'SUCCEEDED':
case 'FAILED':
case 'USER_CANCELED':
public setStatus(status: EnvironmentStatus): void {
if (this.status !== status) {
this.log.info(`EnvironmentInformation: ${this.envId} change status from ${this.status} to ${status}.`)
this.status = status;
break;
default:
this.log.error(`Environment: job ${this.jobId} set an invalid final state ${status}.`);
break;
}
}
public setGpuSummary(nodeId: string, newGpuSummary: TrialGpuSummary): void {
if (nodeId === null || nodeId === undefined) {
nodeId = this.defaultNodeId;
}
const originalGpuSummary = this.gpuSummaries.get(nodeId);
if (undefined === originalGpuSummary) {
newGpuSummary.assignedGpuIndexMap = new Map<number, number>();
this.gpuSummaries.set(nodeId, newGpuSummary);
} else {
originalGpuSummary.gpuCount = newGpuSummary.gpuCount;
originalGpuSummary.timestamp = newGpuSummary.timestamp;
originalGpuSummary.gpuInfos = newGpuSummary.gpuInfos;
}
}
public get defaultGpuSummary(): TrialGpuSummary | undefined {
const gpuSummary = this.gpuSummaries.get(this.defaultNodeId);
if (gpuSummary === undefined) {
if (false === this.isNoGpuWarned) {
this.log.warning(`EnvironmentInformation: ${this.envId} no default gpu found. current gpu info ${JSON.stringify(this.gpuSummaries)}`);
this.isNoGpuWarned = true;
}
} else {
this.isNoGpuWarned = false;
}
return gpuSummary;
}
}
export abstract class EnvironmentService {
public abstract get hasStorageService(): boolean;
public abstract config(key: string, value: string): Promise<void>;
public abstract refreshEnvironmentsStatus(environments: EnvironmentInformation[]): Promise<void>;
public abstract startEnvironment(environment: EnvironmentInformation): Promise<void>;
public abstract stopEnvironment(environment: EnvironmentInformation): Promise<void>;
public getCommandChannel(commandEmitter: EventEmitter): CommandChannel {
// It depends on environment pressure and settings
// for example, OpenPAI relies on API calls, and there is an limitation for frequence, so it need to be bigger.
public get environmentMaintenceLoopInterval(): number {
return 5000;
}
// it's needed in two scenario
// 1. remote machine has fixed number, so it can return false, when all environment are assigned.
// 2. If there are consistent error on requested environments, for example, authentication failure on platform.
public get hasMoreEnvironments(): boolean {
return true;
}
public createCommandChannel(commandEmitter: EventEmitter): CommandChannel {
return new WebCommandChannel(commandEmitter);
}
......@@ -101,7 +168,7 @@ export class RunnerSettings {
public nniManagerVersion: string = "";
public logCollection: string = "none";
public command: string = "";
public enableGpuCollector: boolean = false;
public enableGpuCollector: boolean = true;
// specify which communication channel is used by runner.
// supported channel includes: rest, storage, aml
......
......@@ -3,24 +3,20 @@
'use strict';
import { EventEmitter } from "events";
import * as fs from 'fs';
import * as path from 'path';
import * as component from '../../../common/component';
import { getExperimentId } from '../../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../../common/log';
import { getExperimentRootDir } from '../../../common/utils';
import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import { AMLClusterConfig, AMLTrialConfig } from '../aml/amlConfig';
import { EnvironmentInformation, EnvironmentService } from '../environment';
import { AMLEnvironmentInformation } from '../aml/amlConfig';
import { AMLClient } from '../aml/amlClient';
import {
NNIManagerIpConfig,
} from '../../../common/trainingService';
import { validateCodeDir } from '../../common/util';
import { getExperimentRootDir } from '../../../common/utils';
import { AMLClient } from '../aml/amlClient';
import { AMLClusterConfig, AMLEnvironmentInformation, AMLTrialConfig } from '../aml/amlConfig';
import { AMLCommandChannel } from '../channels/amlCommandChannel';
import { CommandChannel } from "../commandChannel";
import { EventEmitter } from "events";
import { EnvironmentInformation, EnvironmentService, EnvironmentStatus } from '../environment';
/**
......@@ -32,13 +28,7 @@ export class AMLEnvironmentService extends EnvironmentService {
private readonly log: Logger = getLogger();
public amlClusterConfig: AMLClusterConfig | undefined;
public amlTrialConfig: AMLTrialConfig | undefined;
private amlJobConfig: any;
private stopping: boolean = false;
private versionCheck: boolean = true;
private isMultiPhase: boolean = false;
private nniVersion?: string;
private experimentId: string;
private nniManagerIpConfig?: NNIManagerIpConfig;
private experimentRootDir: string;
constructor() {
......@@ -51,7 +41,7 @@ export class AMLEnvironmentService extends EnvironmentService {
return false;
}
public getCommandChannel(commandEmitter: EventEmitter): CommandChannel {
public createCommandChannel(commandEmitter: EventEmitter): CommandChannel {
return new AMLCommandChannel(commandEmitter);
}
......@@ -86,26 +76,28 @@ export class AMLEnvironmentService extends EnvironmentService {
if (!amlClient) {
throw new Error('AML client not initialized!');
}
const status = await amlClient.updateStatus(environment.status);
switch (status.toUpperCase()) {
const newStatus = await amlClient.updateStatus(environment.status);
switch (newStatus.toUpperCase()) {
case 'WAITING':
case 'RUNNING':
case 'QUEUED':
// RUNNING status is set by runner, and ignore waiting status
environment.setStatus('WAITING');
break;
case 'RUNNING':
environment.setStatus('RUNNING');
break;
case 'COMPLETED':
case 'SUCCEEDED':
environment.setFinalStatus('SUCCEEDED');
environment.setStatus('SUCCEEDED');
break;
case 'FAILED':
environment.setFinalStatus('FAILED');
environment.setStatus(newStatus.toUpperCase() as EnvironmentStatus);
break;
case 'STOPPED':
case 'STOPPING':
environment.setFinalStatus('USER_CANCELED');
environment.setStatus('USER_CANCELED');
break;
default:
environment.setFinalStatus('UNKNOWN');
environment.setStatus('UNKNOWN');
}
});
}
......@@ -120,7 +112,7 @@ export class AMLEnvironmentService extends EnvironmentService {
const amlEnvironment: AMLEnvironmentInformation = environment as AMLEnvironmentInformation;
const environmentLocalTempFolder = path.join(this.experimentRootDir, this.experimentId, "environment-temp");
environment.command = `import os\nos.system('${amlEnvironment.command}')`;
await fs.promises.writeFile(path.join(environmentLocalTempFolder, 'nni_script.py'), amlEnvironment.command ,{ encoding: 'utf8' });
await fs.promises.writeFile(path.join(environmentLocalTempFolder, 'nni_script.py'), amlEnvironment.command, { encoding: 'utf8' });
const amlClient = new AMLClient(
this.amlClusterConfig.subscriptionId,
this.amlClusterConfig.resourceGroup,
......
......@@ -4,6 +4,7 @@
'use strict';
import * as fs from 'fs';
import * as yaml from 'js-yaml';
import * as request from 'request';
import { Deferred } from 'ts-deferred';
import * as component from '../../../common/component';
......@@ -15,7 +16,6 @@ import { NNIPAIK8STrialConfig } from '../../pai/paiK8S/paiK8SConfig';
import { EnvironmentInformation, EnvironmentService } from '../environment';
import { StorageService } from '../storageService';
const yaml = require('js-yaml');
/**
* Collector PAI jobs info from PAI cluster, and update pai job status locally
......@@ -40,6 +40,10 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
this.experimentId = getExperimentId();
}
public get environmentMaintenceLoopInterval(): number {
return 5000;
}
public get hasStorageService(): boolean {
return true;
}
......@@ -72,6 +76,16 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
if (this.paiTrialConfig.paiConfigPath) {
this.paiJobConfig = yaml.safeLoad(fs.readFileSync(this.paiTrialConfig.paiConfigPath, 'utf8'));
}
if (this.paiClusterConfig.gpuNum === undefined) {
this.paiClusterConfig.gpuNum = this.paiTrialConfig.gpuNum;
}
if (this.paiClusterConfig.cpuNum === undefined) {
this.paiClusterConfig.cpuNum = this.paiTrialConfig.cpuNum;
}
if (this.paiClusterConfig.memoryMB === undefined) {
this.paiClusterConfig.memoryMB = this.paiTrialConfig.memoryMB;
}
break;
}
default:
......@@ -111,37 +125,35 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
});
environments.forEach((environment) => {
if (jobInfos.has(environment.jobId)) {
const jobResponse = jobInfos.get(environment.jobId);
if (jobInfos.has(environment.envId)) {
const jobResponse = jobInfos.get(environment.envId);
if (jobResponse && jobResponse.state) {
const oldEnvironmentStatus = environment.status;
switch (jobResponse.state) {
case 'RUNNING':
case 'WAITING':
// RUNNING status is set by runner, and ignore waiting status
break;
case 'SUCCEEDED':
case 'FAILED':
environment.setFinalStatus(jobResponse.state);
environment.setStatus(jobResponse.state);
break;
case 'STOPPED':
case 'STOPPING':
environment.setFinalStatus('USER_CANCELED');
environment.setStatus('USER_CANCELED');
break;
default:
this.log.error(`OpenPAI: job ${environment.jobId} returns unknown state ${jobResponse.state}.`);
environment.setFinalStatus('UNKNOWN');
this.log.error(`OpenPAI: job ${environment.envId} returns unknown state ${jobResponse.state}.`);
environment.setStatus('UNKNOWN');
}
if (oldEnvironmentStatus !== environment.status) {
this.log.debug(`OpenPAI: job ${environment.jobId} change status ${oldEnvironmentStatus} to ${environment.status} due to job is ${jobResponse.state}.`)
this.log.debug(`OpenPAI: job ${environment.envId} change status ${oldEnvironmentStatus} to ${environment.status} due to job is ${jobResponse.state}.`)
}
} else {
this.log.error(`OpenPAI: job ${environment.jobId} has no state returned. body:${JSON.stringify(jobResponse)}`);
this.log.error(`OpenPAI: job ${environment.envId} has no state returned. body:${JSON.stringify(jobResponse)}`);
// some error happens, and mark this environment
environment.status = 'FAILED';
}
} else {
this.log.error(`OpenPAI job ${environment.jobId} is not found in job list.`);
this.log.error(`OpenPAI job ${environment.envId} is not found in job list.`);
environment.status = 'UNKNOWN';
}
});
......@@ -169,8 +181,10 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
// Step 1. Prepare PAI job configuration
const environmentRoot = `${this.paiTrialConfig.containerNFSMountPath}/${this.experimentId}`;
environment.runnerWorkingFolder = `${environmentRoot}/envs/${environment.id}`;
environment.command = `cd ${environmentRoot} && ${environment.command}`
environment.trackingUrl = `${this.protocol}://${this.paiClusterConfig.host}/job-detail.html?username=${this.paiClusterConfig.userName}&jobName=${environment.jobId}`
environment.command = `cd ${environmentRoot} && ${environment.command}`;
environment.trackingUrl = `${this.protocol}://${this.paiClusterConfig.host}/job-detail.html?username=${this.paiClusterConfig.userName}&jobName=${environment.envId}`;
environment.useActiveGpu = this.paiClusterConfig.useActiveGpu;
environment.maxTrialNumberPerGpu = this.paiClusterConfig.maxTrialNumPerGpu;
// Step 2. Generate Job Configuration in yaml format
const paiJobConfig = this.generateJobConfigInYamlFormat(environment);
......@@ -189,7 +203,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
request(submitJobRequest, (error, response, body) => {
if ((error !== undefined && error !== null) || response.statusCode >= 400) {
const errorMessage: string = (error !== undefined && error !== null) ? error.message :
`start environment ${environment.jobId} failed, http code:${response.statusCode}, http body: ${body}`;
`start environment ${environment.envId} failed, http code:${response.statusCode}, http body: ${body}`;
this.log.error(errorMessage);
environment.status = 'FAILED';
......@@ -211,7 +225,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
}
const stopJobRequest: request.Options = {
uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v2/jobs/${this.paiClusterConfig.userName}~${environment.jobId}/executionType`,
uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v2/jobs/${this.paiClusterConfig.userName}~${environment.envId}/executionType`,
method: 'PUT',
json: true,
body: { value: 'STOP' },
......@@ -222,17 +236,17 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
}
};
this.log.debug(`stopping OpenPAI environment ${environment.jobId}, ${stopJobRequest.uri}`);
this.log.debug(`stopping OpenPAI environment ${environment.envId}, ${stopJobRequest.uri}`);
try {
request(stopJobRequest, (error, response, _body) => {
try {
if ((error !== undefined && error !== null) || (response && response.statusCode >= 400)) {
this.log.error(`OpenPAI: stop job ${environment.jobId} failed with ${response.statusCode}\n${error}`);
this.log.error(`OpenPAI: stop job ${environment.envId} failed with ${response.statusCode}\n${error}`);
deferred.reject((error !== undefined && error !== null) ? error :
`Stop trial failed, http code: ${response.statusCode}`);
} else {
this.log.info(`OpenPAI job ${environment.jobId} stopped.`);
this.log.info(`OpenPAI job ${environment.envId} stopped.`);
}
deferred.resolve();
} catch (error) {
......@@ -265,7 +279,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
if (this.paiTrialConfig === undefined) {
throw new Error('trial config is not initialized');
}
const jobName = environment.jobId;
const jobName = environment.envId;
let nniJobConfig: any = undefined;
if (this.paiTrialConfig.paiConfigPath) {
......@@ -284,7 +298,6 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
environment.nodeCount += instanceCount;
}
// Each taskRole will generate new command in NNI's command format
// Each command will be formatted to NNI style
for (const taskRoleName in nniJobConfig.taskRoles) {
......@@ -298,6 +311,19 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
}
} else {
if (this.paiClusterConfig === undefined) {
throw new Error('PAI Cluster config is not initialized');
}
if (this.paiClusterConfig.gpuNum === undefined) {
throw new Error('PAI Cluster gpuNum is not initialized');
}
if (this.paiClusterConfig.cpuNum === undefined) {
throw new Error('PAI Cluster cpuNum is not initialized');
}
if (this.paiClusterConfig.memoryMB === undefined) {
throw new Error('PAI Cluster memoryMB is not initialized');
}
nniJobConfig = {
protocolVersion: 2,
name: jobName,
......@@ -320,9 +346,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
taskRetryCount: 0,
dockerImage: 'docker_image_0',
resourcePerInstance: {
gpu: this.paiTrialConfig.gpuNum,
cpu: this.paiTrialConfig.cpuNum,
memoryMB: this.paiTrialConfig.memoryMB
gpu: this.paiClusterConfig.gpuNum,
cpu: this.paiClusterConfig.cpuNum,
memoryMB: this.paiClusterConfig.memoryMB
},
commands: [
environment.command
......
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'use strict';
import * as assert from 'assert';
import { getLogger, Logger } from '../../common/log';
import { randomSelect } from '../../common/utils';
import { GPUInfo, ScheduleResultType } from '../common/gpuData';
import { EnvironmentInformation } from './environment';
import { TrialDetail } from './trial';
type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
export class GpuSchedulerSetting {
public useActiveGpu: boolean = false;
public maxTrialNumberPerGpu: number = 1;
}
export type GpuScheduleResult = {
resultType: ScheduleResultType;
environment: EnvironmentInformation | undefined;
gpuIndices: GPUInfo[] | undefined;
};
/**
* A simple GPU scheduler implementation
*/
export class GpuScheduler {
// private readonly machineExecutorMap: Set<TrialDetail>;
private readonly log: Logger = getLogger();
private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin';
private defaultSetting: GpuSchedulerSetting;
private roundRobinIndex: number = 0;
/**
* Constructor
* @param environments map from remote machine to executor
*/
constructor(gpuSchedulerSetting: GpuSchedulerSetting | undefined = undefined) {
if (undefined === gpuSchedulerSetting) {
gpuSchedulerSetting = new GpuSchedulerSetting();
}
this.defaultSetting = gpuSchedulerSetting;
}
public setSettings(gpuSchedulerSetting: GpuSchedulerSetting): void {
this.defaultSetting = gpuSchedulerSetting;
}
/**
* Schedule a machine according to the constraints (requiredGPUNum)
* @param requiredGPUNum required GPU number
*/
public scheduleMachine(environments: EnvironmentInformation[], requiredGPUNum: number | undefined, trialDetail: TrialDetail): GpuScheduleResult {
if (requiredGPUNum === undefined) {
requiredGPUNum = 0;
}
assert(requiredGPUNum >= 0);
// Step 1: Check if required GPU number not exceeds the total GPU number in all machines
const eligibleEnvironments: EnvironmentInformation[] = environments.filter((environment: EnvironmentInformation) =>
environment.defaultGpuSummary === undefined || requiredGPUNum === 0 || (requiredGPUNum !== undefined && environment.defaultGpuSummary.gpuCount >= requiredGPUNum));
if (eligibleEnvironments.length === 0) {
// If the required gpu number exceeds the upper limit of all machine's GPU number
// Return REQUIRE_EXCEED_TOTAL directly
return ({
resultType: ScheduleResultType.REQUIRE_EXCEED_TOTAL,
gpuIndices: undefined,
environment: undefined,
});
}
// Step 2: Allocate Host/GPU for specified trial job
// Currenty the requireGPUNum parameter for all trial jobs are identical.
if (requiredGPUNum > 0) {
// Trial job requires GPU
const result: GpuScheduleResult | undefined = this.scheduleGPUHost(environments, requiredGPUNum, trialDetail);
if (result !== undefined) {
return result;
}
} else {
// Trail job does not need GPU
const allocatedRm: EnvironmentInformation = this.selectMachine(environments, environments);
return this.allocateHost(requiredGPUNum, allocatedRm, [], trialDetail);
}
return {
resultType: ScheduleResultType.TMP_NO_AVAILABLE_GPU,
gpuIndices: undefined,
environment: undefined,
};
}
/**
* remove the job's gpu reversion
*/
public removeGpuReservation(trial: TrialDetail): void {
if (trial.environment !== undefined &&
trial.environment.defaultGpuSummary !== undefined &&
trial.assignedGpus !== undefined &&
trial.assignedGpus.length > 0) {
for (const gpuInfo of trial.assignedGpus) {
const defaultGpuSummary = trial.environment.defaultGpuSummary;
const num: number | undefined = defaultGpuSummary.assignedGpuIndexMap.get(gpuInfo.index);
if (num !== undefined) {
if (num === 1) {
defaultGpuSummary.assignedGpuIndexMap.delete(gpuInfo.index);
} else {
defaultGpuSummary.assignedGpuIndexMap.set(gpuInfo.index, num - 1);
}
}
}
}
}
private scheduleGPUHost(environments: EnvironmentInformation[], requiredGPUNumber: number, trial: TrialDetail): GpuScheduleResult | undefined {
const totalResourceMap: Map<EnvironmentInformation, GPUInfo[]> = this.gpuResourceDetection(environments);
const qualifiedEnvironments: EnvironmentInformation[] = [];
totalResourceMap.forEach((gpuInfos: GPUInfo[], environment: EnvironmentInformation) => {
if (gpuInfos !== undefined && gpuInfos.length >= requiredGPUNumber) {
qualifiedEnvironments.push(environment);
}
});
if (qualifiedEnvironments.length > 0) {
const allocatedEnvironment: EnvironmentInformation = this.selectMachine(qualifiedEnvironments, environments);
const gpuInfos: GPUInfo[] | undefined = totalResourceMap.get(allocatedEnvironment);
if (gpuInfos !== undefined) { // should always true
return this.allocateHost(requiredGPUNumber, allocatedEnvironment, gpuInfos, trial);
} else {
assert(false, 'gpuInfos is undefined');
}
}
}
/**
* Detect available GPU resource for an environment
* @returns Available GPUs on environments
*/
private gpuResourceDetection(environments: EnvironmentInformation[]): Map<EnvironmentInformation, GPUInfo[]> {
const totalResourceMap: Map<EnvironmentInformation, GPUInfo[]> = new Map<EnvironmentInformation, GPUInfo[]>();
environments.forEach((environment: EnvironmentInformation) => {
// Assgin totoal GPU count as init available GPU number
if (environment.defaultGpuSummary !== undefined) {
const defaultGpuSummary = environment.defaultGpuSummary;
const availableGPUs: GPUInfo[] = [];
const designatedGpuIndices: Set<number> = new Set<number>(environment.usableGpus);
if (designatedGpuIndices.size > 0) {
for (const gpuIndex of designatedGpuIndices) {
if (gpuIndex >= environment.defaultGpuSummary.gpuCount) {
throw new Error(`Specified GPU index not found: ${gpuIndex}`);
}
}
}
if (undefined !== defaultGpuSummary.gpuInfos) {
defaultGpuSummary.gpuInfos.forEach((gpuInfo: GPUInfo) => {
// if the GPU has active process, OR be reserved by a job,
// or index not in gpuIndices configuration in machineList,
// or trial number on a GPU reach max number,
// We should NOT allocate this GPU
// if users set useActiveGpu, use the gpu whether there is another activeProcess
if (designatedGpuIndices.size === 0 || designatedGpuIndices.has(gpuInfo.index)) {
if (defaultGpuSummary.assignedGpuIndexMap !== undefined) {
const num: number | undefined = defaultGpuSummary.assignedGpuIndexMap.get(gpuInfo.index);
const maxTrialNumberPerGpu: number = environment.maxTrialNumberPerGpu ? environment.maxTrialNumberPerGpu : this.defaultSetting.maxTrialNumberPerGpu;
const useActiveGpu: boolean = environment.useActiveGpu ? environment.useActiveGpu : this.defaultSetting.useActiveGpu;
if ((num === undefined && (!useActiveGpu && gpuInfo.activeProcessNum === 0 || useActiveGpu)) ||
(num !== undefined && num < maxTrialNumberPerGpu)) {
availableGPUs.push(gpuInfo);
}
} else {
throw new Error(`occupiedGpuIndexMap is undefined!`);
}
}
});
}
totalResourceMap.set(environment, availableGPUs);
}
});
return totalResourceMap;
}
private selectMachine(qualifiedEnvironments: EnvironmentInformation[], allEnvironments: EnvironmentInformation[]): EnvironmentInformation {
assert(qualifiedEnvironments !== undefined && qualifiedEnvironments.length > 0);
if (this.policyName === 'random') {
return randomSelect(qualifiedEnvironments);
} else if (this.policyName === 'round-robin') {
return this.roundRobinSelect(qualifiedEnvironments, allEnvironments);
} else {
throw new Error(`Unsupported schedule policy: ${this.policyName}`);
}
}
private roundRobinSelect(qualifiedEnvironments: EnvironmentInformation[], allEnvironments: EnvironmentInformation[]): EnvironmentInformation {
while (!qualifiedEnvironments.includes(allEnvironments[this.roundRobinIndex % allEnvironments.length])) {
this.roundRobinIndex++;
}
return allEnvironments[this.roundRobinIndex++ % allEnvironments.length];
}
private selectGPUsForTrial(gpuInfos: GPUInfo[], requiredGPUNum: number): GPUInfo[] {
// Sequentially allocate GPUs
return gpuInfos.slice(0, requiredGPUNum);
}
private allocateHost(requiredGPUNum: number, environment: EnvironmentInformation,
gpuInfos: GPUInfo[], trialDetails: TrialDetail): GpuScheduleResult {
assert(gpuInfos.length >= requiredGPUNum);
const allocatedGPUs: GPUInfo[] = this.selectGPUsForTrial(gpuInfos, requiredGPUNum);
const defaultGpuSummary = environment.defaultGpuSummary;
if (undefined === defaultGpuSummary) {
throw new Error(`Environment ${environment.id} defaultGpuSummary shouldn't be undefined!`);
}
allocatedGPUs.forEach((gpuInfo: GPUInfo) => {
let num: number | undefined = defaultGpuSummary.assignedGpuIndexMap.get(gpuInfo.index);
if (num === undefined) {
num = 0;
}
defaultGpuSummary.assignedGpuIndexMap.set(gpuInfo.index, num + 1);
});
trialDetails.assignedGpus = allocatedGPUs;
return {
resultType: ScheduleResultType.SUCCEED,
environment: environment,
gpuIndices: allocatedGPUs,
};
}
}
......@@ -83,7 +83,7 @@ export abstract class StorageService {
localPath = this.expandPath(false, localPath);
remotePath = this.expandPath(true, remotePath);
this.logger.debug(`copy remotePath: ${remotePath} to localPath: ${localPath}`);
return await this.internalCopy(localPath, remotePath, true, true, false);
return await this.internalCopy(remotePath, localPath, true, true, false);
}
public async removeDirectory(remotePath: string, isRecursive: boolean): Promise<void> {
......@@ -151,7 +151,7 @@ export abstract class StorageService {
localPath = this.expandPath(false, localPath);
remotePath = this.expandPath(true, remotePath);
this.logger.debug(`copy file remotePath: ${remotePath} to localPath: ${localPath}`);
await this.internalCopy(localPath, remotePath, false, true, false);
await this.internalCopy(remotePath, localPath, false, true, false);
}
public async removeFile(remotePath: string): Promise<void> {
......
......@@ -17,12 +17,12 @@ export class MountedStorageService extends StorageService {
if (isRecursive) {
const children = await fs.promises.readdir(path);
for (const file of children) {
const stat = await fs.promises.lstat(file);
this.internalRemove(file, stat.isDirectory(), isRecursive);
const filePath = this.internalJoin(path, file);
const stat = await fs.promises.lstat(filePath);
await this.internalRemove(filePath, stat.isDirectory(), isRecursive);
}
} else {
await fs.promises.rmdir(path);
}
await fs.promises.rmdir(path);
} else {
await fs.promises.unlink(path);
}
......@@ -98,7 +98,7 @@ export class MountedStorageService extends StorageService {
{
encoding: "utf8",
start: current,
end: readLength + current,
end: readLength + current - 1,
}).on("data", (data) => {
result += data;
}).on("end", () => {
......
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'use strict';
import * as chai from 'chai';
import * as fs from 'fs';
import * as path from 'path';
import { getLogger, Logger } from "../../../common/log";
import { cleanupUnitTest, prepareUnitTest } from '../../../common/utils';
import { MountedStorageService } from "../storages/mountedStorageService";
import chaiAsPromised = require("chai-as-promised");
async function remove(removedPath: string, isDirectory: boolean, isRecursive: boolean): Promise<void> {
if (isDirectory) {
if (isRecursive) {
const children = await fs.promises.readdir(removedPath);
for (const fileName of children) {
const filePath = path.join(removedPath, fileName);
const stat = await fs.promises.lstat(filePath);
await remove(filePath, stat.isDirectory(), isRecursive);
}
}
await fs.promises.rmdir(removedPath);
} else {
await fs.promises.unlink(removedPath);
}
}
describe('Unit Test for MountedStorageService', () => {
let service: MountedStorageService;
let log: Logger;
let localPath = "reusableut/local";
let mountedPath = "reusableut/mounted";
const testPath = "testpath";
const testFileName = "testfile.txt";
let localCopiedPath: string;
let localFileName: string;
let mountedFileName: string;
before(() => {
chai.should();
chai.use(chaiAsPromised);
prepareUnitTest();
log = getLogger();
const testRoot = path.dirname(__filename);
localPath = path.join(testRoot, localPath);
mountedPath = path.join(testRoot, mountedPath);
service = new MountedStorageService();
service.initialize(localPath, mountedPath);
localCopiedPath = path.join(localPath, testPath);
localFileName = path.join(localCopiedPath, testFileName);
mountedFileName = path.join(testPath, testFileName);
});
after(() => {
cleanupUnitTest();
});
beforeEach(async () => {
if (!fs.existsSync(localPath)) {
await fs.promises.mkdir(localPath, { recursive: true });
}
if (!fs.existsSync(mountedPath)) {
await fs.promises.mkdir(mountedPath, { recursive: true });
}
log.info(`localFileName: ${localFileName}`);
await fs.promises.mkdir(localCopiedPath, { recursive: true });
await fs.promises.writeFile(localFileName, "hello world");
});
afterEach(async () => {
const testRootPath = path.normalize(`${localPath}/../../reusableut`);
await remove(testRootPath, true, true);
});
it('copyAndRename', async () => {
await service.copyDirectory(localCopiedPath, ".");
chai.expect(fs.existsSync(mountedPath));
const newName = `${testFileName}new`;
await service.rename(mountedFileName, newName);
chai.assert.isFalse(fs.existsSync(testPath));
const newTestPath = `${mountedFileName}new`;
chai.assert.isTrue(await service.exists(newTestPath));
await service.copyFileBack(newTestPath, ".");
const localNewFileName = `${localPath}/${newName}`;
chai.assert.isTrue(fs.existsSync(localNewFileName));
fs.unlinkSync(`${localFileName}`);
fs.rmdirSync(`${localPath}/${testPath}`);
await service.copyDirectoryBack(`${mountedPath}/${testPath}`, `.`);
const localNewName = `${localFileName}new`;
chai.assert.isTrue(fs.existsSync(localNewName));
})
it('FileContentTest', async () => {
const savedFileName = "savedfile.txt";
await service.save("01234", savedFileName);
chai.expect(fs.existsSync(savedFileName));
let content = await service.readFileContent(savedFileName, 0, -1);
chai.assert.equal(content, "01234");
await service.save("56789", savedFileName, true);
content = await service.readFileContent(savedFileName, 0, -1);
chai.assert.equal(content, "0123456789");
content = await service.readFileContent(savedFileName, -1, 1);
chai.assert.equal(content, "0");
content = await service.readFileContent(savedFileName, 5, 1);
chai.assert.equal(content, "5");
content = await service.readFileContent(savedFileName, 5, -1);
chai.assert.equal(content, "56789");
});
});
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import * as chai from 'chai';
import * as path from 'path';
import { Scope } from "typescript-ioc";
import * as component from '../../../common/component';
import { getLogger, Logger } from "../../../common/log";
import { TrialJobApplicationForm, TrialJobStatus } from '../../../common/trainingService';
import { cleanupUnitTest, delay, prepareUnitTest, uniqueString } from '../../../common/utils';
import { INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, SEND_TRIAL_JOB_PARAMETER, TRIAL_END, GPU_INFO } from '../../../core/commands';
import { TrialConfigMetadataKey } from '../../../training_service/common/trialConfigMetadataKey';
import { Command } from '../commandChannel';
import { EnvironmentInformation, EnvironmentService } from "../environment";
import { TrialDetail } from '../trial';
import { TrialDispatcher } from "../trialDispatcher";
import { UtCommandChannel } from './utCommandChannel';
import { UtEnvironmentService } from "./utEnvironmentService";
import chaiAsPromised = require("chai-as-promised");
import { promises } from 'fs';
import { Deferred } from 'ts-deferred';
import { NNIErrorNames, NNIError, MethodNotImplementedError } from '../../../common/errors';
function createTrialForm(content: any = undefined): TrialJobApplicationForm {
if (content === undefined) {
content = {
"test": 1
};
}
const trialForm = {
sequenceId: 0,
hyperParameters: {
value: JSON.stringify(content),
index: 0
}
};
return trialForm;
}
async function waitResult<TResult>(callback: () => Promise<TResult | undefined>, waitMs: number = 1000, interval: number = 1, throwError: boolean = false): Promise<TResult | undefined> {
while (waitMs > 0) {
const result = await callback();
if (result !== undefined) {
return result;
}
await delay(interval);
waitMs -= interval;
};
if (throwError) {
throw new Error(`wait result timeout!\n${callback.toString()}`);
}
return undefined;
}
async function waitResultMust<TResult>(callback: () => Promise<TResult | undefined>, waitMs: number = 1000, interval: number = 1): Promise<TResult> {
const result = await waitResult(callback, waitMs, interval, true);
// this error should be thrown in waitResult already.
if (result === undefined) {
throw new Error(`wait result timeout!`);
}
return result;
}
async function newTrial(trialDispatcher: TrialDispatcher): Promise<TrialDetail> {
const trialDetail = await trialDispatcher.submitTrialJob(createTrialForm());
return trialDetail;
}
function newGpuInfo(gpuCount: Number = 2, nodeId: string | undefined = undefined): any {
let gpuInfos = [];
for (let index = 0; index < gpuCount; index++) {
gpuInfos.push({
index: index,
activeProcessNum: 0,
});
}
const gpuInfo = {
gpuInfos: gpuInfos,
gpuCount: gpuInfos.length,
node: nodeId
}
return gpuInfo;
}
async function verifyTrialRunning(commandChannel: UtCommandChannel, trialDetail: TrialDetail): Promise<Command> {
let command = await waitResultMust<Command>(async () => {
return await commandChannel.testReceiveCommandFromTrialDispatcher();
});
chai.assert.equal(command.command, NEW_TRIAL_JOB, "verifyTrialRunning command type");
chai.assert.equal(command.data["trialId"], trialDetail.id, "verifyTrialRunning trialDetail.id should be equal.");
return command;
}
async function verifyTrialResult(commandChannel: UtCommandChannel, trialDetail: TrialDetail, returnCode: number = 0): Promise<void> {
let trialResult = {
trial: trialDetail.id,
code: returnCode,
timestamp: Date.now(),
};
if (trialDetail.environment === undefined) {
throw new Error(`environment shouldn't be undefined.`)
}
await commandChannel.testSendCommandToTrialDispatcher(trialDetail.environment, TRIAL_END, trialResult);
await waitResultMust<boolean>(async () => {
return trialDetail.status !== 'RUNNING' ? true : undefined;
});
if (returnCode === 0) {
chai.assert.equal<TrialJobStatus>(trialDetail.status, 'SUCCEEDED', "trial should be succeeded");
} else {
chai.assert.equal<TrialJobStatus>(trialDetail.status, 'FAILED', "trial should be failed");
}
}
async function waitEnvironment(waitCount: number,
previousEnvironments: Map<string, EnvironmentInformation>,
environmentService: UtEnvironmentService, commandChannel: UtCommandChannel,
gpuCount: number = 2, nodeCount: number = 1,
callback: ((environment: EnvironmentInformation) => Promise<void>) | undefined = undefined): Promise<EnvironmentInformation> {
const waitRequestEnvironment = await waitResultMust<EnvironmentInformation>(async () => {
const environments = environmentService.testGetEnvironments();
if (environments.size === waitCount) {
for (const [id, environment] of environments) {
if (!previousEnvironments.has(id)) {
previousEnvironments.set(id, environment);
return environment;
}
}
}
return undefined;
});
if (waitRequestEnvironment === undefined) {
throw new Error(`waitRequestEnvironment is not defined.`);
}
const nodeIds = [];
waitRequestEnvironment.nodeCount = nodeCount;
if (nodeCount > 1) {
for (let index = 0; index < nodeCount; index++) {
nodeIds.push(uniqueString(5));
}
} else {
nodeIds.push(undefined);
}
for (const nodeId of nodeIds) {
// set runner is ready.
await commandChannel.testSendCommandToTrialDispatcher(waitRequestEnvironment, INITIALIZED, { node: nodeId });
if (gpuCount > 0) {
await commandChannel.testSendCommandToTrialDispatcher(waitRequestEnvironment, GPU_INFO, newGpuInfo(gpuCount, nodeId));
}
}
if (callback) {
await callback(waitRequestEnvironment);
}
// set env to running
environmentService.testSetEnvironmentStatus(waitRequestEnvironment, 'RUNNING');
await waitResultMust<boolean>(async () => {
return waitRequestEnvironment.isRunnerReady ? true : undefined;
});
return waitRequestEnvironment;
}
describe('Unit Test for TrialDispatcher', () => {
let trialRunPromise: Promise<void>;
let trialDispatcher: TrialDispatcher;
let commandChannel: UtCommandChannel;
let environmentService: UtEnvironmentService;
let log: Logger;
let previousEnvironments: Map<string, EnvironmentInformation> = new Map<string, EnvironmentInformation>();
const currentDir = path.dirname(__filename);
before(() => {
chai.should();
chai.use(chaiAsPromised);
prepareUnitTest();
log = getLogger();
});
after(() => {
cleanupUnitTest();
});
beforeEach(async () => {
const trialConfig = {
codeDir: currentDir,
command: "echo",
}
const nniManagerIpConfig = {
nniManagerIp: "127.0.0.1",
}
trialDispatcher = new TrialDispatcher();
component.Container.bind(EnvironmentService)
.to(UtEnvironmentService)
.scope(Scope.Singleton);
await trialDispatcher.setClusterMetadata(TrialConfigMetadataKey.TRIAL_CONFIG, JSON.stringify(trialConfig));
await trialDispatcher.setClusterMetadata(TrialConfigMetadataKey.NNI_MANAGER_IP, JSON.stringify(nniManagerIpConfig));
trialRunPromise = trialDispatcher.run();
environmentService = component.get(EnvironmentService) as UtEnvironmentService;
commandChannel = environmentService.testGetCommandChannel();
});
afterEach(async () => {
previousEnvironments.clear();
await trialDispatcher.cleanUp();
environmentService.testReset();
await trialRunPromise;
});
it('reuse env', async () => {
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
trialDetail = await newTrial(trialDispatcher);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, -1);
chai.assert.equal(environmentService.testGetEnvironments().size, 1, "as env reused, so only 1 env should be here.");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('not reusable env', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
}));
let trialDetail = await newTrial(trialDispatcher);
let environment = await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
await waitResultMust<true>(async () => {
return environment.status === 'USER_CANCELED' ? true : undefined;
});
trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(2, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, -1);
await waitResultMust<true>(async () => {
return environment.status === 'USER_CANCELED' ? true : undefined;
});
chai.assert.equal(environmentService.testGetEnvironments().size, 2, "as env not reused, so only 2 envs should be here.");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('no more env', async () => {
const trialDetail1 = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
// set to no more environment
environmentService.testSetNoMoreEnvironment(false);
const trialDetail2 = await newTrial(trialDispatcher);
await verifyTrialRunning(commandChannel, trialDetail1);
await verifyTrialResult(commandChannel, trialDetail1, 0);
await verifyTrialRunning(commandChannel, trialDetail2);
await verifyTrialResult(commandChannel, trialDetail2, -1);
chai.assert.equal(environmentService.testGetEnvironments().size, 1, "as env not reused, so only 1 envs should be here.");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('2trial2env', async () => {
let trialDetail1 = await newTrial(trialDispatcher);
let trialDetail2 = await newTrial(trialDispatcher);
await waitEnvironment(2, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail1);
await verifyTrialResult(commandChannel, trialDetail1, 0);
await verifyTrialRunning(commandChannel, trialDetail2);
await verifyTrialResult(commandChannel, trialDetail2, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 2, "2 envs should be here.");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('3trial2env', async () => {
let trialDetail1 = await newTrial(trialDispatcher);
let trialDetail2 = await newTrial(trialDispatcher);
await waitEnvironment(2, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail1);
await verifyTrialResult(commandChannel, trialDetail1, 0);
await verifyTrialRunning(commandChannel, trialDetail2);
await verifyTrialResult(commandChannel, trialDetail2, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 2, "2 envs should be here.");
let trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
let trialDetail3 = await newTrial(trialDispatcher);
await verifyTrialRunning(commandChannel, trialDetail3);
await verifyTrialResult(commandChannel, trialDetail3, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 2, "2 envs should be here.");
trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 3, "there should be 2 trials");
});
it('stop trial', async () => {
let trialDetail1 = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail1);
await trialDispatcher.cancelTrialJob(trialDetail1.id, false);
let command = await waitResultMust<Command>(async () => {
return await commandChannel.testReceiveCommandFromTrialDispatcher();
});
chai.assert.equal(command.command, KILL_TRIAL_JOB);
log.info(`command: ${JSON.stringify(command)}`);
chai.assert.equal(command.data, trialDetail1.id);
await waitResultMust<boolean>(async () => {
return trialDetail1.status !== 'RUNNING' ? true : undefined;
});
let trialDetail2 = await newTrial(trialDispatcher);
await verifyTrialRunning(commandChannel, trialDetail2);
await trialDispatcher.cancelTrialJob(trialDetail2.id, true);
command = await waitResultMust<Command>(async () => {
return await commandChannel.testReceiveCommandFromTrialDispatcher();
});
chai.assert.equal(command.command, KILL_TRIAL_JOB);
log.info(`command: ${JSON.stringify(command)}`);
chai.assert.equal(command.data, trialDetail2.id);
await waitResultMust<boolean>(async () => {
return trialDetail2.status !== 'RUNNING' ? true : undefined;
});
chai.assert.equal(environmentService.testGetEnvironments().size, 1, "only one trial, so one env");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 1 stopped trial only");
let trial = await trialDispatcher.getTrialJob(trialDetail1.id);
chai.assert.equal<TrialJobStatus>(trial.status, 'USER_CANCELED', `trial is canceled.`);
trial = await trialDispatcher.getTrialJob(trialDetail2.id);
chai.assert.equal<TrialJobStatus>(trial.status, 'EARLY_STOPPED', `trial is earlier stopped.`);
});
it('multi phase', async () => {
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
let content = {
test: 2,
}
await trialDispatcher.updateTrialJob(trialDetail.id, createTrialForm(content));
let command = await waitResultMust<Command>(async () => {
return await commandChannel.testReceiveCommandFromTrialDispatcher();
});
chai.assert.equal(command.command, SEND_TRIAL_JOB_PARAMETER);
chai.assert.equal(command.data["trialId"], trialDetail.id);
chai.assert.equal(command.data.parameters.index, 0);
chai.assert.equal(command.data.parameters.value, JSON.stringify(content));
content = {
test: 3,
}
await trialDispatcher.updateTrialJob(trialDetail.id, createTrialForm(content));
command = await waitResultMust<Command>(async () => {
return await commandChannel.testReceiveCommandFromTrialDispatcher();
});
chai.assert.equal(command.command, SEND_TRIAL_JOB_PARAMETER);
chai.assert.equal(command.data["trialId"], trialDetail.id);
chai.assert.equal(command.data.parameters.index, 0);
chai.assert.equal(command.data.parameters.value, JSON.stringify(content));
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 1, "only one trial, so one env");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 1, "there should be 1 stopped trial only");
});
it('multi node', async () => {
let trialDetail = await newTrial(trialDispatcher);
const environment = await waitEnvironment(1, previousEnvironments, environmentService, commandChannel, 2, 2);
log.debug(`environment ${JSON.stringify(environment)}`);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(environment.nodes.size, 2);
let command = await waitResultMust<Command>(async () => {
return await commandChannel.testReceiveCommandFromTrialDispatcher();
});
chai.assert.equal(command.command, KILL_TRIAL_JOB);
chai.assert.equal(environmentService.testGetEnvironments().size, 1, "only one trial, so one env");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 1, "there should be 1 stopped trial only");
});
it('env timeout', async () => {
let trialDetail = await newTrial(trialDispatcher);
let environment = await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
environmentService.testSetEnvironmentStatus(environment, 'SUCCEEDED');
await waitResultMust<boolean>(async () => {
return environment.status === 'SUCCEEDED' ? true : undefined;
});
trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(2, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(previousEnvironments.size, 2, "as an env timeout, so 2 envs should be here.");
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('env failed with trial', async () => {
let trialDetail = await newTrial(trialDispatcher);
let environment = await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await verifyTrialRunning(commandChannel, trialDetail);
environmentService.testSetEnvironmentStatus(environment, 'FAILED');
await waitResultMust<boolean>(async () => {
return environment.status === 'FAILED' ? true : undefined;
});
await waitResultMust<boolean>(async () => {
return trialDetail.status === 'FAILED' ? true : undefined;
});
chai.assert.equal<TrialJobStatus>(trialDetail.status, 'FAILED', "env failed, so trial also failed.");
});
it('GPUScheduler disabled gpuNum === undefined', async () => {
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
const command = await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(command.data["gpuIndices"], undefined);
});
it('GPUScheduler disabled gpuNum === 0', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 0,
}));
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
const command = await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(command.data["gpuIndices"], "");
});
it('GPUScheduler enable no cluster gpu config', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 1,
}));
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
const command = await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(command.data["gpuIndices"], "0");
});
it('GPUScheduler skipped no GPU info', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
}));
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
const command = await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(command.data["gpuIndices"], undefined);
});
it('GPUScheduler disabled multi-node', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 0,
}));
let trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
const command = await verifyTrialRunning(commandChannel, trialDetail);
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(command.data["gpuIndices"], "");
});
it('GPUScheduler enabled 2 gpus 2 trial', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 1,
}));
const trialDetail1 = await newTrial(trialDispatcher);
const trialDetail2 = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
let command = await verifyTrialRunning(commandChannel, trialDetail1);
chai.assert.equal(command.data["gpuIndices"], "0");
command = await verifyTrialRunning(commandChannel, trialDetail2);
chai.assert.equal(command.data["gpuIndices"], "1");
await verifyTrialResult(commandChannel, trialDetail1, 0);
await verifyTrialResult(commandChannel, trialDetail2, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 1);
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('GPUScheduler enabled 4 gpus 2 trial(need 2 gpus)', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 2,
}));
const trialDetail1 = await newTrial(trialDispatcher);
const trialDetail2 = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel, 4);
let command = await verifyTrialRunning(commandChannel, trialDetail1);
chai.assert.equal(command.data["gpuIndices"], "0,1");
command = await verifyTrialRunning(commandChannel, trialDetail2);
chai.assert.equal(command.data["gpuIndices"], "2,3");
await verifyTrialResult(commandChannel, trialDetail1, 0);
await verifyTrialResult(commandChannel, trialDetail2, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 1);
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, "there should be 2 trials");
});
it('GPUScheduler enabled use 4 gpus but only 1 usable(4)', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 1,
}));
const trialDetail = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel, 4, 1, async (environment) => {
environment.usableGpus = [3];
});
let command = await verifyTrialRunning(commandChannel, trialDetail);
chai.assert.equal(command.data["gpuIndices"], "3");
await verifyTrialResult(commandChannel, trialDetail, 0);
chai.assert.equal(environmentService.testGetEnvironments().size, 1);
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 1);
});
it('GPUScheduler enabled TMP_NO_AVAILABLE_GPU, request new env', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 1,
}));
const trialDetail1 = await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel, 1);
let command = await verifyTrialRunning(commandChannel, trialDetail1);
chai.assert.equal(command.data["gpuIndices"], "0");
const trialDetail2 = await newTrial(trialDispatcher);
await waitEnvironment(2, previousEnvironments, environmentService, commandChannel, 1);
await verifyTrialResult(commandChannel, trialDetail1, 0);
command = await verifyTrialRunning(commandChannel, trialDetail2);
await verifyTrialResult(commandChannel, trialDetail2, 0);
chai.assert.equal(command.data["gpuIndices"], "0");
chai.assert.equal(environmentService.testGetEnvironments().size, 2, 'environments');
const trials = await trialDispatcher.listTrialJobs();
chai.assert.equal(trials.length, 2, 'trials');
});
it('GPUScheduler enabled REQUIRE_EXCEED_TOTAL, need fail', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 8,
}));
await newTrial(trialDispatcher);
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel);
await chai.expect(trialRunPromise).rejectedWith(NNIError, "REQUIRE_EXCEED_TOTAL");
const deferred = new Deferred<void>();
trialRunPromise = deferred.promise;
deferred.resolve();
});
it('GPUScheduler enabled maxTrialNumberPerGpu=2, 4 trials, 2 gpus', async () => {
trialDispatcher.setClusterMetadata(
TrialConfigMetadataKey.TRIAL_CONFIG,
JSON.stringify({
reuseEnvironment: false,
codeDir: currentDir,
gpuNum: 1,
}));
const trials = [];
// last two trials shouldn't be in first environment.
for (let index = 0; index < 6; index++) {
const trial = await newTrial(trialDispatcher);
trials.push(trial);
}
await waitEnvironment(1, previousEnvironments, environmentService, commandChannel, 2, 1, async (environment) => {
environment.maxTrialNumberPerGpu = 2;
});
await waitEnvironment(2, previousEnvironments, environmentService, commandChannel, 2, 1, async (environment) => {
environment.maxTrialNumberPerGpu = 2;
});
const gpuIndexMap = new Map<string, number>();
for (let index = 0; index < 6; index++) {
const trial = trials[index];
let command = await verifyTrialRunning(commandChannel, trial);
const gpuIndex = command.data["gpuIndices"];
const trialNumbers = gpuIndexMap.get(gpuIndex);
if (index < 4) {
if (undefined === trialNumbers) {
gpuIndexMap.set(gpuIndex, 1);
} else {
gpuIndexMap.set(gpuIndex, trialNumbers + 1);
}
}
}
chai.assert.equal(gpuIndexMap.size, 2);
chai.assert.equal(gpuIndexMap.get("0"), 2);
chai.assert.equal(gpuIndexMap.get("1"), 2);
for (let index = 0; index < 6; index++) {
const trial = trials[index];
await verifyTrialResult(commandChannel, trial, 0);
}
chai.assert.equal(environmentService.testGetEnvironments().size, 2);
const listedTrials = await trialDispatcher.listTrialJobs();
chai.assert.equal(listedTrials.length, 6);
});
});
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import { encodeCommand } from "../../../core/ipcInterface";
import { Command, CommandChannel, RunnerConnection } from "../commandChannel";
import { Channel, EnvironmentInformation } from "../environment";
class UtRunnerConnection extends RunnerConnection {
}
export class UtCommandChannel extends CommandChannel {
private readonly receivedCommands: Command[] = [];
public get channelName(): Channel {
return "ut";
}
public async testSendCommandToTrialDispatcher(environment: EnvironmentInformation, commandType: string, commandData: any) {
const content = encodeCommand(commandType, JSON.stringify(commandData));
this.log.debug(`UtCommandChannel: env ${environment.id} send test command ${content}`);
this.handleCommand(environment, content.toString("utf8"));
}
public async testReceiveCommandFromTrialDispatcher(): Promise<Command | undefined> {
return this.receivedCommands.shift();
}
public async config(_key: string, value: any): Promise<void> {
// do nothing
}
public async start(): Promise<void> {
// do nothing
}
public async stop(): Promise<void> {
// do nothing
}
public async run(): Promise<void> {
// do nothing
}
protected async sendCommandInternal(environment: EnvironmentInformation, message: string): Promise<void> {
const parsedCommands = this.parseCommands(message);
for (const parsedCommand of parsedCommands) {
const command = new Command(environment, parsedCommand[0], parsedCommand[1]);
this.receivedCommands.push(command);
}
}
protected createRunnerConnection(environment: EnvironmentInformation): RunnerConnection {
// do nothing
return new UtRunnerConnection(environment);
}
}
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import { EnvironmentInformation, EnvironmentService, EnvironmentStatus } from "../environment";
import { EventEmitter } from "events";
import { CommandChannel } from "../commandChannel";
import { UtCommandChannel } from "./utCommandChannel";
export class UtEnvironmentService extends EnvironmentService {
private commandChannel: UtCommandChannel | undefined;
private allEnvironments = new Map<string, EnvironmentInformation>();
private hasMoreEnvironmentsInternal = true;
constructor() {
super();
}
public get hasStorageService(): boolean {
// storage service is tested by integration testing.
return false;
}
public get environmentMaintenceLoopInterval(): number {
return 1;
}
public testSetEnvironmentStatus(environment: EnvironmentInformation, newStatus: EnvironmentStatus): void {
environment.status = newStatus;
}
public testReset(): void {
this.allEnvironments.clear();
}
public testGetEnvironments(): Map<string, EnvironmentInformation> {
return this.allEnvironments;
}
public testGetCommandChannel(): UtCommandChannel {
if (this.commandChannel === undefined) {
throw new Error(`command channel shouldn't be undefined.`);
}
return this.commandChannel;
}
public testSetNoMoreEnvironment(hasMore: boolean): void {
this.hasMoreEnvironmentsInternal = hasMore;
}
public get hasMoreEnvironments(): boolean {
return this.hasMoreEnvironmentsInternal;
}
public createCommandChannel(commandEmitter: EventEmitter): CommandChannel {
this.commandChannel = new UtCommandChannel(commandEmitter)
return this.commandChannel;
}
public async config(_key: string, _value: string): Promise<void> {
// do nothing
}
public async refreshEnvironmentsStatus(environments: EnvironmentInformation[]): Promise<void> {
// do nothing
}
public async startEnvironment(environment: EnvironmentInformation): Promise<void> {
if (!this.allEnvironments.has(environment.id)) {
this.allEnvironments.set(environment.id, environment);
environment.status = "WAITING";
}
}
public async stopEnvironment(environment: EnvironmentInformation): Promise<void> {
environment.status = "USER_CANCELED";
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment