"deployment/vscode:/vscode.git/clone" did not exist on "bcefce6a814c2c1228e25fcd98c88fea9e8111a9"
Unverified Commit 1328f412 authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Fix eslint errors (#1836)

* update eslint rules
* auto fix eslint
* manually fix eslint (#1833)
parent 8c07cf41
......@@ -118,7 +118,7 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate {
public readonly replicas: number;
constructor(replicas: number, command : string, gpuNum : number,
constructor(replicas: number, command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryAuthPath);
this.replicas = replicas;
......
......@@ -17,7 +17,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) {
return Promise.resolve();
}
......@@ -40,7 +40,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
if (kubernetesJobInfo.status && kubernetesJobInfo.status.conditions) {
const latestCondition: any = kubernetesJobInfo.status.conditions[kubernetesJobInfo.status.conditions.length - 1];
const tfJobType : KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type;
const tfJobType: KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type;
switch (tfJobType) {
case 'Created':
kubernetesTrialJob.status = 'WAITING';
......
......@@ -17,7 +17,6 @@ import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../../common/containerJobData';
import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import { validateCodeDir } from '../../common/util';
import { AzureStorageClientUtility } from '../azureStorageClientUtils';
import { NFSConfig } from '../kubernetesConfig';
import { KubernetesTrialJobDetail } from '../kubernetesData';
import { KubernetesTrainingService } from '../kubernetesTrainingService';
......@@ -116,7 +115,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value);
break;
case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG:
case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG: {
const kubeflowClusterJsonObject: object = JSON.parse(value);
this.kubeflowClusterConfig = KubeflowClusterConfigFactory.generateKubeflowClusterConfig(kubeflowClusterJsonObject);
if (this.kubeflowClusterConfig.storageType === 'azureStorage') {
......@@ -125,9 +124,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.azureStorageShare = azureKubeflowClusterConfig.azureStorage.azureShare;
await this.createAzureStorage(
azureKubeflowClusterConfig.keyVault.vaultName,
azureKubeflowClusterConfig.keyVault.name,
azureKubeflowClusterConfig.azureStorage.accountName,
azureKubeflowClusterConfig.azureStorage.azureShare
azureKubeflowClusterConfig.keyVault.name
);
} else if (this.kubeflowClusterConfig.storageType === 'nfs') {
const nfsKubeflowClusterConfig: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>this.kubeflowClusterConfig;
......@@ -139,8 +136,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.kubernetesCRDClient = KubeflowOperatorClientFactory.createClient(
this.kubeflowClusterConfig.operator, this.kubeflowClusterConfig.apiVersion);
break;
case TrialConfigMetadataKey.TRIAL_CONFIG:
}
case TrialConfigMetadataKey.TRIAL_CONFIG: {
if (this.kubeflowClusterConfig === undefined) {
this.log.error('kubeflow cluster config is not initialized');
......@@ -163,6 +160,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
return Promise.reject(new Error(error));
}
break;
}
case TrialConfigMetadataKey.VERSION_CHECK:
this.versionCheck = (value === 'true' || value === 'True');
break;
......@@ -235,7 +233,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
//create tmp trial working folder locally.
await cpp.exec(`mkdir -p ${trialLocalTempFolder}`);
const runScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
const runScriptContent: string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' });
......@@ -293,14 +291,14 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
throw Error(`operator ${this.kubeflowClusterConfig.operator} is invalid`);
}
const workerPodResources : any = {};
const workerPodResources: any = {};
if (kubeflowTrialConfig.worker !== undefined) {
workerPodResources.requests = this.generatePodResource(kubeflowTrialConfig.worker.memoryMB, kubeflowTrialConfig.worker.cpuNum,
kubeflowTrialConfig.worker.gpuNum);
}
workerPodResources.limits = {...workerPodResources.requests};
const nonWorkerResources : any = {};
const nonWorkerResources: any = {};
if (this.kubeflowClusterConfig.operator === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
if (tensorflowTrialConfig.ps !== undefined) {
......@@ -330,8 +328,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param workerPodResources worker pod template
* @param nonWorkerPodResources non-worker pod template, like ps or master
*/
private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any,
nonWorkerPodResources?: any) : Promise<any> {
private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName: string, workerPodResources: any,
nonWorkerPodResources?: any): Promise<any> {
if (this.kubeflowClusterConfig === undefined) {
throw new Error('Kubeflow Cluster config is not initialized');
}
......@@ -348,11 +346,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
const replicaSpecsObjMap: Map<string, object> = new Map<string, object>();
if (this.kubeflowTrialConfig.operatorType === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
let privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath);
const privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas,
tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
if (tensorflowTrialConfig.ps !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath);
const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath);
replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas,
tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources, privateRegistrySecretName);
}
......@@ -360,11 +358,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
} else if (this.kubeflowTrialConfig.operatorType === 'pytorch-operator') {
const pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
if (pytorchTrialConfig.worker !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath);
const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas,
pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
}
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath);
const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath);
replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas,
pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources, privateRegistrySecretName);
......@@ -448,7 +446,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
resources: podResources
}
]);
let spec: any = {
const spec: any = {
containers: containersSpecMap.get('containers'),
restartPolicy: 'ExitCode',
volumes: volumeSpecMap.get('nniVolumes')
......
......@@ -3,6 +3,7 @@
'use strict';
// eslint-disable-next-line @typescript-eslint/camelcase
import { Client1_10, config } from 'kubernetes-client';
import { getLogger, Logger } from '../../common/log';
......@@ -21,7 +22,7 @@ class GeneralK8sClient {
public async createSecret(secretManifest: any): Promise<boolean> {
let result: Promise<boolean>;
const response : any = await this.client.api.v1.namespaces('default').secrets
const response: any = await this.client.api.v1.namespaces('default').secrets
.post({body: secretManifest});
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true);
......@@ -73,7 +74,7 @@ abstract class KubernetesCRDClient {
public async createKubernetesJob(jobManifest: any): Promise<boolean> {
let result: Promise<boolean>;
const response : any = await this.operator.post({body: jobManifest});
const response: any = await this.operator.post({body: jobManifest});
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true);
} else {
......@@ -86,7 +87,7 @@ abstract class KubernetesCRDClient {
//TODO : replace any
public async getKubernetesJob(kubeflowJobName: string): Promise<any> {
let result: Promise<any>;
const response : any = await this.operator(kubeflowJobName)
const response: any = await this.operator(kubeflowJobName)
.get();
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(response.body);
......@@ -104,7 +105,7 @@ abstract class KubernetesCRDClient {
.map((labelKey: string) => `${labelKey}=${labels.get(labelKey)}`)
.join(',');
try {
const deleteResult : any = await this.operator()
const deleteResult: any = await this.operator()
.delete({
qs: {
labelSelector: matchQuery,
......
......@@ -113,11 +113,11 @@ export class KubernetesClusterConfigFactory {
*/
export class NFSConfig {
// IP Adress of NFS server
public readonly server : string;
public readonly server: string;
// exported NFS path on NFS server
public readonly path : string;
public readonly path: string;
constructor(server : string, path : string) {
constructor(server: string, path: string) {
this.server = server;
this.path = path;
}
......@@ -129,11 +129,11 @@ export class NFSConfig {
*/
export class KeyVaultConfig {
// The vault-name to specify vault
public readonly vaultName : string;
public readonly vaultName: string;
// The name to specify private key
public readonly name : string;
public readonly name: string;
constructor(vaultName : string, name : string) {
constructor(vaultName: string, name: string) {
this.vaultName = vaultName;
this.name = name;
}
......@@ -144,11 +144,11 @@ export class KeyVaultConfig {
*/
export class AzureStorage {
// The azure share to storage files
public readonly azureShare : string;
public readonly azureShare: string;
// The account name of sotrage service
public readonly accountName: string;
constructor(azureShare : string, accountName: string) {
constructor(azureShare: string, accountName: string) {
this.azureShare = azureShare;
this.accountName = accountName;
}
......@@ -171,12 +171,12 @@ export class KubernetesTrialConfigTemplate {
public readonly privateRegistryAuthPath?: string;
// Trail command
public readonly command : string;
public readonly command: string;
// Required GPU number for trial job. The number should be in [0,100]
public readonly gpuNum : number;
public readonly gpuNum: number;
constructor(command : string, gpuNum : number,
constructor(command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
this.command = command;
this.gpuNum = gpuNum;
......
......@@ -14,7 +14,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData';
* Collector Kubeflow jobs info from Kubernetes cluster, and update kubeflow job status locally
*/
export class KubernetesJobInfoCollector {
protected readonly trialJobsMap : Map<string, KubernetesTrialJobDetail>;
protected readonly trialJobsMap: Map<string, KubernetesTrialJobDetail>;
protected readonly log: Logger = getLogger();
protected readonly statusesNeedToCheck: TrialJobStatus[];
......@@ -23,9 +23,9 @@ export class KubernetesJobInfoCollector {
this.statusesNeedToCheck = ['RUNNING', 'WAITING'];
}
public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined) : Promise<void> {
public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined): Promise<void> {
assert(kubernetesCRDClient !== undefined);
const updateKubernetesTrialJobs : Promise<void>[] = [];
const updateKubernetesTrialJobs: Promise<void>[] = [];
for (const [trialJobId, kubernetesTrialJob] of this.trialJobsMap) {
if (kubernetesTrialJob === undefined) {
throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
......@@ -41,7 +41,7 @@ export class KubernetesJobInfoCollector {
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
throw new MethodNotImplementedError();
}
}
......@@ -26,7 +26,7 @@ export class KubernetesJobRestServer extends ClusterJobRestServer {
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void {
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
if (this.kubernetesTrainingService === undefined) {
throw Error('kubernetesTrainingService not initialized!');
}
......
......@@ -22,8 +22,7 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
import { kubernetesScriptFormat, KubernetesTrialJobDetail } from './kubernetesData';
import { KubernetesJobRestServer } from './kubernetesJobRestServer';
var yaml = require('js-yaml');
var fs = require('fs');
const fs = require('fs');
/**
* Training Service implementation for Kubernetes
......@@ -36,7 +35,7 @@ abstract class KubernetesTrainingService {
// experiment root dir in NFS
protected readonly trialLocalNFSTempFolder: string;
protected stopping: boolean = false;
protected experimentId! : string;
protected experimentId!: string;
protected kubernetesRestServerPort?: number;
protected readonly CONTAINER_MOUNT_PATH: string;
protected azureStorageClient?: azureStorage.FileService;
......@@ -113,12 +112,12 @@ abstract class KubernetesTrainingService {
return Promise.resolve('');
}
public get MetricsEmitter() : EventEmitter {
public get MetricsEmitter(): EventEmitter {
return this.metricsEmitter;
}
public async cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const trialJobDetail : KubernetesTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
const trialJobDetail: KubernetesTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
if (trialJobDetail === undefined) {
const errorMessage: string = `CancelTrialJob: trial job id ${trialJobId} not found`;
this.log.error(errorMessage);
......@@ -208,7 +207,7 @@ abstract class KubernetesTrainingService {
}
// tslint:disable: no-unsafe-any no-any
protected async createAzureStorage(vaultName: string, valutKeyName: string, accountName: string, azureShare: string): Promise<void> {
protected async createAzureStorage(vaultName: string, valutKeyName: string): Promise<void> {
try {
const result: any = await cpp.exec(`az keyvault secret show --name ${valutKeyName} --vault-name ${vaultName}`);
if (result.stderr) {
......@@ -312,8 +311,8 @@ abstract class KubernetesTrainingService {
if(filePath === undefined || filePath === '') {
return undefined;
}
let body = fs.readFileSync(filePath).toString('base64');
let registrySecretName = String.Format('nni-secret-{0}', uniqueString(8)
const body = fs.readFileSync(filePath).toString('base64');
const registrySecretName = String.Format('nni-secret-{0}', uniqueString(8)
.toLowerCase());
await this.genericK8sClient.createSecret(
{
......@@ -336,7 +335,7 @@ abstract class KubernetesTrainingService {
return registrySecretName;
}
protected async uploadFilesToAzureStorage(trialJobId: string, trialLocalTempFolder: String, codeDir: String, uploadRetryCount: number | undefined): Promise<string> {
protected async uploadFilesToAzureStorage(trialJobId: string, trialLocalTempFolder: string, codeDir: string, uploadRetryCount: number | undefined): Promise<string> {
if (this.azureStorageClient === undefined) {
throw new Error('azureStorageClient is not initialized');
}
......
......@@ -4,11 +4,9 @@
'use strict';
import * as cpp from 'child-process-promise';
import * as cp from 'child_process';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { String } from 'typescript-string-operations';
import { getLogger, Logger } from '../../common/log';
import { delay } from '../../common/utils';
import { GPUInfo, GPUSummary } from '../common/gpuData';
......
......@@ -107,7 +107,7 @@ class LocalTrainingService implements TrainingService {
private initialized: boolean;
private stopping: boolean;
private rootDir!: string;
private readonly experimentId! : string;
private readonly experimentId!: string;
private gpuScheduler!: GPUScheduler;
private readonly occupiedGpuIndexNumMap: Map<number, number>;
private designatedGpuIndices!: Set<number>;
......@@ -299,7 +299,7 @@ class LocalTrainingService implements TrainingService {
public getClusterMetadata(key: string): Promise<string> {
switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG:
case TrialConfigMetadataKey.TRIAL_CONFIG: {
let getResult: Promise<string>;
if (this.localTrialConfig === undefined) {
getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`));
......@@ -308,6 +308,7 @@ class LocalTrainingService implements TrainingService {
}
return getResult;
}
default:
return Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, 'Key not found'));
}
......
......@@ -48,10 +48,10 @@ export namespace HDFSClientUtility {
* @param hdfsClient hdfs client
*/
// tslint:disable: no-unsafe-any non-literal-fs-path no-any
export async function copyFileToHdfs(localFilePath : string, hdfsFilePath : string, hdfsClient : any) : Promise<void> {
export async function copyFileToHdfs(localFilePath: string, hdfsFilePath: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
// tslint:disable-next-line:non-literal-fs-path
fs.exists(localFilePath, (exists : boolean) => {
fs.exists(localFilePath, (exists: boolean) => {
// Detect if local file exist
if (exists) {
const localFileStream: fs.ReadStream = fs.createReadStream(localFilePath);
......@@ -60,7 +60,7 @@ export namespace HDFSClientUtility {
hdfsFileStream.on('finish', () => {
deferred.resolve();
});
hdfsFileStream.on('error', (err : any) => {
hdfsFileStream.on('error', (err: any) => {
getLogger()
.error(`HDFSCientUtility:copyFileToHdfs, copy file failed, err is ${err.message}`);
deferred.reject(err);
......@@ -82,7 +82,7 @@ export namespace HDFSClientUtility {
* @param hdfsDirectory HDFS directory
* @param hdfsClient HDFS client
*/
export async function copyDirectoryToHdfs(localDirectory : string, hdfsDirectory : string, hdfsClient : any) : Promise<void> {
export async function copyDirectoryToHdfs(localDirectory: string, hdfsDirectory: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
// TODO: fs.readdirSync doesn't support ~($HOME)
const fileNameArray: string[] = fs.readdirSync(localDirectory);
......@@ -108,28 +108,51 @@ export namespace HDFSClientUtility {
return deferred.promise;
}
/**
* Check if an HDFS path already exists
*
* @param hdfsPath target path need to check in HDFS
* @param hdfsClient HDFS client
*/
export async function pathExists(hdfsPath: string, hdfsClient: any): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist: boolean) => {
deferred.resolve(exist);
});
let timeoutId: NodeJS.Timer;
const delayTimeout: Promise<boolean> = new Promise<boolean>((resolve: Function, reject: Function): void => {
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId = setTimeout(() => { reject(`Check HDFS path ${hdfsPath} exists timeout`); }, 5000);
});
return Promise.race([deferred.promise, delayTimeout])
.finally(() => { clearTimeout(timeoutId); });
}
/**
* Read content from HDFS file
*
* @param hdfsPath HDFS file path
* @param hdfsClient HDFS client
*/
export async function readFileFromHDFS(hdfsPath : string, hdfsClient : any) : Promise<Buffer> {
export async function readFileFromHDFS(hdfsPath: string, hdfsClient: any): Promise<Buffer> {
const deferred: Deferred<Buffer> = new Deferred<Buffer>();
let buffer : Buffer = Buffer.alloc(0);
let buffer: Buffer = Buffer.alloc(0);
const exist : boolean = await pathExists(hdfsPath, hdfsClient);
const exist: boolean = await pathExists(hdfsPath, hdfsClient);
if (!exist) {
deferred.reject(`${hdfsPath} doesn't exists`);
}
const remoteFileStream: any = hdfsClient.createReadStream(hdfsPath);
remoteFileStream.on('error', (err : any) => {
remoteFileStream.on('error', (err: any) => {
// Reject with the error
deferred.reject(err);
});
remoteFileStream.on('data', (chunk : any) => {
remoteFileStream.on('data', (chunk: any) => {
// Concat the data chunk to buffer
buffer = Buffer.concat([buffer, chunk]);
});
......@@ -142,39 +165,16 @@ export namespace HDFSClientUtility {
return deferred.promise;
}
/**
* Check if an HDFS path already exists
*
* @param hdfsPath target path need to check in HDFS
* @param hdfsClient HDFS client
*/
export async function pathExists(hdfsPath : string, hdfsClient : any) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist : boolean) => {
deferred.resolve(exist);
});
let timeoutId : NodeJS.Timer;
const delayTimeout : Promise<boolean> = new Promise<boolean>((resolve : Function, reject : Function) : void => {
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId = setTimeout(() => { reject(`Check HDFS path ${hdfsPath} exists timeout`); }, 5000);
});
return Promise.race([deferred.promise, delayTimeout])
.finally(() => { clearTimeout(timeoutId); });
}
/**
* Mkdir in HDFS, use default permission 755
*
* @param hdfsPath the path in HDFS. It could be either file or directory
* @param hdfsClient HDFS client
*/
export function mkdir(hdfsPath : string, hdfsClient : any) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
export function mkdir(hdfsPath: string, hdfsClient: any): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.mkdir(hdfsPath, (err : any) => {
hdfsClient.mkdir(hdfsPath, (err: any) => {
if (!err) {
deferred.resolve(true);
} else {
......@@ -191,14 +191,14 @@ export namespace HDFSClientUtility {
* @param hdfsPath the path in HDFS. It could be either file or directory
* @param hdfsClient HDFS client
*/
export async function readdir(hdfsPath : string, hdfsClient : any) : Promise<string[]> {
const deferred : Deferred<string[]> = new Deferred<string[]>();
const exist : boolean = await pathExists(hdfsPath, hdfsClient);
export async function readdir(hdfsPath: string, hdfsClient: any): Promise<string[]> {
const deferred: Deferred<string[]> = new Deferred<string[]>();
const exist: boolean = await pathExists(hdfsPath, hdfsClient);
if (!exist) {
deferred.reject(`${hdfsPath} doesn't exists`);
}
hdfsClient.readdir(hdfsPath, (err : any, files : any[]) => {
hdfsClient.readdir(hdfsPath, (err: any, files: any[]) => {
if (err) {
deferred.reject(err);
}
......@@ -215,9 +215,9 @@ export namespace HDFSClientUtility {
* @param hdfsClient HDFS client
* @param recursive Mark if need to delete recursively
*/
export function deletePath(hdfsPath : string, hdfsClient : any, recursive : boolean = true) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
hdfsClient.unlink(hdfsPath, recursive, (err : any) => {
export function deletePath(hdfsPath: string, hdfsClient: any, recursive: boolean = true): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.unlink(hdfsPath, recursive, (err: any) => {
if (!err) {
deferred.resolve(true);
} else {
......
......@@ -24,7 +24,7 @@ export class PAITaskRole {
//Shared memory for one task in the task role
public readonly shmMB?: number;
//portList to specify the port used in container
public portList?: portListMetaData[];
public portList?: PortListMetaData[];
/**
* Constructor
......@@ -35,8 +35,8 @@ export class PAITaskRole {
* @param gpuNumber GPU number for one task in the task role, no less than 0
* @param command Executable command for tasks in the task role, can not be empty
*/
constructor(name : string, taskNumber : number, cpuNumber : number, memoryMB : number, gpuNumber : number,
command : string, shmMB?: number, portList?: portListMetaData[]) {
constructor(name: string, taskNumber: number, cpuNumber: number, memoryMB: number, gpuNumber: number,
command: string, shmMB?: number, portList?: PortListMetaData[]) {
this.name = name;
this.taskNumber = taskNumber;
this.cpuNumber = cpuNumber;
......@@ -75,8 +75,8 @@ export class PAIJobConfig {
* @param outputDir Output directory on HDFS
* @param taskRoles List of taskRole, one task role at least
*/
constructor(jobName: string, image : string, codeDir : string,
taskRoles : PAITaskRole[], virtualCluster: string, authFile?: string) {
constructor(jobName: string, image: string, codeDir: string,
taskRoles: PAITaskRole[], virtualCluster: string, authFile?: string) {
this.jobName = jobName;
this.image = image;
this.codeDir = codeDir;
......@@ -102,7 +102,7 @@ export class PAIClusterConfig {
* @param host Host IP of PAI Cluster
* @param token PAI token of PAI Cluster
*/
constructor(userName: string, host : string, passWord?: string, token?: string) {
constructor(userName: string, host: string, passWord?: string, token?: string) {
this.userName = userName;
this.passWord = passWord;
this.host = host;
......@@ -113,8 +113,8 @@ export class PAIClusterConfig {
/**
* portList data structure used in PAI taskRole
*/
export class portListMetaData {
public readonly label : string = '';
export class PortListMetaData {
public readonly label: string = '';
public readonly beginAt: number = 0;
public readonly portNumber: number = 0;
}
......@@ -135,10 +135,10 @@ export class NNIPAITrialConfig extends TrialConfig {
//authentication file used for private Docker registry
public authFile?: string;
//portList to specify the port used in container
public portList?: portListMetaData[];
public portList?: PortListMetaData[];
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number,
image: string, virtualCluster?: string, shmMB?: number, authFile?: string, portList?: portListMetaData[]) {
constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
image: string, virtualCluster?: string, shmMB?: number, authFile?: string, portList?: PortListMetaData[]) {
super(command, codeDir, gpuNum);
this.cpuNum = cpuNum;
this.memoryMB = memoryMB;
......
......@@ -22,7 +22,7 @@ export class PAITrialJobDetail implements TrialJobDetail {
public hdfsLogPath: string;
public isEarlyStopped?: boolean;
constructor(id: string, status: TrialJobStatus, paiJobName : string,
constructor(id: string, status: TrialJobStatus, paiJobName: string,
submitTime: number, workingDirectory: string, form: TrialJobApplicationForm, hdfsLogPath: string) {
this.id = id;
this.status = status;
......
......@@ -16,10 +16,10 @@ import { PAITrialJobDetail } from './paiData';
* Collector PAI jobs info from PAI cluster, and update pai job status locally
*/
export class PAIJobInfoCollector {
private readonly trialJobsMap : Map<string, PAITrialJobDetail>;
private readonly trialJobsMap: Map<string, PAITrialJobDetail>;
private readonly log: Logger = getLogger();
private readonly statusesNeedToCheck : TrialJobStatus[];
private readonly finalStatuses : TrialJobStatus[];
private readonly statusesNeedToCheck: TrialJobStatus[];
private readonly finalStatuses: TrialJobStatus[];
constructor(jobMap: Map<string, PAITrialJobDetail>) {
this.trialJobsMap = jobMap;
......@@ -27,12 +27,12 @@ export class PAIJobInfoCollector {
this.finalStatuses = ['SUCCEEDED', 'FAILED', 'USER_CANCELED', 'SYS_CANCELED', 'EARLY_STOPPED'];
}
public async retrieveTrialStatus(paiToken? : string, paiClusterConfig?: PAIClusterConfig) : Promise<void> {
public async retrieveTrialStatus(paiToken? : string, paiClusterConfig?: PAIClusterConfig): Promise<void> {
if (paiClusterConfig === undefined || paiToken === undefined) {
return Promise.resolve();
}
const updatePaiTrialJobs : Promise<void>[] = [];
const updatePaiTrialJobs: Promise<void>[] = [];
for (const [trialJobId, paiTrialJob] of this.trialJobsMap) {
if (paiTrialJob === undefined) {
throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
......@@ -43,9 +43,8 @@ export class PAIJobInfoCollector {
await Promise.all(updatePaiTrialJobs);
}
private getSinglePAITrialJobInfo(paiTrialJob : PAITrialJobDetail, paiToken : string, paiClusterConfig: PAIClusterConfig)
: Promise<void> {
const deferred : Deferred<void> = new Deferred<void>();
private getSinglePAITrialJobInfo(paiTrialJob: PAITrialJobDetail, paiToken: string, paiClusterConfig: PAIClusterConfig): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
if (!this.statusesNeedToCheck.includes(paiTrialJob.status)) {
deferred.resolve();
......
......@@ -24,7 +24,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {
private parameterFileMetaList: ParameterFileMeta[] = [];
@Inject
private readonly paiTrainingService : PAITrainingService;
private readonly paiTrainingService: PAITrainingService;
/**
* constructor to provide NNIRestServer's own rest property, e.g. port
......@@ -35,7 +35,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void {
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
// Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWN
for (const singleMetric of metrics) {
......
......@@ -3,7 +3,6 @@
'use strict';
import * as cpp from 'child-process-promise';
import * as fs from 'fs';
import * as path from 'path';
// tslint:disable-next-line:no-implicit-dependencies
......@@ -13,7 +12,6 @@ import * as component from '../../common/component';
import { EventEmitter } from 'events';
import { Deferred } from 'ts-deferred';
import { String } from 'typescript-string-operations';
import { MethodNotImplementedError } from '../../common/errors';
import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log';
import {
......@@ -52,8 +50,8 @@ class PAITrainingService implements TrainingService {
private paiToken? : string;
private paiTokenUpdateTime?: number;
private readonly paiTokenUpdateInterval: number;
private readonly experimentId! : string;
private readonly paiJobCollector : PAIJobInfoCollector;
private readonly experimentId!: string;
private readonly paiJobCollector: PAIJobInfoCollector;
private paiRestServerPort?: number;
private nniManagerIpConfig?: NNIManagerIpConfig;
private copyExpCodeDirPromise?: Promise<void>;
......@@ -126,7 +124,7 @@ class PAITrainingService implements TrainingService {
if (this.paiClusterConfig === undefined) {
throw new Error(`paiClusterConfig not initialized!`);
}
const deferred : Deferred<PAITrialJobDetail> = new Deferred<PAITrialJobDetail>();
const deferred: Deferred<PAITrialJobDetail> = new Deferred<PAITrialJobDetail>();
this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`);
......@@ -137,7 +135,7 @@ class PAITrainingService implements TrainingService {
const hdfsCodeDir: string = HDFSClientUtility.getHdfsTrialWorkDir(this.paiClusterConfig.userName, trialJobId);
const hdfsOutputDir: string = unixPathJoin(hdfsCodeDir, 'nnioutput');
const hdfsLogPath : string = String.Format(
const hdfsLogPath: string = String.Format(
PAI_LOG_PATH_FORMAT,
this.paiClusterConfig.host,
hdfsOutputDir
......@@ -175,8 +173,8 @@ class PAITrainingService implements TrainingService {
// tslint:disable:no-http-string
public cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const trialJobDetail : PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
const deferred : Deferred<void> = new Deferred<void>();
const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
const deferred: Deferred<void> = new Deferred<void>();
if (trialJobDetail === undefined) {
this.log.error(`cancelTrialJob: trial job id ${trialJobId} not found`);
......@@ -222,7 +220,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable: no-unsafe-any no-any
// tslint:disable-next-line:max-func-body-length
public async setClusterMetadata(key: string, value: string): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>();
const deferred: Deferred<void> = new Deferred<void>();
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
......@@ -303,7 +301,7 @@ class PAITrainingService implements TrainingService {
// tslint:enable: no-unsafe-any
public getClusterMetadata(key: string): Promise<string> {
const deferred : Deferred<string> = new Deferred<string>();
const deferred: Deferred<string> = new Deferred<string>();
deferred.resolve();
......@@ -314,7 +312,7 @@ class PAITrainingService implements TrainingService {
this.log.info('Stopping PAI training service...');
this.stopping = true;
const deferred : Deferred<void> = new Deferred<void>();
const deferred: Deferred<void> = new Deferred<void>();
const restServer: PAIJobRestServer = component.get(PAIJobRestServer);
try {
await restServer.stop();
......@@ -329,13 +327,13 @@ class PAITrainingService implements TrainingService {
return deferred.promise;
}
public get MetricsEmitter() : EventEmitter {
public get MetricsEmitter(): EventEmitter {
return this.metricsEmitter;
}
// tslint:disable-next-line:max-func-body-length
private async submitTrialJobToPAI(trialJobId: string): Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
const deferred: Deferred<boolean> = new Deferred<boolean>();
const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
if (trialJobDetail === undefined) {
......@@ -372,7 +370,7 @@ class PAITrainingService implements TrainingService {
//create tmp trial working folder locally.
await execMkdir(trialLocalTempFolder);
const runScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
const runScriptContent: string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' });
......@@ -388,7 +386,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable-next-line: strict-boolean-expressions
const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address();
const version: string = this.versionCheck ? await getVersion() : '';
const nniPaiTrialCommand : string = String.Format(
const nniPaiTrialCommand: string = String.Format(
PAI_TRIAL_COMMAND_FORMAT,
// PAI will copy job's codeDir into /root directory
`$PWD/${trialJobId}`,
......@@ -411,7 +409,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable-next-line:no-console
this.log.info(`nniPAItrial command is ${nniPaiTrialCommand.trim()}`);
const paiTaskRoles : PAITaskRole[] = [
const paiTaskRoles: PAITaskRole[] = [
new PAITaskRole(
`nni_trail_${trialJobId}`,
// Task role number
......@@ -431,7 +429,7 @@ class PAITrainingService implements TrainingService {
)
];
const paiJobConfig : PAIJobConfig = new PAIJobConfig(
const paiJobConfig: PAIJobConfig = new PAIJobConfig(
// Job name
trialJobDetail.paiJobName,
// Docker image
......@@ -472,7 +470,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable:no-any no-unsafe-any
request(submitJobRequest, (error: Error, response: request.Response, body: any) => {
if ((error !== undefined && error !== null) || response.statusCode >= 400) {
const errorMessage : string = (error !== undefined && error !== null) ? error.message :
const errorMessage: string = (error !== undefined && error !== null) ? error.message :
`Submit trial ${trialJobId} failed, http code:${response.statusCode}, http body: ${response.body.message}`;
trialJobDetail.status = 'FAILED';
deferred.resolve(true);
......@@ -527,7 +525,7 @@ class PAITrainingService implements TrainingService {
* Update pai token by the interval time or initialize the pai token
*/
private async updatePaiToken(): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>();
const deferred: Deferred<void> = new Deferred<void>();
const currentTime: number = new Date().getTime();
//If pai token initialized and not reach the interval time, do not update
......@@ -603,7 +601,7 @@ class PAITrainingService implements TrainingService {
}
private postParameterFileMeta(parameterFileMeta: ParameterFileMeta): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>();
const deferred: Deferred<void> = new Deferred<void>();
const restServer: PAIJobRestServer = component.get(PAIJobRestServer);
const req: request.Options = {
uri: `${restServer.endPoint}${restServer.apiRootUrl}/parameter-file-meta`,
......
......@@ -15,7 +15,7 @@ export class PAITrialConfig extends TrialConfig {
public readonly dataDir: string;
public readonly outputDir: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number,
constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
image: string, dataDir: string, outputDir: string) {
super(command, codeDir, gpuNum);
this.cpuNum = cpuNum;
......
......@@ -5,7 +5,6 @@
import * as assert from 'assert';
import { getLogger, Logger } from '../../common/log';
import { TrialJobDetail } from '../../common/trainingService';
import { randomSelect } from '../../common/utils';
import { GPUInfo } from '../common/gpuData';
import {
......@@ -19,7 +18,7 @@ type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
*/
export class GPUScheduler {
private readonly machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>;
private readonly machineSSHClientMap: Map<RemoteMachineMeta, SSHClientManager>;
private readonly log: Logger = getLogger();
private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin';
private roundRobinIndex: number = 0;
......@@ -29,7 +28,7 @@ export class GPUScheduler {
* Constructor
* @param machineSSHClientMap map from remote machine to sshClient
*/
constructor(machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>) {
constructor(machineSSHClientMap: Map<RemoteMachineMeta, SSHClientManager>) {
assert(machineSSHClientMap.size > 0);
this.machineSSHClientMap = machineSSHClientMap;
this.configuredRMs = Array.from(machineSSHClientMap.keys());
......@@ -39,7 +38,7 @@ export class GPUScheduler {
* Schedule a machine according to the constraints (requiredGPUNum)
* @param requiredGPUNum required GPU number
*/
public scheduleMachine(requiredGPUNum: number | undefined, trialJobDetail : RemoteMachineTrialJobDetail) : RemoteMachineScheduleResult {
public scheduleMachine(requiredGPUNum: number | undefined, trialJobDetail: RemoteMachineTrialJobDetail): RemoteMachineScheduleResult {
if(requiredGPUNum === undefined) {
requiredGPUNum = 0;
}
......@@ -48,7 +47,7 @@ export class GPUScheduler {
assert(allRMs.length > 0);
// Step 1: Check if required GPU number not exceeds the total GPU number in all machines
const eligibleRM: RemoteMachineMeta[] = allRMs.filter((rmMeta : RemoteMachineMeta) =>
const eligibleRM: RemoteMachineMeta[] = allRMs.filter((rmMeta: RemoteMachineMeta) =>
rmMeta.gpuSummary === undefined || requiredGPUNum === 0 || (requiredGPUNum !== undefined && rmMeta.gpuSummary.gpuCount >= requiredGPUNum));
if (eligibleRM.length === 0) {
// If the required gpu number exceeds the upper limit of all machine's GPU number
......@@ -134,8 +133,8 @@ export class GPUScheduler {
* @param availableGPUMap available GPU resource filled by this detection
* @returns Available GPU number on this remote machine
*/
private gpuResourceDetection() : Map<RemoteMachineMeta, GPUInfo[]> {
const totalResourceMap : Map<RemoteMachineMeta, GPUInfo[]> = new Map<RemoteMachineMeta, GPUInfo[]>();
private gpuResourceDetection(): Map<RemoteMachineMeta, GPUInfo[]> {
const totalResourceMap: Map<RemoteMachineMeta, GPUInfo[]> = new Map<RemoteMachineMeta, GPUInfo[]>();
this.machineSSHClientMap.forEach((sshClientManager: SSHClientManager, rmMeta: RemoteMachineMeta) => {
// Assgin totoal GPU count as init available GPU number
if (rmMeta.gpuSummary !== undefined) {
......@@ -224,11 +223,11 @@ export class GPUScheduler {
resultType: ScheduleResultType.SUCCEED,
scheduleInfo: {
rmMeta: rmMeta,
cuda_visible_device: allocatedGPUs
.map((gpuInfo: GPUInfo) => {
return gpuInfo.index;
})
.join(',')
cudaVisibleDevice: allocatedGPUs
.map((gpuInfo: GPUInfo) => {
return gpuInfo.index;
})
.join(',')
}
};
}
......
......@@ -13,13 +13,13 @@ import { GPUInfo, GPUSummary } from '../common/gpuData';
* Metadata of remote machine for configuration and statuc query
*/
export class RemoteMachineMeta {
public readonly ip : string = '';
public readonly port : number = 22;
public readonly username : string = '';
public readonly ip: string = '';
public readonly port: number = 22;
public readonly username: string = '';
public readonly passwd: string = '';
public readonly sshKeyPath?: string;
public readonly passphrase?: string;
public gpuSummary : GPUSummary | undefined;
public gpuSummary: GPUSummary | undefined;
public readonly gpuIndices?: string;
public readonly maxTrialNumPerGpu?: number;
//TODO: initialize varialbe in constructor
......@@ -43,11 +43,11 @@ export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
* The execution result for command executed on remote machine
*/
export class RemoteCommandResult {
public readonly stdout : string;
public readonly stderr : string;
public readonly exitCode : number;
public readonly stdout: string;
public readonly stderr: string;
public readonly exitCode: number;
constructor(stdout : string, stderr : string, exitCode : number) {
constructor(stdout: string, stderr: string, exitCode: number) {
this.stdout = stdout;
this.stderr = stderr;
this.exitCode = exitCode;
......@@ -225,9 +225,9 @@ export class SSHClientManager {
}
}
export type RemoteMachineScheduleResult = { scheduleInfo : RemoteMachineScheduleInfo | undefined; resultType : ScheduleResultType};
export type RemoteMachineScheduleResult = { scheduleInfo: RemoteMachineScheduleInfo | undefined; resultType: ScheduleResultType};
export type RemoteMachineScheduleInfo = { rmMeta : RemoteMachineMeta; cuda_visible_device : string};
export type RemoteMachineScheduleInfo = { rmMeta: RemoteMachineMeta; cudaVisibleDevice: string};
export enum ScheduleResultType {
// Schedule succeeded
......
......@@ -15,7 +15,7 @@ import { RemoteMachineTrainingService } from './remoteMachineTrainingService';
@component.Singleton
export class RemoteMachineJobRestServer extends ClusterJobRestServer {
@Inject
private readonly remoteMachineTrainingService : RemoteMachineTrainingService;
private readonly remoteMachineTrainingService: RemoteMachineTrainingService;
/**
* constructor to provide NNIRestServer's own rest property, e.g. port
......@@ -26,7 +26,7 @@ export class RemoteMachineJobRestServer extends ClusterJobRestServer {
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void {
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
// Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWNls
for (const singleMetric of metrics) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment