Unverified Commit 1328f412 authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Fix eslint errors (#1836)

* update eslint rules
* auto fix eslint
* manually fix eslint (#1833)
parent 8c07cf41
...@@ -118,7 +118,7 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig { ...@@ -118,7 +118,7 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate { export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate {
public readonly replicas: number; public readonly replicas: number;
constructor(replicas: number, command : string, gpuNum : number, constructor(replicas: number, command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) { cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryAuthPath); super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryAuthPath);
this.replicas = replicas; this.replicas = replicas;
......
...@@ -17,7 +17,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector { ...@@ -17,7 +17,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
} }
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined, protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> { kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) { if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) {
return Promise.resolve(); return Promise.resolve();
} }
...@@ -40,7 +40,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector { ...@@ -40,7 +40,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
if (kubernetesJobInfo.status && kubernetesJobInfo.status.conditions) { if (kubernetesJobInfo.status && kubernetesJobInfo.status.conditions) {
const latestCondition: any = kubernetesJobInfo.status.conditions[kubernetesJobInfo.status.conditions.length - 1]; const latestCondition: any = kubernetesJobInfo.status.conditions[kubernetesJobInfo.status.conditions.length - 1];
const tfJobType : KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type; const tfJobType: KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type;
switch (tfJobType) { switch (tfJobType) {
case 'Created': case 'Created':
kubernetesTrialJob.status = 'WAITING'; kubernetesTrialJob.status = 'WAITING';
......
...@@ -17,7 +17,6 @@ import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from ...@@ -17,7 +17,6 @@ import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../../common/containerJobData'; import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../../common/containerJobData';
import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import { validateCodeDir } from '../../common/util'; import { validateCodeDir } from '../../common/util';
import { AzureStorageClientUtility } from '../azureStorageClientUtils';
import { NFSConfig } from '../kubernetesConfig'; import { NFSConfig } from '../kubernetesConfig';
import { KubernetesTrialJobDetail } from '../kubernetesData'; import { KubernetesTrialJobDetail } from '../kubernetesData';
import { KubernetesTrainingService } from '../kubernetesTrainingService'; import { KubernetesTrainingService } from '../kubernetesTrainingService';
...@@ -116,7 +115,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -116,7 +115,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value); this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value);
break; break;
case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG: case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG: {
const kubeflowClusterJsonObject: object = JSON.parse(value); const kubeflowClusterJsonObject: object = JSON.parse(value);
this.kubeflowClusterConfig = KubeflowClusterConfigFactory.generateKubeflowClusterConfig(kubeflowClusterJsonObject); this.kubeflowClusterConfig = KubeflowClusterConfigFactory.generateKubeflowClusterConfig(kubeflowClusterJsonObject);
if (this.kubeflowClusterConfig.storageType === 'azureStorage') { if (this.kubeflowClusterConfig.storageType === 'azureStorage') {
...@@ -125,9 +124,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -125,9 +124,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.azureStorageShare = azureKubeflowClusterConfig.azureStorage.azureShare; this.azureStorageShare = azureKubeflowClusterConfig.azureStorage.azureShare;
await this.createAzureStorage( await this.createAzureStorage(
azureKubeflowClusterConfig.keyVault.vaultName, azureKubeflowClusterConfig.keyVault.vaultName,
azureKubeflowClusterConfig.keyVault.name, azureKubeflowClusterConfig.keyVault.name
azureKubeflowClusterConfig.azureStorage.accountName,
azureKubeflowClusterConfig.azureStorage.azureShare
); );
} else if (this.kubeflowClusterConfig.storageType === 'nfs') { } else if (this.kubeflowClusterConfig.storageType === 'nfs') {
const nfsKubeflowClusterConfig: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>this.kubeflowClusterConfig; const nfsKubeflowClusterConfig: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>this.kubeflowClusterConfig;
...@@ -139,8 +136,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -139,8 +136,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.kubernetesCRDClient = KubeflowOperatorClientFactory.createClient( this.kubernetesCRDClient = KubeflowOperatorClientFactory.createClient(
this.kubeflowClusterConfig.operator, this.kubeflowClusterConfig.apiVersion); this.kubeflowClusterConfig.operator, this.kubeflowClusterConfig.apiVersion);
break; break;
}
case TrialConfigMetadataKey.TRIAL_CONFIG: case TrialConfigMetadataKey.TRIAL_CONFIG: {
if (this.kubeflowClusterConfig === undefined) { if (this.kubeflowClusterConfig === undefined) {
this.log.error('kubeflow cluster config is not initialized'); this.log.error('kubeflow cluster config is not initialized');
...@@ -163,6 +160,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -163,6 +160,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
return Promise.reject(new Error(error)); return Promise.reject(new Error(error));
} }
break; break;
}
case TrialConfigMetadataKey.VERSION_CHECK: case TrialConfigMetadataKey.VERSION_CHECK:
this.versionCheck = (value === 'true' || value === 'True'); this.versionCheck = (value === 'true' || value === 'True');
break; break;
...@@ -235,7 +233,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -235,7 +233,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
//create tmp trial working folder locally. //create tmp trial working folder locally.
await cpp.exec(`mkdir -p ${trialLocalTempFolder}`); await cpp.exec(`mkdir -p ${trialLocalTempFolder}`);
const runScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT; const runScriptContent: string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files // Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' }); await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' });
...@@ -293,14 +291,14 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -293,14 +291,14 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
throw Error(`operator ${this.kubeflowClusterConfig.operator} is invalid`); throw Error(`operator ${this.kubeflowClusterConfig.operator} is invalid`);
} }
const workerPodResources : any = {}; const workerPodResources: any = {};
if (kubeflowTrialConfig.worker !== undefined) { if (kubeflowTrialConfig.worker !== undefined) {
workerPodResources.requests = this.generatePodResource(kubeflowTrialConfig.worker.memoryMB, kubeflowTrialConfig.worker.cpuNum, workerPodResources.requests = this.generatePodResource(kubeflowTrialConfig.worker.memoryMB, kubeflowTrialConfig.worker.cpuNum,
kubeflowTrialConfig.worker.gpuNum); kubeflowTrialConfig.worker.gpuNum);
} }
workerPodResources.limits = {...workerPodResources.requests}; workerPodResources.limits = {...workerPodResources.requests};
const nonWorkerResources : any = {}; const nonWorkerResources: any = {};
if (this.kubeflowClusterConfig.operator === 'tf-operator') { if (this.kubeflowClusterConfig.operator === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig; const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
if (tensorflowTrialConfig.ps !== undefined) { if (tensorflowTrialConfig.ps !== undefined) {
...@@ -330,8 +328,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -330,8 +328,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param workerPodResources worker pod template * @param workerPodResources worker pod template
* @param nonWorkerPodResources non-worker pod template, like ps or master * @param nonWorkerPodResources non-worker pod template, like ps or master
*/ */
private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any, private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName: string, workerPodResources: any,
nonWorkerPodResources?: any) : Promise<any> { nonWorkerPodResources?: any): Promise<any> {
if (this.kubeflowClusterConfig === undefined) { if (this.kubeflowClusterConfig === undefined) {
throw new Error('Kubeflow Cluster config is not initialized'); throw new Error('Kubeflow Cluster config is not initialized');
} }
...@@ -348,11 +346,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -348,11 +346,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
const replicaSpecsObjMap: Map<string, object> = new Map<string, object>(); const replicaSpecsObjMap: Map<string, object> = new Map<string, object>();
if (this.kubeflowTrialConfig.operatorType === 'tf-operator') { if (this.kubeflowTrialConfig.operatorType === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig; const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
let privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath); const privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas, replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas,
tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName); tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
if (tensorflowTrialConfig.ps !== undefined) { if (tensorflowTrialConfig.ps !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath); const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath);
replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas, replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas,
tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources, privateRegistrySecretName); tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources, privateRegistrySecretName);
} }
...@@ -360,11 +358,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -360,11 +358,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
} else if (this.kubeflowTrialConfig.operatorType === 'pytorch-operator') { } else if (this.kubeflowTrialConfig.operatorType === 'pytorch-operator') {
const pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig; const pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
if (pytorchTrialConfig.worker !== undefined) { if (pytorchTrialConfig.worker !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath); const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas, replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas,
pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName); pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
} }
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath); const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath);
replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas, replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas,
pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources, privateRegistrySecretName); pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources, privateRegistrySecretName);
...@@ -448,7 +446,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber ...@@ -448,7 +446,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
resources: podResources resources: podResources
} }
]); ]);
let spec: any = { const spec: any = {
containers: containersSpecMap.get('containers'), containers: containersSpecMap.get('containers'),
restartPolicy: 'ExitCode', restartPolicy: 'ExitCode',
volumes: volumeSpecMap.get('nniVolumes') volumes: volumeSpecMap.get('nniVolumes')
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
'use strict'; 'use strict';
// eslint-disable-next-line @typescript-eslint/camelcase
import { Client1_10, config } from 'kubernetes-client'; import { Client1_10, config } from 'kubernetes-client';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
...@@ -21,7 +22,7 @@ class GeneralK8sClient { ...@@ -21,7 +22,7 @@ class GeneralK8sClient {
public async createSecret(secretManifest: any): Promise<boolean> { public async createSecret(secretManifest: any): Promise<boolean> {
let result: Promise<boolean>; let result: Promise<boolean>;
const response : any = await this.client.api.v1.namespaces('default').secrets const response: any = await this.client.api.v1.namespaces('default').secrets
.post({body: secretManifest}); .post({body: secretManifest});
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) { if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true); result = Promise.resolve(true);
...@@ -73,7 +74,7 @@ abstract class KubernetesCRDClient { ...@@ -73,7 +74,7 @@ abstract class KubernetesCRDClient {
public async createKubernetesJob(jobManifest: any): Promise<boolean> { public async createKubernetesJob(jobManifest: any): Promise<boolean> {
let result: Promise<boolean>; let result: Promise<boolean>;
const response : any = await this.operator.post({body: jobManifest}); const response: any = await this.operator.post({body: jobManifest});
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) { if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true); result = Promise.resolve(true);
} else { } else {
...@@ -86,7 +87,7 @@ abstract class KubernetesCRDClient { ...@@ -86,7 +87,7 @@ abstract class KubernetesCRDClient {
//TODO : replace any //TODO : replace any
public async getKubernetesJob(kubeflowJobName: string): Promise<any> { public async getKubernetesJob(kubeflowJobName: string): Promise<any> {
let result: Promise<any>; let result: Promise<any>;
const response : any = await this.operator(kubeflowJobName) const response: any = await this.operator(kubeflowJobName)
.get(); .get();
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) { if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(response.body); result = Promise.resolve(response.body);
...@@ -104,7 +105,7 @@ abstract class KubernetesCRDClient { ...@@ -104,7 +105,7 @@ abstract class KubernetesCRDClient {
.map((labelKey: string) => `${labelKey}=${labels.get(labelKey)}`) .map((labelKey: string) => `${labelKey}=${labels.get(labelKey)}`)
.join(','); .join(',');
try { try {
const deleteResult : any = await this.operator() const deleteResult: any = await this.operator()
.delete({ .delete({
qs: { qs: {
labelSelector: matchQuery, labelSelector: matchQuery,
......
...@@ -113,11 +113,11 @@ export class KubernetesClusterConfigFactory { ...@@ -113,11 +113,11 @@ export class KubernetesClusterConfigFactory {
*/ */
export class NFSConfig { export class NFSConfig {
// IP Adress of NFS server // IP Adress of NFS server
public readonly server : string; public readonly server: string;
// exported NFS path on NFS server // exported NFS path on NFS server
public readonly path : string; public readonly path: string;
constructor(server : string, path : string) { constructor(server: string, path: string) {
this.server = server; this.server = server;
this.path = path; this.path = path;
} }
...@@ -129,11 +129,11 @@ export class NFSConfig { ...@@ -129,11 +129,11 @@ export class NFSConfig {
*/ */
export class KeyVaultConfig { export class KeyVaultConfig {
// The vault-name to specify vault // The vault-name to specify vault
public readonly vaultName : string; public readonly vaultName: string;
// The name to specify private key // The name to specify private key
public readonly name : string; public readonly name: string;
constructor(vaultName : string, name : string) { constructor(vaultName: string, name: string) {
this.vaultName = vaultName; this.vaultName = vaultName;
this.name = name; this.name = name;
} }
...@@ -144,11 +144,11 @@ export class KeyVaultConfig { ...@@ -144,11 +144,11 @@ export class KeyVaultConfig {
*/ */
export class AzureStorage { export class AzureStorage {
// The azure share to storage files // The azure share to storage files
public readonly azureShare : string; public readonly azureShare: string;
// The account name of sotrage service // The account name of sotrage service
public readonly accountName: string; public readonly accountName: string;
constructor(azureShare : string, accountName: string) { constructor(azureShare: string, accountName: string) {
this.azureShare = azureShare; this.azureShare = azureShare;
this.accountName = accountName; this.accountName = accountName;
} }
...@@ -171,12 +171,12 @@ export class KubernetesTrialConfigTemplate { ...@@ -171,12 +171,12 @@ export class KubernetesTrialConfigTemplate {
public readonly privateRegistryAuthPath?: string; public readonly privateRegistryAuthPath?: string;
// Trail command // Trail command
public readonly command : string; public readonly command: string;
// Required GPU number for trial job. The number should be in [0,100] // Required GPU number for trial job. The number should be in [0,100]
public readonly gpuNum : number; public readonly gpuNum: number;
constructor(command : string, gpuNum : number, constructor(command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) { cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
this.command = command; this.command = command;
this.gpuNum = gpuNum; this.gpuNum = gpuNum;
......
...@@ -14,7 +14,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData'; ...@@ -14,7 +14,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData';
* Collector Kubeflow jobs info from Kubernetes cluster, and update kubeflow job status locally * Collector Kubeflow jobs info from Kubernetes cluster, and update kubeflow job status locally
*/ */
export class KubernetesJobInfoCollector { export class KubernetesJobInfoCollector {
protected readonly trialJobsMap : Map<string, KubernetesTrialJobDetail>; protected readonly trialJobsMap: Map<string, KubernetesTrialJobDetail>;
protected readonly log: Logger = getLogger(); protected readonly log: Logger = getLogger();
protected readonly statusesNeedToCheck: TrialJobStatus[]; protected readonly statusesNeedToCheck: TrialJobStatus[];
...@@ -23,9 +23,9 @@ export class KubernetesJobInfoCollector { ...@@ -23,9 +23,9 @@ export class KubernetesJobInfoCollector {
this.statusesNeedToCheck = ['RUNNING', 'WAITING']; this.statusesNeedToCheck = ['RUNNING', 'WAITING'];
} }
public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined) : Promise<void> { public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined): Promise<void> {
assert(kubernetesCRDClient !== undefined); assert(kubernetesCRDClient !== undefined);
const updateKubernetesTrialJobs : Promise<void>[] = []; const updateKubernetesTrialJobs: Promise<void>[] = [];
for (const [trialJobId, kubernetesTrialJob] of this.trialJobsMap) { for (const [trialJobId, kubernetesTrialJob] of this.trialJobsMap) {
if (kubernetesTrialJob === undefined) { if (kubernetesTrialJob === undefined) {
throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`); throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
...@@ -41,7 +41,7 @@ export class KubernetesJobInfoCollector { ...@@ -41,7 +41,7 @@ export class KubernetesJobInfoCollector {
} }
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined, protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> { kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
throw new MethodNotImplementedError(); throw new MethodNotImplementedError();
} }
} }
...@@ -26,7 +26,7 @@ export class KubernetesJobRestServer extends ClusterJobRestServer { ...@@ -26,7 +26,7 @@ export class KubernetesJobRestServer extends ClusterJobRestServer {
} }
// tslint:disable-next-line:no-any // tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void { protected handleTrialMetrics(jobId: string, metrics: any[]): void {
if (this.kubernetesTrainingService === undefined) { if (this.kubernetesTrainingService === undefined) {
throw Error('kubernetesTrainingService not initialized!'); throw Error('kubernetesTrainingService not initialized!');
} }
......
...@@ -22,8 +22,7 @@ import { KubernetesClusterConfig } from './kubernetesConfig'; ...@@ -22,8 +22,7 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
import { kubernetesScriptFormat, KubernetesTrialJobDetail } from './kubernetesData'; import { kubernetesScriptFormat, KubernetesTrialJobDetail } from './kubernetesData';
import { KubernetesJobRestServer } from './kubernetesJobRestServer'; import { KubernetesJobRestServer } from './kubernetesJobRestServer';
var yaml = require('js-yaml'); const fs = require('fs');
var fs = require('fs');
/** /**
* Training Service implementation for Kubernetes * Training Service implementation for Kubernetes
...@@ -36,7 +35,7 @@ abstract class KubernetesTrainingService { ...@@ -36,7 +35,7 @@ abstract class KubernetesTrainingService {
// experiment root dir in NFS // experiment root dir in NFS
protected readonly trialLocalNFSTempFolder: string; protected readonly trialLocalNFSTempFolder: string;
protected stopping: boolean = false; protected stopping: boolean = false;
protected experimentId! : string; protected experimentId!: string;
protected kubernetesRestServerPort?: number; protected kubernetesRestServerPort?: number;
protected readonly CONTAINER_MOUNT_PATH: string; protected readonly CONTAINER_MOUNT_PATH: string;
protected azureStorageClient?: azureStorage.FileService; protected azureStorageClient?: azureStorage.FileService;
...@@ -113,12 +112,12 @@ abstract class KubernetesTrainingService { ...@@ -113,12 +112,12 @@ abstract class KubernetesTrainingService {
return Promise.resolve(''); return Promise.resolve('');
} }
public get MetricsEmitter() : EventEmitter { public get MetricsEmitter(): EventEmitter {
return this.metricsEmitter; return this.metricsEmitter;
} }
public async cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> { public async cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const trialJobDetail : KubernetesTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId); const trialJobDetail: KubernetesTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
if (trialJobDetail === undefined) { if (trialJobDetail === undefined) {
const errorMessage: string = `CancelTrialJob: trial job id ${trialJobId} not found`; const errorMessage: string = `CancelTrialJob: trial job id ${trialJobId} not found`;
this.log.error(errorMessage); this.log.error(errorMessage);
...@@ -208,7 +207,7 @@ abstract class KubernetesTrainingService { ...@@ -208,7 +207,7 @@ abstract class KubernetesTrainingService {
} }
// tslint:disable: no-unsafe-any no-any // tslint:disable: no-unsafe-any no-any
protected async createAzureStorage(vaultName: string, valutKeyName: string, accountName: string, azureShare: string): Promise<void> { protected async createAzureStorage(vaultName: string, valutKeyName: string): Promise<void> {
try { try {
const result: any = await cpp.exec(`az keyvault secret show --name ${valutKeyName} --vault-name ${vaultName}`); const result: any = await cpp.exec(`az keyvault secret show --name ${valutKeyName} --vault-name ${vaultName}`);
if (result.stderr) { if (result.stderr) {
...@@ -312,8 +311,8 @@ abstract class KubernetesTrainingService { ...@@ -312,8 +311,8 @@ abstract class KubernetesTrainingService {
if(filePath === undefined || filePath === '') { if(filePath === undefined || filePath === '') {
return undefined; return undefined;
} }
let body = fs.readFileSync(filePath).toString('base64'); const body = fs.readFileSync(filePath).toString('base64');
let registrySecretName = String.Format('nni-secret-{0}', uniqueString(8) const registrySecretName = String.Format('nni-secret-{0}', uniqueString(8)
.toLowerCase()); .toLowerCase());
await this.genericK8sClient.createSecret( await this.genericK8sClient.createSecret(
{ {
...@@ -336,7 +335,7 @@ abstract class KubernetesTrainingService { ...@@ -336,7 +335,7 @@ abstract class KubernetesTrainingService {
return registrySecretName; return registrySecretName;
} }
protected async uploadFilesToAzureStorage(trialJobId: string, trialLocalTempFolder: String, codeDir: String, uploadRetryCount: number | undefined): Promise<string> { protected async uploadFilesToAzureStorage(trialJobId: string, trialLocalTempFolder: string, codeDir: string, uploadRetryCount: number | undefined): Promise<string> {
if (this.azureStorageClient === undefined) { if (this.azureStorageClient === undefined) {
throw new Error('azureStorageClient is not initialized'); throw new Error('azureStorageClient is not initialized');
} }
......
...@@ -4,11 +4,9 @@ ...@@ -4,11 +4,9 @@
'use strict'; 'use strict';
import * as cpp from 'child-process-promise'; import * as cpp from 'child-process-promise';
import * as cp from 'child_process';
import * as fs from 'fs'; import * as fs from 'fs';
import * as os from 'os'; import * as os from 'os';
import * as path from 'path'; import * as path from 'path';
import { String } from 'typescript-string-operations';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { delay } from '../../common/utils'; import { delay } from '../../common/utils';
import { GPUInfo, GPUSummary } from '../common/gpuData'; import { GPUInfo, GPUSummary } from '../common/gpuData';
......
...@@ -107,7 +107,7 @@ class LocalTrainingService implements TrainingService { ...@@ -107,7 +107,7 @@ class LocalTrainingService implements TrainingService {
private initialized: boolean; private initialized: boolean;
private stopping: boolean; private stopping: boolean;
private rootDir!: string; private rootDir!: string;
private readonly experimentId! : string; private readonly experimentId!: string;
private gpuScheduler!: GPUScheduler; private gpuScheduler!: GPUScheduler;
private readonly occupiedGpuIndexNumMap: Map<number, number>; private readonly occupiedGpuIndexNumMap: Map<number, number>;
private designatedGpuIndices!: Set<number>; private designatedGpuIndices!: Set<number>;
...@@ -299,7 +299,7 @@ class LocalTrainingService implements TrainingService { ...@@ -299,7 +299,7 @@ class LocalTrainingService implements TrainingService {
public getClusterMetadata(key: string): Promise<string> { public getClusterMetadata(key: string): Promise<string> {
switch (key) { switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG: case TrialConfigMetadataKey.TRIAL_CONFIG: {
let getResult: Promise<string>; let getResult: Promise<string>;
if (this.localTrialConfig === undefined) { if (this.localTrialConfig === undefined) {
getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`)); getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`));
...@@ -308,6 +308,7 @@ class LocalTrainingService implements TrainingService { ...@@ -308,6 +308,7 @@ class LocalTrainingService implements TrainingService {
} }
return getResult; return getResult;
}
default: default:
return Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, 'Key not found')); return Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, 'Key not found'));
} }
......
...@@ -48,10 +48,10 @@ export namespace HDFSClientUtility { ...@@ -48,10 +48,10 @@ export namespace HDFSClientUtility {
* @param hdfsClient hdfs client * @param hdfsClient hdfs client
*/ */
// tslint:disable: no-unsafe-any non-literal-fs-path no-any // tslint:disable: no-unsafe-any non-literal-fs-path no-any
export async function copyFileToHdfs(localFilePath : string, hdfsFilePath : string, hdfsClient : any) : Promise<void> { export async function copyFileToHdfs(localFilePath: string, hdfsFilePath: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
// tslint:disable-next-line:non-literal-fs-path // tslint:disable-next-line:non-literal-fs-path
fs.exists(localFilePath, (exists : boolean) => { fs.exists(localFilePath, (exists: boolean) => {
// Detect if local file exist // Detect if local file exist
if (exists) { if (exists) {
const localFileStream: fs.ReadStream = fs.createReadStream(localFilePath); const localFileStream: fs.ReadStream = fs.createReadStream(localFilePath);
...@@ -60,7 +60,7 @@ export namespace HDFSClientUtility { ...@@ -60,7 +60,7 @@ export namespace HDFSClientUtility {
hdfsFileStream.on('finish', () => { hdfsFileStream.on('finish', () => {
deferred.resolve(); deferred.resolve();
}); });
hdfsFileStream.on('error', (err : any) => { hdfsFileStream.on('error', (err: any) => {
getLogger() getLogger()
.error(`HDFSCientUtility:copyFileToHdfs, copy file failed, err is ${err.message}`); .error(`HDFSCientUtility:copyFileToHdfs, copy file failed, err is ${err.message}`);
deferred.reject(err); deferred.reject(err);
...@@ -82,7 +82,7 @@ export namespace HDFSClientUtility { ...@@ -82,7 +82,7 @@ export namespace HDFSClientUtility {
* @param hdfsDirectory HDFS directory * @param hdfsDirectory HDFS directory
* @param hdfsClient HDFS client * @param hdfsClient HDFS client
*/ */
export async function copyDirectoryToHdfs(localDirectory : string, hdfsDirectory : string, hdfsClient : any) : Promise<void> { export async function copyDirectoryToHdfs(localDirectory: string, hdfsDirectory: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
// TODO: fs.readdirSync doesn't support ~($HOME) // TODO: fs.readdirSync doesn't support ~($HOME)
const fileNameArray: string[] = fs.readdirSync(localDirectory); const fileNameArray: string[] = fs.readdirSync(localDirectory);
...@@ -108,28 +108,51 @@ export namespace HDFSClientUtility { ...@@ -108,28 +108,51 @@ export namespace HDFSClientUtility {
return deferred.promise; return deferred.promise;
} }
/**
* Check if an HDFS path already exists
*
* @param hdfsPath target path need to check in HDFS
* @param hdfsClient HDFS client
*/
export async function pathExists(hdfsPath: string, hdfsClient: any): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist: boolean) => {
deferred.resolve(exist);
});
let timeoutId: NodeJS.Timer;
const delayTimeout: Promise<boolean> = new Promise<boolean>((resolve: Function, reject: Function): void => {
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId = setTimeout(() => { reject(`Check HDFS path ${hdfsPath} exists timeout`); }, 5000);
});
return Promise.race([deferred.promise, delayTimeout])
.finally(() => { clearTimeout(timeoutId); });
}
/** /**
* Read content from HDFS file * Read content from HDFS file
* *
* @param hdfsPath HDFS file path * @param hdfsPath HDFS file path
* @param hdfsClient HDFS client * @param hdfsClient HDFS client
*/ */
export async function readFileFromHDFS(hdfsPath : string, hdfsClient : any) : Promise<Buffer> { export async function readFileFromHDFS(hdfsPath: string, hdfsClient: any): Promise<Buffer> {
const deferred: Deferred<Buffer> = new Deferred<Buffer>(); const deferred: Deferred<Buffer> = new Deferred<Buffer>();
let buffer : Buffer = Buffer.alloc(0); let buffer: Buffer = Buffer.alloc(0);
const exist : boolean = await pathExists(hdfsPath, hdfsClient); const exist: boolean = await pathExists(hdfsPath, hdfsClient);
if (!exist) { if (!exist) {
deferred.reject(`${hdfsPath} doesn't exists`); deferred.reject(`${hdfsPath} doesn't exists`);
} }
const remoteFileStream: any = hdfsClient.createReadStream(hdfsPath); const remoteFileStream: any = hdfsClient.createReadStream(hdfsPath);
remoteFileStream.on('error', (err : any) => { remoteFileStream.on('error', (err: any) => {
// Reject with the error // Reject with the error
deferred.reject(err); deferred.reject(err);
}); });
remoteFileStream.on('data', (chunk : any) => { remoteFileStream.on('data', (chunk: any) => {
// Concat the data chunk to buffer // Concat the data chunk to buffer
buffer = Buffer.concat([buffer, chunk]); buffer = Buffer.concat([buffer, chunk]);
}); });
...@@ -142,39 +165,16 @@ export namespace HDFSClientUtility { ...@@ -142,39 +165,16 @@ export namespace HDFSClientUtility {
return deferred.promise; return deferred.promise;
} }
/**
* Check if an HDFS path already exists
*
* @param hdfsPath target path need to check in HDFS
* @param hdfsClient HDFS client
*/
export async function pathExists(hdfsPath : string, hdfsClient : any) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist : boolean) => {
deferred.resolve(exist);
});
let timeoutId : NodeJS.Timer;
const delayTimeout : Promise<boolean> = new Promise<boolean>((resolve : Function, reject : Function) : void => {
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId = setTimeout(() => { reject(`Check HDFS path ${hdfsPath} exists timeout`); }, 5000);
});
return Promise.race([deferred.promise, delayTimeout])
.finally(() => { clearTimeout(timeoutId); });
}
/** /**
* Mkdir in HDFS, use default permission 755 * Mkdir in HDFS, use default permission 755
* *
* @param hdfsPath the path in HDFS. It could be either file or directory * @param hdfsPath the path in HDFS. It could be either file or directory
* @param hdfsClient HDFS client * @param hdfsClient HDFS client
*/ */
export function mkdir(hdfsPath : string, hdfsClient : any) : Promise<boolean> { export function mkdir(hdfsPath: string, hdfsClient: any): Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>(); const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.mkdir(hdfsPath, (err : any) => { hdfsClient.mkdir(hdfsPath, (err: any) => {
if (!err) { if (!err) {
deferred.resolve(true); deferred.resolve(true);
} else { } else {
...@@ -191,14 +191,14 @@ export namespace HDFSClientUtility { ...@@ -191,14 +191,14 @@ export namespace HDFSClientUtility {
* @param hdfsPath the path in HDFS. It could be either file or directory * @param hdfsPath the path in HDFS. It could be either file or directory
* @param hdfsClient HDFS client * @param hdfsClient HDFS client
*/ */
export async function readdir(hdfsPath : string, hdfsClient : any) : Promise<string[]> { export async function readdir(hdfsPath: string, hdfsClient: any): Promise<string[]> {
const deferred : Deferred<string[]> = new Deferred<string[]>(); const deferred: Deferred<string[]> = new Deferred<string[]>();
const exist : boolean = await pathExists(hdfsPath, hdfsClient); const exist: boolean = await pathExists(hdfsPath, hdfsClient);
if (!exist) { if (!exist) {
deferred.reject(`${hdfsPath} doesn't exists`); deferred.reject(`${hdfsPath} doesn't exists`);
} }
hdfsClient.readdir(hdfsPath, (err : any, files : any[]) => { hdfsClient.readdir(hdfsPath, (err: any, files: any[]) => {
if (err) { if (err) {
deferred.reject(err); deferred.reject(err);
} }
...@@ -215,9 +215,9 @@ export namespace HDFSClientUtility { ...@@ -215,9 +215,9 @@ export namespace HDFSClientUtility {
* @param hdfsClient HDFS client * @param hdfsClient HDFS client
* @param recursive Mark if need to delete recursively * @param recursive Mark if need to delete recursively
*/ */
export function deletePath(hdfsPath : string, hdfsClient : any, recursive : boolean = true) : Promise<boolean> { export function deletePath(hdfsPath: string, hdfsClient: any, recursive: boolean = true): Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>(); const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.unlink(hdfsPath, recursive, (err : any) => { hdfsClient.unlink(hdfsPath, recursive, (err: any) => {
if (!err) { if (!err) {
deferred.resolve(true); deferred.resolve(true);
} else { } else {
......
...@@ -24,7 +24,7 @@ export class PAITaskRole { ...@@ -24,7 +24,7 @@ export class PAITaskRole {
//Shared memory for one task in the task role //Shared memory for one task in the task role
public readonly shmMB?: number; public readonly shmMB?: number;
//portList to specify the port used in container //portList to specify the port used in container
public portList?: portListMetaData[]; public portList?: PortListMetaData[];
/** /**
* Constructor * Constructor
...@@ -35,8 +35,8 @@ export class PAITaskRole { ...@@ -35,8 +35,8 @@ export class PAITaskRole {
* @param gpuNumber GPU number for one task in the task role, no less than 0 * @param gpuNumber GPU number for one task in the task role, no less than 0
* @param command Executable command for tasks in the task role, can not be empty * @param command Executable command for tasks in the task role, can not be empty
*/ */
constructor(name : string, taskNumber : number, cpuNumber : number, memoryMB : number, gpuNumber : number, constructor(name: string, taskNumber: number, cpuNumber: number, memoryMB: number, gpuNumber: number,
command : string, shmMB?: number, portList?: portListMetaData[]) { command: string, shmMB?: number, portList?: PortListMetaData[]) {
this.name = name; this.name = name;
this.taskNumber = taskNumber; this.taskNumber = taskNumber;
this.cpuNumber = cpuNumber; this.cpuNumber = cpuNumber;
...@@ -75,8 +75,8 @@ export class PAIJobConfig { ...@@ -75,8 +75,8 @@ export class PAIJobConfig {
* @param outputDir Output directory on HDFS * @param outputDir Output directory on HDFS
* @param taskRoles List of taskRole, one task role at least * @param taskRoles List of taskRole, one task role at least
*/ */
constructor(jobName: string, image : string, codeDir : string, constructor(jobName: string, image: string, codeDir: string,
taskRoles : PAITaskRole[], virtualCluster: string, authFile?: string) { taskRoles: PAITaskRole[], virtualCluster: string, authFile?: string) {
this.jobName = jobName; this.jobName = jobName;
this.image = image; this.image = image;
this.codeDir = codeDir; this.codeDir = codeDir;
...@@ -102,7 +102,7 @@ export class PAIClusterConfig { ...@@ -102,7 +102,7 @@ export class PAIClusterConfig {
* @param host Host IP of PAI Cluster * @param host Host IP of PAI Cluster
* @param token PAI token of PAI Cluster * @param token PAI token of PAI Cluster
*/ */
constructor(userName: string, host : string, passWord?: string, token?: string) { constructor(userName: string, host: string, passWord?: string, token?: string) {
this.userName = userName; this.userName = userName;
this.passWord = passWord; this.passWord = passWord;
this.host = host; this.host = host;
...@@ -113,8 +113,8 @@ export class PAIClusterConfig { ...@@ -113,8 +113,8 @@ export class PAIClusterConfig {
/** /**
* portList data structure used in PAI taskRole * portList data structure used in PAI taskRole
*/ */
export class portListMetaData { export class PortListMetaData {
public readonly label : string = ''; public readonly label: string = '';
public readonly beginAt: number = 0; public readonly beginAt: number = 0;
public readonly portNumber: number = 0; public readonly portNumber: number = 0;
} }
...@@ -135,10 +135,10 @@ export class NNIPAITrialConfig extends TrialConfig { ...@@ -135,10 +135,10 @@ export class NNIPAITrialConfig extends TrialConfig {
//authentication file used for private Docker registry //authentication file used for private Docker registry
public authFile?: string; public authFile?: string;
//portList to specify the port used in container //portList to specify the port used in container
public portList?: portListMetaData[]; public portList?: PortListMetaData[];
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number, constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
image: string, virtualCluster?: string, shmMB?: number, authFile?: string, portList?: portListMetaData[]) { image: string, virtualCluster?: string, shmMB?: number, authFile?: string, portList?: PortListMetaData[]) {
super(command, codeDir, gpuNum); super(command, codeDir, gpuNum);
this.cpuNum = cpuNum; this.cpuNum = cpuNum;
this.memoryMB = memoryMB; this.memoryMB = memoryMB;
......
...@@ -22,7 +22,7 @@ export class PAITrialJobDetail implements TrialJobDetail { ...@@ -22,7 +22,7 @@ export class PAITrialJobDetail implements TrialJobDetail {
public hdfsLogPath: string; public hdfsLogPath: string;
public isEarlyStopped?: boolean; public isEarlyStopped?: boolean;
constructor(id: string, status: TrialJobStatus, paiJobName : string, constructor(id: string, status: TrialJobStatus, paiJobName: string,
submitTime: number, workingDirectory: string, form: TrialJobApplicationForm, hdfsLogPath: string) { submitTime: number, workingDirectory: string, form: TrialJobApplicationForm, hdfsLogPath: string) {
this.id = id; this.id = id;
this.status = status; this.status = status;
......
...@@ -16,10 +16,10 @@ import { PAITrialJobDetail } from './paiData'; ...@@ -16,10 +16,10 @@ import { PAITrialJobDetail } from './paiData';
* Collector PAI jobs info from PAI cluster, and update pai job status locally * Collector PAI jobs info from PAI cluster, and update pai job status locally
*/ */
export class PAIJobInfoCollector { export class PAIJobInfoCollector {
private readonly trialJobsMap : Map<string, PAITrialJobDetail>; private readonly trialJobsMap: Map<string, PAITrialJobDetail>;
private readonly log: Logger = getLogger(); private readonly log: Logger = getLogger();
private readonly statusesNeedToCheck : TrialJobStatus[]; private readonly statusesNeedToCheck: TrialJobStatus[];
private readonly finalStatuses : TrialJobStatus[]; private readonly finalStatuses: TrialJobStatus[];
constructor(jobMap: Map<string, PAITrialJobDetail>) { constructor(jobMap: Map<string, PAITrialJobDetail>) {
this.trialJobsMap = jobMap; this.trialJobsMap = jobMap;
...@@ -27,12 +27,12 @@ export class PAIJobInfoCollector { ...@@ -27,12 +27,12 @@ export class PAIJobInfoCollector {
this.finalStatuses = ['SUCCEEDED', 'FAILED', 'USER_CANCELED', 'SYS_CANCELED', 'EARLY_STOPPED']; this.finalStatuses = ['SUCCEEDED', 'FAILED', 'USER_CANCELED', 'SYS_CANCELED', 'EARLY_STOPPED'];
} }
public async retrieveTrialStatus(paiToken? : string, paiClusterConfig?: PAIClusterConfig) : Promise<void> { public async retrieveTrialStatus(paiToken? : string, paiClusterConfig?: PAIClusterConfig): Promise<void> {
if (paiClusterConfig === undefined || paiToken === undefined) { if (paiClusterConfig === undefined || paiToken === undefined) {
return Promise.resolve(); return Promise.resolve();
} }
const updatePaiTrialJobs : Promise<void>[] = []; const updatePaiTrialJobs: Promise<void>[] = [];
for (const [trialJobId, paiTrialJob] of this.trialJobsMap) { for (const [trialJobId, paiTrialJob] of this.trialJobsMap) {
if (paiTrialJob === undefined) { if (paiTrialJob === undefined) {
throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`); throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
...@@ -43,9 +43,8 @@ export class PAIJobInfoCollector { ...@@ -43,9 +43,8 @@ export class PAIJobInfoCollector {
await Promise.all(updatePaiTrialJobs); await Promise.all(updatePaiTrialJobs);
} }
private getSinglePAITrialJobInfo(paiTrialJob : PAITrialJobDetail, paiToken : string, paiClusterConfig: PAIClusterConfig) private getSinglePAITrialJobInfo(paiTrialJob: PAITrialJobDetail, paiToken: string, paiClusterConfig: PAIClusterConfig): Promise<void> {
: Promise<void> { const deferred: Deferred<void> = new Deferred<void>();
const deferred : Deferred<void> = new Deferred<void>();
if (!this.statusesNeedToCheck.includes(paiTrialJob.status)) { if (!this.statusesNeedToCheck.includes(paiTrialJob.status)) {
deferred.resolve(); deferred.resolve();
......
...@@ -24,7 +24,7 @@ export class PAIJobRestServer extends ClusterJobRestServer { ...@@ -24,7 +24,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {
private parameterFileMetaList: ParameterFileMeta[] = []; private parameterFileMetaList: ParameterFileMeta[] = [];
@Inject @Inject
private readonly paiTrainingService : PAITrainingService; private readonly paiTrainingService: PAITrainingService;
/** /**
* constructor to provide NNIRestServer's own rest property, e.g. port * constructor to provide NNIRestServer's own rest property, e.g. port
...@@ -35,7 +35,7 @@ export class PAIJobRestServer extends ClusterJobRestServer { ...@@ -35,7 +35,7 @@ export class PAIJobRestServer extends ClusterJobRestServer {
} }
// tslint:disable-next-line:no-any // tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void { protected handleTrialMetrics(jobId: string, metrics: any[]): void {
// Split metrics array into single metric, then emit // Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWN // Warning: If not split metrics into single ones, the behavior will be UNKNOWN
for (const singleMetric of metrics) { for (const singleMetric of metrics) {
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
'use strict'; 'use strict';
import * as cpp from 'child-process-promise';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
// tslint:disable-next-line:no-implicit-dependencies // tslint:disable-next-line:no-implicit-dependencies
...@@ -13,7 +12,6 @@ import * as component from '../../common/component'; ...@@ -13,7 +12,6 @@ import * as component from '../../common/component';
import { EventEmitter } from 'events'; import { EventEmitter } from 'events';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import { MethodNotImplementedError } from '../../common/errors';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { import {
...@@ -52,8 +50,8 @@ class PAITrainingService implements TrainingService { ...@@ -52,8 +50,8 @@ class PAITrainingService implements TrainingService {
private paiToken? : string; private paiToken? : string;
private paiTokenUpdateTime?: number; private paiTokenUpdateTime?: number;
private readonly paiTokenUpdateInterval: number; private readonly paiTokenUpdateInterval: number;
private readonly experimentId! : string; private readonly experimentId!: string;
private readonly paiJobCollector : PAIJobInfoCollector; private readonly paiJobCollector: PAIJobInfoCollector;
private paiRestServerPort?: number; private paiRestServerPort?: number;
private nniManagerIpConfig?: NNIManagerIpConfig; private nniManagerIpConfig?: NNIManagerIpConfig;
private copyExpCodeDirPromise?: Promise<void>; private copyExpCodeDirPromise?: Promise<void>;
...@@ -126,7 +124,7 @@ class PAITrainingService implements TrainingService { ...@@ -126,7 +124,7 @@ class PAITrainingService implements TrainingService {
if (this.paiClusterConfig === undefined) { if (this.paiClusterConfig === undefined) {
throw new Error(`paiClusterConfig not initialized!`); throw new Error(`paiClusterConfig not initialized!`);
} }
const deferred : Deferred<PAITrialJobDetail> = new Deferred<PAITrialJobDetail>(); const deferred: Deferred<PAITrialJobDetail> = new Deferred<PAITrialJobDetail>();
this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`); this.log.info(`submitTrialJob: form: ${JSON.stringify(form)}`);
...@@ -137,7 +135,7 @@ class PAITrainingService implements TrainingService { ...@@ -137,7 +135,7 @@ class PAITrainingService implements TrainingService {
const hdfsCodeDir: string = HDFSClientUtility.getHdfsTrialWorkDir(this.paiClusterConfig.userName, trialJobId); const hdfsCodeDir: string = HDFSClientUtility.getHdfsTrialWorkDir(this.paiClusterConfig.userName, trialJobId);
const hdfsOutputDir: string = unixPathJoin(hdfsCodeDir, 'nnioutput'); const hdfsOutputDir: string = unixPathJoin(hdfsCodeDir, 'nnioutput');
const hdfsLogPath : string = String.Format( const hdfsLogPath: string = String.Format(
PAI_LOG_PATH_FORMAT, PAI_LOG_PATH_FORMAT,
this.paiClusterConfig.host, this.paiClusterConfig.host,
hdfsOutputDir hdfsOutputDir
...@@ -175,8 +173,8 @@ class PAITrainingService implements TrainingService { ...@@ -175,8 +173,8 @@ class PAITrainingService implements TrainingService {
// tslint:disable:no-http-string // tslint:disable:no-http-string
public cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> { public cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const trialJobDetail : PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId); const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
const deferred : Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
if (trialJobDetail === undefined) { if (trialJobDetail === undefined) {
this.log.error(`cancelTrialJob: trial job id ${trialJobId} not found`); this.log.error(`cancelTrialJob: trial job id ${trialJobId} not found`);
...@@ -222,7 +220,7 @@ class PAITrainingService implements TrainingService { ...@@ -222,7 +220,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable: no-unsafe-any no-any // tslint:disable: no-unsafe-any no-any
// tslint:disable-next-line:max-func-body-length // tslint:disable-next-line:max-func-body-length
public async setClusterMetadata(key: string, value: string): Promise<void> { public async setClusterMetadata(key: string, value: string): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
switch (key) { switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP: case TrialConfigMetadataKey.NNI_MANAGER_IP:
...@@ -303,7 +301,7 @@ class PAITrainingService implements TrainingService { ...@@ -303,7 +301,7 @@ class PAITrainingService implements TrainingService {
// tslint:enable: no-unsafe-any // tslint:enable: no-unsafe-any
public getClusterMetadata(key: string): Promise<string> { public getClusterMetadata(key: string): Promise<string> {
const deferred : Deferred<string> = new Deferred<string>(); const deferred: Deferred<string> = new Deferred<string>();
deferred.resolve(); deferred.resolve();
...@@ -314,7 +312,7 @@ class PAITrainingService implements TrainingService { ...@@ -314,7 +312,7 @@ class PAITrainingService implements TrainingService {
this.log.info('Stopping PAI training service...'); this.log.info('Stopping PAI training service...');
this.stopping = true; this.stopping = true;
const deferred : Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
const restServer: PAIJobRestServer = component.get(PAIJobRestServer); const restServer: PAIJobRestServer = component.get(PAIJobRestServer);
try { try {
await restServer.stop(); await restServer.stop();
...@@ -329,13 +327,13 @@ class PAITrainingService implements TrainingService { ...@@ -329,13 +327,13 @@ class PAITrainingService implements TrainingService {
return deferred.promise; return deferred.promise;
} }
public get MetricsEmitter() : EventEmitter { public get MetricsEmitter(): EventEmitter {
return this.metricsEmitter; return this.metricsEmitter;
} }
// tslint:disable-next-line:max-func-body-length // tslint:disable-next-line:max-func-body-length
private async submitTrialJobToPAI(trialJobId: string): Promise<boolean> { private async submitTrialJobToPAI(trialJobId: string): Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>(); const deferred: Deferred<boolean> = new Deferred<boolean>();
const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId); const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
if (trialJobDetail === undefined) { if (trialJobDetail === undefined) {
...@@ -372,7 +370,7 @@ class PAITrainingService implements TrainingService { ...@@ -372,7 +370,7 @@ class PAITrainingService implements TrainingService {
//create tmp trial working folder locally. //create tmp trial working folder locally.
await execMkdir(trialLocalTempFolder); await execMkdir(trialLocalTempFolder);
const runScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT; const runScriptContent: string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files // Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' }); await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' });
...@@ -388,7 +386,7 @@ class PAITrainingService implements TrainingService { ...@@ -388,7 +386,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable-next-line: strict-boolean-expressions // tslint:disable-next-line: strict-boolean-expressions
const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address(); const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address();
const version: string = this.versionCheck ? await getVersion() : ''; const version: string = this.versionCheck ? await getVersion() : '';
const nniPaiTrialCommand : string = String.Format( const nniPaiTrialCommand: string = String.Format(
PAI_TRIAL_COMMAND_FORMAT, PAI_TRIAL_COMMAND_FORMAT,
// PAI will copy job's codeDir into /root directory // PAI will copy job's codeDir into /root directory
`$PWD/${trialJobId}`, `$PWD/${trialJobId}`,
...@@ -411,7 +409,7 @@ class PAITrainingService implements TrainingService { ...@@ -411,7 +409,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable-next-line:no-console // tslint:disable-next-line:no-console
this.log.info(`nniPAItrial command is ${nniPaiTrialCommand.trim()}`); this.log.info(`nniPAItrial command is ${nniPaiTrialCommand.trim()}`);
const paiTaskRoles : PAITaskRole[] = [ const paiTaskRoles: PAITaskRole[] = [
new PAITaskRole( new PAITaskRole(
`nni_trail_${trialJobId}`, `nni_trail_${trialJobId}`,
// Task role number // Task role number
...@@ -431,7 +429,7 @@ class PAITrainingService implements TrainingService { ...@@ -431,7 +429,7 @@ class PAITrainingService implements TrainingService {
) )
]; ];
const paiJobConfig : PAIJobConfig = new PAIJobConfig( const paiJobConfig: PAIJobConfig = new PAIJobConfig(
// Job name // Job name
trialJobDetail.paiJobName, trialJobDetail.paiJobName,
// Docker image // Docker image
...@@ -472,7 +470,7 @@ class PAITrainingService implements TrainingService { ...@@ -472,7 +470,7 @@ class PAITrainingService implements TrainingService {
// tslint:disable:no-any no-unsafe-any // tslint:disable:no-any no-unsafe-any
request(submitJobRequest, (error: Error, response: request.Response, body: any) => { request(submitJobRequest, (error: Error, response: request.Response, body: any) => {
if ((error !== undefined && error !== null) || response.statusCode >= 400) { if ((error !== undefined && error !== null) || response.statusCode >= 400) {
const errorMessage : string = (error !== undefined && error !== null) ? error.message : const errorMessage: string = (error !== undefined && error !== null) ? error.message :
`Submit trial ${trialJobId} failed, http code:${response.statusCode}, http body: ${response.body.message}`; `Submit trial ${trialJobId} failed, http code:${response.statusCode}, http body: ${response.body.message}`;
trialJobDetail.status = 'FAILED'; trialJobDetail.status = 'FAILED';
deferred.resolve(true); deferred.resolve(true);
...@@ -527,7 +525,7 @@ class PAITrainingService implements TrainingService { ...@@ -527,7 +525,7 @@ class PAITrainingService implements TrainingService {
* Update pai token by the interval time or initialize the pai token * Update pai token by the interval time or initialize the pai token
*/ */
private async updatePaiToken(): Promise<void> { private async updatePaiToken(): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
const currentTime: number = new Date().getTime(); const currentTime: number = new Date().getTime();
//If pai token initialized and not reach the interval time, do not update //If pai token initialized and not reach the interval time, do not update
...@@ -603,7 +601,7 @@ class PAITrainingService implements TrainingService { ...@@ -603,7 +601,7 @@ class PAITrainingService implements TrainingService {
} }
private postParameterFileMeta(parameterFileMeta: ParameterFileMeta): Promise<void> { private postParameterFileMeta(parameterFileMeta: ParameterFileMeta): Promise<void> {
const deferred : Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
const restServer: PAIJobRestServer = component.get(PAIJobRestServer); const restServer: PAIJobRestServer = component.get(PAIJobRestServer);
const req: request.Options = { const req: request.Options = {
uri: `${restServer.endPoint}${restServer.apiRootUrl}/parameter-file-meta`, uri: `${restServer.endPoint}${restServer.apiRootUrl}/parameter-file-meta`,
......
...@@ -15,7 +15,7 @@ export class PAITrialConfig extends TrialConfig { ...@@ -15,7 +15,7 @@ export class PAITrialConfig extends TrialConfig {
public readonly dataDir: string; public readonly dataDir: string;
public readonly outputDir: string; public readonly outputDir: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number, constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
image: string, dataDir: string, outputDir: string) { image: string, dataDir: string, outputDir: string) {
super(command, codeDir, gpuNum); super(command, codeDir, gpuNum);
this.cpuNum = cpuNum; this.cpuNum = cpuNum;
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
import * as assert from 'assert'; import * as assert from 'assert';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { TrialJobDetail } from '../../common/trainingService';
import { randomSelect } from '../../common/utils'; import { randomSelect } from '../../common/utils';
import { GPUInfo } from '../common/gpuData'; import { GPUInfo } from '../common/gpuData';
import { import {
...@@ -19,7 +18,7 @@ type SCHEDULE_POLICY_NAME = 'random' | 'round-robin'; ...@@ -19,7 +18,7 @@ type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
*/ */
export class GPUScheduler { export class GPUScheduler {
private readonly machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>; private readonly machineSSHClientMap: Map<RemoteMachineMeta, SSHClientManager>;
private readonly log: Logger = getLogger(); private readonly log: Logger = getLogger();
private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin'; private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin';
private roundRobinIndex: number = 0; private roundRobinIndex: number = 0;
...@@ -29,7 +28,7 @@ export class GPUScheduler { ...@@ -29,7 +28,7 @@ export class GPUScheduler {
* Constructor * Constructor
* @param machineSSHClientMap map from remote machine to sshClient * @param machineSSHClientMap map from remote machine to sshClient
*/ */
constructor(machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>) { constructor(machineSSHClientMap: Map<RemoteMachineMeta, SSHClientManager>) {
assert(machineSSHClientMap.size > 0); assert(machineSSHClientMap.size > 0);
this.machineSSHClientMap = machineSSHClientMap; this.machineSSHClientMap = machineSSHClientMap;
this.configuredRMs = Array.from(machineSSHClientMap.keys()); this.configuredRMs = Array.from(machineSSHClientMap.keys());
...@@ -39,7 +38,7 @@ export class GPUScheduler { ...@@ -39,7 +38,7 @@ export class GPUScheduler {
* Schedule a machine according to the constraints (requiredGPUNum) * Schedule a machine according to the constraints (requiredGPUNum)
* @param requiredGPUNum required GPU number * @param requiredGPUNum required GPU number
*/ */
public scheduleMachine(requiredGPUNum: number | undefined, trialJobDetail : RemoteMachineTrialJobDetail) : RemoteMachineScheduleResult { public scheduleMachine(requiredGPUNum: number | undefined, trialJobDetail: RemoteMachineTrialJobDetail): RemoteMachineScheduleResult {
if(requiredGPUNum === undefined) { if(requiredGPUNum === undefined) {
requiredGPUNum = 0; requiredGPUNum = 0;
} }
...@@ -48,7 +47,7 @@ export class GPUScheduler { ...@@ -48,7 +47,7 @@ export class GPUScheduler {
assert(allRMs.length > 0); assert(allRMs.length > 0);
// Step 1: Check if required GPU number not exceeds the total GPU number in all machines // Step 1: Check if required GPU number not exceeds the total GPU number in all machines
const eligibleRM: RemoteMachineMeta[] = allRMs.filter((rmMeta : RemoteMachineMeta) => const eligibleRM: RemoteMachineMeta[] = allRMs.filter((rmMeta: RemoteMachineMeta) =>
rmMeta.gpuSummary === undefined || requiredGPUNum === 0 || (requiredGPUNum !== undefined && rmMeta.gpuSummary.gpuCount >= requiredGPUNum)); rmMeta.gpuSummary === undefined || requiredGPUNum === 0 || (requiredGPUNum !== undefined && rmMeta.gpuSummary.gpuCount >= requiredGPUNum));
if (eligibleRM.length === 0) { if (eligibleRM.length === 0) {
// If the required gpu number exceeds the upper limit of all machine's GPU number // If the required gpu number exceeds the upper limit of all machine's GPU number
...@@ -134,8 +133,8 @@ export class GPUScheduler { ...@@ -134,8 +133,8 @@ export class GPUScheduler {
* @param availableGPUMap available GPU resource filled by this detection * @param availableGPUMap available GPU resource filled by this detection
* @returns Available GPU number on this remote machine * @returns Available GPU number on this remote machine
*/ */
private gpuResourceDetection() : Map<RemoteMachineMeta, GPUInfo[]> { private gpuResourceDetection(): Map<RemoteMachineMeta, GPUInfo[]> {
const totalResourceMap : Map<RemoteMachineMeta, GPUInfo[]> = new Map<RemoteMachineMeta, GPUInfo[]>(); const totalResourceMap: Map<RemoteMachineMeta, GPUInfo[]> = new Map<RemoteMachineMeta, GPUInfo[]>();
this.machineSSHClientMap.forEach((sshClientManager: SSHClientManager, rmMeta: RemoteMachineMeta) => { this.machineSSHClientMap.forEach((sshClientManager: SSHClientManager, rmMeta: RemoteMachineMeta) => {
// Assgin totoal GPU count as init available GPU number // Assgin totoal GPU count as init available GPU number
if (rmMeta.gpuSummary !== undefined) { if (rmMeta.gpuSummary !== undefined) {
...@@ -224,7 +223,7 @@ export class GPUScheduler { ...@@ -224,7 +223,7 @@ export class GPUScheduler {
resultType: ScheduleResultType.SUCCEED, resultType: ScheduleResultType.SUCCEED,
scheduleInfo: { scheduleInfo: {
rmMeta: rmMeta, rmMeta: rmMeta,
cuda_visible_device: allocatedGPUs cudaVisibleDevice: allocatedGPUs
.map((gpuInfo: GPUInfo) => { .map((gpuInfo: GPUInfo) => {
return gpuInfo.index; return gpuInfo.index;
}) })
......
...@@ -13,13 +13,13 @@ import { GPUInfo, GPUSummary } from '../common/gpuData'; ...@@ -13,13 +13,13 @@ import { GPUInfo, GPUSummary } from '../common/gpuData';
* Metadata of remote machine for configuration and statuc query * Metadata of remote machine for configuration and statuc query
*/ */
export class RemoteMachineMeta { export class RemoteMachineMeta {
public readonly ip : string = ''; public readonly ip: string = '';
public readonly port : number = 22; public readonly port: number = 22;
public readonly username : string = ''; public readonly username: string = '';
public readonly passwd: string = ''; public readonly passwd: string = '';
public readonly sshKeyPath?: string; public readonly sshKeyPath?: string;
public readonly passphrase?: string; public readonly passphrase?: string;
public gpuSummary : GPUSummary | undefined; public gpuSummary: GPUSummary | undefined;
public readonly gpuIndices?: string; public readonly gpuIndices?: string;
public readonly maxTrialNumPerGpu?: number; public readonly maxTrialNumPerGpu?: number;
//TODO: initialize varialbe in constructor //TODO: initialize varialbe in constructor
...@@ -43,11 +43,11 @@ export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined { ...@@ -43,11 +43,11 @@ export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
* The execution result for command executed on remote machine * The execution result for command executed on remote machine
*/ */
export class RemoteCommandResult { export class RemoteCommandResult {
public readonly stdout : string; public readonly stdout: string;
public readonly stderr : string; public readonly stderr: string;
public readonly exitCode : number; public readonly exitCode: number;
constructor(stdout : string, stderr : string, exitCode : number) { constructor(stdout: string, stderr: string, exitCode: number) {
this.stdout = stdout; this.stdout = stdout;
this.stderr = stderr; this.stderr = stderr;
this.exitCode = exitCode; this.exitCode = exitCode;
...@@ -225,9 +225,9 @@ export class SSHClientManager { ...@@ -225,9 +225,9 @@ export class SSHClientManager {
} }
} }
export type RemoteMachineScheduleResult = { scheduleInfo : RemoteMachineScheduleInfo | undefined; resultType : ScheduleResultType}; export type RemoteMachineScheduleResult = { scheduleInfo: RemoteMachineScheduleInfo | undefined; resultType: ScheduleResultType};
export type RemoteMachineScheduleInfo = { rmMeta : RemoteMachineMeta; cuda_visible_device : string}; export type RemoteMachineScheduleInfo = { rmMeta: RemoteMachineMeta; cudaVisibleDevice: string};
export enum ScheduleResultType { export enum ScheduleResultType {
// Schedule succeeded // Schedule succeeded
......
...@@ -15,7 +15,7 @@ import { RemoteMachineTrainingService } from './remoteMachineTrainingService'; ...@@ -15,7 +15,7 @@ import { RemoteMachineTrainingService } from './remoteMachineTrainingService';
@component.Singleton @component.Singleton
export class RemoteMachineJobRestServer extends ClusterJobRestServer { export class RemoteMachineJobRestServer extends ClusterJobRestServer {
@Inject @Inject
private readonly remoteMachineTrainingService : RemoteMachineTrainingService; private readonly remoteMachineTrainingService: RemoteMachineTrainingService;
/** /**
* constructor to provide NNIRestServer's own rest property, e.g. port * constructor to provide NNIRestServer's own rest property, e.g. port
...@@ -26,7 +26,7 @@ export class RemoteMachineJobRestServer extends ClusterJobRestServer { ...@@ -26,7 +26,7 @@ export class RemoteMachineJobRestServer extends ClusterJobRestServer {
} }
// tslint:disable-next-line:no-any // tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void { protected handleTrialMetrics(jobId: string, metrics: any[]): void {
// Split metrics array into single metric, then emit // Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWNls // Warning: If not split metrics into single ones, the behavior will be UNKNOWNls
for (const singleMetric of metrics) { for (const singleMetric of metrics) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment