Unverified Commit 543239c6 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #220 from microsoft/master

merge master
parents 32efaa36 659480f2
......@@ -10,7 +10,6 @@ import * as os from 'os';
import * as path from 'path';
import { String } from 'typescript-string-operations';
import { countFilesRecursively, getNewLine, validateFileNameRecursively } from '../../common/utils';
import { file } from '../../node_modules/@types/tmp';
import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData';
/**
......@@ -19,8 +18,7 @@ import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData';
* @param codeDir codeDir in nni config file
* @returns file number under codeDir
*/
// tslint:disable: no-redundant-jsdoc
export async function validateCodeDir(codeDir: string) : Promise<number> {
export async function validateCodeDir(codeDir: string): Promise<number> {
let fileCount: number | undefined;
let fileNameValid: boolean = true;
try {
......
......@@ -11,7 +11,6 @@ import { String } from 'typescript-string-operations';
import { getLogger } from '../../common/log';
import { mkDirP } from '../../common/utils';
// tslint:disable: no-redundant-jsdoc no-any no-unsafe-any
export namespace AzureStorageClientUtility {
/**
......@@ -66,7 +65,7 @@ export namespace AzureStorageClientUtility {
let rootDirectory: string = '';
for (const directory of directories) {
rootDirectory += directory;
let result:boolean = await createDirectory(fileServerClient, rootDirectory, azureShare);
const result: boolean = await createDirectory(fileServerClient, rootDirectory, azureShare);
if (!result) {
deferred.resolve(false);
return deferred.promise;
......@@ -114,7 +113,6 @@ export namespace AzureStorageClientUtility {
async function downloadFile(fileServerClient: any, azureDirectory: string, azureFileName: any, azureShare: any,
localFilePath: string): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
// tslint:disable-next-line:non-literal-fs-path
await fileServerClient.getFileToStream(azureShare, azureDirectory, azureFileName, fs.createWriteStream(localFilePath),
(error: any, result: any, response: any) => {
if (error) {
......@@ -136,12 +134,11 @@ export namespace AzureStorageClientUtility {
* @param azureShare : the azure share used
* @param localDirectory : local directory to be uploaded
*/
// tslint:disable:non-literal-fs-path
export async function uploadDirectory(fileServerClient: azureStorage.FileService, azureDirectory: string, azureShare: any,
localDirectory: string): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
const fileNameArray: string[] = fs.readdirSync(localDirectory);
let result: boolean = await createDirectoryRecursive(fileServerClient, azureDirectory, azureShare);
const result: boolean = await createDirectoryRecursive(fileServerClient, azureDirectory, azureShare);
if (!result) {
deferred.resolve(false);
return deferred.promise;
......@@ -221,4 +218,3 @@ export namespace AzureStorageClientUtility {
return deferred.promise;
}
}
// tslint:enable: no-redundant-jsdoc no-any no-unsafe-any
......@@ -6,19 +6,6 @@
import * as fs from 'fs';
import { GeneralK8sClient, KubernetesCRDClient } from '../kubernetesApiClient';
/**
* FrameworkController Client
*/
class FrameworkControllerClientFactory {
/**
* Factory method to generate operator client
*/
// tslint:disable-next-line:function-name
public static createClient(): KubernetesCRDClient {
return new FrameworkControllerClientV1();
}
}
/**
* FrameworkController ClientV1
*/
......@@ -26,7 +13,6 @@ class FrameworkControllerClientV1 extends KubernetesCRDClient {
/**
* constructor, to initialize frameworkcontroller CRD definition
*/
// tslint:disable: no-unsafe-any no-any
public constructor() {
super();
this.crdSchema = JSON.parse(fs.readFileSync('./config/frameworkcontroller/frameworkcontrollerjob-crd-v1.json', 'utf8'));
......@@ -36,11 +22,22 @@ class FrameworkControllerClientV1 extends KubernetesCRDClient {
protected get operator(): any {
return this.client.apis['frameworkcontroller.microsoft.com'].v1.namespaces('default').frameworks;
}
// tslint:enable: no-unsafe-any no-any
public get containerName(): string {
return 'framework';
}
}
/**
* FrameworkController Client
*/
class FrameworkControllerClientFactory {
/**
* Factory method to generate operator client
*/
public static createClient(): KubernetesCRDClient {
return new FrameworkControllerClientV1();
}
}
export { FrameworkControllerClientFactory, GeneralK8sClient };
......@@ -9,7 +9,6 @@ import { AzureStorage, KeyVaultConfig, KubernetesClusterConfig, KubernetesCluste
KubernetesStorageKind, KubernetesTrialConfig, KubernetesTrialConfigTemplate, NFSConfig, StorageConfig
} from '../kubernetesConfig';
// tslint:disable:completed-docs
export class FrameworkAttemptCompletionPolicy {
public readonly minFailedTaskCount: number;
public readonly minSucceededTaskCount: number;
......@@ -26,7 +25,7 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
public readonly frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy;
public readonly name: string;
public readonly taskNum: number;
constructor(taskNum: number, command : string, gpuNum : number,
constructor(taskNum: number, command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string,
frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy, privateRegistryFilePath?: string | undefined) {
super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryFilePath);
......@@ -54,7 +53,6 @@ export class FrameworkControllerClusterConfig extends KubernetesClusterConfig {
}
}
// tslint:disable:function-name
export class FrameworkControllerClusterConfigNFS extends KubernetesClusterConfigNFS {
public readonly serviceAccountName: string;
constructor(
......
......@@ -17,7 +17,7 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) {
return Promise.resolve();
}
......@@ -26,7 +26,6 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
return Promise.reject('kubernetesCRDClient is undefined');
}
// tslint:disable-next-line:no-any
let kubernetesJobInfo: any;
try {
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
......@@ -37,9 +36,9 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
return Promise.resolve();
}
// tslint:disable: no-unsafe-any
if (kubernetesJobInfo.status && kubernetesJobInfo.status.state) {
const frameworkJobType: FrameworkControllerJobStatus = <FrameworkControllerJobStatus>kubernetesJobInfo.status.state;
/* eslint-disable require-atomic-updates */
switch (frameworkJobType) {
case 'AttemptCreationPending':
case 'AttemptCreationRequested':
......@@ -52,8 +51,8 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
kubernetesTrialJob.startTime = Date.parse(<string>kubernetesJobInfo.status.startTime);
}
break;
case 'Completed':
const completedJobType : FrameworkControllerJobCompleteStatus =
case 'Completed': {
const completedJobType: FrameworkControllerJobCompleteStatus =
<FrameworkControllerJobCompleteStatus>kubernetesJobInfo.status.attemptStatus.completionStatus.type.name;
switch (completedJobType) {
case 'Succeeded':
......@@ -66,11 +65,12 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
}
kubernetesTrialJob.endTime = Date.parse(<string>kubernetesJobInfo.status.completionTime);
break;
}
default:
}
/* eslint-enable require-atomic-updates */
}
return Promise.resolve();
}
// tslint:enable: no-unsafe-any
}
......@@ -15,7 +15,6 @@ import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../../common/containerJobData';
import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import { validateCodeDir } from '../../common/util';
import { AzureStorageClientUtility } from '../azureStorageClientUtils';
import { NFSConfig } from '../kubernetesConfig';
import { KubernetesTrialJobDetail } from '../kubernetesData';
import { KubernetesTrainingService } from '../kubernetesTrainingService';
......@@ -102,7 +101,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
this.trialJobsMap.set(trialJobId, trialJobDetail);
// Create frameworkcontroller job based on generated frameworkcontroller job resource config
// tslint:disable-next-line:no-any
const frameworkcontrollerJobConfig: any = await this.prepareFrameworkControllerConfig(
trialJobId, trialWorkingFolder, frameworkcontrollerJobName);
await this.kubernetesCRDClient.createKubernetesJob(frameworkcontrollerJobConfig);
......@@ -113,13 +111,12 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return Promise.resolve(trialJobDetail);
}
// tslint:disable:no-redundant-jsdoc no-any no-unsafe-any
public async setClusterMetadata(key: string, value: string): Promise<void> {
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value);
break;
case TrialConfigMetadataKey.FRAMEWORKCONTROLLER_CLUSTER_CONFIG:
case TrialConfigMetadataKey.FRAMEWORKCONTROLLER_CLUSTER_CONFIG: {
const frameworkcontrollerClusterJsonObject: any = JSON.parse(value);
this.fcClusterConfig = FrameworkControllerClusterConfigFactory
.generateFrameworkControllerClusterConfig(frameworkcontrollerClusterJsonObject);
......@@ -130,9 +127,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
this.azureStorageShare = azureFrameworkControllerClusterConfig.azureStorage.azureShare;
await this.createAzureStorage(
azureFrameworkControllerClusterConfig.keyVault.vaultName,
azureFrameworkControllerClusterConfig.keyVault.name,
azureFrameworkControllerClusterConfig.azureStorage.accountName,
azureFrameworkControllerClusterConfig.azureStorage.azureShare
azureFrameworkControllerClusterConfig.keyVault.name
);
} else if (this.fcClusterConfig.storageType === 'nfs') {
const nfsFrameworkControllerClusterConfig: FrameworkControllerClusterConfigNFS =
......@@ -144,7 +139,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
this.kubernetesCRDClient = FrameworkControllerClientFactory.createClient();
break;
case TrialConfigMetadataKey.TRIAL_CONFIG:
}
case TrialConfigMetadataKey.TRIAL_CONFIG: {
const frameworkcontrollerTrialJsonObjsect: any = JSON.parse(value);
this.fcTrialConfig = new FrameworkControllerTrialConfig(
......@@ -161,6 +157,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return Promise.reject(new Error(error));
}
break;
}
case TrialConfigMetadataKey.VERSION_CHECK:
this.versionCheck = (value === 'true' || value === 'True');
break;
......@@ -172,7 +169,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return Promise.resolve();
}
// tslint:enable: no-any no-unsafe-any
/**
* upload code files to nfs or azureStroage
......@@ -237,7 +233,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
await cpp.exec(`mkdir -p ${trialLocalTempFolder}`);
const installScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
const installScriptContent: string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), installScriptContent, { encoding: 'utf8' });
// Create tmp trial working folder locally.
......@@ -251,14 +247,12 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
// Write file content ( parameter.cfg ) to local tmp folders
const trialForm : TrialJobApplicationForm = (<TrialJobApplicationForm>form);
if (form !== undefined) {
await fs.promises.writeFile(path.join(trialLocalTempFolder, generateParamFileName(form.hyperParameters)),
form.hyperParameters.value, { encoding: 'utf8' });
}
}
// tslint:disable: no-any no-unsafe-any
private async prepareFrameworkControllerConfig(trialJobId: string, trialWorkingFolder: string, frameworkcontrollerJobName: string):
Promise<any> {
......@@ -266,7 +260,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
throw new Error('frameworkcontroller trial config is not initialized');
}
const podResources : any = [];
const podResources: any = [];
for (const taskRole of this.fcTrialConfig.taskRoles) {
const resource: any = {};
resource.requests = this.generatePodResource(taskRole.memoryMB, taskRole.cpuNum, taskRole.gpuNum);
......@@ -300,7 +294,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
* @param podResources pod template
*/
private async generateFrameworkControllerJobConfig(trialJobId: string, trialWorkingFolder: string,
frameworkcontrollerJobName : string, podResources : any) : Promise<any> {
frameworkcontrollerJobName: string, podResources: any): Promise<any> {
if (this.fcClusterConfig === undefined) {
throw new Error('frameworkcontroller Cluster config is not initialized');
}
......@@ -424,7 +418,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}]
}];
let spec: any = {
const spec: any = {
containers: containers,
initContainers: initContainers,
restartPolicy: 'OnFailure',
......@@ -449,7 +443,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
};
}
// tslint:enable: no-any no-unsafe-any
}
export { FrameworkControllerTrainingService };
......@@ -8,7 +8,6 @@ import { GeneralK8sClient, KubernetesCRDClient } from '../kubernetesApiClient';
import { KubeflowOperator } from './kubeflowConfig';
// tslint:disable: no-unsafe-any no-any completed-docs
class TFOperatorClientV1Alpha2 extends KubernetesCRDClient {
/**
* constructor, to initialize tfjob CRD definition
......@@ -130,7 +129,6 @@ class KubeflowOperatorClientFactory {
/**
* Factory method to generate operator client
*/
// tslint:disable-next-line:function-name
public static createClient(kubeflowOperator: KubeflowOperator, operatorApiVersion: string): KubernetesCRDClient {
switch (kubeflowOperator) {
case 'tf-operator': {
......@@ -169,5 +167,4 @@ class KubeflowOperatorClientFactory {
}
}
// tslint:enable: no-unsafe-any
export { KubeflowOperatorClientFactory, GeneralK8sClient };
......@@ -26,7 +26,6 @@ export class KubeflowClusterConfig extends KubernetesClusterConfig {
}
}
// tslint:disable:completed-docs
export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS {
public readonly operator: KubeflowOperator;
constructor(
......@@ -43,7 +42,6 @@ export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS {
return 'nfs';
}
// tslint:disable-next-line:function-name
public static getInstance(jsonObject: object): KubeflowClusterConfigNFS {
const kubeflowClusterConfigObjectNFS: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>jsonObject;
assert (kubeflowClusterConfigObjectNFS !== undefined);
......@@ -75,7 +73,6 @@ export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure {
return 'azureStorage';
}
// tslint:disable-next-line:function-name
public static getInstance(jsonObject: object): KubeflowClusterConfigAzure {
const kubeflowClusterConfigObjectAzure: KubeflowClusterConfigAzure = <KubeflowClusterConfigAzure>jsonObject;
......@@ -91,7 +88,6 @@ export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure {
export class KubeflowClusterConfigFactory {
// tslint:disable-next-line:function-name
public static generateKubeflowClusterConfig(jsonObject: object): KubeflowClusterConfig {
const storageConfig: StorageConfig = <StorageConfig>jsonObject;
if (storageConfig === undefined) {
......@@ -118,7 +114,7 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate {
public readonly replicas: number;
constructor(replicas: number, command : string, gpuNum : number,
constructor(replicas: number, command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryAuthPath);
this.replicas = replicas;
......@@ -156,8 +152,6 @@ export class KubeflowTrialConfigPytorch extends KubeflowTrialConfig {
}
export class KubeflowTrialConfigFactory {
// tslint:disable-next-line:function-name
public static generateKubeflowTrialConfig(jsonObject: object, operator: KubeflowOperator): KubeflowTrialConfig {
if (operator === 'tf-operator') {
const kubeflowTrialConfigObject: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>jsonObject;
......
......@@ -17,7 +17,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) {
return Promise.resolve();
}
......@@ -26,7 +26,6 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
return Promise.reject('kubernetesCRDClient is undefined');
}
// tslint:disable:no-any no-unsafe-any
let kubernetesJobInfo: any;
try {
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
......@@ -37,10 +36,10 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
//This is not treat as a error status
return Promise.resolve();
}
/* eslint-disable require-atomic-updates */
if (kubernetesJobInfo.status && kubernetesJobInfo.status.conditions) {
const latestCondition: any = kubernetesJobInfo.status.conditions[kubernetesJobInfo.status.conditions.length - 1];
const tfJobType : KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type;
const tfJobType: KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type;
switch (tfJobType) {
case 'Created':
kubernetesTrialJob.status = 'WAITING';
......@@ -63,7 +62,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
default:
}
}
// tslint:enable:no-any no-unsafe-any
/* eslint-enable require-atomic-updates */
return Promise.resolve();
}
......
......@@ -17,7 +17,6 @@ import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../../common/containerJobData';
import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import { validateCodeDir } from '../../common/util';
import { AzureStorageClientUtility } from '../azureStorageClientUtils';
import { NFSConfig } from '../kubernetesConfig';
import { KubernetesTrialJobDetail } from '../kubernetesData';
import { KubernetesTrainingService } from '../kubernetesTrainingService';
......@@ -28,7 +27,6 @@ import { KubeflowClusterConfig, KubeflowClusterConfigAzure, KubeflowClusterConfi
import { KubeflowJobInfoCollector } from './kubeflowJobInfoCollector';
import { KubeflowJobRestServer } from './kubeflowJobRestServer';
// tslint:disable: no-unsafe-any no-any
/**
* Training Service implementation for Kubeflow
* Refer https://github.com/kubeflow/kubeflow for more info about Kubeflow
......@@ -109,14 +107,13 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
return Promise.resolve(trialJobDetail);
}
// tslint:disable:no-redundant-jsdoc
public async setClusterMetadata(key: string, value: string): Promise<void> {
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value);
break;
case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG:
case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG: {
const kubeflowClusterJsonObject: object = JSON.parse(value);
this.kubeflowClusterConfig = KubeflowClusterConfigFactory.generateKubeflowClusterConfig(kubeflowClusterJsonObject);
if (this.kubeflowClusterConfig.storageType === 'azureStorage') {
......@@ -125,9 +122,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.azureStorageShare = azureKubeflowClusterConfig.azureStorage.azureShare;
await this.createAzureStorage(
azureKubeflowClusterConfig.keyVault.vaultName,
azureKubeflowClusterConfig.keyVault.name,
azureKubeflowClusterConfig.azureStorage.accountName,
azureKubeflowClusterConfig.azureStorage.azureShare
azureKubeflowClusterConfig.keyVault.name
);
} else if (this.kubeflowClusterConfig.storageType === 'nfs') {
const nfsKubeflowClusterConfig: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>this.kubeflowClusterConfig;
......@@ -139,8 +134,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.kubernetesCRDClient = KubeflowOperatorClientFactory.createClient(
this.kubeflowClusterConfig.operator, this.kubeflowClusterConfig.apiVersion);
break;
case TrialConfigMetadataKey.TRIAL_CONFIG:
}
case TrialConfigMetadataKey.TRIAL_CONFIG: {
if (this.kubeflowClusterConfig === undefined) {
this.log.error('kubeflow cluster config is not initialized');
......@@ -163,6 +158,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
return Promise.reject(new Error(error));
}
break;
}
case TrialConfigMetadataKey.VERSION_CHECK:
this.versionCheck = (value === 'true' || value === 'True');
break;
......@@ -235,7 +231,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
//create tmp trial working folder locally.
await cpp.exec(`mkdir -p ${trialLocalTempFolder}`);
const runScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
const runScriptContent: string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'install_nni.sh'), runScriptContent, { encoding: 'utf8' });
......@@ -293,14 +289,14 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
throw Error(`operator ${this.kubeflowClusterConfig.operator} is invalid`);
}
const workerPodResources : any = {};
const workerPodResources: any = {};
if (kubeflowTrialConfig.worker !== undefined) {
workerPodResources.requests = this.generatePodResource(kubeflowTrialConfig.worker.memoryMB, kubeflowTrialConfig.worker.cpuNum,
kubeflowTrialConfig.worker.gpuNum);
}
workerPodResources.limits = {...workerPodResources.requests};
const nonWorkerResources : any = {};
const nonWorkerResources: any = {};
if (this.kubeflowClusterConfig.operator === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
if (tensorflowTrialConfig.ps !== undefined) {
......@@ -330,8 +326,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param workerPodResources worker pod template
* @param nonWorkerPodResources non-worker pod template, like ps or master
*/
private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any,
nonWorkerPodResources?: any) : Promise<any> {
private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName: string, workerPodResources: any,
nonWorkerPodResources?: any): Promise<any> {
if (this.kubeflowClusterConfig === undefined) {
throw new Error('Kubeflow Cluster config is not initialized');
}
......@@ -348,11 +344,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
const replicaSpecsObjMap: Map<string, object> = new Map<string, object>();
if (this.kubeflowTrialConfig.operatorType === 'tf-operator') {
const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
let privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath);
const privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas,
tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
if (tensorflowTrialConfig.ps !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath);
const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath);
replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas,
tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources, privateRegistrySecretName);
}
......@@ -360,11 +356,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
} else if (this.kubeflowTrialConfig.operatorType === 'pytorch-operator') {
const pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
if (pytorchTrialConfig.worker !== undefined) {
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath);
const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath);
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas,
pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName);
}
let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath);
const privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath);
replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas,
pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources, privateRegistrySecretName);
......@@ -448,7 +444,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
resources: podResources
}
]);
let spec: any = {
const spec: any = {
containers: containersSpecMap.get('containers'),
restartPolicy: 'ExitCode',
volumes: volumeSpecMap.get('nniVolumes')
......@@ -463,7 +459,6 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
replicas: replicaNumber,
template: {
metadata: {
// tslint:disable-next-line:no-null-keyword
creationTimestamp: null
},
spec: spec
......@@ -471,5 +466,4 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}
}
// tslint:enable: no-unsafe-any no-any
export { KubeflowTrainingService };
......@@ -3,13 +3,13 @@
'use strict';
// eslint-disable-next-line @typescript-eslint/camelcase
import { Client1_10, config } from 'kubernetes-client';
import { getLogger, Logger } from '../../common/log';
/**
* Generict Kubernetes client, target version >= 1.9
*/
// tslint:disable: no-any no-unsafe-any
class GeneralK8sClient {
protected readonly client: any;
protected readonly log: Logger = getLogger();
......@@ -21,7 +21,7 @@ class GeneralK8sClient {
public async createSecret(secretManifest: any): Promise<boolean> {
let result: Promise<boolean>;
const response : any = await this.client.api.v1.namespaces('default').secrets
const response: any = await this.client.api.v1.namespaces('default').secrets
.post({body: secretManifest});
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true);
......@@ -73,7 +73,7 @@ abstract class KubernetesCRDClient {
public async createKubernetesJob(jobManifest: any): Promise<boolean> {
let result: Promise<boolean>;
const response : any = await this.operator.post({body: jobManifest});
const response: any = await this.operator.post({body: jobManifest});
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true);
} else {
......@@ -86,7 +86,7 @@ abstract class KubernetesCRDClient {
//TODO : replace any
public async getKubernetesJob(kubeflowJobName: string): Promise<any> {
let result: Promise<any>;
const response : any = await this.operator(kubeflowJobName)
const response: any = await this.operator(kubeflowJobName)
.get();
if (response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(response.body);
......@@ -104,7 +104,7 @@ abstract class KubernetesCRDClient {
.map((labelKey: string) => `${labelKey}=${labels.get(labelKey)}`)
.join(',');
try {
const deleteResult : any = await this.operator()
const deleteResult: any = await this.operator()
.delete({
qs: {
labelSelector: matchQuery,
......
......@@ -6,7 +6,6 @@
export type KubernetesStorageKind = 'nfs' | 'azureStorage';
import { MethodNotImplementedError } from '../../common/errors';
// tslint:disable: completed-docs function-name
export abstract class KubernetesClusterConfig {
public readonly storage?: KubernetesStorageKind;
public readonly apiVersion: string;
......@@ -91,7 +90,6 @@ export class KubernetesClusterConfigAzure extends KubernetesClusterConfig {
}
}
// tslint:disable-next-line:no-unnecessary-class
export class KubernetesClusterConfigFactory {
public static generateKubernetesClusterConfig(jsonObject: object): KubernetesClusterConfig {
......@@ -113,11 +111,11 @@ export class KubernetesClusterConfigFactory {
*/
export class NFSConfig {
// IP Adress of NFS server
public readonly server : string;
public readonly server: string;
// exported NFS path on NFS server
public readonly path : string;
public readonly path: string;
constructor(server : string, path : string) {
constructor(server: string, path: string) {
this.server = server;
this.path = path;
}
......@@ -129,11 +127,11 @@ export class NFSConfig {
*/
export class KeyVaultConfig {
// The vault-name to specify vault
public readonly vaultName : string;
public readonly vaultName: string;
// The name to specify private key
public readonly name : string;
public readonly name: string;
constructor(vaultName : string, name : string) {
constructor(vaultName: string, name: string) {
this.vaultName = vaultName;
this.name = name;
}
......@@ -144,11 +142,11 @@ export class KeyVaultConfig {
*/
export class AzureStorage {
// The azure share to storage files
public readonly azureShare : string;
public readonly azureShare: string;
// The account name of sotrage service
public readonly accountName: string;
constructor(azureShare : string, accountName: string) {
constructor(azureShare: string, accountName: string) {
this.azureShare = azureShare;
this.accountName = accountName;
}
......@@ -171,12 +169,12 @@ export class KubernetesTrialConfigTemplate {
public readonly privateRegistryAuthPath?: string;
// Trail command
public readonly command : string;
public readonly command: string;
// Required GPU number for trial job. The number should be in [0,100]
public readonly gpuNum : number;
public readonly gpuNum: number;
constructor(command : string, gpuNum : number,
constructor(command: string, gpuNum: number,
cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) {
this.command = command;
this.gpuNum = gpuNum;
......
......@@ -14,7 +14,7 @@ import { KubernetesTrialJobDetail } from './kubernetesData';
* Collector Kubeflow jobs info from Kubernetes cluster, and update kubeflow job status locally
*/
export class KubernetesJobInfoCollector {
protected readonly trialJobsMap : Map<string, KubernetesTrialJobDetail>;
protected readonly trialJobsMap: Map<string, KubernetesTrialJobDetail>;
protected readonly log: Logger = getLogger();
protected readonly statusesNeedToCheck: TrialJobStatus[];
......@@ -23,9 +23,9 @@ export class KubernetesJobInfoCollector {
this.statusesNeedToCheck = ['RUNNING', 'WAITING'];
}
public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined) : Promise<void> {
public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined): Promise<void> {
assert(kubernetesCRDClient !== undefined);
const updateKubernetesTrialJobs : Promise<void>[] = [];
const updateKubernetesTrialJobs: Promise<void>[] = [];
for (const [trialJobId, kubernetesTrialJob] of this.trialJobsMap) {
if (kubernetesTrialJob === undefined) {
throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
......@@ -41,7 +41,7 @@ export class KubernetesJobInfoCollector {
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
throw new MethodNotImplementedError();
}
}
......@@ -25,8 +25,7 @@ export class KubernetesJobRestServer extends ClusterJobRestServer {
this.kubernetesTrainingService = kubernetesTrainingService;
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId : string, metrics : any[]) : void {
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
if (this.kubernetesTrainingService === undefined) {
throw Error('kubernetesTrainingService not initialized!');
}
......
......@@ -22,8 +22,7 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
import { kubernetesScriptFormat, KubernetesTrialJobDetail } from './kubernetesData';
import { KubernetesJobRestServer } from './kubernetesJobRestServer';
var yaml = require('js-yaml');
var fs = require('fs');
const fs = require('fs');
/**
* Training Service implementation for Kubernetes
......@@ -36,7 +35,7 @@ abstract class KubernetesTrainingService {
// experiment root dir in NFS
protected readonly trialLocalNFSTempFolder: string;
protected stopping: boolean = false;
protected experimentId! : string;
protected experimentId!: string;
protected kubernetesRestServerPort?: number;
protected readonly CONTAINER_MOUNT_PATH: string;
protected azureStorageClient?: azureStorage.FileService;
......@@ -62,14 +61,18 @@ abstract class KubernetesTrainingService {
this.logCollection = 'none';
}
// tslint:disable:no-any
public generatePodResource(memory: number, cpuNum: number, gpuNum: number): any {
return {
const resources: any = {
memory: `${memory}Mi`,
cpu: `${cpuNum}`,
'nvidia.com/gpu': `${gpuNum}`
cpu: `${cpuNum}`
};
} // tslint:enable:no-any
if (gpuNum !== 0) {
resources['nvidia.com/gpu'] = `${gpuNum}`;
}
return resources;
}
public async listTrialJobs(): Promise<TrialJobDetail[]> {
const jobs: TrialJobDetail[] = [];
......@@ -108,12 +111,12 @@ abstract class KubernetesTrainingService {
return Promise.resolve('');
}
public get MetricsEmitter() : EventEmitter {
public get MetricsEmitter(): EventEmitter {
return this.metricsEmitter;
}
public async cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const trialJobDetail : KubernetesTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
const trialJobDetail: KubernetesTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
if (trialJobDetail === undefined) {
const errorMessage: string = `CancelTrialJob: trial job id ${trialJobId} not found`;
this.log.error(errorMessage);
......@@ -193,7 +196,6 @@ abstract class KubernetesTrainingService {
await this.kubernetesJobRestServer.stop();
this.log.info('Kubernetes Training service rest server stopped successfully.');
} catch (error) {
// tslint:disable-next-line: no-unsafe-any
this.log.error(`Kubernetes Training service rest server stopped failed, error: ${error.message}`);
return Promise.reject(error);
......@@ -202,8 +204,7 @@ abstract class KubernetesTrainingService {
return Promise.resolve();
}
// tslint:disable: no-unsafe-any no-any
protected async createAzureStorage(vaultName: string, valutKeyName: string, accountName: string, azureShare: string): Promise<void> {
protected async createAzureStorage(vaultName: string, valutKeyName: string): Promise<void> {
try {
const result: any = await cpp.exec(`az keyvault secret show --name ${valutKeyName} --vault-name ${vaultName}`);
if (result.stderr) {
......@@ -249,7 +250,6 @@ abstract class KubernetesTrainingService {
return Promise.resolve();
}
// tslint:enable: no-unsafe-any no-any
/**
* Genereate run script for different roles(like worker or ps)
......@@ -265,9 +265,8 @@ abstract class KubernetesTrainingService {
// Refer https://github.com/NVIDIA/k8s-device-plugin/issues/61
// So we have to explicitly set CUDA_VISIBLE_DEVICES to empty if user sets gpuNum to 0 in NNI config file
if (gpuNum === 0) {
nvidiaScript = `export CUDA_VISIBLE_DEVICES='0'`;
nvidiaScript = 'export CUDA_VISIBLE_DEVICES=';
}
// tslint:disable-next-line: strict-boolean-expressions
const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address();
const version: string = this.versionCheck ? await getVersion() : '';
const runScript: string = String.Format(
......@@ -307,8 +306,8 @@ abstract class KubernetesTrainingService {
if(filePath === undefined || filePath === '') {
return undefined;
}
let body = fs.readFileSync(filePath).toString('base64');
let registrySecretName = String.Format('nni-secret-{0}', uniqueString(8)
const body = fs.readFileSync(filePath).toString('base64');
const registrySecretName = String.Format('nni-secret-{0}', uniqueString(8)
.toLowerCase());
await this.genericK8sClient.createSecret(
{
......@@ -331,7 +330,7 @@ abstract class KubernetesTrainingService {
return registrySecretName;
}
protected async uploadFilesToAzureStorage(trialJobId: string, trialLocalTempFolder: String, codeDir: String, uploadRetryCount: number | undefined): Promise<string> {
protected async uploadFilesToAzureStorage(trialJobId: string, trialLocalTempFolder: string, codeDir: string, uploadRetryCount: number | undefined): Promise<string> {
if (this.azureStorageClient === undefined) {
throw new Error('azureStorageClient is not initialized');
}
......
......@@ -4,11 +4,9 @@
'use strict';
import * as cpp from 'child-process-promise';
import * as cp from 'child_process';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { String } from 'typescript-string-operations';
import { getLogger, Logger } from '../../common/log';
import { delay } from '../../common/utils';
import { GPUInfo, GPUSummary } from '../common/gpuData';
......@@ -88,7 +86,6 @@ class GPUScheduler {
runGpuMetricsCollector(this.gpuMetricCollectorScriptFolder);
}
// tslint:disable:non-literal-fs-path
private async updateGPUSummary(): Promise<void> {
const gpuMetricPath: string = path.join(this.gpuMetricCollectorScriptFolder, 'gpu_metrics');
if (fs.existsSync(gpuMetricPath)) {
......
......@@ -31,7 +31,6 @@ import { GPUScheduler } from './gpuScheduler';
* success: true if the buffer contains at least one complete command; otherwise false
* remain: remaining data after the first command
*/
// tslint:disable:newline-per-chained-call informative-docs
function decodeCommand(data: Buffer): [boolean, string, string, Buffer] {
if (data.length < 8) {
return [false, '', '', data];
......@@ -46,7 +45,6 @@ function decodeCommand(data: Buffer): [boolean, string, string, Buffer] {
return [true, commandType, content, remain];
}
// tslint:enable:newline-per-chained-call informative-docs
/**
* LocalTrialJobDetail
......@@ -107,7 +105,7 @@ class LocalTrainingService implements TrainingService {
private initialized: boolean;
private stopping: boolean;
private rootDir!: string;
private readonly experimentId! : string;
private readonly experimentId!: string;
private gpuScheduler!: GPUScheduler;
private readonly occupiedGpuIndexNumMap: Map<number, number>;
private designatedGpuIndices!: Set<number>;
......@@ -252,7 +250,6 @@ class LocalTrainingService implements TrainingService {
public async setClusterMetadata(key: string, value: string): Promise<void> {
if (!this.initialized) {
this.rootDir = getExperimentRootDir();
// tslint:disable-next-line:non-literal-fs-path
if (!fs.existsSync(this.rootDir)) {
await cpp.exec(`powershell.exe mkdir ${this.rootDir}`);
}
......@@ -299,7 +296,7 @@ class LocalTrainingService implements TrainingService {
public getClusterMetadata(key: string): Promise<string> {
switch (key) {
case TrialConfigMetadataKey.TRIAL_CONFIG:
case TrialConfigMetadataKey.TRIAL_CONFIG: {
let getResult: Promise<string>;
if (this.localTrialConfig === undefined) {
getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`));
......@@ -308,6 +305,7 @@ class LocalTrainingService implements TrainingService {
}
return getResult;
}
default:
return Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, 'Key not found'));
}
......@@ -523,8 +521,8 @@ class LocalTrainingService implements TrainingService {
await this.writeParameterFile(trialJobDetail.workingDirectory, trialJobDetail.form.hyperParameters);
const trialJobProcess: cp.ChildProcess = runScript(path.join(trialJobDetail.workingDirectory, scriptName));
this.setTrialJobStatus(trialJobDetail, 'RUNNING');
trialJobDetail.startTime = Date.now();
trialJobDetail.pid = trialJobProcess.pid;
trialJobDetail.startTime = Date.now(); // eslint-disable-line require-atomic-updates
trialJobDetail.pid = trialJobProcess.pid; // eslint-disable-line require-atomic-updates
this.setExtraProperties(trialJobDetail, resource);
let buffer: Buffer = Buffer.alloc(0);
......
......@@ -17,7 +17,6 @@ export namespace HDFSClientUtility {
* @param hdfsUserName HDFS user name
*/
export function hdfsExpRootDir(hdfsUserName: string): string {
// tslint:disable-next-line:prefer-template
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
}
......@@ -47,11 +46,9 @@ export namespace HDFSClientUtility {
* @param hdfsFilePath hdfs file path(target)
* @param hdfsClient hdfs client
*/
// tslint:disable: no-unsafe-any non-literal-fs-path no-any
export async function copyFileToHdfs(localFilePath : string, hdfsFilePath : string, hdfsClient : any) : Promise<void> {
export async function copyFileToHdfs(localFilePath: string, hdfsFilePath: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
// tslint:disable-next-line:non-literal-fs-path
fs.exists(localFilePath, (exists : boolean) => {
fs.exists(localFilePath, (exists: boolean) => {
// Detect if local file exist
if (exists) {
const localFileStream: fs.ReadStream = fs.createReadStream(localFilePath);
......@@ -60,7 +57,7 @@ export namespace HDFSClientUtility {
hdfsFileStream.on('finish', () => {
deferred.resolve();
});
hdfsFileStream.on('error', (err : any) => {
hdfsFileStream.on('error', (err: any) => {
getLogger()
.error(`HDFSCientUtility:copyFileToHdfs, copy file failed, err is ${err.message}`);
deferred.reject(err);
......@@ -82,7 +79,7 @@ export namespace HDFSClientUtility {
* @param hdfsDirectory HDFS directory
* @param hdfsClient HDFS client
*/
export async function copyDirectoryToHdfs(localDirectory : string, hdfsDirectory : string, hdfsClient : any) : Promise<void> {
export async function copyDirectoryToHdfs(localDirectory: string, hdfsDirectory: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
// TODO: fs.readdirSync doesn't support ~($HOME)
const fileNameArray: string[] = fs.readdirSync(localDirectory);
......@@ -90,7 +87,6 @@ export namespace HDFSClientUtility {
for (const fileName of fileNameArray) {
const fullFilePath: string = path.join(localDirectory, fileName);
try {
// tslint:disable-next-line:non-literal-fs-path
if (fs.lstatSync(fullFilePath)
.isFile()) {
await copyFileToHdfs(fullFilePath, path.join(hdfsDirectory, fileName), hdfsClient);
......@@ -108,28 +104,51 @@ export namespace HDFSClientUtility {
return deferred.promise;
}
/**
* Check if an HDFS path already exists
*
* @param hdfsPath target path need to check in HDFS
* @param hdfsClient HDFS client
*/
export async function pathExists(hdfsPath: string, hdfsClient: any): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist: boolean) => {
deferred.resolve(exist);
});
let timeoutId: NodeJS.Timer;
const delayTimeout: Promise<boolean> = new Promise<boolean>((resolve: Function, reject: Function): void => {
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId = setTimeout(() => { reject(`Check HDFS path ${hdfsPath} exists timeout`); }, 5000);
});
return Promise.race([deferred.promise, delayTimeout])
.finally(() => { clearTimeout(timeoutId); });
}
/**
* Read content from HDFS file
*
* @param hdfsPath HDFS file path
* @param hdfsClient HDFS client
*/
export async function readFileFromHDFS(hdfsPath : string, hdfsClient : any) : Promise<Buffer> {
export async function readFileFromHDFS(hdfsPath: string, hdfsClient: any): Promise<Buffer> {
const deferred: Deferred<Buffer> = new Deferred<Buffer>();
let buffer : Buffer = Buffer.alloc(0);
let buffer: Buffer = Buffer.alloc(0);
const exist : boolean = await pathExists(hdfsPath, hdfsClient);
const exist: boolean = await pathExists(hdfsPath, hdfsClient);
if (!exist) {
deferred.reject(`${hdfsPath} doesn't exists`);
}
const remoteFileStream: any = hdfsClient.createReadStream(hdfsPath);
remoteFileStream.on('error', (err : any) => {
remoteFileStream.on('error', (err: any) => {
// Reject with the error
deferred.reject(err);
});
remoteFileStream.on('data', (chunk : any) => {
remoteFileStream.on('data', (chunk: any) => {
// Concat the data chunk to buffer
buffer = Buffer.concat([buffer, chunk]);
});
......@@ -142,39 +161,16 @@ export namespace HDFSClientUtility {
return deferred.promise;
}
/**
* Check if an HDFS path already exists
*
* @param hdfsPath target path need to check in HDFS
* @param hdfsClient HDFS client
*/
export async function pathExists(hdfsPath : string, hdfsClient : any) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
hdfsClient.exists(hdfsPath, (exist : boolean) => {
deferred.resolve(exist);
});
let timeoutId : NodeJS.Timer;
const delayTimeout : Promise<boolean> = new Promise<boolean>((resolve : Function, reject : Function) : void => {
// Set timeout and reject the promise once reach timeout (5 seconds)
timeoutId = setTimeout(() => { reject(`Check HDFS path ${hdfsPath} exists timeout`); }, 5000);
});
return Promise.race([deferred.promise, delayTimeout])
.finally(() => { clearTimeout(timeoutId); });
}
/**
* Mkdir in HDFS, use default permission 755
*
* @param hdfsPath the path in HDFS. It could be either file or directory
* @param hdfsClient HDFS client
*/
export function mkdir(hdfsPath : string, hdfsClient : any) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
export function mkdir(hdfsPath: string, hdfsClient: any): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.mkdir(hdfsPath, (err : any) => {
hdfsClient.mkdir(hdfsPath, (err: any) => {
if (!err) {
deferred.resolve(true);
} else {
......@@ -191,14 +187,14 @@ export namespace HDFSClientUtility {
* @param hdfsPath the path in HDFS. It could be either file or directory
* @param hdfsClient HDFS client
*/
export async function readdir(hdfsPath : string, hdfsClient : any) : Promise<string[]> {
const deferred : Deferred<string[]> = new Deferred<string[]>();
const exist : boolean = await pathExists(hdfsPath, hdfsClient);
export async function readdir(hdfsPath: string, hdfsClient: any): Promise<string[]> {
const deferred: Deferred<string[]> = new Deferred<string[]>();
const exist: boolean = await pathExists(hdfsPath, hdfsClient);
if (!exist) {
deferred.reject(`${hdfsPath} doesn't exists`);
}
hdfsClient.readdir(hdfsPath, (err : any, files : any[]) => {
hdfsClient.readdir(hdfsPath, (err: any, files: any[]) => {
if (err) {
deferred.reject(err);
}
......@@ -215,9 +211,9 @@ export namespace HDFSClientUtility {
* @param hdfsClient HDFS client
* @param recursive Mark if need to delete recursively
*/
export function deletePath(hdfsPath : string, hdfsClient : any, recursive : boolean = true) : Promise<boolean> {
const deferred : Deferred<boolean> = new Deferred<boolean>();
hdfsClient.unlink(hdfsPath, recursive, (err : any) => {
export function deletePath(hdfsPath: string, hdfsClient: any, recursive: boolean = true): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
hdfsClient.unlink(hdfsPath, recursive, (err: any) => {
if (!err) {
deferred.resolve(true);
} else {
......@@ -227,5 +223,4 @@ export namespace HDFSClientUtility {
return deferred.promise;
}
// tslint:enable: no-unsafe-any non-literal-fs-path no-any
}
......@@ -24,7 +24,7 @@ export class PAITaskRole {
//Shared memory for one task in the task role
public readonly shmMB?: number;
//portList to specify the port used in container
public portList?: portListMetaData[];
public portList?: PortListMetaData[];
/**
* Constructor
......@@ -35,8 +35,8 @@ export class PAITaskRole {
* @param gpuNumber GPU number for one task in the task role, no less than 0
* @param command Executable command for tasks in the task role, can not be empty
*/
constructor(name : string, taskNumber : number, cpuNumber : number, memoryMB : number, gpuNumber : number,
command : string, shmMB?: number, portList?: portListMetaData[]) {
constructor(name: string, taskNumber: number, cpuNumber: number, memoryMB: number, gpuNumber: number,
command: string, shmMB?: number, portList?: PortListMetaData[]) {
this.name = name;
this.taskNumber = taskNumber;
this.cpuNumber = cpuNumber;
......@@ -75,8 +75,8 @@ export class PAIJobConfig {
* @param outputDir Output directory on HDFS
* @param taskRoles List of taskRole, one task role at least
*/
constructor(jobName: string, image : string, codeDir : string,
taskRoles : PAITaskRole[], virtualCluster: string, authFile?: string) {
constructor(jobName: string, image: string, codeDir: string,
taskRoles: PAITaskRole[], virtualCluster: string, authFile?: string) {
this.jobName = jobName;
this.image = image;
this.codeDir = codeDir;
......@@ -102,7 +102,7 @@ export class PAIClusterConfig {
* @param host Host IP of PAI Cluster
* @param token PAI token of PAI Cluster
*/
constructor(userName: string, host : string, passWord?: string, token?: string) {
constructor(userName: string, host: string, passWord?: string, token?: string) {
this.userName = userName;
this.passWord = passWord;
this.host = host;
......@@ -113,8 +113,8 @@ export class PAIClusterConfig {
/**
* portList data structure used in PAI taskRole
*/
export class portListMetaData {
public readonly label : string = '';
export class PortListMetaData {
public readonly label: string = '';
public readonly beginAt: number = 0;
public readonly portNumber: number = 0;
}
......@@ -135,10 +135,10 @@ export class NNIPAITrialConfig extends TrialConfig {
//authentication file used for private Docker registry
public authFile?: string;
//portList to specify the port used in container
public portList?: portListMetaData[];
public portList?: PortListMetaData[];
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number,
image: string, virtualCluster?: string, shmMB?: number, authFile?: string, portList?: portListMetaData[]) {
constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
image: string, virtualCluster?: string, shmMB?: number, authFile?: string, portList?: PortListMetaData[]) {
super(command, codeDir, gpuNum);
this.cpuNum = cpuNum;
this.memoryMB = memoryMB;
......
......@@ -22,7 +22,7 @@ export class PAITrialJobDetail implements TrialJobDetail {
public hdfsLogPath: string;
public isEarlyStopped?: boolean;
constructor(id: string, status: TrialJobStatus, paiJobName : string,
constructor(id: string, status: TrialJobStatus, paiJobName: string,
submitTime: number, workingDirectory: string, form: TrialJobApplicationForm, hdfsLogPath: string) {
this.id = id;
this.status = status;
......@@ -52,6 +52,5 @@ export const PAI_TRIAL_COMMAND_FORMAT: string =
--pai_hdfs_output_dir '{9}' --pai_hdfs_host '{10}' --pai_user_name {11} --nni_hdfs_exp_dir '{12}' --webhdfs_path '/webhdfs/api/v1' \
--nni_manager_version '{13}' --log_collection '{14}'`;
// tslint:disable:no-http-string
export const PAI_LOG_PATH_FORMAT: string =
`http://{0}/webhdfs/explorer.html#{1}`;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment