Commit ae7a72bc authored by Hongarc's avatar Hongarc Committed by Chi Song
Browse files

Remove all whitespace at end of line (#1162)

parent 14c1b31c
......@@ -32,7 +32,7 @@ import { Writable } from 'stream';
/**
* Cluster Job Training service Rest server, provides rest API to support Cluster job metrics update
*
*
*/
@component.Singleton
export abstract class ClusterJobRestServer extends RestServer{
......@@ -52,8 +52,8 @@ export abstract class ClusterJobRestServer extends RestServer{
super();
const basePort: number = getBasePort();
assert(basePort && basePort > 1024);
this.port = basePort + 1;
this.port = basePort + 1;
}
public get clusterRestServerPort(): number {
......@@ -62,11 +62,11 @@ export abstract class ClusterJobRestServer extends RestServer{
}
return this.port;
}
public get getErrorMessage(): string | undefined{
return this.errorMessage;
}
public set setEnableVersionCheck(versionCheck: boolean) {
this.enableVersionCheck = versionCheck;
}
......
......@@ -19,12 +19,12 @@
'use strict';
export const CONTAINER_INSTALL_NNI_SHELL_FORMAT: string =
export const CONTAINER_INSTALL_NNI_SHELL_FORMAT: string =
`#!/bin/bash
if python3 -c 'import nni' > /dev/null 2>&1; then
# nni module is already installed, skip
return
else
# Install nni
python3 -m pip install --user --upgrade nni
python3 -m pip install --user --upgrade nni
fi`;
\ No newline at end of file
......@@ -59,7 +59,7 @@ export class GPUSummary {
}
}
export const GPU_INFO_COLLECTOR_FORMAT_LINUX: string =
export const GPU_INFO_COLLECTOR_FORMAT_LINUX: string =
`
#!/bin/bash
export METRIC_OUTPUT_DIR={0}
......@@ -67,7 +67,7 @@ echo $$ >{1}
python3 -m nni_gpu_tool.gpu_metrics_collector
`
export const GPU_INFO_COLLECTOR_FORMAT_WINDOWS: string =
export const GPU_INFO_COLLECTOR_FORMAT_WINDOWS: string =
`
$env:METRIC_OUTPUT_DIR="{0}"
$app = Start-Process "python" -ArgumentList "-m nni_gpu_tool.gpu_metrics_collector" -passthru -NoNewWindow
......
......@@ -34,7 +34,7 @@ import { file } from "../../node_modules/@types/tmp";
/**
* Validate codeDir, calculate file count recursively under codeDir, and throw error if any rule is broken
*
*
* @param codeDir codeDir in nni config file
* @returns file number under codeDir
*/
......@@ -48,9 +48,9 @@ export async function validateCodeDir(codeDir: string) : Promise<number> {
}
if(fileCount && fileCount > 1000) {
const errMessage: string = `Too many files(${fileCount} found}) in ${codeDir},`
const errMessage: string = `Too many files(${fileCount} found}) in ${codeDir},`
+ ` please check if it's a valid code dir`;
throw new Error(errMessage);
throw new Error(errMessage);
}
return fileCount;
......@@ -58,7 +58,7 @@ export async function validateCodeDir(codeDir: string) : Promise<number> {
/**
* crete a new directory
* @param directory
* @param directory
*/
export async function execMkdir(directory: string): Promise<void> {
if (process.platform === 'win32') {
......@@ -85,7 +85,7 @@ export async function execCopydir(source: string, destination: string): Promise<
/**
* crete a new file
* @param filename
* @param filename
*/
export async function execNewFile(filename: string): Promise<void> {
if (process.platform === 'win32') {
......@@ -110,7 +110,7 @@ export function execScript(filePath: string): cp.ChildProcess {
/**
* output the last line of a file
* @param filePath
* @param filePath
*/
export async function execTail(filePath: string): Promise<cpp.childProcessPromise.Result> {
let cmdresult: cpp.childProcessPromise.Result;
......@@ -124,7 +124,7 @@ export async function execTail(filePath: string): Promise<cpp.childProcessPromis
/**
* delete a directory
* @param directory
* @param directory
*/
export async function execRemove(directory: string): Promise<void> {
if (process.platform === 'win32') {
......@@ -137,7 +137,7 @@ export async function execRemove(directory: string): Promise<void> {
/**
* kill a process
* @param directory
* @param directory
*/
export async function execKill(pid: string): Promise<void> {
if (process.platform === 'win32') {
......@@ -151,7 +151,7 @@ export async function execKill(pid: string): Promise<void> {
/**
* set environment variable
* @param variable
* @returns command string
* @returns command string
*/
export function setEnvironmentVariable(variable: { key: string; value: string }): string {
if (process.platform === 'win32') {
......@@ -191,7 +191,7 @@ export async function tarAdd(tar_path: string, source_path: string): Promise<voi
/**
* generate script file name
* @param fileNamePrefix
* @param fileNamePrefix
*/
export function getScriptName(fileNamePrefix: string): string {
if (process.platform === 'win32') {
......@@ -203,7 +203,7 @@ export function getScriptName(fileNamePrefix: string): string {
/**
* generate script file
* @param gpuMetricCollectorScriptFolder
* @param gpuMetricCollectorScriptFolder
*/
export function getgpuMetricsCollectorScriptContent(gpuMetricCollectorScriptFolder: string): string {
if(process.platform === 'win32') {
......
......@@ -26,11 +26,11 @@ import { getLogger } from '../../common/log';
import { mkDirP } from '../../common/utils';
export namespace AzureStorageClientUtility {
/**
* create azure share
* @param fileServerClient
* @param azureShare
* @param fileServerClient
* @param azureShare
*/
export async function createShare(fileServerClient: any, azureShare: any): Promise<void>{
const deferred: Deferred<void> = new Deferred<void>();
......@@ -44,12 +44,12 @@ export namespace AzureStorageClientUtility {
})
return deferred.promise;
}
/**
* Create a new directory (NOT recursively) in azure file storage.
* @param fileServerClient
* @param azureFoler
* @param azureShare
* @param fileServerClient
* @param azureFoler
* @param azureShare
*/
export async function createDirectory(fileServerClient: any, azureFoler: any, azureShare: any): Promise<void>{
const deferred: Deferred<void> = new Deferred<void>();
......@@ -67,7 +67,7 @@ export namespace AzureStorageClientUtility {
/**
* Create a new directory recursively in azure file storage
* @param fileServerClient
* @param azureDirectory
* @param azureDirectory
*/
export async function createDirectoryRecursive(fileServerClient: any, azureDirectory: any, azureShare: any): Promise<void>{
const deferred: Deferred<void> = new Deferred<void>();
......@@ -81,14 +81,14 @@ export namespace AzureStorageClientUtility {
deferred.resolve();
return deferred.promise;
}
/**
* upload a file to azure storage
* @param fileServerClient
* @param azureDirectory
* @param azureFileName
* @param azureShare
* @param localFilePath
* @param fileServerClient
* @param azureDirectory
* @param azureFileName
* @param azureShare
* @param localFilePath
*/
async function uploadFileToAzure(fileServerClient: any, azureDirectory: any, azureFileName: any, azureShare: any, localFilePath: any): Promise<void>{
const deferred: Deferred<void> = new Deferred<void>();
......@@ -96,20 +96,20 @@ export namespace AzureStorageClientUtility {
if(error){
getLogger().error(`Upload file failed:, ${error}`);
deferred.reject(error);
}else{
}else{
deferred.resolve();
}
})
return deferred.promise;
}
/**
* download a file from azure storage
* @param fileServerClient
* @param azureDirectory
* @param azureFileName
* @param azureShare
* @param localFilePath
* @param fileServerClient
* @param azureDirectory
* @param azureFileName
* @param azureShare
* @param localFilePath
*/
async function downloadFile(fileServerClient: any, azureDirectory: any, azureFileName: any, azureShare: any, localFilePath: any): Promise<void>{
const deferred: Deferred<void> = new Deferred<void>();
......@@ -118,7 +118,7 @@ export namespace AzureStorageClientUtility {
getLogger().error(`Download file failed:, ${error}`);
deferred.reject(error);
}else{
deferred.resolve();
deferred.resolve();
}
})
return deferred.promise;
......@@ -153,13 +153,13 @@ export namespace AzureStorageClientUtility {
deferred.resolve();
return deferred.promise;
}
/**
* downlod a directory from azure
* @param fileServerClient
* @param azureDirectory
* @param azureShare
* @param localDirectory
* @param fileServerClient
* @param azureDirectory
* @param azureShare
* @param localDirectory
*/
export async function downloadDirectory(fileServerClient: any, azureDirectory:any, azureShare: any, localDirectory: any): Promise<void>{
const deferred: Deferred<void> = new Deferred<void>();
......@@ -184,7 +184,7 @@ export namespace AzureStorageClientUtility {
const fullFilePath: string = path.join(localDirectory, fileName.name);
downloadFile(fileServerClient, azureDirectory, fileName.name, azureShare, fullFilePath)
}
for(var directoryName of result['entries']['directories']){
const fullDirectoryPath: string = path.join(localDirectory, directoryName.name)
const fullAzureDirectory: string = path.join(azureDirectory, directoryName.name)
......
......@@ -47,7 +47,7 @@ class FrameworkControllerClientV1 extends FrameworkControllerClient {
public get containerName(): string {
return 'framework';
}
}
}
export { FrameworkControllerClient, GeneralK8sClient };
......
......@@ -40,8 +40,8 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
public readonly frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy;
public readonly name: string;
public readonly taskNum: number;
constructor(taskNum: number, command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string,
constructor(taskNum: number, command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string,
frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy) {
super(command, gpuNum, cpuNum, memoryMB, image);
this.frameworkAttemptCompletionPolicy = frameworkAttemptCompletionPolicy;
......@@ -71,8 +71,8 @@ export class FrameworkControllerClusterConfig extends KubernetesClusterConfig {
export class FrameworkControllerClusterConfigNFS extends KubernetesClusterConfigNFS {
public readonly serviceAccountName: string;
constructor(
serviceAccountName: string,
apiVersion: string,
serviceAccountName: string,
apiVersion: string,
nfs: NFSConfig,
storage?: KubernetesStorageKind
) {
......@@ -94,12 +94,12 @@ export class FrameworkControllerClusterConfigNFS extends KubernetesClusterConfig
export class FrameworkControllerClusterConfigAzure extends KubernetesClusterConfigAzure {
public readonly serviceAccountName: string;
constructor(
serviceAccountName: string,
apiVersion: string,
keyVault: keyVaultConfig,
azureStorage: AzureStorage,
serviceAccountName: string,
apiVersion: string,
keyVault: keyVaultConfig,
azureStorage: AzureStorage,
storage?: KubernetesStorageKind
) {
super(apiVersion, keyVault, azureStorage,storage);
......
......@@ -32,7 +32,7 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
super(jobMap);
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) {
return Promise.resolve();
......@@ -44,7 +44,7 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
let kubernetesJobInfo: any;
try {
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
} catch(error) {
this.log.error(`Get job ${kubernetesTrialJob.kubernetesJobName} info failed, error is ${error}`);
//This is not treat as a error status
......@@ -71,9 +71,9 @@ export class FrameworkControllerJobInfoCollector extends KubernetesJobInfoCollec
break;
case 'Failed':
kubernetesTrialJob.status = 'FAILED';
break;
break;
}
kubernetesTrialJob.endTime = Date.parse(<string>kubernetesJobInfo.status.completionTime);
kubernetesTrialJob.endTime = Date.parse(<string>kubernetesJobInfo.status.completionTime);
break;
default:
break;
......
......@@ -25,11 +25,11 @@ import { KubernetesJobRestServer } from '../kubernetesJobRestServer'
/**
* frameworkcontroller Training service Rest server, provides rest API to support frameworkcontroller job metrics update
*
*
*/
@component.Singleton
export class FrameworkControllerJobRestServer extends KubernetesJobRestServer{
constructor() {
super(component.get(FrameworkControllerTrainingService));
}
}
}
\ No newline at end of file
......@@ -37,7 +37,7 @@ import { KubernetesTrialJobDetail } from '../kubernetesData';
import { validateCodeDir } from '../../common/util';
import { AzureStorageClientUtility } from '../azureStorageClientUtils';
import { KubernetesTrainingService } from '../kubernetesTrainingService';
import { FrameworkControllerTrialConfig, FrameworkControllerClusterConfig, FrameworkControllerClusterConfigAzure, FrameworkControllerClusterConfigNFS,
import { FrameworkControllerTrialConfig, FrameworkControllerClusterConfig, FrameworkControllerClusterConfigAzure, FrameworkControllerClusterConfigNFS,
FrameworkControllerClusterConfigFactory} from './frameworkcontrollerConfig';
import { FrameworkControllerJobRestServer } from './frameworkcontrollerJobRestServer';
import { FrameworkControllerClient } from './frameworkcontrollerApiClient';
......@@ -56,7 +56,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
constructor() {
super();
this.fcJobInfoCollector = new FrameworkControllerJobInfoCollector(this.trialJobsMap);
this.experimentId = getExperimentId();
this.experimentId = getExperimentId();
this.nextTrialSequenceId = -1;
}
......@@ -69,7 +69,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
this.kubernetesJobRestServer.setEnableVersionCheck = this.versionCheck;
this.log.info(`frameworkcontroller Training service rest server listening on: ${this.kubernetesJobRestServer.endPoint}`);
while (!this.stopping) {
// collect metrics for frameworkcontroller jobs by interacting with Kubernetes API server
// collect metrics for frameworkcontroller jobs by interacting with Kubernetes API server
await delay(3000);
await this.fcJobInfoCollector.retrieveTrialStatus(this.kubernetesCRDClient);
if(this.kubernetesJobRestServer.getErrorMessage) {
......@@ -101,10 +101,10 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
//Generate the port used for taskRole
this.generateContainerPort();
await this.prepareRunScript(trialLocalTempFolder, curTrialSequenceId, trialJobId, trialWorkingFolder, form);
//upload code files
let trialJobOutputUrl: string = await this.uploadCodeFiles(trialJobId, trialLocalTempFolder);
const trialJobDetail: KubernetesTrialJobDetail = new KubernetesTrialJobDetail(
trialJobId,
'WAITING',
......@@ -116,14 +116,14 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
trialJobOutputUrl
);
// Set trial job detail until create frameworkcontroller job successfully
// Set trial job detail until create frameworkcontroller job successfully
this.trialJobsMap.set(trialJobId, trialJobDetail);
// Create frameworkcontroller job based on generated frameworkcontroller job resource config
const frameworkcontrollerJobConfig = await this.prepareFrameworkControllerConfig(trialJobId, trialWorkingFolder, frameworkcontrollerJobName);
await this.kubernetesCRDClient.createKubernetesJob(frameworkcontrollerJobConfig);
// Set trial job detail until create frameworkcontroller job successfully
// Set trial job detail until create frameworkcontroller job successfully
this.trialJobsMap.set(trialJobId, trialJobDetail);
return Promise.resolve(trialJobDetail);
......@@ -131,8 +131,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
/**
* upload code files to nfs or azureStroage
* @param trialJobId
* @param trialLocalTempFolder
* @param trialJobId
* @param trialLocalTempFolder
* return: trialJobOutputUrl
*/
private async uploadCodeFiles(trialJobId: string, trialLocalTempFolder: string): Promise<string> {
......@@ -145,7 +145,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
if(this.fcClusterConfig.storageType === 'azureStorage') {
try{
//upload local files to azure storage
await AzureStorageClientUtility.uploadDirectory(this.azureStorageClient,
await AzureStorageClientUtility.uploadDirectory(this.azureStorageClient,
`nni/${getExperimentId()}/${trialJobId}`, this.azureStorageShare, `${trialLocalTempFolder}`);
trialJobOutputUrl = `https://${this.azureStorageAccountName}.file.core.windows.net/${this.azureStorageShare}/${path.join('nni', getExperimentId(), trialJobId, 'output')}`
......@@ -155,21 +155,21 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
} else if(this.fcClusterConfig.storageType === 'nfs') {
let nfsFrameworkControllerClusterConfig: FrameworkControllerClusterConfigNFS = <FrameworkControllerClusterConfigNFS>this.fcClusterConfig;
// Creat work dir for current trial in NFS directory
// Creat work dir for current trial in NFS directory
await cpp.exec(`mkdir -p ${this.trialLocalNFSTempFolder}/nni/${getExperimentId()}/${trialJobId}`);
// Copy code files from local dir to NFS mounted dir
await cpp.exec(`cp -r ${trialLocalTempFolder}/* ${this.trialLocalNFSTempFolder}/nni/${getExperimentId()}/${trialJobId}/.`);
const nfsConfig: NFSConfig = nfsFrameworkControllerClusterConfig.nfs;
trialJobOutputUrl = `nfs://${nfsConfig.server}:${path.join(nfsConfig.path, 'nni', getExperimentId(), trialJobId, 'output')}`
}
return Promise.resolve(trialJobOutputUrl);
}
/**
* generate trial's command for frameworkcontroller
* expose port and execute injector.sh before executing user's command
* @param command
* @param command
*/
private generateCommandScript(command: string): string {
let portScript = '';
......@@ -181,7 +181,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
return `${portScript} . /mnt/frameworkbarrier/injector.sh && ${command}`;
}
private async prepareRunScript(trialLocalTempFolder: string, curTrialSequenceId: number, trialJobId: string, trialWorkingFolder: string, form: JobApplicationForm): Promise<void> {
if(!this.fcTrialConfig) {
throw new Error('frameworkcontroller trial config is not initialized');
......@@ -196,7 +196,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
await cpp.exec(`mkdir -p ${trialLocalTempFolder}`);
for(let taskRole of this.fcTrialConfig.taskRoles) {
const runScriptContent: string = await this.generateRunScript('frameworkcontroller', trialJobId, trialWorkingFolder,
const runScriptContent: string = await this.generateRunScript('frameworkcontroller', trialJobId, trialWorkingFolder,
this.generateCommandScript(taskRole.command), curTrialSequenceId.toString(), taskRole.name, taskRole.gpuNum);
await fs.promises.writeFile(path.join(trialLocalTempFolder, `run_${taskRole.name}.sh`), runScriptContent, { encoding: 'utf8' });
}
......@@ -204,11 +204,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
// Write file content ( parameter.cfg ) to local tmp folders
const trialForm : TrialJobApplicationForm = (<TrialJobApplicationForm>form)
if(trialForm && trialForm.hyperParameters) {
await fs.promises.writeFile(path.join(trialLocalTempFolder, generateParamFileName(trialForm.hyperParameters)),
await fs.promises.writeFile(path.join(trialLocalTempFolder, generateParamFileName(trialForm.hyperParameters)),
trialForm.hyperParameters.value, { encoding: 'utf8' });
}
}
private async prepareFrameworkControllerConfig(trialJobId: string, trialWorkingFolder: string, frameworkcontrollerJobName: string): Promise<any> {
if(!this.fcTrialConfig) {
......@@ -222,18 +222,18 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
resource.limits = Object.assign({}, resource.requests);
podResources.push(resource);
}
// Generate frameworkcontroller job resource config object
// Generate frameworkcontroller job resource config object
const frameworkcontrollerJobConfig: any = this.generateFrameworkControllerJobConfig(trialJobId, trialWorkingFolder, frameworkcontrollerJobName, podResources);
return Promise.resolve(frameworkcontrollerJobConfig);
}
}
public async setClusterMetadata(key: string, value: string): Promise<void> {
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value);
break;
case TrialConfigMetadataKey.FRAMEWORKCONTROLLER_CLUSTER_CONFIG:
let frameworkcontrollerClusterJsonObject = JSON.parse(value);
this.fcClusterConfig = FrameworkControllerClusterConfigFactory.generateFrameworkControllerClusterConfig(frameworkcontrollerClusterJsonObject);
......@@ -253,7 +253,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
nfsFrameworkControllerClusterConfig.nfs.server,
nfsFrameworkControllerClusterConfig.nfs.path
);
}
}
this.kubernetesCRDClient = FrameworkControllerClient.generateFrameworkControllerClient();
break;
case TrialConfigMetadataKey.TRIAL_CONFIG:
......@@ -269,7 +269,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
await validateCodeDir(this.fcTrialConfig.codeDir);
} catch(error) {
this.log.error(error);
return Promise.reject(new Error(error));
return Promise.reject(new Error(error));
}
break;
case TrialConfigMetadataKey.VERSION_CHECK:
......@@ -284,7 +284,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return Promise.resolve();
}
private generateContainerPort() {
if(!this.fcTrialConfig) {
throw new Error('frameworkcontroller trial config is not initialized');
......@@ -312,7 +312,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
if(!this.fcTrialConfig) {
throw new Error('frameworkcontroller trial config is not initialized');
}
let taskRoles = [];
for(let index in this.fcTrialConfig.taskRoles) {
let containerPort = this.fcContainerPortMap.get(this.fcTrialConfig.taskRoles[index].name);
......@@ -320,8 +320,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
throw new Error('Container port is not initialized');
}
let taskRole = this.generateTaskRoleConfig(
trialWorkingFolder,
this.fcTrialConfig.taskRoles[index].image,
trialWorkingFolder,
this.fcTrialConfig.taskRoles[index].image,
`run_${this.fcTrialConfig.taskRoles[index].name}.sh`,
podResources[index],
containerPort
......@@ -330,17 +330,17 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
name: this.fcTrialConfig.taskRoles[index].name,
taskNumber: this.fcTrialConfig.taskRoles[index].taskNum,
frameworkAttemptCompletionPolicy: {
minFailedTaskCount: this.fcTrialConfig.taskRoles[index].frameworkAttemptCompletionPolicy.minFailedTaskCount,
minFailedTaskCount: this.fcTrialConfig.taskRoles[index].frameworkAttemptCompletionPolicy.minFailedTaskCount,
minSucceededTaskCount: this.fcTrialConfig.taskRoles[index].frameworkAttemptCompletionPolicy.minSucceededTaskCount
},
task: taskRole
});
}
return {
apiVersion: `frameworkcontroller.microsoft.com/v1`,
kind: 'Framework',
metadata: {
metadata: {
name: frameworkcontrollerJobName,
namespace: 'default',
labels: {
......@@ -356,7 +356,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
};
}
private generateTaskRoleConfig(trialWorkingFolder: string, replicaImage: string, runScriptFile: string, podResources: any, containerPort: number): any {
if(!this.fcClusterConfig) {
......@@ -366,7 +366,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
if(!this.fcTrialConfig) {
throw new Error('frameworkcontroller trial config is not initialized');
}
let volumeSpecMap = new Map<string, object>();
if(this.fcClusterConfig.storageType === 'azureStorage'){
volumeSpecMap.set('nniVolumes', [
......@@ -395,7 +395,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
emptyDir: {}
}])
}
let containers = [
{
name: 'framework',
......@@ -420,7 +420,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
name: 'frameworkbarrier',
image: 'frameworkcontroller/frameworkbarrier',
volumeMounts: [
{
{
name: 'frameworkbarrier-volume',
mountPath: '/mnt/frameworkbarrier'
}]
......@@ -432,8 +432,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
volumes: volumeSpecMap.get('nniVolumes'),
hostNetwork: false
};
if(this.fcClusterConfig.serviceAccountName) {
spec.serviceAccountName = this.fcClusterConfig.serviceAccountName;
if(this.fcClusterConfig.serviceAccountName) {
spec.serviceAccountName = this.fcClusterConfig.serviceAccountName;
}
let taskRole = {
pod: {
......
......@@ -27,7 +27,7 @@ abstract class KubeflowOperatorClient extends KubernetesCRDClient{
/**
* Factory method to generate operator cliet
*/
public static generateOperatorClient(kubeflowOperator: KubeflowOperator,
public static generateOperatorClient(kubeflowOperator: KubeflowOperator,
operatorApiVersion: string): KubernetesCRDClient {
switch(kubeflowOperator) {
case 'tf-operator': {
......@@ -78,7 +78,7 @@ class TFOperatorClientV1Alpha2 extends KubeflowOperatorClient {
public get containerName(): string {
return 'tensorflow';
}
}
}
class TFOperatorClientV1Beta1 extends KubernetesCRDClient {
......@@ -97,7 +97,7 @@ class TFOperatorClientV1Beta1 extends KubernetesCRDClient {
public get containerName(): string {
return 'tensorflow';
}
}
}
class TFOperatorClientV1Beta2 extends KubernetesCRDClient {
......
......@@ -41,8 +41,8 @@ export class KubeflowClusterConfig extends KubernetesClusterConfig {
export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS {
public readonly operator: KubeflowOperator;
constructor(
operator: KubeflowOperator,
apiVersion: string,
operator: KubeflowOperator,
apiVersion: string,
nfs: NFSConfig,
storage?: KubernetesStorageKind
) {
......@@ -68,12 +68,12 @@ export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS {
export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure{
public readonly operator: KubeflowOperator;
constructor(
operator: KubeflowOperator,
apiVersion: string,
keyVault: keyVaultConfig,
azureStorage: AzureStorage,
operator: KubeflowOperator,
apiVersion: string,
keyVault: keyVaultConfig,
azureStorage: AzureStorage,
storage?: KubernetesStorageKind
) {
super(apiVersion, keyVault, azureStorage,storage);
......@@ -124,7 +124,7 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate{
public readonly replicas: number;
constructor(replicas: number, command : string, gpuNum : number,
constructor(replicas: number, command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string) {
super(command, gpuNum, cpuNum, memoryMB, image);
this.replicas = replicas;
......
......@@ -32,7 +32,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector{
super(jobMap);
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
if (!this.statusesNeedToCheck.includes(kubernetesTrialJob.status)) {
return Promise.resolve();
......@@ -44,9 +44,9 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector{
let kubernetesJobInfo: any;
try {
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
} catch(error) {
// Notice: it maynot be a 'real' error since cancel trial job can also cause getKubernetesJob failed.
// Notice: it maynot be a 'real' error since cancel trial job can also cause getKubernetesJob failed.
this.log.error(`Get job ${kubernetesTrialJob.kubernetesJobName} info failed, error is ${error}`);
//This is not treat as a error status
return Promise.resolve();
......@@ -58,8 +58,8 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector{
switch(tfJobType) {
case 'Created':
kubernetesTrialJob.status = 'WAITING';
kubernetesTrialJob.startTime = Date.parse(<string>latestCondition.lastUpdateTime);
break;
kubernetesTrialJob.startTime = Date.parse(<string>latestCondition.lastUpdateTime);
break;
case 'Running':
kubernetesTrialJob.status = 'RUNNING';
if(!kubernetesTrialJob.startTime) {
......@@ -68,11 +68,11 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector{
break;
case 'Failed':
kubernetesTrialJob.status = 'FAILED';
kubernetesTrialJob.endTime = Date.parse(<string>latestCondition.lastUpdateTime);
kubernetesTrialJob.endTime = Date.parse(<string>latestCondition.lastUpdateTime);
break;
case 'Succeeded':
kubernetesTrialJob.status = 'SUCCEEDED';
kubernetesTrialJob.endTime = Date.parse(<string>latestCondition.lastUpdateTime);
kubernetesTrialJob.endTime = Date.parse(<string>latestCondition.lastUpdateTime);
break;
default:
break;
......
......@@ -25,7 +25,7 @@ import { KubernetesJobRestServer } from '../kubernetesJobRestServer'
/**
* Kubeflow Training service Rest server, provides rest API to support kubeflow job metrics update
*
*
*/
@component.Singleton
export class KubeflowJobRestServer extends KubernetesJobRestServer{
......@@ -34,5 +34,5 @@ export class KubeflowJobRestServer extends KubernetesJobRestServer{
*/
constructor() {
super(component.get(KubeflowTrainingService));
}
}
}
\ No newline at end of file
......@@ -57,9 +57,9 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
private kubeflowJobInfoCollector: KubeflowJobInfoCollector;
constructor() {
super();
super();
this.kubeflowJobInfoCollector = new KubeflowJobInfoCollector(this.trialJobsMap);
this.experimentId = getExperimentId();
this.experimentId = getExperimentId();
this.nextTrialSequenceId = -1;
this.log.info('Construct Kubeflow training service.');
}
......@@ -74,7 +74,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
this.kubernetesJobRestServer.setEnableVersionCheck = this.versionCheck;
this.log.info(`Kubeflow Training service rest server listening on: ${this.kubernetesJobRestServer.endPoint}`);
while (!this.stopping) {
// collect metrics for Kubeflow jobs by interacting with Kubernetes API server
// collect metrics for Kubeflow jobs by interacting with Kubernetes API server
await delay(3000);
await this.kubeflowJobInfoCollector.retrieveTrialStatus(this.kubernetesCRDClient);
if(this.kubernetesJobRestServer.getErrorMessage) {
......@@ -113,22 +113,22 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
curTrialSequenceId,
trialJobOutputUrl
);
// Generate kubeflow job resource config object
// Generate kubeflow job resource config object
const kubeflowJobConfig: any = await this.prepareKubeflowConfig(trialJobId, trialWorkingFolder, kubeflowJobName);
// Create kubeflow job based on generated kubeflow job resource config
await this.kubernetesCRDClient.createKubernetesJob(kubeflowJobConfig);
// Set trial job detail until create Kubeflow job successfully
// Set trial job detail until create Kubeflow job successfully
this.trialJobsMap.set(trialJobId, trialJobDetail);
return Promise.resolve(trialJobDetail);
}
/**
* upload code files to nfs or azureStroage
* @param trialJobId
* @param trialLocalTempFolder
* @param trialJobId
* @param trialLocalTempFolder
* return: trialJobOutputUrl
*/
private async uploadCodeFiles(trialJobId: string, trialLocalTempFolder: string): Promise<string> {
......@@ -138,14 +138,14 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
let trialJobOutputUrl: string = '';
assert(!this.kubeflowClusterConfig.storage
|| this.kubeflowClusterConfig.storage === 'azureStorage'
assert(!this.kubeflowClusterConfig.storage
|| this.kubeflowClusterConfig.storage === 'azureStorage'
|| this.kubeflowClusterConfig.storage === 'nfs');
if(this.kubeflowClusterConfig.storage === 'azureStorage') {
try{
//upload local files to azure storage
await AzureStorageClientUtility.uploadDirectory(this.azureStorageClient,
await AzureStorageClientUtility.uploadDirectory(this.azureStorageClient,
`nni/${getExperimentId()}/${trialJobId}`, this.azureStorageShare, `${trialLocalTempFolder}`);
trialJobOutputUrl = `https://${this.azureStorageAccountName}.file.core.windows.net/${this.azureStorageShare}/${path.join('nni', getExperimentId(), trialJobId, 'output')}`
......@@ -155,18 +155,18 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
} else if(this.kubeflowClusterConfig.storage === 'nfs' || this.kubeflowClusterConfig.storage === undefined) {
let nfsKubeflowClusterConfig: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>this.kubeflowClusterConfig;
// Creat work dir for current trial in NFS directory
// Creat work dir for current trial in NFS directory
await cpp.exec(`mkdir -p ${this.trialLocalNFSTempFolder}/nni/${getExperimentId()}/${trialJobId}`);
// Copy code files from local dir to NFS mounted dir
await cpp.exec(`cp -r ${trialLocalTempFolder}/* ${this.trialLocalNFSTempFolder}/nni/${getExperimentId()}/${trialJobId}/.`);
const nfsConfig: NFSConfig = nfsKubeflowClusterConfig.nfs;
trialJobOutputUrl = `nfs://${nfsConfig.server}:${path.join(nfsConfig.path, 'nni', getExperimentId(), trialJobId, 'output')}`
}
return Promise.resolve(trialJobOutputUrl);
}
private async prepareRunScript(trialLocalTempFolder: string, trialJobId: string, trialWorkingFolder: string, curTrialSequenceId: number, form: JobApplicationForm): Promise<void> {
if(!this.kubeflowClusterConfig) {
throw new Error('Kubeflow Cluster config is not initialized');
......@@ -181,7 +181,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}else {
throw Error(`operator ${this.kubeflowClusterConfig.operator} is invalid`)
}
//create tmp trial working folder locally.
await cpp.exec(`mkdir -p ${path.dirname(trialLocalTempFolder)}`);
await cpp.exec(`cp -r ${kubeflowTrialConfig.codeDir} ${trialLocalTempFolder}`);
......@@ -193,7 +193,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
// Write worker file content run_worker.sh to local tmp folders
if(kubeflowTrialConfig.worker) {
const workerRunScriptContent: string = await this.generateRunScript('kubeflow', trialJobId, trialWorkingFolder,
const workerRunScriptContent: string = await this.generateRunScript('kubeflow', trialJobId, trialWorkingFolder,
kubeflowTrialConfig.worker.command, curTrialSequenceId.toString(), 'worker', kubeflowTrialConfig.worker.gpuNum);
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'run_worker.sh'), workerRunScriptContent, { encoding: 'utf8' });
......@@ -202,7 +202,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
if(this.kubeflowClusterConfig.operator === 'tf-operator') {
let tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
if(tensorflowTrialConfig.ps){
const psRunScriptContent: string = await this.generateRunScript('kubeflow', trialJobId, trialWorkingFolder,
const psRunScriptContent: string = await this.generateRunScript('kubeflow', trialJobId, trialWorkingFolder,
tensorflowTrialConfig.ps.command, curTrialSequenceId.toString(), 'ps', tensorflowTrialConfig.ps.gpuNum);
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'run_ps.sh'), psRunScriptContent, { encoding: 'utf8' });
}
......@@ -210,7 +210,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
else if(this.kubeflowClusterConfig.operator === 'pytorch-operator') {
let pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
if(pytorchTrialConfig.master){
const masterRunScriptContent: string = await this.generateRunScript('kubeflow', trialJobId, trialWorkingFolder,
const masterRunScriptContent: string = await this.generateRunScript('kubeflow', trialJobId, trialWorkingFolder,
pytorchTrialConfig.master.command, curTrialSequenceId.toString(), 'master', pytorchTrialConfig.master.gpuNum);
await fs.promises.writeFile(path.join(trialLocalTempFolder, 'run_master.sh'), masterRunScriptContent, { encoding: 'utf8' });
}
......@@ -218,11 +218,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
// Write file content ( parameter.cfg ) to local tmp folders
const trialForm : TrialJobApplicationForm = (<TrialJobApplicationForm>form)
if(trialForm && trialForm.hyperParameters) {
await fs.promises.writeFile(path.join(trialLocalTempFolder, generateParamFileName(trialForm.hyperParameters)),
await fs.promises.writeFile(path.join(trialLocalTempFolder, generateParamFileName(trialForm.hyperParameters)),
trialForm.hyperParameters.value, { encoding: 'utf8' });
}
}
private async prepareKubeflowConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName: string): Promise<any> {
if(!this.kubeflowClusterConfig) {
throw new Error('Kubeflow Cluster config is not initialized');
......@@ -241,10 +241,10 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}else {
throw Error(`operator ${this.kubeflowClusterConfig.operator} is invalid`)
}
const workerPodResources : any = {};
if(kubeflowTrialConfig.worker) {
workerPodResources.requests = this.generatePodResource(kubeflowTrialConfig.worker.memoryMB, kubeflowTrialConfig.worker.cpuNum,
workerPodResources.requests = this.generatePodResource(kubeflowTrialConfig.worker.memoryMB, kubeflowTrialConfig.worker.cpuNum,
kubeflowTrialConfig.worker.gpuNum)
}
workerPodResources.limits = Object.assign({}, workerPodResources.requests);
......@@ -253,30 +253,30 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
if(this.kubeflowClusterConfig.operator === 'tf-operator') {
let tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
if (tensorflowTrialConfig.ps) {
nonWorkerResources.requests = this.generatePodResource(tensorflowTrialConfig.ps.memoryMB, tensorflowTrialConfig.ps.cpuNum,
nonWorkerResources.requests = this.generatePodResource(tensorflowTrialConfig.ps.memoryMB, tensorflowTrialConfig.ps.cpuNum,
tensorflowTrialConfig.ps.gpuNum)
nonWorkerResources.limits = Object.assign({}, nonWorkerResources.requests);
nonWorkerResources.limits = Object.assign({}, nonWorkerResources.requests);
}
}else if(this.kubeflowClusterConfig.operator === 'pytorch-operator'){
let pyTorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
nonWorkerResources.requests = this.generatePodResource(pyTorchTrialConfig.master.memoryMB, pyTorchTrialConfig.master.cpuNum,
nonWorkerResources.requests = this.generatePodResource(pyTorchTrialConfig.master.memoryMB, pyTorchTrialConfig.master.cpuNum,
pyTorchTrialConfig.master.gpuNum)
nonWorkerResources.limits = Object.assign({}, nonWorkerResources.requests);
}
nonWorkerResources.limits = Object.assign({}, nonWorkerResources.requests);
// Generate kubeflow job resource config object
}
// Generate kubeflow job resource config object
const kubeflowJobConfig: any = this.generateKubeflowJobConfig(trialJobId, trialWorkingFolder, kubeflowJobName, workerPodResources, nonWorkerResources);
return Promise.resolve(kubeflowJobConfig);
}
}
public async setClusterMetadata(key: string, value: string): Promise<void> {
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
this.nniManagerIpConfig = <NNIManagerIpConfig>JSON.parse(value);
break;
case TrialConfigMetadataKey.KUBEFLOW_CLUSTER_CONFIG:
let kubeflowClusterJsonObject = JSON.parse(value);
this.kubeflowClusterConfig = KubeflowClusterConfigFactory.generateKubeflowClusterConfig(kubeflowClusterJsonObject);
......@@ -296,7 +296,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
nfsKubeflowClusterConfig.nfs.server,
nfsKubeflowClusterConfig.nfs.path
);
}
}
this.kubernetesCRDClient = KubeflowOperatorClient.generateOperatorClient(this.kubeflowClusterConfig.operator,
this.kubeflowClusterConfig.apiVersion);
break;
......@@ -304,13 +304,13 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
case TrialConfigMetadataKey.TRIAL_CONFIG:
if (!this.kubeflowClusterConfig){
this.log.error('kubeflow cluster config is not initialized');
return Promise.reject(new Error('kubeflow cluster config is not initialized'));
return Promise.reject(new Error('kubeflow cluster config is not initialized'));
}
assert(this.kubeflowClusterConfig !== undefined)
let kubeflowTrialJsonObjsect = JSON.parse(value);
this.kubeflowTrialConfig = KubeflowTrialConfigFactory.generateKubeflowTrialConfig(
kubeflowTrialJsonObjsect,
kubeflowTrialJsonObjsect,
this.kubeflowClusterConfig.operator
);
......@@ -319,7 +319,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
await validateCodeDir(this.kubeflowTrialConfig.codeDir);
} catch(error) {
this.log.error(error);
return Promise.reject(new Error(error));
return Promise.reject(new Error(error));
}
break;
case TrialConfigMetadataKey.VERSION_CHECK:
......@@ -361,11 +361,11 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
if(this.kubeflowTrialConfig.operatorType === 'tf-operator') {
let tensorflowTrialConfig: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>this.kubeflowTrialConfig;
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas,
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas,
tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources);
if (tensorflowTrialConfig.ps){
replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas,
replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas,
tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources);
}
replicaSpecsObjMap.set(this.kubernetesCRDClient.jobKind, {'tfReplicaSpecs': replicaSpecsObj})
......@@ -373,19 +373,19 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
else if(this.kubeflowTrialConfig.operatorType === 'pytorch-operator') {
let pytorchTrialConfig: KubeflowTrialConfigPytorch = <KubeflowTrialConfigPytorch>this.kubeflowTrialConfig;
if(pytorchTrialConfig.worker) {
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas,
replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas,
pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources);
}
replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas,
replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas,
pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources);
replicaSpecsObjMap.set(this.kubernetesCRDClient.jobKind, {'pytorchReplicaSpecs': replicaSpecsObj})
}
return {
apiVersion: `kubeflow.org/${this.kubernetesCRDClient.apiVersion}`,
kind: this.kubernetesCRDClient.jobKind,
metadata: {
metadata: {
name: kubeflowJobName,
namespace: 'default',
labels: {
......@@ -395,7 +395,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
},
spec: replicaSpecsObjMap.get(this.kubernetesCRDClient.jobKind)
};
};
}
/**
......
......@@ -39,7 +39,7 @@ class GeneralK8sClient {
}
public async createSecret(secretManifest: any): Promise<boolean> {
let result: Promise<boolean>;
let result: Promise<boolean>;
const response : any = await this.client.api.v1.namespaces('default').secrets.post({body: secretManifest});
if(response.statusCode && (response.statusCode >= 200 && response.statusCode <= 299)) {
result = Promise.resolve(true);
......@@ -65,8 +65,8 @@ abstract class KubernetesCRDClient {
public abstract get containerName(): string;
public get jobKind(): string {
if(this.crdSchema
&& this.crdSchema.spec
if(this.crdSchema
&& this.crdSchema.spec
&& this.crdSchema.spec.names
&& this.crdSchema.spec.names.kind) {
return this.crdSchema.spec.names.kind;
......@@ -76,15 +76,15 @@ abstract class KubernetesCRDClient {
}
public get apiVersion(): string {
if(this.crdSchema
&& this.crdSchema.spec
if(this.crdSchema
&& this.crdSchema.spec
&& this.crdSchema.spec.version) {
return this.crdSchema.spec.version;
} else {
throw new Error('KubeflowOperatorClient: get apiVersion failed, version is undefined in crd schema!');
}
}
public async createKubernetesJob(jobManifest: any): Promise<boolean> {
let result: Promise<boolean>;
const response : any = await this.operator.post({body: jobManifest});
......@@ -117,7 +117,7 @@ abstract class KubernetesCRDClient {
qs: {
labelSelector: matchQuery,
propagationPolicy: "Background"
}
}
});
if(deleteResult.statusCode && deleteResult.statusCode >= 200 && deleteResult.statusCode <= 299) {
result = Promise.resolve(true);
......
......@@ -25,7 +25,7 @@ import { MethodNotImplementedError } from '../../common/errors';
export abstract class KubernetesClusterConfig {
public readonly storage?: KubernetesStorageKind;
public readonly apiVersion: string;
constructor(apiVersion: string, storage?: KubernetesStorageKind) {
this.storage = storage;
this.apiVersion = apiVersion;
......@@ -48,7 +48,7 @@ export class KubernetesClusterConfigNFS extends KubernetesClusterConfig {
public readonly nfs: NFSConfig;
constructor(
apiVersion: string,
apiVersion: string,
nfs: NFSConfig,
storage?: KubernetesStorageKind
) {
......@@ -73,11 +73,11 @@ export class KubernetesClusterConfigNFS extends KubernetesClusterConfig {
export class KubernetesClusterConfigAzure extends KubernetesClusterConfig {
public readonly keyVault: keyVaultConfig;
public readonly azureStorage: AzureStorage;
constructor(
apiVersion: string,
keyVault: keyVaultConfig,
azureStorage: AzureStorage,
apiVersion: string,
keyVault: keyVaultConfig,
azureStorage: AzureStorage,
storage?: KubernetesStorageKind
) {
super(apiVersion, storage);
......@@ -151,7 +151,7 @@ export class keyVaultConfig {
export class AzureStorage {
/**The azure share to storage files */
public readonly azureShare : string;
/**The account name of sotrage service */
public readonly accountName: string;
constructor(azureShare : string, accountName: string){
......@@ -178,8 +178,8 @@ export class KubernetesTrialConfigTemplate {
/** Required GPU number for trial job. The number should be in [0,100] */
public readonly gpuNum : number;
constructor(command : string, gpuNum : number,
constructor(command : string, gpuNum : number,
cpuNum: number, memoryMB: number, image: string) {
this.command = command;
this.gpuNum = gpuNum;
......
......@@ -40,7 +40,7 @@ export class KubernetesTrialJobDetail implements TrialJobDetail {
public queryJobFailedCount: number;
constructor(id: string, status: TrialJobStatus, submitTime: number,
workingDirectory: string, form: JobApplicationForm,
workingDirectory: string, form: JobApplicationForm,
kubernetesJobName: string, sequenceId: number, url: string) {
this.id = id;
this.status = status;
......
......@@ -57,7 +57,7 @@ export class KubernetesJobInfoCollector {
await Promise.all(updateKubernetesTrialJobs);
}
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
protected async retrieveSingleTrialJobInfo(kubernetesCRDClient: KubernetesCRDClient | undefined,
kubernetesTrialJob : KubernetesTrialJobDetail) : Promise<void> {
throw new MethodNotImplementedError();
}
......
......@@ -26,7 +26,7 @@ import { ClusterJobRestServer } from '../common/clusterJobRestServer'
/**
* Kubeflow Training service Rest server, provides rest API to support kubeflow job metrics update
*
*
*/
@component.Singleton
export class KubernetesJobRestServer extends ClusterJobRestServer{
......@@ -53,5 +53,5 @@ export class KubernetesJobRestServer extends ClusterJobRestServer{
data : singleMetric
});
}
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment