Unverified Commit cb15be49 authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Enable eslint for nni_manager (#1845)

* enable eslint

* remove tslint
parent a2210436
......@@ -101,7 +101,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
this.trialJobsMap.set(trialJobId, trialJobDetail);
// Create frameworkcontroller job based on generated frameworkcontroller job resource config
// tslint:disable-next-line:no-any
const frameworkcontrollerJobConfig: any = await this.prepareFrameworkControllerConfig(
trialJobId, trialWorkingFolder, frameworkcontrollerJobName);
await this.kubernetesCRDClient.createKubernetesJob(frameworkcontrollerJobConfig);
......@@ -112,7 +111,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return Promise.resolve(trialJobDetail);
}
// tslint:disable:no-redundant-jsdoc no-any no-unsafe-any
public async setClusterMetadata(key: string, value: string): Promise<void> {
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
......@@ -171,7 +169,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
return Promise.resolve();
}
// tslint:enable: no-any no-unsafe-any
/**
* upload code files to nfs or azureStroage
......@@ -256,7 +253,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
}
// tslint:disable: no-any no-unsafe-any
private async prepareFrameworkControllerConfig(trialJobId: string, trialWorkingFolder: string, frameworkcontrollerJobName: string):
Promise<any> {
......@@ -447,7 +443,6 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
};
}
// tslint:enable: no-any no-unsafe-any
}
export { FrameworkControllerTrainingService };
......@@ -8,7 +8,6 @@ import { GeneralK8sClient, KubernetesCRDClient } from '../kubernetesApiClient';
import { KubeflowOperator } from './kubeflowConfig';
// tslint:disable: no-unsafe-any no-any completed-docs
class TFOperatorClientV1Alpha2 extends KubernetesCRDClient {
/**
* constructor, to initialize tfjob CRD definition
......@@ -130,7 +129,6 @@ class KubeflowOperatorClientFactory {
/**
* Factory method to generate operator client
*/
// tslint:disable-next-line:function-name
public static createClient(kubeflowOperator: KubeflowOperator, operatorApiVersion: string): KubernetesCRDClient {
switch (kubeflowOperator) {
case 'tf-operator': {
......@@ -169,5 +167,4 @@ class KubeflowOperatorClientFactory {
}
}
// tslint:enable: no-unsafe-any
export { KubeflowOperatorClientFactory, GeneralK8sClient };
......@@ -26,7 +26,6 @@ export class KubeflowClusterConfig extends KubernetesClusterConfig {
}
}
// tslint:disable:completed-docs
export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS {
public readonly operator: KubeflowOperator;
constructor(
......@@ -43,7 +42,6 @@ export class KubeflowClusterConfigNFS extends KubernetesClusterConfigNFS {
return 'nfs';
}
// tslint:disable-next-line:function-name
public static getInstance(jsonObject: object): KubeflowClusterConfigNFS {
const kubeflowClusterConfigObjectNFS: KubeflowClusterConfigNFS = <KubeflowClusterConfigNFS>jsonObject;
assert (kubeflowClusterConfigObjectNFS !== undefined);
......@@ -75,7 +73,6 @@ export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure {
return 'azureStorage';
}
// tslint:disable-next-line:function-name
public static getInstance(jsonObject: object): KubeflowClusterConfigAzure {
const kubeflowClusterConfigObjectAzure: KubeflowClusterConfigAzure = <KubeflowClusterConfigAzure>jsonObject;
......@@ -91,7 +88,6 @@ export class KubeflowClusterConfigAzure extends KubernetesClusterConfigAzure {
export class KubeflowClusterConfigFactory {
// tslint:disable-next-line:function-name
public static generateKubeflowClusterConfig(jsonObject: object): KubeflowClusterConfig {
const storageConfig: StorageConfig = <StorageConfig>jsonObject;
if (storageConfig === undefined) {
......@@ -156,8 +152,6 @@ export class KubeflowTrialConfigPytorch extends KubeflowTrialConfig {
}
export class KubeflowTrialConfigFactory {
// tslint:disable-next-line:function-name
public static generateKubeflowTrialConfig(jsonObject: object, operator: KubeflowOperator): KubeflowTrialConfig {
if (operator === 'tf-operator') {
const kubeflowTrialConfigObject: KubeflowTrialConfigTensorflow = <KubeflowTrialConfigTensorflow>jsonObject;
......
......@@ -26,7 +26,6 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
return Promise.reject('kubernetesCRDClient is undefined');
}
// tslint:disable:no-any no-unsafe-any
let kubernetesJobInfo: any;
try {
kubernetesJobInfo = await kubernetesCRDClient.getKubernetesJob(kubernetesTrialJob.kubernetesJobName);
......@@ -37,7 +36,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
//This is not treat as a error status
return Promise.resolve();
}
/* eslint-disable require-atomic-updates */
if (kubernetesJobInfo.status && kubernetesJobInfo.status.conditions) {
const latestCondition: any = kubernetesJobInfo.status.conditions[kubernetesJobInfo.status.conditions.length - 1];
const tfJobType: KubeflowJobStatus = <KubeflowJobStatus>latestCondition.type;
......@@ -63,7 +62,7 @@ export class KubeflowJobInfoCollector extends KubernetesJobInfoCollector {
default:
}
}
// tslint:enable:no-any no-unsafe-any
/* eslint-enable require-atomic-updates */
return Promise.resolve();
}
......
......@@ -27,7 +27,6 @@ import { KubeflowClusterConfig, KubeflowClusterConfigAzure, KubeflowClusterConfi
import { KubeflowJobInfoCollector } from './kubeflowJobInfoCollector';
import { KubeflowJobRestServer } from './kubeflowJobRestServer';
// tslint:disable: no-unsafe-any no-any
/**
* Training Service implementation for Kubeflow
* Refer https://github.com/kubeflow/kubeflow for more info about Kubeflow
......@@ -108,7 +107,6 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
return Promise.resolve(trialJobDetail);
}
// tslint:disable:no-redundant-jsdoc
public async setClusterMetadata(key: string, value: string): Promise<void> {
switch (key) {
case TrialConfigMetadataKey.NNI_MANAGER_IP:
......@@ -461,7 +459,6 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
replicas: replicaNumber,
template: {
metadata: {
// tslint:disable-next-line:no-null-keyword
creationTimestamp: null
},
spec: spec
......@@ -469,5 +466,4 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}
}
// tslint:enable: no-unsafe-any no-any
export { KubeflowTrainingService };
......@@ -10,7 +10,6 @@ import { getLogger, Logger } from '../../common/log';
/**
* Generict Kubernetes client, target version >= 1.9
*/
// tslint:disable: no-any no-unsafe-any
class GeneralK8sClient {
protected readonly client: any;
protected readonly log: Logger = getLogger();
......
......@@ -6,7 +6,6 @@
export type KubernetesStorageKind = 'nfs' | 'azureStorage';
import { MethodNotImplementedError } from '../../common/errors';
// tslint:disable: completed-docs function-name
export abstract class KubernetesClusterConfig {
public readonly storage?: KubernetesStorageKind;
public readonly apiVersion: string;
......@@ -91,7 +90,6 @@ export class KubernetesClusterConfigAzure extends KubernetesClusterConfig {
}
}
// tslint:disable-next-line:no-unnecessary-class
export class KubernetesClusterConfigFactory {
public static generateKubernetesClusterConfig(jsonObject: object): KubernetesClusterConfig {
......
......@@ -25,7 +25,6 @@ export class KubernetesJobRestServer extends ClusterJobRestServer {
this.kubernetesTrainingService = kubernetesTrainingService;
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
if (this.kubernetesTrainingService === undefined) {
throw Error('kubernetesTrainingService not initialized!');
......
......@@ -61,7 +61,6 @@ abstract class KubernetesTrainingService {
this.logCollection = 'none';
}
// tslint:disable:no-any
public generatePodResource(memory: number, cpuNum: number, gpuNum: number): any {
const resources: any = {
memory: `${memory}Mi`,
......@@ -73,7 +72,7 @@ abstract class KubernetesTrainingService {
}
return resources;
} // tslint:enable:no-any
}
public async listTrialJobs(): Promise<TrialJobDetail[]> {
const jobs: TrialJobDetail[] = [];
......@@ -197,7 +196,6 @@ abstract class KubernetesTrainingService {
await this.kubernetesJobRestServer.stop();
this.log.info('Kubernetes Training service rest server stopped successfully.');
} catch (error) {
// tslint:disable-next-line: no-unsafe-any
this.log.error(`Kubernetes Training service rest server stopped failed, error: ${error.message}`);
return Promise.reject(error);
......@@ -206,7 +204,6 @@ abstract class KubernetesTrainingService {
return Promise.resolve();
}
// tslint:disable: no-unsafe-any no-any
protected async createAzureStorage(vaultName: string, valutKeyName: string): Promise<void> {
try {
const result: any = await cpp.exec(`az keyvault secret show --name ${valutKeyName} --vault-name ${vaultName}`);
......@@ -253,7 +250,6 @@ abstract class KubernetesTrainingService {
return Promise.resolve();
}
// tslint:enable: no-unsafe-any no-any
/**
* Genereate run script for different roles(like worker or ps)
......@@ -271,7 +267,6 @@ abstract class KubernetesTrainingService {
if (gpuNum === 0) {
nvidiaScript = 'export CUDA_VISIBLE_DEVICES=';
}
// tslint:disable-next-line: strict-boolean-expressions
const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address();
const version: string = this.versionCheck ? await getVersion() : '';
const runScript: string = String.Format(
......
......@@ -86,7 +86,6 @@ class GPUScheduler {
runGpuMetricsCollector(this.gpuMetricCollectorScriptFolder);
}
// tslint:disable:non-literal-fs-path
private async updateGPUSummary(): Promise<void> {
const gpuMetricPath: string = path.join(this.gpuMetricCollectorScriptFolder, 'gpu_metrics');
if (fs.existsSync(gpuMetricPath)) {
......
......@@ -31,7 +31,6 @@ import { GPUScheduler } from './gpuScheduler';
* success: true if the buffer contains at least one complete command; otherwise false
* remain: remaining data after the first command
*/
// tslint:disable:newline-per-chained-call informative-docs
function decodeCommand(data: Buffer): [boolean, string, string, Buffer] {
if (data.length < 8) {
return [false, '', '', data];
......@@ -46,7 +45,6 @@ function decodeCommand(data: Buffer): [boolean, string, string, Buffer] {
return [true, commandType, content, remain];
}
// tslint:enable:newline-per-chained-call informative-docs
/**
* LocalTrialJobDetail
......@@ -252,7 +250,6 @@ class LocalTrainingService implements TrainingService {
public async setClusterMetadata(key: string, value: string): Promise<void> {
if (!this.initialized) {
this.rootDir = getExperimentRootDir();
// tslint:disable-next-line:non-literal-fs-path
if (!fs.existsSync(this.rootDir)) {
await cpp.exec(`powershell.exe mkdir ${this.rootDir}`);
}
......@@ -524,8 +521,8 @@ class LocalTrainingService implements TrainingService {
await this.writeParameterFile(trialJobDetail.workingDirectory, trialJobDetail.form.hyperParameters);
const trialJobProcess: cp.ChildProcess = runScript(path.join(trialJobDetail.workingDirectory, scriptName));
this.setTrialJobStatus(trialJobDetail, 'RUNNING');
trialJobDetail.startTime = Date.now();
trialJobDetail.pid = trialJobProcess.pid;
trialJobDetail.startTime = Date.now(); // eslint-disable-line require-atomic-updates
trialJobDetail.pid = trialJobProcess.pid; // eslint-disable-line require-atomic-updates
this.setExtraProperties(trialJobDetail, resource);
let buffer: Buffer = Buffer.alloc(0);
......
......@@ -17,7 +17,6 @@ export namespace HDFSClientUtility {
* @param hdfsUserName HDFS user name
*/
export function hdfsExpRootDir(hdfsUserName: string): string {
// tslint:disable-next-line:prefer-template
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
}
......@@ -47,10 +46,8 @@ export namespace HDFSClientUtility {
* @param hdfsFilePath hdfs file path(target)
* @param hdfsClient hdfs client
*/
// tslint:disable: no-unsafe-any non-literal-fs-path no-any
export async function copyFileToHdfs(localFilePath: string, hdfsFilePath: string, hdfsClient: any): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
// tslint:disable-next-line:non-literal-fs-path
fs.exists(localFilePath, (exists: boolean) => {
// Detect if local file exist
if (exists) {
......@@ -90,7 +87,6 @@ export namespace HDFSClientUtility {
for (const fileName of fileNameArray) {
const fullFilePath: string = path.join(localDirectory, fileName);
try {
// tslint:disable-next-line:non-literal-fs-path
if (fs.lstatSync(fullFilePath)
.isFile()) {
await copyFileToHdfs(fullFilePath, path.join(hdfsDirectory, fileName), hdfsClient);
......@@ -227,5 +223,4 @@ export namespace HDFSClientUtility {
return deferred.promise;
}
// tslint:enable: no-unsafe-any non-literal-fs-path no-any
}
......@@ -52,6 +52,5 @@ export const PAI_TRIAL_COMMAND_FORMAT: string =
--pai_hdfs_output_dir '{9}' --pai_hdfs_host '{10}' --pai_user_name {11} --nni_hdfs_exp_dir '{12}' --webhdfs_path '/webhdfs/api/v1' \
--nni_manager_version '{13}' --log_collection '{14}'`;
// tslint:disable:no-http-string
export const PAI_LOG_PATH_FORMAT: string =
`http://{0}/webhdfs/explorer.html#{1}`;
......@@ -3,7 +3,6 @@
'use strict';
// tslint:disable-next-line:no-implicit-dependencies
import * as request from 'request';
import { Deferred } from 'ts-deferred';
import { NNIError, NNIErrorNames } from '../../common/errors';
......@@ -54,7 +53,6 @@ export class PAIJobInfoCollector {
// Rest call to get PAI job info and update status
// Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
const getJobInfoRequest: request.Options = {
// tslint:disable-next-line:no-http-string
uri: `http://${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`,
method: 'GET',
json: true,
......@@ -64,7 +62,6 @@ export class PAIJobInfoCollector {
}
};
// tslint:disable: no-unsafe-any no-any cyclomatic-complexity
//TODO : pass in request timeout param?
request(getJobInfoRequest, (error: Error, response: request.Response, body: any) => {
if ((error !== undefined && error !== null) || response.statusCode >= 500) {
......@@ -128,5 +125,4 @@ export class PAIJobInfoCollector {
return deferred.promise;
}
// tslint:enable: no-unsafe-any no-any
}
......@@ -34,7 +34,6 @@ export class PAIJobRestServer extends ClusterJobRestServer {
this.paiTrainingService = component.get(PAITrainingService);
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
// Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWN
......
......@@ -5,7 +5,6 @@
import * as fs from 'fs';
import * as path from 'path';
// tslint:disable-next-line:no-implicit-dependencies
import * as request from 'request';
import * as component from '../../common/component';
......@@ -45,7 +44,6 @@ class PAITrainingService implements TrainingService {
private paiClusterConfig?: PAIClusterConfig;
private readonly jobQueue: string[];
private stopping: boolean = false;
// tslint:disable-next-line:no-any
private hdfsClient: any;
private paiToken? : string;
private paiTokenUpdateTime?: number;
......@@ -171,7 +169,6 @@ class PAITrainingService implements TrainingService {
return true;
}
// tslint:disable:no-http-string
public cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
const deferred: Deferred<void> = new Deferred<void>();
......@@ -203,7 +200,6 @@ class PAITrainingService implements TrainingService {
// Set trialjobDetail's early stopped field, to mark the job's cancellation source
trialJobDetail.isEarlyStopped = isEarlyStopped;
// tslint:disable-next-line:no-any
request(stopJobRequest, (error: Error, response: request.Response, body: any) => {
if ((error !== undefined && error !== null) || response.statusCode >= 400) {
this.log.error(`PAI Training service: stop trial ${trialJobId} to PAI Cluster failed!`);
......@@ -217,8 +213,6 @@ class PAITrainingService implements TrainingService {
return deferred.promise;
}
// tslint:disable: no-unsafe-any no-any
// tslint:disable-next-line:max-func-body-length
public async setClusterMetadata(key: string, value: string): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
......@@ -298,7 +292,6 @@ class PAITrainingService implements TrainingService {
return deferred.promise;
}
// tslint:enable: no-unsafe-any
public getClusterMetadata(key: string): Promise<string> {
const deferred: Deferred<string> = new Deferred<string>();
......@@ -319,7 +312,6 @@ class PAITrainingService implements TrainingService {
deferred.resolve();
this.log.info('PAI Training service rest server stopped successfully.');
} catch (error) {
// tslint:disable-next-line: no-unsafe-any
this.log.error(`PAI Training service rest server stopped failed, error: ${error.message}`);
deferred.reject(error);
}
......@@ -331,7 +323,6 @@ class PAITrainingService implements TrainingService {
return this.metricsEmitter;
}
// tslint:disable-next-line:max-func-body-length
private async submitTrialJobToPAI(trialJobId: string): Promise<boolean> {
const deferred: Deferred<boolean> = new Deferred<boolean>();
const trialJobDetail: PAITrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
......@@ -383,7 +374,6 @@ class PAITrainingService implements TrainingService {
}
const hdfsCodeDir: string = HDFSClientUtility.getHdfsTrialWorkDir(this.paiClusterConfig.userName, trialJobId);
const hdfsOutputDir: string = unixPathJoin(hdfsCodeDir, 'nnioutput');
// tslint:disable-next-line: strict-boolean-expressions
const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address();
const version: string = this.versionCheck ? await getVersion() : '';
const nniPaiTrialCommand: string = String.Format(
......@@ -407,7 +397,6 @@ class PAITrainingService implements TrainingService {
)
.replace(/\r\n|\n|\r/gm, '');
// tslint:disable-next-line:no-console
this.log.info(`nniPAItrial command is ${nniPaiTrialCommand.trim()}`);
const paiTaskRoles: PAITaskRole[] = [
new PAITaskRole(
......@@ -449,7 +438,7 @@ class PAITrainingService implements TrainingService {
await HDFSClientUtility.copyDirectoryToHdfs(trialLocalTempFolder, hdfsCodeDir, this.hdfsClient);
} catch (error) {
this.log.error(`PAI Training service: copy ${this.paiTrialConfig.codeDir} to HDFS ${hdfsCodeDir} failed, error is ${error}`);
trialJobDetail.status = 'FAILED';
trialJobDetail.status = 'FAILED'; // eslint-disable-line require-atomic-updates
deferred.resolve(true);
return deferred.promise;
......@@ -467,7 +456,6 @@ class PAITrainingService implements TrainingService {
Authorization: `Bearer ${this.paiToken}`
}
};
// tslint:disable:no-any no-unsafe-any
request(submitJobRequest, (error: Error, response: request.Response, body: any) => {
if ((error !== undefined && error !== null) || response.statusCode >= 400) {
const errorMessage: string = (error !== undefined && error !== null) ? error.message :
......
......@@ -148,7 +148,6 @@ export class GPUScheduler {
}
}
this.log.debug(`designated gpu indices: ${designatedGpuIndices}`);
// tslint:disable: strict-boolean-expressions
rmMeta.gpuSummary.gpuInfos.forEach((gpuInfo: GPUInfo) => {
// if the GPU has active process, OR be reserved by a job,
// or index not in gpuIndices configuration in machineList,
......@@ -174,7 +173,6 @@ export class GPUScheduler {
return totalResourceMap;
}
// tslint:enable: strict-boolean-expressions
private selectMachine(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
assert(rmMetas !== undefined && rmMetas.length > 0);
......
......@@ -186,7 +186,6 @@ export class SSHClientManager {
/**
* Create a new ssh connection client and initialize it
*/
// tslint:disable:non-literal-fs-path
private initNewSSHClient(): Promise<Client> {
const deferred: Deferred<Client> = new Deferred<Client>();
const conn: Client = new Client();
......
......@@ -25,7 +25,6 @@ export class RemoteMachineJobRestServer extends ClusterJobRestServer {
this.remoteMachineTrainingService = component.get(RemoteMachineTrainingService);
}
// tslint:disable-next-line:no-any
protected handleTrialMetrics(jobId: string, metrics: any[]): void {
// Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWNls
......
......@@ -206,7 +206,6 @@ class RemoteMachineTrainingService implements TrainingService {
* Submit trial job
* @param form trial job description form
*/
// tslint:disable-next-line:informative-docs
public async submitTrialJob(form: TrialJobApplicationForm): Promise<TrialJobDetail> {
if (this.trialConfig === undefined) {
throw new Error('trial config is not initialized');
......@@ -255,7 +254,6 @@ class RemoteMachineTrainingService implements TrainingService {
* Cancel trial job
* @param trialJobId ID of trial job
*/
// tslint:disable:informative-docs no-unsafe-any
public async cancelTrialJob(trialJobId: string, isEarlyStopped: boolean = false): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
const trialJob: RemoteMachineTrialJobDetail | undefined = this.trialJobsMap.get(trialJobId);
......@@ -319,7 +317,6 @@ class RemoteMachineTrainingService implements TrainingService {
throw new Error('trial config parsed failed');
}
// codeDir is not a valid directory, throw Error
// tslint:disable-next-line:non-literal-fs-path
if (!fs.lstatSync(remoteMachineTrailConfig.codeDir)
.isDirectory()) {
throw new Error(`codeDir ${remoteMachineTrailConfig.codeDir} is not a directory`);
......@@ -438,7 +435,6 @@ class RemoteMachineTrainingService implements TrainingService {
await SSHClientUtility.remoteExeCommand(`chmod 777 ${nniRootDir} ${nniRootDir}/* ${nniRootDir}/scripts/*`, conn);
//Begin to execute gpu_metrics_collection scripts
// tslint:disable-next-line: no-floating-promises
const script = getGpuMetricsCollectorBashScriptContent(remoteGpuScriptCollectorDir);
SSHClientUtility.remoteExeCommand(`bash -c '${script}'`, conn);
......@@ -549,7 +545,6 @@ class RemoteMachineTrainingService implements TrainingService {
command = `CUDA_VISIBLE_DEVICES=" " ${this.trialConfig.command}`;
}
}
// tslint:disable-next-line: strict-boolean-expressions
const nniManagerIp: string = this.nniManagerIpConfig ? this.nniManagerIpConfig.nniManagerIp : getIPV4Address();
if (this.remoteRestServerPort === undefined) {
const restServer: RemoteMachineJobRestServer = component.get(RemoteMachineJobRestServer);
......@@ -587,7 +582,6 @@ class RemoteMachineTrainingService implements TrainingService {
// Copy files in codeDir to remote working directory
await SSHClientUtility.copyDirectoryToRemote(trialLocalTempFolder, trialWorkingFolder, sshClient, this.remoteOS);
// Execute command in remote machine
// tslint:disable-next-line: no-floating-promises
SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(trialWorkingFolder, 'run.sh')}`, sshClient);
}
......@@ -604,6 +598,7 @@ class RemoteMachineTrainingService implements TrainingService {
const deferred: Deferred<TrialJobDetail> = new Deferred<TrialJobDetail>();
const jobpidPath: string = this.getJobPidPath(trialJob.id);
const trialReturnCodeFilePath: string = unixPathJoin(this.remoteExpRootDir, 'trials', trialJob.id, '.nni', 'code');
/* eslint-disable require-atomic-updates */
try {
const killResult: number = (await SSHClientUtility.remoteExeCommand(`kill -0 \`cat ${jobpidPath}\``, sshClient)).exitCode;
// if the process of jobpid is not alive any more
......@@ -640,7 +635,7 @@ class RemoteMachineTrainingService implements TrainingService {
deferred.resolve(trialJob);
}
}
/* eslint-enable require-atomic-updates */
return deferred.promise;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment