Unverified Commit 8b388fca authored by Ni Hao's avatar Ni Hao Committed by GitHub
Browse files

fix trial duration time incorrect (#4044)

parent bab02efb
...@@ -547,14 +547,23 @@ class NNIManager implements Manager { ...@@ -547,14 +547,23 @@ class NNIManager implements Manager {
} }
} }
private async stopTrialJobIfOverMaxDurationTimer(trialJobId: string): Promise<void> { private async stopTrialIfOverMaxDurationLimit(): Promise<void> {
if(this.maxTrialDuration === Infinity){
return;
}
for (const trialJobId of Array.from(this.trialJobs.keys())) {
const trialJobDetail: TrialJobDetail | undefined = this.trialJobs.get(trialJobId); const trialJobDetail: TrialJobDetail | undefined = this.trialJobs.get(trialJobId);
if (undefined !== trialJobDetail && if(undefined !== trialJobDetail &&
trialJobDetail.status === 'RUNNING' && trialJobDetail.status === 'RUNNING' &&
trialJobDetail.startTime !== undefined) { trialJobDetail.startTime !== undefined){
const currentTrialDuration = (new Date().getTime() - trialJobDetail.startTime) / 1000;
if(currentTrialDuration>this.maxTrialDuration) {
const isEarlyStopped = true; const isEarlyStopped = true;
await this.trainingService.cancelTrialJob(trialJobId, isEarlyStopped); await this.trainingService.cancelTrialJob(trialJobId, isEarlyStopped);
this.log.info(`Trial job ${trialJobId} has stoped because it is over maxTrialDuration.`); this.log.info(`Trial job ${trialJobDetail.id} has been canceled because it is over max trial duration.`);
}
}
} }
} }
...@@ -622,6 +631,8 @@ class NNIManager implements Manager { ...@@ -622,6 +631,8 @@ class NNIManager implements Manager {
let allFinishedTrialJobNum: number = this.currSubmittedTrialNum; let allFinishedTrialJobNum: number = this.currSubmittedTrialNum;
let waitSubmittedToFinish: number; let waitSubmittedToFinish: number;
while (!['ERROR', 'STOPPING', 'STOPPED'].includes(this.status.status)) { while (!['ERROR', 'STOPPING', 'STOPPED'].includes(this.status.status)) {
await this.stopTrialIfOverMaxDurationLimit();
const finishedTrialJobNum: number = await this.requestTrialJobsStatus(); const finishedTrialJobNum: number = await this.requestTrialJobsStatus();
allFinishedTrialJobNum += finishedTrialJobNum; allFinishedTrialJobNum += finishedTrialJobNum;
...@@ -681,12 +692,6 @@ class NNIManager implements Manager { ...@@ -681,12 +692,6 @@ class NNIManager implements Manager {
this.currSubmittedTrialNum++; this.currSubmittedTrialNum++;
this.log.info('submitTrialJob: form:', form); this.log.info('submitTrialJob: form:', form);
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(form); const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(form);
if(this.maxTrialDuration !== Infinity){
// Fix timeout warning : Infinity does not fit into a 32-bit signed integer(2147483647).
const duration = (this.maxTrialDuration * 1000) > 2147483647 ? 2147483647 : this.maxTrialDuration * 1000;
setTimeout(async () => this.stopTrialJobIfOverMaxDurationTimer(trialJobDetail.id), duration);
}
const Snapshot: TrialJobDetail = Object.assign({}, trialJobDetail); const Snapshot: TrialJobDetail = Object.assign({}, trialJobDetail);
await this.storeExperimentProfile(); await this.storeExperimentProfile();
this.trialJobs.set(trialJobDetail.id, Snapshot); this.trialJobs.set(trialJobDetail.id, Snapshot);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment