Unverified Commit e99c579b authored by Ni Hao's avatar Ni Hao Committed by GitHub
Browse files

fix trial status not correct when trial is early stoped (#4005)

parent e7eab833
...@@ -681,7 +681,12 @@ class NNIManager implements Manager { ...@@ -681,7 +681,12 @@ class NNIManager implements Manager {
this.currSubmittedTrialNum++; this.currSubmittedTrialNum++;
this.log.info('submitTrialJob: form:', form); this.log.info('submitTrialJob: form:', form);
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(form); const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(form);
setTimeout(async () => this.stopTrialJobIfOverMaxDurationTimer(trialJobDetail.id), 1000 * this.maxTrialDuration); if(this.maxTrialDuration !== Infinity){
// Fix timeout warning : Infinity does not fit into a 32-bit signed integer(2147483647).
const duration = (this.maxTrialDuration * 1000) > 2147483647 ? 2147483647 : this.maxTrialDuration * 1000;
setTimeout(async () => this.stopTrialJobIfOverMaxDurationTimer(trialJobDetail.id), duration);
}
const Snapshot: TrialJobDetail = Object.assign({}, trialJobDetail); const Snapshot: TrialJobDetail = Object.assign({}, trialJobDetail);
await this.storeExperimentProfile(); await this.storeExperimentProfile();
this.trialJobs.set(trialJobDetail.id, Snapshot); this.trialJobs.set(trialJobDetail.id, Snapshot);
......
...@@ -237,8 +237,10 @@ class LocalTrainingService implements TrainingService { ...@@ -237,8 +237,10 @@ class LocalTrainingService implements TrainingService {
return Promise.resolve(); return Promise.resolve();
} }
tkill(trialJob.pid, 'SIGTERM'); tkill(trialJob.pid, 'SIGTERM');
this.setTrialJobStatus(trialJob, getJobCancelStatus(isEarlyStopped));
const startTime = Date.now(); const startTime = Date.now();
while(await isAlive(trialJob.pid)) { while(await isAlive(trialJob.pid)) {
if (Date.now() - startTime > 4999) { if (Date.now() - startTime > 4999) {
tkill(trialJob.pid, 'SIGKILL', (err) => { tkill(trialJob.pid, 'SIGKILL', (err) => {
if (err) { if (err) {
...@@ -250,8 +252,6 @@ class LocalTrainingService implements TrainingService { ...@@ -250,8 +252,6 @@ class LocalTrainingService implements TrainingService {
await delay(500); await delay(500);
} }
this.setTrialJobStatus(trialJob, getJobCancelStatus(isEarlyStopped));
return Promise.resolve(); return Promise.resolve();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment