"git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "c0154dca1e73871c89835b8bc2a9840f6910d60d"
Unverified Commit a272da9e authored by J-shang's avatar J-shang Committed by GitHub
Browse files

hotfix unhandled `TrainingService is not assigned` and extend exec time in pipeline (#3442)

parent 62af469b
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxExecDuration: 10m
maxTrialNum: 8
trialConcurrency: 8
searchSpacePath: ../naive_trial/search_space.json
......
......@@ -260,9 +260,9 @@ def run(args):
continue
# remote mode need more time to cleanup
if args.ts == 'remote':
wait_for_port_available(8080, 180)
wait_for_port_available(8080, 240)
else:
wait_for_port_available(8080, 30)
wait_for_port_available(8080, 60)
# adl mode need more time to cleanup PVC
if args.ts == 'adl' and name == 'nnictl-resume-2':
......
......@@ -326,6 +326,7 @@ class NNIManager implements Manager {
}
public async stopExperimentBottomHalf(): Promise<void> {
try {
const trialJobList: TrialJobDetail[] = await this.trainingService.listTrialJobs();
// DON'T try to make it in parallel, the training service may not handle it well.
......@@ -342,6 +343,9 @@ class NNIManager implements Manager {
}
}
await this.trainingService.cleanUp();
} catch (err) {
this.log.error(`${err.stack}`);
}
if (this.experimentProfile.endTime === undefined) {
this.setEndtime();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment