Unverified Commit 8f716170 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Fix nni stop (#368)

Fix "nnictl stop"
parent d6c07948
......@@ -26,6 +26,7 @@ import { Deferred } from 'ts-deferred';
import { getLogger, Logger } from './log';
import { getBasePort } from './experimentStartupInfo';
/**
* Abstraction class to create a RestServer
* The module who wants to use a RestServer could <b>extends</b> this abstract class
......@@ -90,6 +91,10 @@ export abstract class RestServer {
} else {
this.startTask.promise.then(
() => { // Started
//Stops the server from accepting new connections and keeps existing connections.
//This function is asynchronous, the server is finally closed when all connections
//are ended and the server emits a 'close' event.
//Refer https://nodejs.org/docs/latest/api/net.html#net_server_close_callback
this.server.close().on('close', () => {
this.log.info('Rest server stopped.');
this.stopTask.resolve();
......@@ -103,7 +108,7 @@ export abstract class RestServer {
}
);
}
this.stopTask.resolve()
return this.stopTask.promise;
}
......
......@@ -219,7 +219,6 @@ class NNIManager implements Manager {
public async stopExperiment(): Promise<void> {
this.status.status = 'STOPPING';
await this.experimentDoneCleanUp();
}
public async getMetricData(trialJobId?: string, metricType?: MetricType): Promise<MetricDataRecord[]> {
......
......@@ -110,10 +110,20 @@ mkDirP(getLogDir()).then(async () => {
});
process.on('SIGTERM', async () => {
const ds: DataStore = component.get(DataStore);
await ds.close();
const restServer: NNIRestServer = component.get(NNIRestServer);
await restServer.stop();
const log: Logger = getLogger();
log.close();
let hasError: boolean = false;
try{
const nniManager: Manager = component.get(Manager);
await nniManager.stopExperiment();
const ds: DataStore = component.get(DataStore);
await ds.close();
const restServer: NNIRestServer = component.get(NNIRestServer);
await restServer.stop();
}catch(err){
hasError = true;
log.error(`${err.stack}`);
}finally{
await log.close();
process.exit(hasError?1:0);
}
})
\ No newline at end of file
......@@ -65,7 +65,6 @@ class NNIRestHandler {
this.getExperimentProfile(router);
this.updateExperimentProfile(router);
this.startExperiment(router);
this.stopExperiment(router);
this.getTrialJobStatistics(router);
this.setClusterMetaData(router);
this.listTrialJobs(router);
......@@ -158,18 +157,6 @@ class NNIRestHandler {
});
}
private stopExperiment(router: Router): void {
router.delete('/experiment', async (req: Request, res: Response) => {
try {
await this.tb.cleanUp();
await this.nniManager.stopExperiment();
res.send();
} catch (err) {
this.handle_error(err, res);
}
});
}
private getTrialJobStatistics(router: Router): void {
router.get('/job-statistics', (req: Request, res: Response) => {
this.nniManager.getTrialJobStatistics().then((statistics: TrialJobStatistics[]) => {
......
......@@ -179,18 +179,6 @@ def stop_experiment(args):
print_normal('Experiment is not running...')
experiment_config.update_experiment(experiment_id, 'status', 'stopped')
return
running, _ = check_rest_server_quick(rest_port)
stop_rest_result = True
if running:
response = rest_delete(experiment_url(rest_port), 20)
if not response or not check_response(response):
if response:
print_error(response.text)
else:
print_error('No response from restful server!')
stop_rest_result = False
#sleep to wait rest handler done
time.sleep(3)
rest_pid = nni_config.get_config('restServerPid')
if rest_pid:
stop_rest_cmds = ['kill', str(rest_pid)]
......@@ -204,8 +192,7 @@ def stop_experiment(args):
except Exception as exception:
print_error(exception)
nni_config.set_config('tensorboardPidList', [])
if stop_rest_result:
print_normal('Stop experiment success!')
print_normal('Stop experiment success!')
experiment_config.update_experiment(experiment_id, 'status', 'stopped')
time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
experiment_config.update_experiment(experiment_id, 'endTime', str(time_now))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment