"git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "da4d1ef4f070f98c73e2ba309498231bd733748c"
Unverified Commit 5d51637b authored by J-shang's avatar J-shang Committed by GitHub
Browse files

add doc and fix bug in experiment resume/view (#3524)

parent 08986c6b
...@@ -106,6 +106,12 @@ Please refer to `example usage <./python_api_connect.rst>`__ and code file :gith ...@@ -106,6 +106,12 @@ Please refer to `example usage <./python_api_connect.rst>`__ and code file :gith
.. Note:: You can use ``stop()`` to stop the experiment when connecting to an existing experiment. .. Note:: You can use ``stop()`` to stop the experiment when connecting to an existing experiment.
Resume/View and Manage a Stopped Experiment
-------------------------------------------
You can use ``Experiment.resume()`` and ``Experiment.view()`` to resume and view a stopped experiment, these functions behave like ``nnictl resume`` and ``nnictl view``.
If you want to manage the experiment, set ``wait_completion`` as ``False`` and the functions will return an ``Experiment`` instance. For more parameters, please refer to API.
API API
--- ---
......
...@@ -202,8 +202,15 @@ class Experiment: ...@@ -202,8 +202,15 @@ class Experiment:
---------- ----------
experiment_id experiment_id
The stopped experiment id. The stopped experiment id.
port
The port of web UI.
wait_completion
If true, run in the foreground. If false, run in the background.
debug
Whether to start in debug mode.
""" """
experiment = Experiment() experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'resume' experiment.mode = 'resume'
if wait_completion: if wait_completion:
experiment.run(port, debug) experiment.run(port, debug)
...@@ -212,7 +219,7 @@ class Experiment: ...@@ -212,7 +219,7 @@ class Experiment:
return experiment return experiment
@classmethod @classmethod
def view(cls, experiment_id: str, port: int, wait_completion: bool = True, debug: bool = False): def view(cls, experiment_id: str, port: int, wait_completion: bool = True):
""" """
View a stopped experiment. View a stopped experiment.
...@@ -220,8 +227,14 @@ class Experiment: ...@@ -220,8 +227,14 @@ class Experiment:
---------- ----------
experiment_id experiment_id
The stopped experiment id. The stopped experiment id.
port
The port of web UI.
wait_completion
If true, run in the foreground. If false, run in the background.
""" """
debug = False
experiment = Experiment() experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'view' experiment.mode = 'view'
if wait_completion: if wait_completion:
experiment.run(port, debug) experiment.run(port, debug)
......
...@@ -43,10 +43,9 @@ def start_experiment(exp_id: str, config: ExperimentConfig, port: int, debug: bo ...@@ -43,10 +43,9 @@ def start_experiment(exp_id: str, config: ExperimentConfig, port: int, debug: bo
_check_rest_server(port) _check_rest_server(port)
platform = 'hybrid' if isinstance(config.training_service, list) else config.training_service.platform platform = 'hybrid' if isinstance(config.training_service, list) else config.training_service.platform
_save_experiment_information(exp_id, port, start_time, platform, _save_experiment_information(exp_id, port, start_time, platform,
config.experiment_name, proc.pid, config.experiment_working_directory) config.experiment_name, proc.pid, str(config.experiment_working_directory))
if mode != 'view': _logger.info('Setting up...')
_logger.info('Setting up...') rest.post(port, '/experiment', config.json())
rest.post(port, '/experiment', config.json())
return proc return proc
except Exception as e: except Exception as e:
......
...@@ -19,7 +19,7 @@ def request(method: str, port: Optional[int], api: str, data: Any = None) -> Any ...@@ -19,7 +19,7 @@ def request(method: str, port: Optional[int], api: str, data: Any = None) -> Any
if not resp.ok: if not resp.ok:
_logger.error('rest request %s %s failed: %s %s', method.upper(), url, resp.status_code, resp.text) _logger.error('rest request %s %s failed: %s %s', method.upper(), url, resp.status_code, resp.text)
resp.raise_for_status() resp.raise_for_status()
if method.lower() in ['get', 'post']: if method.lower() in ['get', 'post'] and len(resp.content) > 0:
return resp.json() return resp.json()
def get(port: Optional[int], api: str) -> Any: def get(port: Optional[int], api: str) -> Any:
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# Licensed under the MIT license. # Licensed under the MIT license.
import os import os
import json import json_tricks
import shutil import shutil
import sqlite3 import sqlite3
import time import time
...@@ -92,7 +92,7 @@ class Config: ...@@ -92,7 +92,7 @@ class Config:
'''refresh to get latest config''' '''refresh to get latest config'''
sql = 'select params from ExperimentProfile where id=? order by revision DESC' sql = 'select params from ExperimentProfile where id=? order by revision DESC'
args = (self.experiment_id,) args = (self.experiment_id,)
self.config = config_v0_to_v1(json.loads(self.conn.cursor().execute(sql, args).fetchone()[0])) self.config = config_v0_to_v1(json_tricks.loads(self.conn.cursor().execute(sql, args).fetchone()[0]))
def get_config(self): def get_config(self):
'''get a value according to key''' '''get a value according to key'''
...@@ -155,7 +155,7 @@ class Experiments: ...@@ -155,7 +155,7 @@ class Experiments:
'''save config to local file''' '''save config to local file'''
try: try:
with open(self.experiment_file, 'w') as file: with open(self.experiment_file, 'w') as file:
json.dump(self.experiments, file, indent=4) json_tricks.dump(self.experiments, file, indent=4)
except IOError as error: except IOError as error:
print('Error:', error) print('Error:', error)
return '' return ''
...@@ -165,7 +165,7 @@ class Experiments: ...@@ -165,7 +165,7 @@ class Experiments:
if os.path.exists(self.experiment_file): if os.path.exists(self.experiment_file):
try: try:
with open(self.experiment_file, 'r') as file: with open(self.experiment_file, 'r') as file:
return json.load(file) return json_tricks.load(file)
except ValueError: except ValueError:
return {} return {}
return {} return {}
...@@ -198,9 +198,9 @@ class NNIManager implements Manager { ...@@ -198,9 +198,9 @@ class NNIManager implements Manager {
} }
public async resumeExperiment(readonly: boolean): Promise<void> { public async resumeExperiment(readonly: boolean): Promise<void> {
this.log.info(`Resuming experiment: ${this.experimentProfile.id}`);
//Fetch back the experiment profile //Fetch back the experiment profile
const experimentId: string = getExperimentId(); const experimentId: string = getExperimentId();
this.log.info(`Resuming experiment: ${experimentId}`);
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId); this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
this.readonly = readonly; this.readonly = readonly;
if (readonly) { if (readonly) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment