Unverified Commit 1278c439 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

modify run/resume/view api & fix logDir rewrite (#3545)

parent 1800646c
......@@ -40,7 +40,7 @@ class Experiment:
"""
Prepare an experiment.
Use `Experiment.start()` to launch it.
Use `Experiment.run()` to launch it.
Parameters
----------
......@@ -60,7 +60,7 @@ class Experiment:
experiment.config.trial_command = 'python3 trial.py'
experiment.config.machines.append(RemoteMachineConfig(ip=..., user_name=...))
...
experiment.start(8080)
experiment.run(8080)
Parameters
----------
......@@ -149,27 +149,30 @@ class Experiment:
self._proc = None
_logger.info('Experiment stopped')
def run(self, port: int = 8080, debug: bool = False) -> bool:
def run(self, port: int = 8080, wait_completion: bool = True, debug: bool = False) -> bool:
"""
Run the experiment.
This function will block until experiment finish or error.
If wait_completion is True, this function will block until experiment finish or error.
Return `True` when experiment done; or return `False` when experiment failed.
Else if wait_completion is False, this function will non-block and return None immediately.
"""
self.start(port, debug)
try:
while True:
time.sleep(10)
status = self.get_status()
if status == 'DONE' or status == 'STOPPED':
return True
if status == 'ERROR':
return False
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
self.stop()
if wait_completion:
try:
while True:
time.sleep(10)
status = self.get_status()
if status == 'DONE' or status == 'STOPPED':
return True
if status == 'ERROR':
return False
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
self.stop()
@classmethod
def connect(cls, port: int):
......@@ -194,7 +197,7 @@ class Experiment:
return experiment
@classmethod
def resume(cls, experiment_id: str, port: int, wait_completion: bool = True, debug: bool = False):
def resume(cls, experiment_id: str, port: int = 8080, wait_completion: bool = True, debug: bool = False):
"""
Resume a stopped experiment.
......@@ -212,14 +215,12 @@ class Experiment:
experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'resume'
if wait_completion:
experiment.run(port, debug)
else:
experiment.start(port, debug)
experiment.run(port=port, wait_completion=wait_completion, debug=debug)
if not wait_completion:
return experiment
@classmethod
def view(cls, experiment_id: str, port: int, wait_completion: bool = True):
def view(cls, experiment_id: str, port: int = 8080, non_blocking: bool = False):
"""
View a stopped experiment.
......@@ -229,18 +230,24 @@ class Experiment:
The stopped experiment id.
port
The port of web UI.
wait_completion
If true, run in the foreground. If false, run in the background.
non_blocking
If false, run in the foreground. If true, run in the background.
"""
debug = False
experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'view'
if wait_completion:
experiment.run(port, debug)
else:
experiment.start(port, debug)
experiment.start(port=port, debug=debug)
if non_blocking:
return experiment
else:
try:
while True:
time.sleep(10)
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
experiment.stop()
def get_status(self) -> str:
"""
......
......@@ -319,7 +319,10 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi
if package_name in ['SMAC', 'BOHB', 'PPOTuner']:
print_error(f'The dependencies for {package_name} can be installed through pip install nni[{package_name}]')
raise
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else NNI_HOME_DIR
if config_version == 1:
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else NNI_HOME_DIR
else:
log_dir = experiment_config['experimentWorkingDirectory'] if experiment_config.get('experimentWorkingDirectory') else NNI_HOME_DIR
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
#view experiment mode do not need debug function, when view an experiment, there will be no new logs created
foreground = False
......@@ -486,8 +489,10 @@ def manage_stopped_experiment(args, mode):
assert 'trainingService' in experiment_config or 'trainingServicePlatform' in experiment_config
try:
if 'trainingService' in experiment_config:
experiment_config['experimentWorkingDirectory'] = experiments_dict[args.id]['logDir']
launch_experiment(args, experiment_config, mode, experiment_id, 2)
else:
experiment_config['logDir'] = experiments_dict[args.id]['logDir']
launch_experiment(args, experiment_config, mode, experiment_id, 1)
except Exception as exception:
restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid')
......
......@@ -8,7 +8,7 @@ import { TrialJobStatus, LogType } from './trainingService';
import { ExperimentConfig } from './experimentConfig';
type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL' | 'VIEWED';
namespace ExperimentStartUpMode {
export const NEW = 'new';
export const RESUME = 'resume';
......
......@@ -204,6 +204,7 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
this.readonly = readonly;
if (readonly) {
this.setStatus('VIEWED');
return Promise.resolve();
}
......
......@@ -237,7 +237,7 @@ class App extends React.Component<{}, AppState> {
}
// experiment status and /trial-jobs api's status could decide website update
if (['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status) || TRIALS.jobListError()) {
if (['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status) || TRIALS.jobListError()) {
// experiment finished, refresh once more to ensure consistency
this.setState(() => ({ interval: 0, isUpdate: false }));
return;
......
......@@ -54,7 +54,7 @@ class ExperimentSummaryPanel extends React.Component<ExpDrawerProps, ExpDrawerSt
this.setState({ experiment: JSON.stringify(result, null, 4) });
}
if (['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status)) {
if (['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status)) {
if (this.refreshId !== null || this.refreshId !== undefined) {
window.clearInterval(this.refreshId);
}
......
......@@ -419,7 +419,7 @@ class TableList extends React.Component<TableListProps, TableListState> {
private _renderOperationColumn(record: any): React.ReactNode {
const runningTrial: boolean = ['RUNNING', 'UNKNOWN'].includes(record.status) ? false : true;
const disabledAddCustomizedTrial = ['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status);
const disabledAddCustomizedTrial = ['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status);
return (
<Stack className='detail-button' horizontal>
<PrimaryButton
......
......@@ -21,6 +21,7 @@ const EXPERIMENTSTATUS = [
'ERROR',
'STOPPING',
'STOPPED',
'VIEWED',
'DONE',
'NO_MORE_TRIAL',
'TUNER_NO_MORE_TRIAL'
......
......@@ -19,7 +19,8 @@ $error: #a4262c;
}
.DONE,
.STOPPED {
.STOPPED,
.VIEWED {
color: $done;
.ms-ProgressIndicator-progressBar {
......@@ -37,7 +38,8 @@ $error: #a4262c;
.bestMetric {
.DONE,
.STOPPED {
.STOPPED,
.VIEWED {
color: $done;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment