Unverified Commit 1278c439 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

modify run/resume/view api & fix logDir rewrite (#3545)

parent 1800646c
...@@ -40,7 +40,7 @@ class Experiment: ...@@ -40,7 +40,7 @@ class Experiment:
""" """
Prepare an experiment. Prepare an experiment.
Use `Experiment.start()` to launch it. Use `Experiment.run()` to launch it.
Parameters Parameters
---------- ----------
...@@ -60,7 +60,7 @@ class Experiment: ...@@ -60,7 +60,7 @@ class Experiment:
experiment.config.trial_command = 'python3 trial.py' experiment.config.trial_command = 'python3 trial.py'
experiment.config.machines.append(RemoteMachineConfig(ip=..., user_name=...)) experiment.config.machines.append(RemoteMachineConfig(ip=..., user_name=...))
... ...
experiment.start(8080) experiment.run(8080)
Parameters Parameters
---------- ----------
...@@ -149,27 +149,30 @@ class Experiment: ...@@ -149,27 +149,30 @@ class Experiment:
self._proc = None self._proc = None
_logger.info('Experiment stopped') _logger.info('Experiment stopped')
def run(self, port: int = 8080, debug: bool = False) -> bool: def run(self, port: int = 8080, wait_completion: bool = True, debug: bool = False) -> bool:
""" """
Run the experiment. Run the experiment.
This function will block until experiment finish or error. If wait_completion is True, this function will block until experiment finish or error.
Return `True` when experiment done; or return `False` when experiment failed. Return `True` when experiment done; or return `False` when experiment failed.
Else if wait_completion is False, this function will non-block and return None immediately.
""" """
self.start(port, debug) self.start(port, debug)
try: if wait_completion:
while True: try:
time.sleep(10) while True:
status = self.get_status() time.sleep(10)
if status == 'DONE' or status == 'STOPPED': status = self.get_status()
return True if status == 'DONE' or status == 'STOPPED':
if status == 'ERROR': return True
return False if status == 'ERROR':
except KeyboardInterrupt: return False
_logger.warning('KeyboardInterrupt detected') except KeyboardInterrupt:
finally: _logger.warning('KeyboardInterrupt detected')
self.stop() finally:
self.stop()
@classmethod @classmethod
def connect(cls, port: int): def connect(cls, port: int):
...@@ -194,7 +197,7 @@ class Experiment: ...@@ -194,7 +197,7 @@ class Experiment:
return experiment return experiment
@classmethod @classmethod
def resume(cls, experiment_id: str, port: int, wait_completion: bool = True, debug: bool = False): def resume(cls, experiment_id: str, port: int = 8080, wait_completion: bool = True, debug: bool = False):
""" """
Resume a stopped experiment. Resume a stopped experiment.
...@@ -212,14 +215,12 @@ class Experiment: ...@@ -212,14 +215,12 @@ class Experiment:
experiment = Experiment() experiment = Experiment()
experiment.id = experiment_id experiment.id = experiment_id
experiment.mode = 'resume' experiment.mode = 'resume'
if wait_completion: experiment.run(port=port, wait_completion=wait_completion, debug=debug)
experiment.run(port, debug) if not wait_completion:
else:
experiment.start(port, debug)
return experiment return experiment
@classmethod @classmethod
def view(cls, experiment_id: str, port: int, wait_completion: bool = True): def view(cls, experiment_id: str, port: int = 8080, non_blocking: bool = False):
""" """
View a stopped experiment. View a stopped experiment.
...@@ -229,18 +230,24 @@ class Experiment: ...@@ -229,18 +230,24 @@ class Experiment:
The stopped experiment id. The stopped experiment id.
port port
The port of web UI. The port of web UI.
wait_completion non_blocking
If true, run in the foreground. If false, run in the background. If false, run in the foreground. If true, run in the background.
""" """
debug = False debug = False
experiment = Experiment() experiment = Experiment()
experiment.id = experiment_id experiment.id = experiment_id
experiment.mode = 'view' experiment.mode = 'view'
if wait_completion: experiment.start(port=port, debug=debug)
experiment.run(port, debug) if non_blocking:
else:
experiment.start(port, debug)
return experiment return experiment
else:
try:
while True:
time.sleep(10)
except KeyboardInterrupt:
_logger.warning('KeyboardInterrupt detected')
finally:
experiment.stop()
def get_status(self) -> str: def get_status(self) -> str:
""" """
......
...@@ -319,7 +319,10 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi ...@@ -319,7 +319,10 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi
if package_name in ['SMAC', 'BOHB', 'PPOTuner']: if package_name in ['SMAC', 'BOHB', 'PPOTuner']:
print_error(f'The dependencies for {package_name} can be installed through pip install nni[{package_name}]') print_error(f'The dependencies for {package_name} can be installed through pip install nni[{package_name}]')
raise raise
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else NNI_HOME_DIR if config_version == 1:
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else NNI_HOME_DIR
else:
log_dir = experiment_config['experimentWorkingDirectory'] if experiment_config.get('experimentWorkingDirectory') else NNI_HOME_DIR
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
#view experiment mode do not need debug function, when view an experiment, there will be no new logs created #view experiment mode do not need debug function, when view an experiment, there will be no new logs created
foreground = False foreground = False
...@@ -486,8 +489,10 @@ def manage_stopped_experiment(args, mode): ...@@ -486,8 +489,10 @@ def manage_stopped_experiment(args, mode):
assert 'trainingService' in experiment_config or 'trainingServicePlatform' in experiment_config assert 'trainingService' in experiment_config or 'trainingServicePlatform' in experiment_config
try: try:
if 'trainingService' in experiment_config: if 'trainingService' in experiment_config:
experiment_config['experimentWorkingDirectory'] = experiments_dict[args.id]['logDir']
launch_experiment(args, experiment_config, mode, experiment_id, 2) launch_experiment(args, experiment_config, mode, experiment_id, 2)
else: else:
experiment_config['logDir'] = experiments_dict[args.id]['logDir']
launch_experiment(args, experiment_config, mode, experiment_id, 1) launch_experiment(args, experiment_config, mode, experiment_id, 1)
except Exception as exception: except Exception as exception:
restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid') restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid')
......
...@@ -8,7 +8,7 @@ import { TrialJobStatus, LogType } from './trainingService'; ...@@ -8,7 +8,7 @@ import { TrialJobStatus, LogType } from './trainingService';
import { ExperimentConfig } from './experimentConfig'; import { ExperimentConfig } from './experimentConfig';
type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM'; type ProfileUpdateType = 'TRIAL_CONCURRENCY' | 'MAX_EXEC_DURATION' | 'SEARCH_SPACE' | 'MAX_TRIAL_NUM';
type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL'; type ExperimentStatus = 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL' | 'VIEWED';
namespace ExperimentStartUpMode { namespace ExperimentStartUpMode {
export const NEW = 'new'; export const NEW = 'new';
export const RESUME = 'resume'; export const RESUME = 'resume';
......
...@@ -204,6 +204,7 @@ class NNIManager implements Manager { ...@@ -204,6 +204,7 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId); this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
this.readonly = readonly; this.readonly = readonly;
if (readonly) { if (readonly) {
this.setStatus('VIEWED');
return Promise.resolve(); return Promise.resolve();
} }
......
...@@ -237,7 +237,7 @@ class App extends React.Component<{}, AppState> { ...@@ -237,7 +237,7 @@ class App extends React.Component<{}, AppState> {
} }
// experiment status and /trial-jobs api's status could decide website update // experiment status and /trial-jobs api's status could decide website update
if (['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status) || TRIALS.jobListError()) { if (['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status) || TRIALS.jobListError()) {
// experiment finished, refresh once more to ensure consistency // experiment finished, refresh once more to ensure consistency
this.setState(() => ({ interval: 0, isUpdate: false })); this.setState(() => ({ interval: 0, isUpdate: false }));
return; return;
......
...@@ -54,7 +54,7 @@ class ExperimentSummaryPanel extends React.Component<ExpDrawerProps, ExpDrawerSt ...@@ -54,7 +54,7 @@ class ExperimentSummaryPanel extends React.Component<ExpDrawerProps, ExpDrawerSt
this.setState({ experiment: JSON.stringify(result, null, 4) }); this.setState({ experiment: JSON.stringify(result, null, 4) });
} }
if (['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status)) { if (['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status)) {
if (this.refreshId !== null || this.refreshId !== undefined) { if (this.refreshId !== null || this.refreshId !== undefined) {
window.clearInterval(this.refreshId); window.clearInterval(this.refreshId);
} }
......
...@@ -419,7 +419,7 @@ class TableList extends React.Component<TableListProps, TableListState> { ...@@ -419,7 +419,7 @@ class TableList extends React.Component<TableListProps, TableListState> {
private _renderOperationColumn(record: any): React.ReactNode { private _renderOperationColumn(record: any): React.ReactNode {
const runningTrial: boolean = ['RUNNING', 'UNKNOWN'].includes(record.status) ? false : true; const runningTrial: boolean = ['RUNNING', 'UNKNOWN'].includes(record.status) ? false : true;
const disabledAddCustomizedTrial = ['DONE', 'ERROR', 'STOPPED'].includes(EXPERIMENT.status); const disabledAddCustomizedTrial = ['DONE', 'ERROR', 'STOPPED', 'VIEWED'].includes(EXPERIMENT.status);
return ( return (
<Stack className='detail-button' horizontal> <Stack className='detail-button' horizontal>
<PrimaryButton <PrimaryButton
......
...@@ -21,6 +21,7 @@ const EXPERIMENTSTATUS = [ ...@@ -21,6 +21,7 @@ const EXPERIMENTSTATUS = [
'ERROR', 'ERROR',
'STOPPING', 'STOPPING',
'STOPPED', 'STOPPED',
'VIEWED',
'DONE', 'DONE',
'NO_MORE_TRIAL', 'NO_MORE_TRIAL',
'TUNER_NO_MORE_TRIAL' 'TUNER_NO_MORE_TRIAL'
......
...@@ -19,7 +19,8 @@ $error: #a4262c; ...@@ -19,7 +19,8 @@ $error: #a4262c;
} }
.DONE, .DONE,
.STOPPED { .STOPPED,
.VIEWED {
color: $done; color: $done;
.ms-ProgressIndicator-progressBar { .ms-ProgressIndicator-progressBar {
...@@ -37,7 +38,8 @@ $error: #a4262c; ...@@ -37,7 +38,8 @@ $error: #a4262c;
.bestMetric { .bestMetric {
.DONE, .DONE,
.STOPPED { .STOPPED,
.VIEWED {
color: $done; color: $done;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment