Unverified Commit f82ef623 authored by Junwei Sun's avatar Junwei Sun Committed by GitHub
Browse files

update nnicli (#2713)

parent 44954e0c
...@@ -262,7 +262,7 @@ Debug mode will disable version check function in Trialkeeper. ...@@ -262,7 +262,7 @@ Debug mode will disable version check function in Trialkeeper.
|Name, shorthand|Required|Default|Description| |Name, shorthand|Required|Default|Description|
|------|------|------ |------| |------|------|------ |------|
|id| False| |ID of the experiment you want to set| |id| False| |ID of the experiment you want to set|
|--value, -v| True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.| |--value, -v| True| | Strings like '1m' for one minute or '2h' for two hours. SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.|
* Example * Example
......
...@@ -17,6 +17,7 @@ from recommonmark.parser import CommonMarkParser ...@@ -17,6 +17,7 @@ from recommonmark.parser import CommonMarkParser
import os import os
import sys import sys
sys.path.insert(0, os.path.abspath('../../src/sdk/pynni')) sys.path.insert(0, os.path.abspath('../../src/sdk/pynni'))
sys.path.insert(1, os.path.abspath('../../src/sdk/pycli'))
# -- Project information --------------------------------------------------- # -- Project information ---------------------------------------------------
......
# NNI Client
NNI client is a python API of `nnictl`, which implements the most commonly used commands. Users can use this API to control their experiments, collect experiment results and conduct advanced analyses based on experiment results in python code directly instead of using command line. Here is an example:
```
from nnicli import Experiment
# create an experiment instance
exp = Experiment()
# start an experiment, then connect the instance to this experiment
# you can also use `resume_experiment`, `view_experiment` or `connect_experiment`
# only one of them should be called in one instance
exp.start_experiment('nni/examples/trials/mnist-pytorch/config.yml', port=9090)
# update the experiment's concurrency
exp.update_concurrency(3)
# get some information about the experiment
print(exp.get_experiment_status())
print(exp.get_job_statistics())
print(exp.list_trial_jobs())
# stop the experiment, then disconnect the instance from the experiment.
exp.stop_experiment()
```
## References
```eval_rst
.. autoclass:: nnicli.Experiment
:members:
.. autoclass:: nnicli.TrialJob
:members:
.. autoclass:: nnicli.TrialHyperParameters
:members:
.. autoclass:: nnicli.TrialMetricData
:members:
.. autoclass:: nnicli.TrialResult
:members:
```
...@@ -8,4 +8,5 @@ Python API Reference ...@@ -8,4 +8,5 @@ Python API Reference
Auto Tune <autotune_ref> Auto Tune <autotune_ref>
NAS <NAS/NasReference> NAS <NAS/NasReference>
Compression Utilities <Compressor/CompressionReference> Compression Utilities <Compressor/CompressionReference>
\ No newline at end of file NNI Client <nnicli_ref>
\ No newline at end of file
...@@ -5,67 +5,47 @@ ...@@ -5,67 +5,47 @@
Example: Example:
import nnicli as nc from nnicli import Experiment
nc.start_nni('../../../../examples/trials/mnist/config.yml') exp = Experiment()
exp.start_experiment('../../../../examples/trials/mnist-pytorch/config.yml')
nc.set_endpoint('http://localhost:8080') exp.update_concurrency(3)
print(nc.version()) print(exp.get_experiment_status())
print(nc.get_experiment_status()) print(exp.get_job_statistics())
print(exp.list_trial_jobs())
print(nc.get_job_statistics()) exp.stop_experiment()
print(nc.list_trial_jobs())
nc.stop_nni()
""" """
import sys import sys
import os import os
import subprocess import subprocess
import re
import json
import requests import requests
__all__ = [ __all__ = [
'start_nni', 'Experiment',
'stop_nni', 'TrialResult',
'set_endpoint', 'TrialMetricData',
'version', 'TrialHyperParameters',
'get_experiment_status', 'TrialJob'
'get_experiment_profile',
'get_trial_job',
'list_trial_jobs',
'get_job_statistics',
'get_job_metrics',
'export_data'
] ]
EXPERIMENT_PATH = 'experiment' EXPERIMENT_PATH = 'experiment'
VERSION_PATH = 'version'
STATUS_PATH = 'check-status' STATUS_PATH = 'check-status'
JOB_STATISTICS_PATH = 'job-statistics' JOB_STATISTICS_PATH = 'job-statistics'
TRIAL_JOBS_PATH = 'trial-jobs' TRIAL_JOBS_PATH = 'trial-jobs'
METRICS_PATH = 'metric-data' METRICS_PATH = 'metric-data'
EXPORT_DATA_PATH = 'export-data' EXPORT_DATA_PATH = 'export-data'
API_ROOT_PATH = 'api/v1/nni' API_ROOT_PATH = 'api/v1/nni'
_api_endpoint = None def _nni_rest_get(endpoint, api_path, response_type='json'):
_check_endpoint(endpoint)
def set_endpoint(endpoint): uri = '{}/{}/{}'.format(endpoint.strip('/'), API_ROOT_PATH, api_path)
"""set endpoint of nni rest server for nnicli, for example:
http://localhost:8080
"""
global _api_endpoint
_api_endpoint = endpoint
def _check_endpoint():
if _api_endpoint is None:
raise AssertionError("Please call set_endpoint to specify nni endpoint")
def _nni_rest_get(api_path, response_type='json'):
_check_endpoint()
uri = '{}/{}/{}'.format(_api_endpoint, API_ROOT_PATH, api_path)
res = requests.get(uri) res = requests.get(uri)
if _http_succeed(res.status_code): if _http_succeed(res.status_code):
if response_type == 'json': if response_type == 'json':
...@@ -73,7 +53,7 @@ def _nni_rest_get(api_path, response_type='json'): ...@@ -73,7 +53,7 @@ def _nni_rest_get(api_path, response_type='json'):
elif response_type == 'text': elif response_type == 'text':
return res.text return res.text
else: else:
raise AssertionError('Incorrect response_type') raise RuntimeError('Incorrect response_type')
else: else:
return None return None
...@@ -92,48 +72,444 @@ def _create_process(cmd): ...@@ -92,48 +72,444 @@ def _create_process(cmd):
print(output.decode('utf-8').strip()) print(output.decode('utf-8').strip())
return process.returncode return process.returncode
def start_nni(config_file): def _check_endpoint(endpoint):
"""start nni experiment with specified configuration file""" if endpoint is None:
cmd = 'nnictl create --config {}'.format(config_file).split(' ') raise RuntimeError("This instance hasn't been connect to an experiment.")
if _create_process(cmd) != 0:
raise RuntimeError('Failed to start nni.') class TrialResult:
"""
def stop_nni(): TrialResult stores the result information of a trial job.
"""stop nni experiment"""
cmd = 'nnictl stop'.split(' ') Parameters
if _create_process(cmd) != 0: ----------
raise RuntimeError('Failed to stop nni.') json_obj: dict
Json object that stores the result information.
def version():
"""return version of nni""" Attributes
return _nni_rest_get(VERSION_PATH, 'text') ----------
parameter: dict
def get_experiment_status(): Hyper parameters for this trial.
"""return experiment status as a dict""" value: serializable object, usually a number, or a dict with key "default" and other extra keys
return _nni_rest_get(STATUS_PATH) Final result.
trialJobId: str
def get_experiment_profile(): Trial job id.
"""return experiment profile as a dict""" """
return _nni_rest_get(EXPERIMENT_PATH) def __init__(self, json_obj):
self.parameter = None
def get_trial_job(trial_job_id): self.value = None
"""return trial job information as a dict""" self.trialJobId = None
assert trial_job_id is not None for key in json_obj.keys():
return _nni_rest_get(os.path.join(TRIAL_JOBS_PATH, trial_job_id)) if key == 'id':
setattr(self, 'trialJobId', json_obj[key])
def list_trial_jobs(): elif hasattr(self, key):
"""return information for all trial jobs as a list""" setattr(self, key, json_obj[key])
return _nni_rest_get(TRIAL_JOBS_PATH) self.value = json.loads(self.value)
def get_job_statistics(): def __repr__(self):
"""return trial job statistics information as a dict""" return "TrialResult(parameter: {} value: {} trialJobId: {})".format(self.parameter, self.value, self.trialJobId)
return _nni_rest_get(JOB_STATISTICS_PATH)
class TrialMetricData:
def get_job_metrics(trial_job_id=None): """
"""return trial job metrics""" TrialMetricData stores the metric data of a trial job.
api_path = METRICS_PATH if trial_job_id is None else os.path.join(METRICS_PATH, trial_job_id) A trial job may have both intermediate metric and final metric.
return _nni_rest_get(api_path)
Parameters
def export_data(): ----------
"""return exported information for all trial jobs""" json_obj: dict
return _nni_rest_get(EXPORT_DATA_PATH) Json object that stores the metric data.
Attributes
----------
timestamp: int
Time stamp.
trialJobId: str
Trial job id.
parameterId: int
Parameter id.
type: str
Metric type, `PERIODICAL` for intermediate result and `FINAL` for final result.
sequence: int
Sequence number in this trial.
data: serializable object, usually a number, or a dict with key "default" and other extra keys
Metric data.
"""
def __init__(self, json_obj):
self.timestamp = None
self.trialJobId = None
self.parameterId = None
self.type = None
self.sequence = None
self.data = None
for key in json_obj.keys():
setattr(self, key, json_obj[key])
self.data = json.loads(json.loads(self.data))
def __repr__(self):
return "TrialMetricData(timestamp: {} trialJobId: {} parameterId: {} type: {} sequence: {} data: {})" \
.format(self.timestamp, self.trialJobId, self.parameterId, self.type, self.sequence, self.data)
class TrialHyperParameters:
"""
TrialHyperParameters stores the hyper parameters of a trial job.
Parameters
----------
json_obj: dict
Json object that stores the hyper parameters.
Attributes
----------
parameter_id: int
Parameter id.
parameter_source: str
Parameter source.
parameters: dict
Hyper parameters.
parameter_index: int
Parameter index.
"""
def __init__(self, json_obj):
self.parameter_id = None
self.parameter_source = None
self.parameters = None
self.parameter_index = None
for key in json_obj.keys():
if hasattr(self, key):
setattr(self, key, json_obj[key])
def __repr__(self):
return "TrialHyperParameters(parameter_id: {} parameter_source: {} parameters: {} parameter_index: {})" \
.format(self.parameter_id, self.parameter_source, self.parameters, self.parameter_index)
class TrialJob:
"""
TrialJob stores the information of a trial job.
Parameters
----------
json_obj: dict
json object that stores the hyper parameters
Attributes
----------
trialJobId: str
Trial job id.
status: str
Job status.
hyperParameters: list of `nnicli.TrialHyperParameters`
See `nnicli.TrialHyperParameters`.
logPath: str
Log path.
startTime: int
Job start time (timestamp).
endTime: int
Job end time (timestamp).
finalMetricData: list of `nnicli.TrialMetricData`
See `nnicli.TrialMetricData`.
parameter_index: int
Parameter index.
"""
def __init__(self, json_obj):
self.trialJobId = None
self.status = None
self.hyperParameters = None
self.logPath = None
self.startTime = None
self.endTime = None
self.finalMetricData = None
self.stderrPath = None
for key in json_obj.keys():
if key == 'id':
setattr(self, 'trialJobId', json_obj[key])
elif hasattr(self, key):
setattr(self, key, json_obj[key])
if self.hyperParameters:
self.hyperParameters = [TrialHyperParameters(json.loads(e)) for e in self.hyperParameters]
if self.finalMetricData:
self.finalMetricData = [TrialMetricData(e) for e in self.finalMetricData]
def __repr__(self):
return ("TrialJob(trialJobId: {} status: {} hyperParameters: {} logPath: {} startTime: {} "
"endTime: {} finalMetricData: {} stderrPath: {})") \
.format(self.trialJobId, self.status, self.hyperParameters, self.logPath,
self.startTime, self.endTime, self.finalMetricData, self.stderrPath)
class Experiment:
def __init__(self):
self._endpoint = None
self._exp_id = None
self._port = None
@property
def endpoint(self):
return self._endpoint
@property
def exp_id(self):
return self._exp_id
@property
def port(self):
return self._port
def _exec_command(self, cmd, port=None):
if self._endpoint is not None:
raise RuntimeError('This instance has been connected to an experiment.')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to establish experiment, please check your config.')
else:
if port:
self._port = port
else:
self._port = 8080
self._endpoint = 'http://localhost:{}'.format(self._port)
self._exp_id = self.get_experiment_profile()['id']
def start_experiment(self, config_file, port=None, debug=False):
"""
Start an experiment with specified configuration file and connect to it.
Parameters
----------
config_file: str
Path to the config file.
port: int
The port of restful server, bigger than 1024.
debug: boolean
Set debug mode.
"""
cmd = 'nnictl create --config {}'.format(config_file).split(' ')
if port:
cmd += '--port {}'.format(port).split(' ')
if debug:
cmd += ['--debug']
self._exec_command(cmd, port)
def resume_experiment(self, exp_id, port=None, debug=False):
"""
Resume a stopped experiment with specified experiment id
Parameters
----------
exp_id: str
Experiment id.
port: int
The port of restful server, bigger than 1024.
debug: boolean
Set debug mode.
"""
cmd = 'nnictl resume {}'.format(exp_id).split(' ')
if port:
cmd += '--port {}'.format(port).split(' ')
if debug:
cmd += ['--debug']
self._exec_command(cmd, port)
def view_experiment(self, exp_id, port=None):
"""
View a stopped experiment with specified experiment id.
Parameters
----------
exp_id: str
Experiment id.
port: int
The port of restful server, bigger than 1024.
"""
cmd = 'nnictl view {}'.format(exp_id).split(' ')
if port:
cmd += '--port {}'.format(port).split(' ')
self._exec_command(cmd, port)
def connect_experiment(self, endpoint):
"""
Connect to an existing experiment.
Parameters
----------
endpoint: str
The endpoint of nni rest server, i.e, the url of Web UI. Should be a format like `http://ip:port`.
"""
if self._endpoint is not None:
raise RuntimeError('This instance has been connected to an experiment.')
self._endpoint = endpoint
try:
self._exp_id = self.get_experiment_profile()['id']
except TypeError:
raise RuntimeError('Invalid experiment endpoint.')
self._port = int(re.search(r':[0-9]+', self._endpoint).group().replace(':', ''))
def stop_experiment(self):
"""Stop the experiment.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl stop {}'.format(self._exp_id).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to stop experiment.')
self._endpoint = None
self._exp_id = None
self._port = None
def update_searchspace(self, filename):
"""
Update the experiment's search space.
Parameters
----------
filename: str
Path to the searchspace file.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update searchspace {} --filename {}'.format(self._exp_id, filename).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to update searchspace.')
def update_concurrency(self, value):
"""
Update an experiment's concurrency
Parameters
----------
value: int
New concurrency value.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update concurrency {} --value {}'.format(self._exp_id, value).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to update concurrency.')
def update_duration(self, value):
"""
Update an experiment's duration
Parameters
----------
value: str
Strings like '1m' for one minute or '2h' for two hours.
SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update duration {} --value {}'.format(self._exp_id, value).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to update duration.')
def update_trailnum(self, value):
"""
Update an experiment's maxtrialnum
Parameters
----------
value: int
New trailnum value.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update trialnum {} --value {}'.format(self._exp_id, value).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to update trailnum.')
def get_experiment_status(self):
"""
Return experiment status as a dict.
Returns
----------
dict
Experiment status.
"""
_check_endpoint(self._endpoint)
return _nni_rest_get(self._endpoint, STATUS_PATH)
def get_trial_job(self, trial_job_id):
"""
Return a trial job.
Parameters
----------
trial_job_id: str
Trial job id.
Returns
----------
nnicli.TrialJob
A `nnicli.TrialJob` instance corresponding to `trial_job_id`.
"""
_check_endpoint(self._endpoint)
assert trial_job_id is not None
trial_job = _nni_rest_get(self._endpoint, os.path.join(TRIAL_JOBS_PATH, trial_job_id))
return TrialJob(trial_job)
def list_trial_jobs(self):
"""
Return information for all trial jobs as a list.
Returns
----------
list
List of `nnicli.TrialJob`.
"""
_check_endpoint(self._endpoint)
trial_jobs = _nni_rest_get(self._endpoint, TRIAL_JOBS_PATH)
return [TrialJob(e) for e in trial_jobs]
def get_job_statistics(self):
"""
Return trial job statistics information as a dict.
Returns
----------
list
Job statistics information.
"""
_check_endpoint(self._endpoint)
return _nni_rest_get(self._endpoint, JOB_STATISTICS_PATH)
def get_job_metrics(self, trial_job_id=None):
"""
Return trial job metrics.
Parameters
----------
trial_job_id: str
trial job id. if this parameter is None, all trail jobs' metrics will be returned.
Returns
----------
dict
Each key is a trialJobId, the corresponding value is a list of `nnicli.TrialMetricData`.
"""
_check_endpoint(self._endpoint)
api_path = METRICS_PATH if trial_job_id is None else os.path.join(METRICS_PATH, trial_job_id)
output = {}
trail_metrics = _nni_rest_get(self._endpoint, api_path)
for metric in trail_metrics:
trial_id = metric["trialJobId"]
if trial_id not in output:
output[trial_id] = [TrialMetricData(metric)]
else:
output[trial_id].append(TrialMetricData(metric))
return output
def export_data(self):
"""
Return exported information for all trial jobs.
Returns
----------
list
List of `nnicli.TrialResult`.
"""
_check_endpoint(self._endpoint)
trial_results = _nni_rest_get(self._endpoint, EXPORT_DATA_PATH)
return [TrialResult(e) for e in trial_results]
def get_experiment_profile(self):
"""
Return experiment profile as a dict.
Returns
----------
dict
The profile of the experiment.
"""
_check_endpoint(self._endpoint)
return _nni_rest_get(self._endpoint, EXPERIMENT_PATH)
...@@ -140,8 +140,8 @@ testCases: ...@@ -140,8 +140,8 @@ testCases:
config: config:
maxTrialNum: 4 maxTrialNum: 4
trialConcurrency: 4 trialConcurrency: 4
launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator: validator:
class: NnicliValidator class: NnicliValidator
platform: linux darwin platform: linux darwin
......
...@@ -110,8 +110,8 @@ testCases: ...@@ -110,8 +110,8 @@ testCases:
config: config:
maxTrialNum: 4 maxTrialNum: 4
trialConcurrency: 4 trialConcurrency: 4
launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator: validator:
class: NnicliValidator class: NnicliValidator
platform: linux darwin platform: linux darwin
......
...@@ -45,10 +45,10 @@ testCases: ...@@ -45,10 +45,10 @@ testCases:
- name: nnicli - name: nnicli
configFile: test/config/examples/sklearn-regression.yml configFile: test/config/examples/sklearn-regression.yml
config: config:
maxTrialNum: 2 maxTrialNum: 4
trialConcurrency: 2 trialConcurrency: 4
launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")' launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()' stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator: validator:
class: NnicliValidator class: NnicliValidator
platform: linux darwin platform: linux darwin
......
...@@ -6,7 +6,7 @@ from os import remove ...@@ -6,7 +6,7 @@ from os import remove
import subprocess import subprocess
import json import json
import requests import requests
import nnicli as nc from nnicli import Experiment
from utils import METRICS_URL from utils import METRICS_URL
...@@ -80,8 +80,8 @@ class MetricsValidator(ITValidator): ...@@ -80,8 +80,8 @@ class MetricsValidator(ITValidator):
class NnicliValidator(ITValidator): class NnicliValidator(ITValidator):
def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
print(rest_endpoint) print(rest_endpoint)
nc.set_endpoint(rest_endpoint) exp = Experiment()
#print(nc.version()) exp.connect_experiment(rest_endpoint)
print(nc.get_job_statistics()) print(exp.get_job_statistics())
print(nc.get_experiment_status()) print(exp.get_experiment_status())
print(nc.list_trial_jobs()) print(exp.list_trial_jobs())
...@@ -14,7 +14,7 @@ from .constants import REST_TIME_OUT, TUNERS_SUPPORTING_IMPORT_DATA, TUNERS_NO_N ...@@ -14,7 +14,7 @@ from .constants import REST_TIME_OUT, TUNERS_SUPPORTING_IMPORT_DATA, TUNERS_NO_N
def validate_digit(value, start, end): def validate_digit(value, start, end):
'''validate if a digit is valid''' '''validate if a digit is valid'''
if not str(value).isdigit() or int(value) < start or int(value) > end: if not str(value).isdigit() or int(value) < start or int(value) > end:
raise ValueError('%s must be a digit from %s to %s' % (value, start, end)) raise ValueError('value (%s) must be a digit from %s to %s' % (value, start, end))
def validate_file(path): def validate_file(path):
'''validate if a file exist''' '''validate if a file exist'''
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment