"git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "459a0c9811235b1543323d68e55d9e7ad615b95e"
Unverified Commit f82ef623 authored by Junwei Sun's avatar Junwei Sun Committed by GitHub
Browse files

update nnicli (#2713)

parent 44954e0c
......@@ -262,7 +262,7 @@ Debug mode will disable version check function in Trialkeeper.
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|id| False| |ID of the experiment you want to set|
|--value, -v| True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.|
|--value, -v| True| | Strings like '1m' for one minute or '2h' for two hours. SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.|
* Example
......
......@@ -17,6 +17,7 @@ from recommonmark.parser import CommonMarkParser
import os
import sys
sys.path.insert(0, os.path.abspath('../../src/sdk/pynni'))
sys.path.insert(1, os.path.abspath('../../src/sdk/pycli'))
# -- Project information ---------------------------------------------------
......
# NNI Client
NNI client is a python API of `nnictl`, which implements the most commonly used commands. Users can use this API to control their experiments, collect experiment results and conduct advanced analyses based on experiment results in python code directly instead of using command line. Here is an example:
```
from nnicli import Experiment
# create an experiment instance
exp = Experiment()
# start an experiment, then connect the instance to this experiment
# you can also use `resume_experiment`, `view_experiment` or `connect_experiment`
# only one of them should be called in one instance
exp.start_experiment('nni/examples/trials/mnist-pytorch/config.yml', port=9090)
# update the experiment's concurrency
exp.update_concurrency(3)
# get some information about the experiment
print(exp.get_experiment_status())
print(exp.get_job_statistics())
print(exp.list_trial_jobs())
# stop the experiment, then disconnect the instance from the experiment.
exp.stop_experiment()
```
## References
```eval_rst
.. autoclass:: nnicli.Experiment
:members:
.. autoclass:: nnicli.TrialJob
:members:
.. autoclass:: nnicli.TrialHyperParameters
:members:
.. autoclass:: nnicli.TrialMetricData
:members:
.. autoclass:: nnicli.TrialResult
:members:
```
......@@ -9,3 +9,4 @@ Python API Reference
Auto Tune <autotune_ref>
NAS <NAS/NasReference>
Compression Utilities <Compressor/CompressionReference>
NNI Client <nnicli_ref>
\ No newline at end of file
......@@ -5,67 +5,47 @@
Example:
import nnicli as nc
from nnicli import Experiment
nc.start_nni('../../../../examples/trials/mnist/config.yml')
exp = Experiment()
exp.start_experiment('../../../../examples/trials/mnist-pytorch/config.yml')
nc.set_endpoint('http://localhost:8080')
exp.update_concurrency(3)
print(nc.version())
print(nc.get_experiment_status())
print(exp.get_experiment_status())
print(exp.get_job_statistics())
print(exp.list_trial_jobs())
print(nc.get_job_statistics())
print(nc.list_trial_jobs())
nc.stop_nni()
exp.stop_experiment()
"""
import sys
import os
import subprocess
import re
import json
import requests
__all__ = [
'start_nni',
'stop_nni',
'set_endpoint',
'version',
'get_experiment_status',
'get_experiment_profile',
'get_trial_job',
'list_trial_jobs',
'get_job_statistics',
'get_job_metrics',
'export_data'
'Experiment',
'TrialResult',
'TrialMetricData',
'TrialHyperParameters',
'TrialJob'
]
EXPERIMENT_PATH = 'experiment'
VERSION_PATH = 'version'
STATUS_PATH = 'check-status'
JOB_STATISTICS_PATH = 'job-statistics'
TRIAL_JOBS_PATH = 'trial-jobs'
METRICS_PATH = 'metric-data'
EXPORT_DATA_PATH = 'export-data'
API_ROOT_PATH = 'api/v1/nni'
_api_endpoint = None
def set_endpoint(endpoint):
"""set endpoint of nni rest server for nnicli, for example:
http://localhost:8080
"""
global _api_endpoint
_api_endpoint = endpoint
def _check_endpoint():
if _api_endpoint is None:
raise AssertionError("Please call set_endpoint to specify nni endpoint")
def _nni_rest_get(api_path, response_type='json'):
_check_endpoint()
uri = '{}/{}/{}'.format(_api_endpoint, API_ROOT_PATH, api_path)
def _nni_rest_get(endpoint, api_path, response_type='json'):
_check_endpoint(endpoint)
uri = '{}/{}/{}'.format(endpoint.strip('/'), API_ROOT_PATH, api_path)
res = requests.get(uri)
if _http_succeed(res.status_code):
if response_type == 'json':
......@@ -73,7 +53,7 @@ def _nni_rest_get(api_path, response_type='json'):
elif response_type == 'text':
return res.text
else:
raise AssertionError('Incorrect response_type')
raise RuntimeError('Incorrect response_type')
else:
return None
......@@ -92,48 +72,444 @@ def _create_process(cmd):
print(output.decode('utf-8').strip())
return process.returncode
def start_nni(config_file):
"""start nni experiment with specified configuration file"""
def _check_endpoint(endpoint):
if endpoint is None:
raise RuntimeError("This instance hasn't been connect to an experiment.")
class TrialResult:
"""
TrialResult stores the result information of a trial job.
Parameters
----------
json_obj: dict
Json object that stores the result information.
Attributes
----------
parameter: dict
Hyper parameters for this trial.
value: serializable object, usually a number, or a dict with key "default" and other extra keys
Final result.
trialJobId: str
Trial job id.
"""
def __init__(self, json_obj):
self.parameter = None
self.value = None
self.trialJobId = None
for key in json_obj.keys():
if key == 'id':
setattr(self, 'trialJobId', json_obj[key])
elif hasattr(self, key):
setattr(self, key, json_obj[key])
self.value = json.loads(self.value)
def __repr__(self):
return "TrialResult(parameter: {} value: {} trialJobId: {})".format(self.parameter, self.value, self.trialJobId)
class TrialMetricData:
"""
TrialMetricData stores the metric data of a trial job.
A trial job may have both intermediate metric and final metric.
Parameters
----------
json_obj: dict
Json object that stores the metric data.
Attributes
----------
timestamp: int
Time stamp.
trialJobId: str
Trial job id.
parameterId: int
Parameter id.
type: str
Metric type, `PERIODICAL` for intermediate result and `FINAL` for final result.
sequence: int
Sequence number in this trial.
data: serializable object, usually a number, or a dict with key "default" and other extra keys
Metric data.
"""
def __init__(self, json_obj):
self.timestamp = None
self.trialJobId = None
self.parameterId = None
self.type = None
self.sequence = None
self.data = None
for key in json_obj.keys():
setattr(self, key, json_obj[key])
self.data = json.loads(json.loads(self.data))
def __repr__(self):
return "TrialMetricData(timestamp: {} trialJobId: {} parameterId: {} type: {} sequence: {} data: {})" \
.format(self.timestamp, self.trialJobId, self.parameterId, self.type, self.sequence, self.data)
class TrialHyperParameters:
"""
TrialHyperParameters stores the hyper parameters of a trial job.
Parameters
----------
json_obj: dict
Json object that stores the hyper parameters.
Attributes
----------
parameter_id: int
Parameter id.
parameter_source: str
Parameter source.
parameters: dict
Hyper parameters.
parameter_index: int
Parameter index.
"""
def __init__(self, json_obj):
self.parameter_id = None
self.parameter_source = None
self.parameters = None
self.parameter_index = None
for key in json_obj.keys():
if hasattr(self, key):
setattr(self, key, json_obj[key])
def __repr__(self):
return "TrialHyperParameters(parameter_id: {} parameter_source: {} parameters: {} parameter_index: {})" \
.format(self.parameter_id, self.parameter_source, self.parameters, self.parameter_index)
class TrialJob:
"""
TrialJob stores the information of a trial job.
Parameters
----------
json_obj: dict
json object that stores the hyper parameters
Attributes
----------
trialJobId: str
Trial job id.
status: str
Job status.
hyperParameters: list of `nnicli.TrialHyperParameters`
See `nnicli.TrialHyperParameters`.
logPath: str
Log path.
startTime: int
Job start time (timestamp).
endTime: int
Job end time (timestamp).
finalMetricData: list of `nnicli.TrialMetricData`
See `nnicli.TrialMetricData`.
parameter_index: int
Parameter index.
"""
def __init__(self, json_obj):
self.trialJobId = None
self.status = None
self.hyperParameters = None
self.logPath = None
self.startTime = None
self.endTime = None
self.finalMetricData = None
self.stderrPath = None
for key in json_obj.keys():
if key == 'id':
setattr(self, 'trialJobId', json_obj[key])
elif hasattr(self, key):
setattr(self, key, json_obj[key])
if self.hyperParameters:
self.hyperParameters = [TrialHyperParameters(json.loads(e)) for e in self.hyperParameters]
if self.finalMetricData:
self.finalMetricData = [TrialMetricData(e) for e in self.finalMetricData]
def __repr__(self):
return ("TrialJob(trialJobId: {} status: {} hyperParameters: {} logPath: {} startTime: {} "
"endTime: {} finalMetricData: {} stderrPath: {})") \
.format(self.trialJobId, self.status, self.hyperParameters, self.logPath,
self.startTime, self.endTime, self.finalMetricData, self.stderrPath)
class Experiment:
def __init__(self):
self._endpoint = None
self._exp_id = None
self._port = None
@property
def endpoint(self):
return self._endpoint
@property
def exp_id(self):
return self._exp_id
@property
def port(self):
return self._port
def _exec_command(self, cmd, port=None):
if self._endpoint is not None:
raise RuntimeError('This instance has been connected to an experiment.')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to establish experiment, please check your config.')
else:
if port:
self._port = port
else:
self._port = 8080
self._endpoint = 'http://localhost:{}'.format(self._port)
self._exp_id = self.get_experiment_profile()['id']
def start_experiment(self, config_file, port=None, debug=False):
"""
Start an experiment with specified configuration file and connect to it.
Parameters
----------
config_file: str
Path to the config file.
port: int
The port of restful server, bigger than 1024.
debug: boolean
Set debug mode.
"""
cmd = 'nnictl create --config {}'.format(config_file).split(' ')
if port:
cmd += '--port {}'.format(port).split(' ')
if debug:
cmd += ['--debug']
self._exec_command(cmd, port)
def resume_experiment(self, exp_id, port=None, debug=False):
"""
Resume a stopped experiment with specified experiment id
Parameters
----------
exp_id: str
Experiment id.
port: int
The port of restful server, bigger than 1024.
debug: boolean
Set debug mode.
"""
cmd = 'nnictl resume {}'.format(exp_id).split(' ')
if port:
cmd += '--port {}'.format(port).split(' ')
if debug:
cmd += ['--debug']
self._exec_command(cmd, port)
def view_experiment(self, exp_id, port=None):
"""
View a stopped experiment with specified experiment id.
Parameters
----------
exp_id: str
Experiment id.
port: int
The port of restful server, bigger than 1024.
"""
cmd = 'nnictl view {}'.format(exp_id).split(' ')
if port:
cmd += '--port {}'.format(port).split(' ')
self._exec_command(cmd, port)
def connect_experiment(self, endpoint):
"""
Connect to an existing experiment.
Parameters
----------
endpoint: str
The endpoint of nni rest server, i.e, the url of Web UI. Should be a format like `http://ip:port`.
"""
if self._endpoint is not None:
raise RuntimeError('This instance has been connected to an experiment.')
self._endpoint = endpoint
try:
self._exp_id = self.get_experiment_profile()['id']
except TypeError:
raise RuntimeError('Invalid experiment endpoint.')
self._port = int(re.search(r':[0-9]+', self._endpoint).group().replace(':', ''))
def stop_experiment(self):
"""Stop the experiment.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl stop {}'.format(self._exp_id).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to stop experiment.')
self._endpoint = None
self._exp_id = None
self._port = None
def update_searchspace(self, filename):
"""
Update the experiment's search space.
Parameters
----------
filename: str
Path to the searchspace file.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update searchspace {} --filename {}'.format(self._exp_id, filename).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to start nni.')
raise RuntimeError('Failed to update searchspace.')
def update_concurrency(self, value):
"""
Update an experiment's concurrency
Parameters
----------
value: int
New concurrency value.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update concurrency {} --value {}'.format(self._exp_id, value).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to update concurrency.')
def update_duration(self, value):
"""
Update an experiment's duration
Parameters
----------
value: str
Strings like '1m' for one minute or '2h' for two hours.
SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update duration {} --value {}'.format(self._exp_id, value).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to update duration.')
def update_trailnum(self, value):
"""
Update an experiment's maxtrialnum
def stop_nni():
"""stop nni experiment"""
cmd = 'nnictl stop'.split(' ')
Parameters
----------
value: int
New trailnum value.
"""
_check_endpoint(self._endpoint)
cmd = 'nnictl update trialnum {} --value {}'.format(self._exp_id, value).split(' ')
if _create_process(cmd) != 0:
raise RuntimeError('Failed to stop nni.')
raise RuntimeError('Failed to update trailnum.')
def get_experiment_status(self):
"""
Return experiment status as a dict.
def version():
"""return version of nni"""
return _nni_rest_get(VERSION_PATH, 'text')
Returns
----------
dict
Experiment status.
"""
_check_endpoint(self._endpoint)
return _nni_rest_get(self._endpoint, STATUS_PATH)
def get_experiment_status():
"""return experiment status as a dict"""
return _nni_rest_get(STATUS_PATH)
def get_trial_job(self, trial_job_id):
"""
Return a trial job.
def get_experiment_profile():
"""return experiment profile as a dict"""
return _nni_rest_get(EXPERIMENT_PATH)
Parameters
----------
trial_job_id: str
Trial job id.
def get_trial_job(trial_job_id):
"""return trial job information as a dict"""
Returns
----------
nnicli.TrialJob
A `nnicli.TrialJob` instance corresponding to `trial_job_id`.
"""
_check_endpoint(self._endpoint)
assert trial_job_id is not None
return _nni_rest_get(os.path.join(TRIAL_JOBS_PATH, trial_job_id))
trial_job = _nni_rest_get(self._endpoint, os.path.join(TRIAL_JOBS_PATH, trial_job_id))
return TrialJob(trial_job)
def list_trial_jobs(self):
"""
Return information for all trial jobs as a list.
Returns
----------
list
List of `nnicli.TrialJob`.
"""
_check_endpoint(self._endpoint)
trial_jobs = _nni_rest_get(self._endpoint, TRIAL_JOBS_PATH)
return [TrialJob(e) for e in trial_jobs]
def get_job_statistics(self):
"""
Return trial job statistics information as a dict.
def list_trial_jobs():
"""return information for all trial jobs as a list"""
return _nni_rest_get(TRIAL_JOBS_PATH)
Returns
----------
list
Job statistics information.
"""
_check_endpoint(self._endpoint)
return _nni_rest_get(self._endpoint, JOB_STATISTICS_PATH)
def get_job_metrics(self, trial_job_id=None):
"""
Return trial job metrics.
def get_job_statistics():
"""return trial job statistics information as a dict"""
return _nni_rest_get(JOB_STATISTICS_PATH)
Parameters
----------
trial_job_id: str
trial job id. if this parameter is None, all trail jobs' metrics will be returned.
def get_job_metrics(trial_job_id=None):
"""return trial job metrics"""
Returns
----------
dict
Each key is a trialJobId, the corresponding value is a list of `nnicli.TrialMetricData`.
"""
_check_endpoint(self._endpoint)
api_path = METRICS_PATH if trial_job_id is None else os.path.join(METRICS_PATH, trial_job_id)
return _nni_rest_get(api_path)
output = {}
trail_metrics = _nni_rest_get(self._endpoint, api_path)
for metric in trail_metrics:
trial_id = metric["trialJobId"]
if trial_id not in output:
output[trial_id] = [TrialMetricData(metric)]
else:
output[trial_id].append(TrialMetricData(metric))
return output
def export_data(self):
"""
Return exported information for all trial jobs.
Returns
----------
list
List of `nnicli.TrialResult`.
"""
_check_endpoint(self._endpoint)
trial_results = _nni_rest_get(self._endpoint, EXPORT_DATA_PATH)
return [TrialResult(e) for e in trial_results]
def export_data():
"""return exported information for all trial jobs"""
return _nni_rest_get(EXPORT_DATA_PATH)
def get_experiment_profile(self):
"""
Return experiment profile as a dict.
Returns
----------
dict
The profile of the experiment.
"""
_check_endpoint(self._endpoint)
return _nni_rest_get(self._endpoint, EXPERIMENT_PATH)
......@@ -140,8 +140,8 @@ testCases:
config:
maxTrialNum: 4
trialConcurrency: 4
launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator:
class: NnicliValidator
platform: linux darwin
......
......@@ -110,8 +110,8 @@ testCases:
config:
maxTrialNum: 4
trialConcurrency: 4
launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator:
class: NnicliValidator
platform: linux darwin
......
......@@ -45,10 +45,10 @@ testCases:
- name: nnicli
configFile: test/config/examples/sklearn-regression.yml
config:
maxTrialNum: 2
trialConcurrency: 2
launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
maxTrialNum: 4
trialConcurrency: 4
launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator:
class: NnicliValidator
platform: linux darwin
......
......@@ -6,7 +6,7 @@ from os import remove
import subprocess
import json
import requests
import nnicli as nc
from nnicli import Experiment
from utils import METRICS_URL
......@@ -80,8 +80,8 @@ class MetricsValidator(ITValidator):
class NnicliValidator(ITValidator):
def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
print(rest_endpoint)
nc.set_endpoint(rest_endpoint)
#print(nc.version())
print(nc.get_job_statistics())
print(nc.get_experiment_status())
print(nc.list_trial_jobs())
exp = Experiment()
exp.connect_experiment(rest_endpoint)
print(exp.get_job_statistics())
print(exp.get_experiment_status())
print(exp.list_trial_jobs())
......@@ -14,7 +14,7 @@ from .constants import REST_TIME_OUT, TUNERS_SUPPORTING_IMPORT_DATA, TUNERS_NO_N
def validate_digit(value, start, end):
'''validate if a digit is valid'''
if not str(value).isdigit() or int(value) < start or int(value) > end:
raise ValueError('%s must be a digit from %s to %s' % (value, start, end))
raise ValueError('value (%s) must be a digit from %s to %s' % (value, start, end))
def validate_file(path):
'''validate if a file exist'''
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment