update nnicli (#2713)

f82ef623 · Junwei Sun · GitHub · 44954e0c · f82ef623 · f82ef623
Unverified Commit f82ef623 authored Aug 12, 2020 by Junwei Sun Committed by GitHub Aug 12, 2020
10 changed files
--- a/docs/en_US/Tutorial/Nnictl.md
+++ b/docs/en_US/Tutorial/Nnictl.md
@@ -262,7 +262,7 @@ Debug mode will disable version check function in Trialkeeper.
  |Name, shorthand|Required|Default|Description|
  |------|------|------ |------|
  |id|  False| |ID of the experiment you want to set|
-  |--value, -v|  True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.|
+  |--value, -v|  True| | Strings like '1m' for one minute or '2h' for two hours. SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.|

  * Example


--- a/docs/en_US/conf.py
+++ b/docs/en_US/conf.py
@@ -17,6 +17,7 @@ from recommonmark.parser import CommonMarkParser
 import os
 import sys
 sys.path.insert(0, os.path.abspath('../../src/sdk/pynni'))
+sys.path.insert(1, os.path.abspath('../../src/sdk/pycli'))


 # -- Project information ---------------------------------------------------

--- a/docs/en_US/nnicli_ref.md
+++ b/docs/en_US/nnicli_ref.md
+# NNI Client
+
+NNI client is a python API of `nnictl`, which implements the most commonly used commands. Users can use this API to control their experiments, collect experiment results and conduct advanced analyses based on experiment results in python code directly instead of using command line. Here is an example:
+
+```
+from nnicli import Experiment
+
+# create an experiment instance
+exp = Experiment() 
+
+# start an experiment, then connect the instance to this experiment
+# you can also use `resume_experiment`, `view_experiment` or `connect_experiment`
+# only one of them should be called in one instance
+exp.start_experiment('nni/examples/trials/mnist-pytorch/config.yml', port=9090)
+
+# update the experiment's concurrency
+exp.update_concurrency(3)
+
+# get some information about the experiment
+print(exp.get_experiment_status())
+print(exp.get_job_statistics())
+print(exp.list_trial_jobs())
+
+# stop the experiment, then disconnect the instance from the experiment.
+exp.stop_experiment()
+```
+
+## References
+
+```eval_rst
+..  autoclass:: nnicli.Experiment
+    :members:
+..  autoclass:: nnicli.TrialJob
+    :members:
+..  autoclass:: nnicli.TrialHyperParameters
+    :members:
+..  autoclass:: nnicli.TrialMetricData
+    :members:
+..  autoclass:: nnicli.TrialResult
+    :members:
+```
--- a/docs/en_US/sdk_reference.rst
+++ b/docs/en_US/sdk_reference.rst
@@ -9,3 +9,4 @@ Python API Reference
    Auto Tune <autotune_ref>
    NAS <NAS/NasReference>
    Compression Utilities <Compressor/CompressionReference>
+    NNI Client <nnicli_ref>
\ No newline at end of file
--- a/src/sdk/pycli/nnicli/nni_client.py
+++ b/src/sdk/pycli/nnicli/nni_client.py
@@ -5,67 +5,47 @@

 Example:

-import nnicli as nc
+from nnicli import Experiment

-nc.start_nni('../../../../examples/trials/mnist/config.yml')
+exp = Experiment()
+exp.start_experiment('../../../../examples/trials/mnist-pytorch/config.yml')

-nc.set_endpoint('http://localhost:8080')
+exp.update_concurrency(3)

-print(nc.version())
-print(nc.get_experiment_status())
+print(exp.get_experiment_status())
+print(exp.get_job_statistics())
+print(exp.list_trial_jobs())

-print(nc.get_job_statistics())
-print(nc.list_trial_jobs())
-
-nc.stop_nni()
+exp.stop_experiment()

 """

 import sys
 import os
 import subprocess
+import re
+import json
 import requests

 __all__ = [
-    'start_nni',
-    'stop_nni',
-    'set_endpoint',
-    'version',
-    'get_experiment_status',
-    'get_experiment_profile',
-    'get_trial_job',
-    'list_trial_jobs',
-    'get_job_statistics',
-    'get_job_metrics',
-    'export_data'
+    'Experiment',
+    'TrialResult',
+    'TrialMetricData',
+    'TrialHyperParameters',
+    'TrialJob'
 ]

 EXPERIMENT_PATH = 'experiment'
-VERSION_PATH = 'version'
 STATUS_PATH = 'check-status'
 JOB_STATISTICS_PATH = 'job-statistics'
 TRIAL_JOBS_PATH = 'trial-jobs'
 METRICS_PATH = 'metric-data'
 EXPORT_DATA_PATH = 'export-data'
-
 API_ROOT_PATH = 'api/v1/nni'

-_api_endpoint = None
-
-def set_endpoint(endpoint):
-    """set endpoint of nni rest server for nnicli, for example:
-    http://localhost:8080
-    """
-    global _api_endpoint
-    _api_endpoint = endpoint
-
-def _check_endpoint():
-    if _api_endpoint is None:
-        raise AssertionError("Please call set_endpoint to specify nni endpoint")
-
-def _nni_rest_get(api_path, response_type='json'):
-    _check_endpoint()
-    uri = '{}/{}/{}'.format(_api_endpoint, API_ROOT_PATH, api_path)
+def _nni_rest_get(endpoint, api_path, response_type='json'):
+    _check_endpoint(endpoint)
+    uri = '{}/{}/{}'.format(endpoint.strip('/'), API_ROOT_PATH, api_path)
    res = requests.get(uri)
    if _http_succeed(res.status_code):
        if response_type == 'json':
@@ -73,7 +53,7 @@ def _nni_rest_get(api_path, response_type='json'):
        elif response_type == 'text':
            return res.text
        else:
-            raise AssertionError('Incorrect response_type')
+            raise RuntimeError('Incorrect response_type')
    else:
        return None

@@ -92,48 +72,444 @@ def _create_process(cmd):
            print(output.decode('utf-8').strip())
    return process.returncode

-def start_nni(config_file):
-    """start nni experiment with specified configuration file"""
+def _check_endpoint(endpoint):
+    if endpoint is None:
+        raise RuntimeError("This instance hasn't been connect to an experiment.")
+
+class TrialResult:
+    """
+    TrialResult stores the result information of a trial job.
+
+    Parameters
+    ----------
+    json_obj: dict
+        Json object that stores the result information.
+
+    Attributes
+    ----------
+    parameter: dict
+        Hyper parameters for this trial.
+    value: serializable object, usually a number, or a dict with key "default" and other extra keys
+        Final result.
+    trialJobId: str
+        Trial job id.
+    """
+    def __init__(self, json_obj):
+        self.parameter = None
+        self.value = None
+        self.trialJobId = None
+        for key in json_obj.keys():
+            if key == 'id':
+                setattr(self, 'trialJobId', json_obj[key])
+            elif hasattr(self, key):
+                setattr(self, key, json_obj[key])
+        self.value = json.loads(self.value)
+
+    def __repr__(self):
+        return "TrialResult(parameter: {} value: {} trialJobId: {})".format(self.parameter, self.value, self.trialJobId)
+
+class TrialMetricData:
+    """
+    TrialMetricData stores the metric data of a trial job.
+    A trial job may have both intermediate metric and final metric.
+
+    Parameters
+    ----------
+    json_obj: dict
+        Json object that stores the metric data.
+
+    Attributes
+    ----------
+    timestamp: int
+        Time stamp.
+    trialJobId: str
+        Trial job id.
+    parameterId: int
+        Parameter id.
+    type: str
+        Metric type, `PERIODICAL` for intermediate result and `FINAL` for final result.
+    sequence: int
+        Sequence number in this trial.
+    data: serializable object, usually a number, or a dict with key "default" and other extra keys
+        Metric data.
+    """
+    def __init__(self, json_obj):
+        self.timestamp = None
+        self.trialJobId = None
+        self.parameterId = None
+        self.type = None
+        self.sequence = None
+        self.data = None
+        for key in json_obj.keys():
+            setattr(self, key, json_obj[key])
+        self.data = json.loads(json.loads(self.data))
+
+    def __repr__(self):
+        return "TrialMetricData(timestamp: {} trialJobId: {} parameterId: {} type: {} sequence: {} data: {})" \
+            .format(self.timestamp, self.trialJobId, self.parameterId, self.type, self.sequence, self.data)
+
+class TrialHyperParameters:
+    """
+    TrialHyperParameters stores the hyper parameters of a trial job.
+
+    Parameters
+    ----------
+    json_obj: dict
+        Json object that stores the hyper parameters.
+
+    Attributes
+    ----------
+    parameter_id: int
+        Parameter id.
+    parameter_source: str
+        Parameter source.
+    parameters: dict
+        Hyper parameters.
+    parameter_index: int
+        Parameter index.
+    """
+    def __init__(self, json_obj):
+        self.parameter_id = None
+        self.parameter_source = None
+        self.parameters = None
+        self.parameter_index = None
+        for key in json_obj.keys():
+            if hasattr(self, key):
+                setattr(self, key, json_obj[key])
+
+    def __repr__(self):
+        return "TrialHyperParameters(parameter_id: {} parameter_source: {} parameters: {} parameter_index: {})" \
+            .format(self.parameter_id, self.parameter_source, self.parameters, self.parameter_index)
+
+class TrialJob:
+    """
+    TrialJob stores the information of a trial job.
+
+    Parameters
+    ----------
+    json_obj: dict
+        json object that stores the hyper parameters
+
+    Attributes
+    ----------
+    trialJobId: str
+        Trial job id.
+    status: str
+        Job status.
+    hyperParameters: list of `nnicli.TrialHyperParameters`
+        See `nnicli.TrialHyperParameters`.
+    logPath: str
+        Log path.
+    startTime: int
+        Job start time (timestamp).
+    endTime: int
+        Job end time (timestamp).
+    finalMetricData: list of `nnicli.TrialMetricData`
+        See `nnicli.TrialMetricData`.
+    parameter_index: int
+        Parameter index.
+    """
+    def __init__(self, json_obj):
+        self.trialJobId = None
+        self.status = None
+        self.hyperParameters = None
+        self.logPath = None
+        self.startTime = None
+        self.endTime = None
+        self.finalMetricData = None
+        self.stderrPath = None
+        for key in json_obj.keys():
+            if key == 'id':
+                setattr(self, 'trialJobId', json_obj[key])
+            elif hasattr(self, key):
+                setattr(self, key, json_obj[key])
+        if self.hyperParameters:
+            self.hyperParameters = [TrialHyperParameters(json.loads(e)) for e in self.hyperParameters]
+        if self.finalMetricData:
+            self.finalMetricData = [TrialMetricData(e) for e in self.finalMetricData]
+
+    def __repr__(self):
+        return ("TrialJob(trialJobId: {} status: {} hyperParameters: {} logPath: {} startTime: {} "
+                "endTime: {} finalMetricData: {} stderrPath: {})") \
+                    .format(self.trialJobId, self.status, self.hyperParameters, self.logPath,
+                            self.startTime, self.endTime, self.finalMetricData, self.stderrPath)
+
+class Experiment:
+    def __init__(self):
+        self._endpoint = None
+        self._exp_id = None
+        self._port = None
+
+    @property
+    def endpoint(self):
+        return self._endpoint
+
+    @property
+    def exp_id(self):
+        return self._exp_id
+
+    @property
+    def port(self):
+        return self._port
+
+    def _exec_command(self, cmd, port=None):
+        if self._endpoint is not None:
+            raise RuntimeError('This instance has been connected to an experiment.')
+        if _create_process(cmd) != 0:
+            raise RuntimeError('Failed to establish experiment, please check your config.')
+        else:
+            if port:
+                self._port = port
+            else:
+                self._port = 8080
+            self._endpoint = 'http://localhost:{}'.format(self._port)
+            self._exp_id = self.get_experiment_profile()['id']
+
+    def start_experiment(self, config_file, port=None, debug=False):
+        """
+        Start an experiment with specified configuration file and connect to it.
+
+        Parameters
+        ----------
+        config_file: str
+            Path to the config file.
+        port: int
+            The port of restful server, bigger than 1024.
+        debug: boolean
+            Set debug mode.
+        """
        cmd = 'nnictl create --config {}'.format(config_file).split(' ')
+        if port:
+            cmd += '--port {}'.format(port).split(' ')
+        if debug:
+            cmd += ['--debug']
+        self._exec_command(cmd, port)
+
+    def resume_experiment(self, exp_id, port=None, debug=False):
+        """
+        Resume a stopped experiment with specified experiment id
+
+        Parameters
+        ----------
+        exp_id: str
+            Experiment id.
+        port: int
+            The port of restful server, bigger than 1024.
+        debug: boolean
+            Set debug mode.
+        """
+        cmd = 'nnictl resume {}'.format(exp_id).split(' ')
+        if port:
+            cmd += '--port {}'.format(port).split(' ')
+        if debug:
+            cmd += ['--debug']
+        self._exec_command(cmd, port)
+
+    def view_experiment(self, exp_id, port=None):
+        """
+        View a stopped experiment with specified experiment id.
+
+        Parameters
+        ----------
+        exp_id: str
+            Experiment id.
+        port: int
+            The port of restful server, bigger than 1024.
+        """
+        cmd = 'nnictl view {}'.format(exp_id).split(' ')
+        if port:
+            cmd += '--port {}'.format(port).split(' ')
+        self._exec_command(cmd, port)
+
+    def connect_experiment(self, endpoint):
+        """
+        Connect to an existing experiment.
+
+        Parameters
+        ----------
+        endpoint: str
+            The endpoint of nni rest server, i.e, the url of Web UI. Should be a format like `http://ip:port`.
+        """
+        if self._endpoint is not None:
+            raise RuntimeError('This instance has been connected to an experiment.')
+        self._endpoint = endpoint
+        try:
+            self._exp_id = self.get_experiment_profile()['id']
+        except TypeError:
+            raise RuntimeError('Invalid experiment endpoint.')
+        self._port = int(re.search(r':[0-9]+', self._endpoint).group().replace(':', ''))
+
+    def stop_experiment(self):
+        """Stop the experiment.
+        """
+        _check_endpoint(self._endpoint)
+        cmd = 'nnictl stop {}'.format(self._exp_id).split(' ')
+        if _create_process(cmd) != 0:
+            raise RuntimeError('Failed to stop experiment.')
+        self._endpoint = None
+        self._exp_id = None
+        self._port = None
+
+    def update_searchspace(self, filename):
+        """
+        Update the experiment's search space.
+
+        Parameters
+        ----------
+        filename: str
+            Path to the searchspace file.
+        """
+        _check_endpoint(self._endpoint)
+        cmd = 'nnictl update searchspace {} --filename {}'.format(self._exp_id, filename).split(' ')
        if _create_process(cmd) != 0:
-        raise RuntimeError('Failed to start nni.')
+            raise RuntimeError('Failed to update searchspace.')
+
+    def update_concurrency(self, value):
+        """
+        Update an experiment's concurrency
+
+        Parameters
+        ----------
+        value: int
+            New concurrency value.
+        """
+        _check_endpoint(self._endpoint)
+        cmd = 'nnictl update concurrency {} --value {}'.format(self._exp_id, value).split(' ')
+        if _create_process(cmd) != 0:
+            raise RuntimeError('Failed to update concurrency.')
+
+    def update_duration(self, value):
+        """
+        Update an experiment's duration
+
+        Parameters
+        ----------
+        value: str
+            Strings like '1m' for one minute or '2h' for two hours.
+            SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.
+        """
+        _check_endpoint(self._endpoint)
+        cmd = 'nnictl update duration {} --value {}'.format(self._exp_id, value).split(' ')
+        if _create_process(cmd) != 0:
+            raise RuntimeError('Failed to update duration.')
+
+    def update_trailnum(self, value):
+        """
+        Update an experiment's maxtrialnum

-def stop_nni():
-    """stop nni experiment"""
-    cmd = 'nnictl stop'.split(' ')
+        Parameters
+        ----------
+        value: int
+            New trailnum value.
+        """
+        _check_endpoint(self._endpoint)
+        cmd = 'nnictl update trialnum {} --value {}'.format(self._exp_id, value).split(' ')
        if _create_process(cmd) != 0:
-        raise RuntimeError('Failed to stop nni.')
+            raise RuntimeError('Failed to update trailnum.')
+
+    def get_experiment_status(self):
+        """
+        Return experiment status as a dict.

-def version():
-    """return version of nni"""
-    return _nni_rest_get(VERSION_PATH, 'text')
+        Returns
+        ----------
+        dict
+            Experiment status.
+        """
+        _check_endpoint(self._endpoint)
+        return _nni_rest_get(self._endpoint, STATUS_PATH)

-def get_experiment_status():
-    """return experiment status as a dict"""
-    return _nni_rest_get(STATUS_PATH)
+    def get_trial_job(self, trial_job_id):
+        """
+        Return a trial job.

-def get_experiment_profile():
-    """return experiment profile as a dict"""
-    return _nni_rest_get(EXPERIMENT_PATH)
+        Parameters
+        ----------
+        trial_job_id: str
+            Trial job id.

-def get_trial_job(trial_job_id):
-    """return trial job information as a dict"""
+        Returns
+        ----------
+        nnicli.TrialJob
+            A `nnicli.TrialJob` instance corresponding to `trial_job_id`.
+        """
+        _check_endpoint(self._endpoint)
        assert trial_job_id is not None
-    return _nni_rest_get(os.path.join(TRIAL_JOBS_PATH, trial_job_id))
+        trial_job = _nni_rest_get(self._endpoint, os.path.join(TRIAL_JOBS_PATH, trial_job_id))
+        return TrialJob(trial_job)
+
+    def list_trial_jobs(self):
+        """
+        Return information for all trial jobs as a list.
+
+        Returns
+        ----------
+        list
+            List of `nnicli.TrialJob`.
+        """
+        _check_endpoint(self._endpoint)
+        trial_jobs = _nni_rest_get(self._endpoint, TRIAL_JOBS_PATH)
+        return [TrialJob(e) for e in trial_jobs]
+
+    def get_job_statistics(self):
+        """
+        Return trial job statistics information as a dict.

-def list_trial_jobs():
-    """return information for all trial jobs as a list"""
-    return _nni_rest_get(TRIAL_JOBS_PATH)
+        Returns
+        ----------
+        list
+            Job statistics information.
+        """
+        _check_endpoint(self._endpoint)
+        return _nni_rest_get(self._endpoint, JOB_STATISTICS_PATH)
+
+    def get_job_metrics(self, trial_job_id=None):
+        """
+        Return trial job metrics.

-def get_job_statistics():
-    """return trial job statistics information as a dict"""
-    return _nni_rest_get(JOB_STATISTICS_PATH)
+        Parameters
+        ----------
+        trial_job_id: str
+            trial job id. if this parameter is None, all trail jobs' metrics will be returned.

-def get_job_metrics(trial_job_id=None):
-    """return trial job metrics"""
+        Returns
+        ----------
+        dict
+            Each key is a trialJobId, the corresponding value is a list of `nnicli.TrialMetricData`.
+        """
+        _check_endpoint(self._endpoint)
        api_path = METRICS_PATH if trial_job_id is None else os.path.join(METRICS_PATH, trial_job_id)
-    return _nni_rest_get(api_path)
+        output = {}
+        trail_metrics = _nni_rest_get(self._endpoint, api_path)
+        for metric in trail_metrics:
+            trial_id = metric["trialJobId"]
+            if trial_id not in output:
+                output[trial_id] = [TrialMetricData(metric)]
+            else:
+                output[trial_id].append(TrialMetricData(metric))
+        return output
+
+    def export_data(self):
+        """
+        Return exported information for all trial jobs.
+
+        Returns
+        ----------
+        list
+            List of `nnicli.TrialResult`.
+        """
+        _check_endpoint(self._endpoint)
+        trial_results = _nni_rest_get(self._endpoint, EXPORT_DATA_PATH)
+        return [TrialResult(e) for e in trial_results]

-def export_data():
-    """return exported information for all trial jobs"""
-    return _nni_rest_get(EXPORT_DATA_PATH)
+    def get_experiment_profile(self):
+        """
+        Return experiment profile as a dict.
+
+        Returns
+        ----------
+        dict
+            The profile of the experiment.
+        """
+        _check_endpoint(self._endpoint)
+        return _nni_rest_get(self._endpoint, EXPERIMENT_PATH)
--- a/test/config/integration_tests.yml
+++ b/test/config/integration_tests.yml
@@ -140,8 +140,8 @@ testCases:
  config:
    maxTrialNum: 4
    trialConcurrency: 4
-  launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
-  stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
+  launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
+  stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
  validator:
    class: NnicliValidator
  platform: linux darwin

--- a/test/config/integration_tests_tf2.yml
+++ b/test/config/integration_tests_tf2.yml
@@ -110,8 +110,8 @@ testCases:
  config:
    maxTrialNum: 4
    trialConcurrency: 4
-  launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
-  stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
+  launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
+  stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
  validator:
    class: NnicliValidator
  platform: linux darwin

--- a/test/config/pr_tests.yml
+++ b/test/config/pr_tests.yml
@@ -45,10 +45,10 @@ testCases:
 - name: nnicli
  configFile: test/config/examples/sklearn-regression.yml
  config:
-    maxTrialNum: 2
-    trialConcurrency: 2
-  launchCommand: python3 -c 'import nnicli as nc; nc.start_nni("$configFile")'
-  stopCommand: python3 -c 'import nnicli as nc; nc.stop_nni()'
+    maxTrialNum: 4
+    trialConcurrency: 4
+  launchCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
+  stopCommand: python3 -c 'from nnicli import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
  validator:
    class: NnicliValidator
  platform: linux darwin

--- a/test/nni_test/nnitest/validators.py
+++ b/test/nni_test/nnitest/validators.py
@@ -6,7 +6,7 @@ from os import remove
 import subprocess
 import json
 import requests
-import nnicli as nc
+from nnicli import Experiment
 from utils import METRICS_URL


@@ -80,8 +80,8 @@ class MetricsValidator(ITValidator):
 class NnicliValidator(ITValidator):
    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
        print(rest_endpoint)
-        nc.set_endpoint(rest_endpoint)
-        #print(nc.version())
-        print(nc.get_job_statistics())
-        print(nc.get_experiment_status())
-        print(nc.list_trial_jobs())
+        exp = Experiment()
+        exp.connect_experiment(rest_endpoint)
+        print(exp.get_job_statistics())
+        print(exp.get_experiment_status())
+        print(exp.list_trial_jobs())
--- a/tools/nni_cmd/updater.py
+++ b/tools/nni_cmd/updater.py
@@ -14,7 +14,7 @@ from .constants import REST_TIME_OUT, TUNERS_SUPPORTING_IMPORT_DATA, TUNERS_NO_N
 def validate_digit(value, start, end):
    '''validate if a digit is valid'''
    if not str(value).isdigit() or int(value) < start or int(value) > end:
-        raise ValueError('%s must be a digit from %s to %s' % (value, start, end))
+        raise ValueError('value (%s) must be a digit from %s to %s' % (value, start, end))

 def validate_file(path):
    '''validate if a file exist'''