Merge pull request #174 from microsoft/master

merge master from Microsoft

Merge pull request #174 from microsoft/master
merge master from Microsoft
bee8f84e · SparkSnail · GitHub · c5acd8c2 · 252d35e0 · c5acd8c2
Unverified Commit bee8f84e authored May 28, 2019 by SparkSnail Committed by GitHub May 28, 2019
11 changed files
--- a/src/sdk/pynni/nni/metis_tuner/README_zh_CN.md
+++ b/src/sdk/pynni/nni/metis_tuner/README_zh_CN.md
-# Metis Tuner
-## Metis Tuner
-大多数调参工具仅仅预测最优配置，而 [Metis](https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/) 的优势在于有两个输出：(a) 最优配置的当前预测结果， 以及 (b) 下一次 Trial 的建议。 不再需要随机猜测!
-大多数工具假设训练集没有噪声数据，但 Metis 会知道是否需要对某个超参重新采样。
-大多数工具都有着重于在已有结果上继续发展的问题，而 Metis 的搜索策略可以在探索，发展和重新采样（可选）中进行平衡。
-Metis 属于基于序列的贝叶斯优化 (SMBO) 的类别，它也基于贝叶斯优化框架。 为了对超参-性能空间建模，Metis 同时使用了高斯过程（Gaussian Process）和高斯混合模型（GMM）。 由于每次 Trial 都可能有很高的时间成本，Metis 大量使用了已有模型来进行推理计算。 在每次迭代中，Metis 执行两个任务：
-在高斯过程空间中找到全局最优点。 这一点表示了最佳配置。
-它会标识出下一个超参的候选项。 这是通过对隐含信息的探索、挖掘和重采样来实现的。
-注意，搜索空间仅支持 `choice`, `quniform`, `uniform` 和 `randint`。
-更多详情，参考论文：https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/
\ No newline at end of file
--- a/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
+++ b/src/sdk/pynni/nni/smac_tuner/smac_tuner.py
@@ -21,21 +21,21 @@
 smac_tuner.py
 """
-from nni.tuner import Tuner
-from nni.utils import OptimizeMode, extract_scalar_reward
 import sys
 import logging
 import numpy as np
-import json_tricks
-from enum import Enum, unique
+from nni.tuner import Tuner
-from .convert_ss_to_scenario import generate_scenario
+from nni.utils import OptimizeMode, extract_scalar_reward
 from smac.utils.io.cmd_reader import CMDReader
 from smac.scenario.scenario import Scenario
 from smac.facade.smac_facade import SMAC
 from smac.facade.roar_facade import ROAR
 from smac.facade.epils_facade import EPILS
+from ConfigSpaceNNI import Configuration
+from .convert_ss_to_scenario import generate_scenario
 class SMACTuner(Tuner):
@@ -57,6 +57,7 @@ class SMACTuner(Tuner):
        self.update_ss_done = False
        self.loguniform_key = set()
        self.categorical_dict = {}
+        self.cs = None
    def _main_cli(self):
        """Main function of SMAC for CLI interface
@@ -66,7 +67,7 @@ class SMACTuner(Tuner):
        instance
            optimizer
        """
-        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))
+        self.logger.info("SMAC call: %s", " ".join(sys.argv))
        cmd_reader = CMDReader()
        args, _ = cmd_reader.read_cmd()
@@ -95,6 +96,7 @@ class SMACTuner(Tuner):
        # Create scenario-object
        scen = Scenario(args.scenario_file, [])
+        self.cs = scen.cs
        if args.mode == "SMAC":
            optimizer = SMAC(
@@ -258,4 +260,45 @@ class SMACTuner(Tuner):
        return params
    def import_data(self, data):
-        pass
+        """Import additional data for tuning
+        Parameters
+        ----------
+        data:
+            a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        """
+        _completed_num = 0
+        for trial_info in data:
+            self.logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data))
+            # simply validate data format
+            assert "parameter" in trial_info
+            _params = trial_info["parameter"]
+            assert "value" in trial_info
+            _value = trial_info['value']
+            if not _value:
+                self.logger.info("Useless trial data, value is %s, skip this trial data.", _value)
+                continue
+            # convert the keys in loguniform and categorical types
+            valid_entry = True
+            for key, value in _params.items():
+                if key in self.loguniform_key:
+                    _params[key] = np.log(value)
+                elif key in self.categorical_dict:
+                    if value in self.categorical_dict[key]:
+                        _params[key] = self.categorical_dict[key].index(value)
+                    else:
+                        self.logger.info("The value %s of key %s is not in search space.", str(value), key)
+                        valid_entry = False
+                        break
+            if not valid_entry:
+                continue
+            # start import this data entry
+            _completed_num += 1
+            config = Configuration(self.cs, values=_params)
+            if self.optimize_mode is OptimizeMode.Maximize:
+                _value = -_value
+            if self.first_one:
+                self.smbo_solver.nni_smac_receive_first_run(config, _value)
+                self.first_one = False
+            else:
+                self.smbo_solver.nni_smac_receive_runs(config, _value)
+        self.logger.info("Successfully import data to smac tuner, total data: %d, imported data: %d.", len(data), _completed_num)
--- a/src/webui/src/components/public-child/IntermediateVal.tsx
+++ b/src/webui/src/components/public-child/IntermediateVal.tsx
@@ -28,12 +28,11 @@ class IntermediateVal extends React.Component<IntermediateValProps, {}> {
                if (wei > 6) {
                    result = `${lastVal.toFixed(6)}`;
                }
-                if (status === 'SUCCEEDED') {
+            }
-                    result = `${lastVal.toFixed(6)} (FINAL)`;
+            if (status === 'SUCCEEDED') {
-                } else {
+                result = `${result} (FINAL)`;
-                    result = `${lastVal.toFixed(6)} (LATEST)`;
+            } else {
-                }
+                result = `${result} (LATEST)`;
            }
        } else {
            result = '--';

--- a/test/config_test/examples/mnist-cascading-search-space.test.yml
+++ b/test/config_test/examples/mnist-cascading-search-space.test.yml
@@ -3,7 +3,7 @@ experimentName: default_test
 maxExecDuration: 5m
 maxTrialNum: 4
 trialConcurrency: 2
-searchSpacePath: ../../../examples/trials/mnist-cascading-search-space/search_space.json
+searchSpacePath: ../../../examples/trials/mnist-nested-search-space/search_space.json
 tuner:
  #choice: TPE, Random, Anneal, Evolution
@@ -13,7 +13,7 @@ assessor:
  classArgs:
    optimize_mode: maximize
 trial:
-  codeDir: ../../../examples/trials/mnist-cascading-search-space
+  codeDir: ../../../examples/trials/mnist-nested-search-space
  command: python3 mnist.py --batch_num 100
  gpuNum: 0

--- a/tools/nni_cmd/config_schema.py
+++ b/tools/nni_cmd/config_schema.py
@@ -63,7 +63,9 @@ common_schema = {
    Optional('advisor'): dict,
    Optional('assessor'): dict,
    Optional('localConfig'): {
-        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!')
+        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
+        Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
+        Optional('useActiveGpu'): setType('useActiveGpu', bool)
    }
 }
 tuner_schema_dict = {
@@ -310,26 +312,30 @@ frameworkcontroller_config_schema = {
    })
 }
-machine_list_schima = {
+machine_list_schema = {
 Optional('machineList'):[Or({
    'ip': setType('ip', str),
    Optional('port'): setNumberRange('port', int, 1, 65535),
    'username': setType('username', str),
    'passwd': setType('passwd', str),
-    Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!')
+    Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
+    Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
+    Optional('useActiveGpu'): setType('useActiveGpu', bool)
    },{
    'ip': setType('ip', str),
    Optional('port'): setNumberRange('port', int, 1, 65535),
    'username': setType('username', str),
    'sshKeyPath': setPathCheck('sshKeyPath'),
    Optional('passphrase'): setType('passphrase', str),
-    Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!')
+    Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
+    Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
+    Optional('useActiveGpu'): setType('useActiveGpu', bool)
 })]
 }
 LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema})
-REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schima})
+REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schema})
 PAI_CONFIG_SCHEMA = Schema({**common_schema, **pai_trial_schema, **pai_config_schema})

--- a/tools/nni_cmd/config_utils.py
+++ b/tools/nni_cmd/config_utils.py
@@ -23,6 +23,7 @@ import os
 import json
 import shutil
 from .constants import NNICTL_HOME_DIR
+from .common_utils import print_error
 class Config:
    '''a util class to load and save config'''
@@ -119,4 +120,26 @@ class Experiments:
                    return json.load(file)
            except ValueError:
                return {}
        return {} 
\ No newline at end of file
+class HDFSConfig:
+    '''manage hdfs configuration'''
+    def __init__(self):
+        os.makedirs(NNICTL_HOME_DIR, exist_ok=True)
+        self.hdfs_config_file = os.path.join(NNICTL_HOME_DIR, '.hdfs')
+    def get_config(self):
+        if os.path.exists(self.hdfs_config_file):
+            try:
+                with open(self.hdfs_config_file, 'r') as file:
+                    return json.load(file)
+            except Exception as exception:
+                print_error(exception)
+                return None
+        else:
+            return None
+    def set_config(self, host, user_name):
+        with open(self.hdfs_config_file, 'w') as file:
+            json.dump({'host':host, 'userName': user_name}, file)
--- a/tools/nni_cmd/constants.py
+++ b/tools/nni_cmd/constants.py
@@ -86,12 +86,13 @@ TUNERS_SUPPORTING_IMPORT_DATA = {
    'Anneal',
    'GridSearch',
    'MetisTuner',
-    'BOHB'
+    'BOHB',
+    'SMAC',
+    'BatchTuner'
 }
 TUNERS_NO_NEED_TO_IMPORT_DATA = {
    'Random',
-    'Batch_tuner',
    'Hyperband'
 }

--- a/tools/nni_cmd/launcher.py
+++ b/tools/nni_cmd/launcher.py
@@ -160,9 +160,13 @@ def set_local_config(experiment_config, port, config_file_name):
    request_data = dict()
    if experiment_config.get('localConfig'):
        request_data['local_config'] = experiment_config['localConfig']
-        if request_data['local_config'] and request_data['local_config'].get('gpuIndices') \
+        if request_data['local_config']:
-            and isinstance(request_data['local_config'].get('gpuIndices'), int):
+            if request_data['local_config'].get('gpuIndices') and isinstance(request_data['local_config'].get('gpuIndices'), int):
-            request_data['local_config']['gpuIndices'] = str(request_data['local_config'].get('gpuIndices'))
+                request_data['local_config']['gpuIndices'] = str(request_data['local_config'].get('gpuIndices'))
+            if request_data['local_config'].get('maxTrialNumOnEachGpu'):
+                request_data['local_config']['maxTrialNumOnEachGpu'] = request_data['local_config'].get('maxTrialNumOnEachGpu')
+            if request_data['local_config'].get('useActiveGpu'):
+                request_data['local_config']['useActiveGpu'] = request_data['local_config'].get('useActiveGpu')
        response = rest_put(cluster_metadata_url(port), json.dumps(request_data), REST_TIME_OUT)
        err_message = ''
        if not response or not check_response(response):
@@ -343,6 +347,13 @@ def set_experiment(experiment_config, mode, port, config_file_name):
 def launch_experiment(args, experiment_config, mode, config_file_name, experiment_id=None):
    '''follow steps to start rest server and start experiment'''
    nni_config = Config(config_file_name)
+    # check execution policy in powershell
+    if sys.platform == 'win32':
+        execution_policy = check_output(['powershell.exe','Get-ExecutionPolicy']).decode('ascii').strip()
+        if execution_policy == 'Restricted':
+            print_error('PowerShell execution policy error, please run PowerShell as administrator with this command first:\r\n'\
+                + '\'Set-ExecutionPolicy -ExecutionPolicy Unrestricted\'')
+            exit(1)
    # check packages for tuner
    package_name, module_name = None, None
    if experiment_config.get('tuner') and experiment_config['tuner'].get('builtinTunerName'):

--- a/tools/nni_cmd/nnictl.py
+++ b/tools/nni_cmd/nnictl.py
@@ -194,6 +194,15 @@ def parse_args():
    'the unit is second')
    parser_top.set_defaults(func=monitor_experiment)
+    parser_hdfs = subparsers.add_parser('hdfs', help='monitor hdfs files')
+    parser_hdfs_subparsers = parser_hdfs.add_subparsers()
+    parser_hdfs_set = parser_hdfs_subparsers.add_parser('set', help='set the host and userName of hdfs')
+    parser_hdfs_set.add_argument('--host', required=True, dest='host', help='the host of hdfs')
+    parser_hdfs_set.add_argument('--user_name', required=True, dest='user_name', help='the userName of hdfs')
+    parser_hdfs_set.set_defaults(func=hdfs_set)
+    parser_hdfs_list = parser_hdfs_subparsers.add_parser('clean', help='clean hdfs files')
+    parser_hdfs_list.set_defaults(func=hdfs_clean)
    args = parser.parse_args()
    args.func(args)

--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -26,8 +26,9 @@ import datetime
 import time
 from subprocess import call, check_output
 from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
-from .config_utils import Config, Experiments
+from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url
-from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url
+from pyhdfs import HdfsClient, HdfsFileNotFoundException
+from .config_utils import Config, Experiments, HDFSConfig
 from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
     EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT
 from .common_utils import print_normal, print_error, print_warning, detect_process
@@ -450,30 +451,9 @@ def monitor_experiment(args):
            print_error(exception)
            exit(1)
-def parse_trial_data(content):
-    """output: List[Dict]"""
-    trial_records = []
-    for trial_data in content:
-        for phase_i in range(len(trial_data['hyperParameters'])):
-            hparam = json.loads(trial_data['hyperParameters'][phase_i])['parameters']
-            hparam['id'] = trial_data['id']
-            if 'finalMetricData' in trial_data.keys() and phase_i < len(trial_data['finalMetricData']):
-                reward = json.loads(trial_data['finalMetricData'][phase_i]['data'])
-                if isinstance(reward, (float, int)):
-                    dict_tmp = {**hparam, **{'reward': reward}}
-                elif isinstance(reward, dict):
-                    dict_tmp = {**hparam, **reward}
-                else:
-                    raise ValueError("Invalid finalMetricsData format: {}/{}".format(type(reward), reward))
-            else:
-                dict_tmp = hparam
-            trial_records.append(dict_tmp)
-    return trial_records
 def export_trials_data(args):
-    """export experiment metadata to csv
+    '''export experiment metadata to csv
-    """
+    '''
    nni_config = Config(get_config_filename(args))
    rest_port = nni_config.get_config('restServerPort')
    rest_pid = nni_config.get_config('restServerPid')
@@ -482,26 +462,60 @@ def export_trials_data(args):
        return
    running, response = check_rest_server_quick(rest_port)
    if running:
-        response = rest_get(trial_jobs_url(rest_port), 20)
+        response = rest_get(export_data_url(rest_port), 20)
        if response is not None and check_response(response):
-            content = json.loads(response.text)
-            # dframe = pd.DataFrame.from_records([parse_trial_data(t_data) for t_data in content])
-            # dframe.to_csv(args.csv_path, sep='\t')
-            records = parse_trial_data(content)
            if args.type == 'json':
-                json_records = []
+                with open(args.path, 'w') as file:
-                for trial in records:
+                    file.write(response.text)
-                    value = trial.pop('reward', None)
+            elif args.type == 'csv':
-                    trial_id =  trial.pop('id', None)
+                content = json.loads(response.text)
-                    json_records.append({'parameter': trial, 'value': value, 'id': trial_id})
+                trial_records = []
-            with open(args.path, 'w') as file:
+                for record in content:
-                if args.type == 'csv':
+                    if not isinstance(record['value'], (float, int)):
-                    writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in records]))
+                        formated_record = {**record['parameter'], **record['value'], **{'id': record['id']}}
+                    else:
+                        formated_record = {**record['parameter'], **{'reward': record['value'], 'id': record['id']}}
+                    trial_records.append(formated_record)
+                with open(args.path, 'w') as file:
+                    writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records]))
                    writer.writeheader()
-                    writer.writerows(records)
+                    writer.writerows(trial_records)
-                else:
+            else:
-                    json.dump(json_records, file)
+                print_error('Unknown type: %s' % args.type)
+                exit(1)
        else:
            print_error('Export failed...')
    else:
        print_error('Restful server is not Running')
+def hdfs_set(args):
+    hdfsConfig = HDFSConfig()
+    hdfsConfig.set_config(args.host, args.user_name)
+    print_normal('HDFS account update success!')
+def hdfs_clean(args):
+    hdfsConfig = HDFSConfig()
+    if not hdfsConfig.get_config():
+        print_error('Please use \'nnictl hdfs set\' command to set hdfs account first!')
+        exit(1)
+    host = hdfsConfig.get_config().get('host')
+    user_name = hdfsConfig.get_config().get('userName')
+    hdfs_client = HdfsClient(hosts='{0}:80'.format(host), user_name=user_name, webhdfs_path='/webhdfs/api/v1', timeout=5)
+    root_path = os.path.join('/', user_name, 'nni', 'experiments')
+    while True:
+        inputs = input('INFO: clean up all files in {0}, do you want to continue?[Y/N]:'.format(root_path))
+        if inputs.lower() not in ['y', 'n', 'yes', 'no']:
+            print_warning('please input Y or N!')
+        elif inputs.lower() in ['n', 'no']:
+            exit(0)
+        else:
+            break
+    path_list = hdfs_client.listdir(root_path)
+    for path in path_list:
+        full_path = os.path.join(root_path, path)
+        print_normal('deleting {0}'.format(full_path))
+        if hdfs_client.delete(full_path, recursive=True):
+            print_normal('delete success!')
+        else:
+            print_normal('delete failed!')
+    print_normal('DONE')
--- a/tools/nni_cmd/url_utils.py
+++ b/tools/nni_cmd/url_utils.py
@@ -35,6 +35,8 @@ CHECK_STATUS_API = '/check-status'
 TRIAL_JOBS_API = '/trial-jobs'
+EXPORT_DATA_API = '/export-data'
 TENSORBOARD_API = '/tensorboard'
@@ -68,6 +70,11 @@ def trial_job_id_url(port, job_id):
    return '{0}:{1}{2}{3}/:{4}'.format(BASE_URL, port, API_ROOT_URL, TRIAL_JOBS_API, job_id)
+def export_data_url(port):
+    '''get export_data url'''
+    return '{0}:{1}{2}{3}'.format(BASE_URL, port, API_ROOT_URL, EXPORT_DATA_API)
 def tensorboard_url(port):
    '''get tensorboard url'''
    return '{0}:{1}{2}{3}'.format(BASE_URL, port, API_ROOT_URL, TENSORBOARD_API)