Merge branch 'master' into quniform-tuners

05913424 · suiguoxin · e3c8552f · 1dab3118 · 05913424 · 05913424
Commit 05913424 authored Aug 05, 2019 by suiguoxin
6 changed files
--- a/tools/nni_annotation/testcase/mutable_layer_usercode/simple.py
+++ b/tools/nni_annotation/testcase/mutable_layer_usercode/simple.py
--- a/tools/nni_cmd/config_schema.py
+++ b/tools/nni_cmd/config_schema.py
@@ -92,6 +92,16 @@ tuner_schema_dict = {
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    },
+    'TPE': {
+        'builtinTunerName': 'TPE',
+        'classArgs': {
+            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
+            Optional('parallel_optimize'): setType('parallel_optimize', bool),
+            Optional('constant_liar_type'): setChoice('constant_liar_type', 'min', 'max', 'mean')
+        },
+        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
+        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
+    },
    'NetworkMorphism': {
        'builtinTunerName': 'NetworkMorphism',
        'classArgs': {
@@ -210,7 +220,7 @@ common_trial_schema = {
 'trial':{
    'command': setType('command', str),
    'codeDir': setPathCheck('codeDir'),
-    'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
+    Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    Optional('nasMode'): setChoice('classic_mode', 'enas_mode', 'oneshot_mode')
    }
 }
@@ -223,6 +233,8 @@ pai_trial_schema = {
    'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
    'memoryMB': setType('memoryMB', int),
    'image': setType('image', str),
+    Optional('authFile'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
+                         error='ERROR: authFile format error, authFile format is hdfs://xxx.xxx.xxx.xxx:xxx'),
    Optional('shmMB'): setType('shmMB', int),
    Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
                         error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
@@ -251,7 +263,8 @@ kubeflow_trial_schema = {
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
-            'image': setType('image', str)
+            'image': setType('image', str),
+            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
        },
        Optional('master'): {
            'replicas': setType('replicas', int),
@@ -259,7 +272,8 @@ kubeflow_trial_schema = {
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
-            'image': setType('image', str)
+            'image': setType('image', str),
+            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
        },
        Optional('worker'):{
            'replicas': setType('replicas', int),
@@ -267,7 +281,8 @@ kubeflow_trial_schema = {
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
-            'image': setType('image', str)
+            'image': setType('image', str),
+            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
        }
    }
 }
@@ -314,7 +329,8 @@ frameworkcontroller_trial_schema = {
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
-            'image': setType('image', str)
+            'image': setType('image', str),
+            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
        }]
    }
 }

--- a/tools/nni_cmd/launcher.py
+++ b/tools/nni_cmd/launcher.py
@@ -360,9 +360,14 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
        module_name = AdvisorModuleName.get(package_name)
    if package_name and module_name:
        try:
-            check_call([sys.executable, '-c', 'import %s'%(module_name)], stdout=PIPE, stderr=PIPE)
+            stdout_full_path, stderr_full_path = get_log_path(config_file_name)
+            with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file:
+                check_call([sys.executable, '-c', 'import %s'%(module_name)], stdout=stdout_file, stderr=stderr_file)
        except CalledProcessError as e:
-            print_error('%s should be installed through \'nnictl package install --name %s\''%(package_name, package_name))
+            print_error('some errors happen when import package %s.' %(package_name))
+            print_log_content(config_file_name)
+            if package_name in PACKAGE_REQUIREMENTS:
+                print_error('If %s is not installed, it should be installed through \'nnictl package install --name %s\''%(package_name, package_name))
            exit(1)
    log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None
    log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None

--- a/tools/nni_cmd/launcher_utils.py
+++ b/tools/nni_cmd/launcher_utils.py
@@ -253,6 +253,14 @@ def validate_pai_trial_conifg(experiment_config):
        experiment_config['trial']['shmMB'] > experiment_config['trial']['memoryMB']:
            print_error('shmMB should be no more than memoryMB!')
            exit(1)
+        #backward compatibility
+        warning_information = '{0} is not supported in NNI anymore, please remove the field in config file!\
+        please refer https://github.com/microsoft/nni/blob/master/docs/en_US/TrainingService/PaiMode.md#run-an-experiment\
+        for the practices of how to get data and output model in trial code'
+        if experiment_config.get('trial').get('dataDir'):
+            print_warning(warning_information.format('dataDir'))
+        if experiment_config.get('trial').get('outputDir'):
+            print_warning(warning_information.format('outputDir'))

 def validate_all_content(experiment_config, config_path):
    '''Validate whether experiment_config is valid'''

--- a/tools/nni_cmd/nnictl.py
+++ b/tools/nni_cmd/nnictl.py
@@ -90,6 +90,7 @@ def parse_args():
    #parse stop command
    parser_stop = subparsers.add_parser('stop', help='stop the experiment')
    parser_stop.add_argument('id', nargs='?', help='the id of experiment, use \'all\' to stop all running experiments')
+    parser_stop.add_argument('--port', '-p', dest='port', help='the port of restful server')
    parser_stop.set_defaults(func=stop_experiment)

    #parse trial command

--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -118,12 +118,14 @@ def check_experiment_id(args, update=True):

 def parse_ids(args):
    '''Parse the arguments for nnictl stop
-    1.If there is an id specified, return the corresponding id
-    2.If there is no id specified, and there is an experiment running, return the id, or return Error
-    3.If the id matches an experiment, nnictl will return the id.
-    4.If the id ends with *, nnictl will match all ids matchs the regular
-    5.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id
-    6.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information
+    1.If port is provided and id is not specified, return the id who owns the port
+    2.If both port and id are provided, return the id if it owns the port, otherwise fail
+    3.If there is an id specified, return the corresponding id
+    4.If there is no id specified, and there is an experiment running, return the id, or return Error
+    5.If the id matches an experiment, nnictl will return the id.
+    6.If the id ends with *, nnictl will match all ids matchs the regular
+    7.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id
+    8.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information
    '''
    update_experiment()
    experiment_config = Experiments()
@@ -140,7 +142,14 @@ def parse_ids(args):
        elif isinstance(experiment_dict[key], list):
            # if the config file is old version, remove the configuration from file
            experiment_config.remove_experiment(key)
-    if not args.id:
+    if args.port is not None:
+        for key in running_experiment_list:
+            if str(experiment_dict[key]['port']) == args.port:
+                result_list.append(key)
+        if args.id and result_list and args.id != result_list[0]:
+            print_error('Experiment id and resful server port not match')
+            exit(1)
+    elif not args.id:
        if len(running_experiment_list) > 1:
            print_error('There are multiple experiments, please set the experiment id...')
            experiment_information = ""
@@ -166,8 +175,8 @@ def parse_ids(args):
        if len(result_list) > 1:
            print_error(args.id + ' is ambiguous, please choose ' + ' '.join(result_list) )
            return None
-    if not result_list and args.id and args.id != 'all':
-        print_error('There are no experiments matched, please set correct experiment id...')
+    if not result_list and ((args.id and args.id != 'all') or args.port):
+        print_error('There are no experiments matched, please set correct experiment id or restful server port')
    elif not result_list:
        print_error('There is no experiment running...')
    return result_list
@@ -511,9 +520,9 @@ def platform_clean(args):
    if platform not in ['remote', 'pai']:
        print_normal('platform {0} not supported.'.format(platform))
        exit(0)
+    update_experiment()
    experiment_config = Experiments()
    experiment_dict = experiment_config.get_all_experiments()
-    update_experiment()
    id_list = list(experiment_dict.keys())
    dir_list = get_platform_dir(config_content)
    if not dir_list:
@@ -544,12 +553,12 @@ def platform_clean(args):

 def experiment_list(args):
    '''get the information of all experiments'''
+    update_experiment()
    experiment_config = Experiments()
    experiment_dict = experiment_config.get_all_experiments()
    if not experiment_dict:
        print_normal('Cannot find experiments.')
        exit(1)
-    update_experiment()
    experiment_id_list = []
    if args.all:
        for key in experiment_dict.keys():
@@ -586,12 +595,12 @@ def get_time_interval(time1, time2):

 def show_experiment_info():
    '''show experiment information in monitor'''
+    update_experiment()
    experiment_config = Experiments()
    experiment_dict = experiment_config.get_all_experiments()
    if not experiment_dict:
        print('There is no experiment running...')
        exit(1)
-    update_experiment()
    experiment_id_list = []
    for key in experiment_dict.keys():
        if experiment_dict[key]['status'] != 'STOPPED':