Rename heterogeneous to hybrid (#3252)

c702241e · SparkSnail · GitHub · b4f0d321 · c702241e · c702241e
Unverified Commit c702241e authored Jan 05, 2021 by SparkSnail Committed by GitHub Jan 05, 2021
10 changed files
--- a/docs/en_US/TrainingService/HeterogeneousMode.rst
+++ b/docs/en_US/TrainingService/HeterogeneousMode.rst
-**Run an Experiment on Heterogeneous Mode**
+**Run an Experiment on Hybrid Mode**
 ===========================================
-Run NNI on heterogeneous mode means that NNI will run trials jobs in multiple kinds of training platforms. For example, NNI could submit trial jobs to remote machine and AML simultaneously.
+Run NNI on hybrid mode means that NNI will run trials jobs in multiple kinds of training platforms. For example, NNI could submit trial jobs to remote machine and AML simultaneously.
 Setup environment
 -----------------
-NNI has supported `local <./LocalMode.rst>`__\ , `remote <./RemoteMachineMode.rst>`__\ , `PAI <./PaiMode.rst>`__\ , and `AML <./AMLMode.rst>`__ for heterogeneous training service. Before starting an experiment using these mode, users should setup the corresponding environment for the platforms. More details about the environment setup could be found in the corresponding docs.
+NNI has supported `local <./LocalMode.rst>`__\ , `remote <./RemoteMachineMode.rst>`__\ , `PAI <./PaiMode.rst>`__\ , and `AML <./AMLMode.rst>`__ for hybrid training service. Before starting an experiment using these mode, users should setup the corresponding environment for the platforms. More details about the environment setup could be found in the corresponding docs.
 Run an experiment
 -----------------
@@ -20,7 +20,7 @@ Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's con
    trialConcurrency: 2
    maxExecDuration: 1h
    maxTrialNum: 10
-    trainingServicePlatform: heterogeneous
+    trainingServicePlatform: hybrid
    searchSpacePath: search_space.json
    #choice: true, false
    useAnnotation: false
@@ -33,7 +33,7 @@ Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's con
      command: python3 mnist.py
      codeDir: .
      gpuNum: 1
-    heterogeneousConfig:
+    hybridConfig:
      trainingServicePlatforms:
        - local
        - remote
@@ -44,11 +44,11 @@ Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's con
        username: bob
        passwd: bob123
-Configurations for heterogeneous mode:
+Configurations for hybrid mode:
-heterogeneousConfig:
+hybridConfig:
-* trainingServicePlatforms. required key. This field specify the platforms used in heterogeneous mode, the values using yaml list format. NNI support setting ``local``, ``remote``, ``aml``, ``pai`` in this field.
+* trainingServicePlatforms. required key. This field specify the platforms used in hybrid mode, the values using yaml list format. NNI support setting ``local``, ``remote``, ``aml``, ``pai`` in this field.
-.. Note:: If setting a platform in trainingServicePlatforms mode, users should also set the corresponding configuration for the platform. For example, if set ``remote`` as one of the platform, should also set ``machineList`` and ``remoteConfig`` configuration.
+.. Note:: If setting a platform in trainingServicePlatforms mode, users should also set the corresponding configuration for the platform. For example, if set ``remote`` as one of the platform, should also set ``machineList`` and ``remoteConfig`` configuration. Local platform in hybrid mode does not support windows for now.
--- a/docs/en_US/training_services.rst
+++ b/docs/en_US/training_services.rst
@@ -11,4 +11,4 @@ Introduction to NNI Training Services
    FrameworkController<./TrainingService/FrameworkControllerMode>
    DLTS<./TrainingService/DLTSMode>
    AML<./TrainingService/AMLMode>
-    Heterogeneous<./TrainingService/HeterogeneousMode>
+    Hybrid<./TrainingService/HybridMode>
--- a/examples/trials/mnist-tfv1/config_heterogeneous.yml
+++ b/examples/trials/mnist-tfv1/config_heterogeneous.yml
@@ -3,7 +3,7 @@ experimentName: example_mnist
 trialConcurrency: 3
 maxExecDuration: 1h
 maxTrialNum: 10
-trainingServicePlatform: heterogeneous
+trainingServicePlatform: hybrid
 searchSpacePath: search_space.json
 #choice: true, false
 useAnnotation: false
@@ -18,7 +18,7 @@ trial:
  command: python3 mnist.py
  codeDir: .
  gpuNum: 0
-heterogeneousConfig:
+hybridConfig:
  trainingServicePlatforms:
    - local
    - remote

--- a/nni/runtime/platform/__init__.py
+++ b/nni/runtime/platform/__init__.py
@@ -9,7 +9,7 @@ if trial_env_vars.NNI_PLATFORM is None:
    from .standalone import *
 elif trial_env_vars.NNI_PLATFORM == 'unittest':
    from .test import *
-elif trial_env_vars.NNI_PLATFORM in ('local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'):
+elif trial_env_vars.NNI_PLATFORM in ('local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'):
    from .local import *
 else:
    raise RuntimeError('Unknown platform %s' % trial_env_vars.NNI_PLATFORM)
--- a/nni/tools/nnictl/config_schema.py
+++ b/nni/tools/nnictl/config_schema.py
@@ -124,7 +124,7 @@ common_schema = {
    Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$', error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')),
    Optional('maxTrialNum'): setNumberRange('maxTrialNum', int, 1, 99999),
    'trainingServicePlatform': setChoice(
-        'trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'),
+        'trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'),
    Optional('searchSpacePath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'searchSpacePath'),
    Optional('multiPhase'): setType('multiPhase', bool),
    Optional('multiThread'): setType('multiThread', bool),
@@ -262,7 +262,7 @@ aml_config_schema = {
    }
 }
-heterogeneous_trial_schema = {
+hybrid_trial_schema = {
    'trial': {
        'codeDir': setPathCheck('codeDir'),
        Optional('nniManagerNFSMountPath'): setPathCheck('nniManagerNFSMountPath'),
@@ -279,8 +279,8 @@ heterogeneous_trial_schema = {
    }
 }
-heterogeneous_config_schema = {
+hybrid_config_schema = {
-    'heterogeneousConfig': {
+    'hybridConfig': {
        'trainingServicePlatforms': ['local', 'remote', 'pai', 'aml']
    }
 }
@@ -461,7 +461,7 @@ training_service_schema_dict = {
    'frameworkcontroller': Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema}),
    'aml': Schema({**common_schema, **aml_trial_schema, **aml_config_schema}),
    'dlts': Schema({**common_schema, **dlts_trial_schema, **dlts_config_schema}),
-    'heterogeneous': Schema({**common_schema, **heterogeneous_trial_schema, **heterogeneous_config_schema, **machine_list_schema,
+    'hybrid': Schema({**common_schema, **hybrid_trial_schema, **hybrid_config_schema, **machine_list_schema,
                             **pai_config_schema, **aml_config_schema, **remote_config_schema}),
 }
@@ -479,7 +479,7 @@ class NNIConfigSchema:
        self.validate_pai_trial_conifg(experiment_config)
        self.validate_kubeflow_operators(experiment_config)
        self.validate_eth0_device(experiment_config)
-        self.validate_heterogeneous_platforms(experiment_config)
+        self.validate_hybrid_platforms(experiment_config)
    def validate_tuner_adivosr_assessor(self, experiment_config):
        if experiment_config.get('advisor'):
@@ -590,15 +590,15 @@ class NNIConfigSchema:
                and 'eth0' not in netifaces.interfaces():
            raise SchemaError('This machine does not contain eth0 network device, please set nniManagerIp in config file!')
-    def validate_heterogeneous_platforms(self, experiment_config):
+    def validate_hybrid_platforms(self, experiment_config):
        required_config_name_map = {
            'remote': 'machineList',
            'aml': 'amlConfig',
            'pai': 'paiConfig'
        }
-        if experiment_config.get('trainingServicePlatform') == 'heterogeneous':
+        if experiment_config.get('trainingServicePlatform') == 'hybrid':
-            for platform in experiment_config['heterogeneousConfig']['trainingServicePlatforms']:
+            for platform in experiment_config['hybridConfig']['trainingServicePlatforms']:
                config_name = required_config_name_map.get(platform)
                if config_name and not experiment_config.get(config_name):
-                    raise SchemaError('Need to set {0} for {1} in heterogeneous mode!'.format(config_name, platform))
+                    raise SchemaError('Need to set {0} for {1} in hybrid mode!'.format(config_name, platform))
\ No newline at end of file
--- a/nni/tools/nnictl/launcher.py
+++ b/nni/tools/nnictl/launcher.py
@@ -300,23 +300,23 @@ def set_aml_config(experiment_config, port, config_file_name):
    #set trial_config
    return set_trial_config(experiment_config, port, config_file_name), err_message
-def set_heterogeneous_config(experiment_config, port, config_file_name):
+def set_hybrid_config(experiment_config, port, config_file_name):
-    '''set heterogeneous configuration'''
+    '''set hybrid configuration'''
-    heterogeneous_config_data = dict()
+    hybrid_config_data = dict()
-    heterogeneous_config_data['heterogeneous_config'] = experiment_config['heterogeneousConfig']
+    hybrid_config_data['hybrid_config'] = experiment_config['hybridConfig']
-    platform_list = experiment_config['heterogeneousConfig']['trainingServicePlatforms']
+    platform_list = experiment_config['hybridConfig']['trainingServicePlatforms']
    for platform in platform_list:
        if platform == 'aml':
-            heterogeneous_config_data['aml_config'] = experiment_config['amlConfig']
+            hybrid_config_data['aml_config'] = experiment_config['amlConfig']
        elif platform ==  'remote':
            if experiment_config.get('remoteConfig'):
-                heterogeneous_config_data['remote_config'] = experiment_config['remoteConfig']
+                hybrid_config_data['remote_config'] = experiment_config['remoteConfig']
-            heterogeneous_config_data['machine_list'] = experiment_config['machineList']
+            hybrid_config_data['machine_list'] = experiment_config['machineList']
        elif platform == 'local' and experiment_config.get('localConfig'):
-            heterogeneous_config_data['local_config'] = experiment_config['localConfig']
+            hybrid_config_data['local_config'] = experiment_config['localConfig']
        elif platform == 'pai':
-            heterogeneous_config_data['pai_config'] = experiment_config['paiConfig']
+            hybrid_config_data['pai_config'] = experiment_config['paiConfig']
-    response = rest_put(cluster_metadata_url(port), json.dumps(heterogeneous_config_data), REST_TIME_OUT)
+    response = rest_put(cluster_metadata_url(port), json.dumps(hybrid_config_data), REST_TIME_OUT)
    err_message = None
    if not response or not response.status_code == 200:
        if response is not None:
@@ -412,10 +412,10 @@ def set_experiment(experiment_config, mode, port, config_file_name):
            {'key': 'aml_config', 'value': experiment_config['amlConfig']})
        request_data['clusterMetaData'].append(
            {'key': 'trial_config', 'value': experiment_config['trial']})
-    elif experiment_config['trainingServicePlatform'] == 'heterogeneous':
+    elif experiment_config['trainingServicePlatform'] == 'hybrid':
        request_data['clusterMetaData'].append(
-            {'key': 'heterogeneous_config', 'value': experiment_config['heterogeneousConfig']})
+            {'key': 'hybrid_config', 'value': experiment_config['hybridConfig']})
-        platform_list = experiment_config['heterogeneousConfig']['trainingServicePlatforms']
+        platform_list = experiment_config['hybridConfig']['trainingServicePlatforms']
        request_dict = {
            'aml': {'key': 'aml_config', 'value': experiment_config.get('amlConfig')},
            'remote': {'key': 'machine_list', 'value': experiment_config.get('machineList')},
@@ -460,8 +460,8 @@ def set_platform_config(platform, experiment_config, port, config_file_name, res
        config_result, err_msg = set_dlts_config(experiment_config, port, config_file_name)
    elif platform == 'aml':
        config_result, err_msg = set_aml_config(experiment_config, port, config_file_name)
-    elif platform == 'heterogeneous':
+    elif platform == 'hybrid':
-        config_result, err_msg = set_heterogeneous_config(experiment_config, port, config_file_name)
+        config_result, err_msg = set_hybrid_config(experiment_config, port, config_file_name)
    else:
        raise Exception(ERROR_INFO % 'Unsupported platform!')
        exit(1)

--- a/ts/nni_manager/main.ts
+++ b/ts/nni_manager/main.ts
@@ -37,7 +37,7 @@ function initStartupInfo(
 }
 async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
-    const routerPlatformMode = ['remote', 'pai', 'aml', 'heterogeneous'];
+    const routerPlatformMode = ['remote', 'pai', 'aml', 'hybrid'];
    if (routerPlatformMode.includes(platformMode)) {
        Container.bind(TrainingService)
            .to(RouterTrainingService)
@@ -97,7 +97,7 @@ async function initContainer(foreground: boolean, platformMode: string, logFileN
 function usage(): void {
    console.info('usage: node main.js --port <port> --mode \
-    <local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/heterogeneous> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
+    <local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/hybrid> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
 }
 const strPort: string = parseArg(['--port', '-p']);
@@ -117,7 +117,7 @@ const foreground: boolean = foregroundArg.toLowerCase() === 'true' ? true : fals
 const port: number = parseInt(strPort, 10);
 const mode: string = parseArg(['--mode', '-m']);
-if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'].includes(mode)) {
+if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'].includes(mode)) {
    console.log(`FATAL: unknown mode: ${mode}`);
    usage();
    process.exit(1);

--- a/ts/nni_manager/rest_server/restValidationSchemas.ts
+++ b/ts/nni_manager/rest_server/restValidationSchemas.ts
@@ -183,7 +183,7 @@ export namespace ValidationSchemas {
                maxTrialNumPerGpu: joi.number(),
                useActiveGpu: joi.boolean()
            }),
-            heterogeneous_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
+            hybrid_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
                trainingServicePlatforms: joi.array(),
            }),
            nni_manager_ip: joi.object({ // eslint-disable-line @typescript-eslint/camelcase

--- a/ts/nni_manager/training_service/common/trialConfigMetadataKey.ts
+++ b/ts/nni_manager/training_service/common/trialConfigMetadataKey.ts
@@ -11,7 +11,7 @@ export enum TrialConfigMetadataKey {
    LOCAL_CONFIG = 'local_config',
    TRIAL_CONFIG = 'trial_config',
    REMOTE_CONFIG = 'remote_config',
-    HETEROGENEOUS_CONFIG = 'heterogeneous_config',
+    HYBRID_CONFIG = 'hybrid_config',
    EXPERIMENT_ID = 'experimentId',
    MULTI_PHASE = 'multiPhase',
    RANDOM_SCHEDULER = 'random_scheduler',
@@ -24,7 +24,7 @@ export enum TrialConfigMetadataKey {
    AML_CLUSTER_CONFIG = 'aml_config',
    VERSION_CHECK = 'version_check',
    LOG_COLLECTION = 'log_collection',
-    // Used to set platform for heterogeneous in reuse mode, 
+    // Used to set platform for hybrid in reuse mode, 
    // temproarily change and will refactor config schema in the future
    PLATFORM_LIST = 'platform_list'
 }
--- a/ts/nni_manager/training_service/reusable/routerTrainingService.ts
+++ b/ts/nni_manager/training_service/reusable/routerTrainingService.ts
@@ -95,8 +95,8 @@ class RouterTrainingService implements TrainingService {
    public async setClusterMetadata(key: string, value: string): Promise<void> {
        if (this.internalTrainingService === undefined) {
-            // Need to refactor configuration, remove heterogeneous_config field in the future
+            // Need to refactor configuration, remove hybrid_config field in the future
-            if (key === TrialConfigMetadataKey.HETEROGENEOUS_CONFIG){
+            if (key === TrialConfigMetadataKey.HYBRID_CONFIG){
                this.internalTrainingService = component.get(TrialDispatcher);
                const heterogenousConfig: HeterogenousConfig = <HeterogenousConfig>JSON.parse(value);
                if (this.internalTrainingService === undefined) {