"...testcase/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "0cea39c59857ed8e34c29374c069b595edd4dc60"
Unverified Commit 134368fa authored by George Cheng's avatar George Cheng Committed by GitHub
Browse files

DLTS integration (#1945)



* skeleton of dlts training service (#1844)

* Hello, DLTS!

* Revert version

* Remove fs-extra

* Add some default cluster config

* schema

* fix

* Optional cluster (default to `.default`)

Depends on DLWorkspace#837

* fix

* fix

* optimize gpu type

* No more copy

* Format

* Code clean up

* Issue fix

* Add optional fields in config

* Issue fix

* Lint

* Lint

* Validate email, password and team

* Doc

* Doc fix

* Set TMPDIR

* Use metadata instead of gpu_capacity

* Cancel paused DLTS job

* workaround lint rules

* pylint

* doc
Co-authored-by: default avatarQuanluZhang <z.quanluzhang@gmail.com>
parent 03cea2b4
...@@ -5,8 +5,9 @@ import os ...@@ -5,8 +5,9 @@ import os
import json import json
from schema import SchemaError from schema import SchemaError
from schema import Schema from schema import Schema
from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, PAI_YARN_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA,\ from .config_schema import LOCAL_CONFIG_SCHEMA, REMOTE_CONFIG_SCHEMA, PAI_CONFIG_SCHEMA, PAI_YARN_CONFIG_SCHEMA, \
FRAMEWORKCONTROLLER_CONFIG_SCHEMA, tuner_schema_dict, advisor_schema_dict, assessor_schema_dict DLTS_CONFIG_SCHEMA, KUBEFLOW_CONFIG_SCHEMA, FRAMEWORKCONTROLLER_CONFIG_SCHEMA, \
tuner_schema_dict, advisor_schema_dict, assessor_schema_dict
from .common_utils import print_error, print_warning, print_normal, get_yml_content from .common_utils import print_error, print_warning, print_normal, get_yml_content
def expand_path(experiment_config, key): def expand_path(experiment_config, key):
...@@ -147,7 +148,9 @@ def validate_kubeflow_operators(experiment_config): ...@@ -147,7 +148,9 @@ def validate_kubeflow_operators(experiment_config):
def validate_common_content(experiment_config): def validate_common_content(experiment_config):
'''Validate whether the common values in experiment_config is valid''' '''Validate whether the common values in experiment_config is valid'''
if not experiment_config.get('trainingServicePlatform') or \ if not experiment_config.get('trainingServicePlatform') or \
experiment_config.get('trainingServicePlatform') not in ['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn']: experiment_config.get('trainingServicePlatform') not in [
'local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts'
]:
print_error('Please set correct trainingServicePlatform!') print_error('Please set correct trainingServicePlatform!')
exit(1) exit(1)
schema_dict = { schema_dict = {
...@@ -156,7 +159,8 @@ def validate_common_content(experiment_config): ...@@ -156,7 +159,8 @@ def validate_common_content(experiment_config):
'pai': PAI_CONFIG_SCHEMA, 'pai': PAI_CONFIG_SCHEMA,
'paiYarn': PAI_YARN_CONFIG_SCHEMA, 'paiYarn': PAI_YARN_CONFIG_SCHEMA,
'kubeflow': KUBEFLOW_CONFIG_SCHEMA, 'kubeflow': KUBEFLOW_CONFIG_SCHEMA,
'frameworkcontroller': FRAMEWORKCONTROLLER_CONFIG_SCHEMA 'frameworkcontroller': FRAMEWORKCONTROLLER_CONFIG_SCHEMA,
'dlts': DLTS_CONFIG_SCHEMA,
} }
separate_schema_dict = { separate_schema_dict = {
'tuner': tuner_schema_dict, 'tuner': tuner_schema_dict,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment