Unverified Commit e19f5d26 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Fix k8s bugs (#3572)

parent 055d7552
...@@ -19,7 +19,6 @@ from . import management ...@@ -19,7 +19,6 @@ from . import management
from . import rest from . import rest
from ..tools.nnictl.command_utils import kill_command from ..tools.nnictl.command_utils import kill_command
nni.runtime.log.init_logger_experiment()
_logger = logging.getLogger('nni.experiment') _logger = logging.getLogger('nni.experiment')
...@@ -71,6 +70,8 @@ class Experiment: ...@@ -71,6 +70,8 @@ class Experiment:
... ...
def __init__(self, config=None, training_service=None): def __init__(self, config=None, training_service=None):
nni.runtime.log.init_logger_experiment()
self.config: Optional[ExperimentConfig] = None self.config: Optional[ExperimentConfig] = None
self.id: Optional[str] = None self.id: Optional[str] = None
self.port: Optional[int] = None self.port: Optional[int] = None
......
...@@ -46,6 +46,7 @@ def init_logger() -> None: ...@@ -46,6 +46,7 @@ def init_logger() -> None:
logging.getLogger('filelock').setLevel(logging.WARNING) logging.getLogger('filelock').setLevel(logging.WARNING)
_exp_log_initialized = False
def init_logger_experiment() -> None: def init_logger_experiment() -> None:
""" """
...@@ -53,9 +54,12 @@ def init_logger_experiment() -> None: ...@@ -53,9 +54,12 @@ def init_logger_experiment() -> None:
This function will get invoked after `init_logger()`. This function will get invoked after `init_logger()`.
""" """
colorful_formatter = Formatter(log_format, time_format) global _exp_log_initialized
colorful_formatter.format = _colorful_format if not _exp_log_initialized:
handlers['_default_'].setFormatter(colorful_formatter) _exp_log_initialized = True
colorful_formatter = Formatter(log_format, time_format)
colorful_formatter.format = _colorful_format
handlers['_default_'].setFormatter(colorful_formatter)
def start_experiment_log(experiment_id: str, log_directory: Path, debug: bool) -> None: def start_experiment_log(experiment_id: str, log_directory: Path, debug: bool) -> None:
log_path = _prepare_log_dir(log_directory) / 'dispatcher.log' log_path = _prepare_log_dir(log_directory) / 'dispatcher.log'
......
...@@ -212,7 +212,9 @@ def set_experiment_v1(experiment_config, mode, port, config_file_name): ...@@ -212,7 +212,9 @@ def set_experiment_v1(experiment_config, mode, port, config_file_name):
request_data['experimentName'] = experiment_config['experimentName'] request_data['experimentName'] = experiment_config['experimentName']
request_data['trialConcurrency'] = experiment_config['trialConcurrency'] request_data['trialConcurrency'] = experiment_config['trialConcurrency']
request_data['maxExecDuration'] = experiment_config['maxExecDuration'] request_data['maxExecDuration'] = experiment_config['maxExecDuration']
request_data['maxExperimentDuration'] = str(experiment_config['maxExecDuration']) + 's'
request_data['maxTrialNum'] = experiment_config['maxTrialNum'] request_data['maxTrialNum'] = experiment_config['maxTrialNum']
request_data['maxTrialNumber'] = experiment_config['maxTrialNum']
request_data['searchSpace'] = experiment_config.get('searchSpace') request_data['searchSpace'] = experiment_config.get('searchSpace')
request_data['trainingServicePlatform'] = experiment_config.get('trainingServicePlatform') request_data['trainingServicePlatform'] = experiment_config.get('trainingServicePlatform')
# hack for hotfix, fix config.trainingService undefined error, need refactor # hack for hotfix, fix config.trainingService undefined error, need refactor
...@@ -368,14 +370,14 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi ...@@ -368,14 +370,14 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi
code_dir = expand_annotations(experiment_config['trial']['codeDir'], path, nas_mode=nas_mode) code_dir = expand_annotations(experiment_config['trial']['codeDir'], path, nas_mode=nas_mode)
experiment_config['trial']['codeDir'] = code_dir experiment_config['trial']['codeDir'] = code_dir
search_space = generate_search_space(code_dir) search_space = generate_search_space(code_dir)
experiment_config['searchSpace'] = json.dumps(search_space) experiment_config['searchSpace'] = search_space
assert search_space, ERROR_INFO % 'Generated search space is empty' assert search_space, ERROR_INFO % 'Generated search space is empty'
elif config_version == 1: elif config_version == 1:
if experiment_config.get('searchSpacePath'): if experiment_config.get('searchSpacePath'):
search_space = get_json_content(experiment_config.get('searchSpacePath')) search_space = get_json_content(experiment_config.get('searchSpacePath'))
experiment_config['searchSpace'] = json.dumps(search_space) experiment_config['searchSpace'] = search_space
else: else:
experiment_config['searchSpace'] = json.dumps('') experiment_config['searchSpace'] = ''
# check rest server # check rest server
running, _ = check_rest_server(args.port) running, _ = check_rest_server(args.port)
......
...@@ -140,10 +140,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple ...@@ -140,10 +140,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId); const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId);
let frameworkcontrollerJobName: string = `nniexp${this.experimentId}trial${trialJobId}`.toLowerCase(); let frameworkcontrollerJobName: string = `nniexp${this.experimentId}trial${trialJobId}`.toLowerCase();
// Create frameworkcontroller job based on generated frameworkcontroller job resource config let frameworkcontrollerJobConfig: any;
let frameworkcontrollerJobConfig = JSON.parse(JSON.stringify(this.fcTemplate));
if (this.fcTemplate !== undefined) { if (this.fcTemplate !== undefined) {
// Create frameworkcontroller job based on generated frameworkcontroller job resource config
frameworkcontrollerJobConfig = JSON.parse(JSON.stringify(this.fcTemplate));
// add a custom name extension to the job name and apply it to the custom template // add a custom name extension to the job name and apply it to the custom template
frameworkcontrollerJobName += "xx" + this.fcTemplate.metadata.name; frameworkcontrollerJobName += "xx" + this.fcTemplate.metadata.name;
// Process custom task roles commands // Process custom task roles commands
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment