"vscode:/vscode.git/clone" did not exist on "d072790fe27a5915d12b9172020b6b74e485aadf"
Unverified Commit a5efb4e6 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Fix v2 config version_check and log_collection (#3575)

parent e19f5d26
......@@ -131,12 +131,39 @@ def set_adl_config(experiment_config, port, config_file_name):
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
set_V1_common_config(experiment_config, port, config_file_name)
result, message = setNNIManagerIp(experiment_config, port, config_file_name)
if not result:
return result, message
#set trial_config
return set_trial_config(experiment_config, port, config_file_name), None
def validate_response(response, config_file_name):
err_message = None
if not response or not response.status_code == 200:
if response is not None:
err_message = response.text
_, stderr_full_path = get_log_path(config_file_name)
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
print_error('Error:' + err_message)
exit(1)
# hack to fix v1 version_check and log_collection bug, need refactor
def set_V1_common_config(experiment_config, port, config_file_name):
version_check = True
#debug mode should disable version check
if experiment_config.get('debug') is not None:
version_check = not experiment_config.get('debug')
#validate version check
if experiment_config.get('versionCheck') is not None:
version_check = experiment_config.get('versionCheck')
response = rest_put(cluster_metadata_url(port), json.dumps({'version_check': version_check}), REST_TIME_OUT)
validate_response(response, config_file_name)
if experiment_config.get('logCollection'):
response = rest_put(cluster_metadata_url(port), json.dumps({'log_collection': experiment_config.get('logCollection')}), REST_TIME_OUT)
validate_response(response, config_file_name)
def setNNIManagerIp(experiment_config, port, config_file_name):
'''set nniManagerIp'''
if experiment_config.get('nniManagerIp') is None:
......@@ -167,6 +194,7 @@ def set_kubeflow_config(experiment_config, port, config_file_name):
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
set_V1_common_config(experiment_config, port, config_file_name)
result, message = setNNIManagerIp(experiment_config, port, config_file_name)
if not result:
return result, message
......@@ -186,6 +214,7 @@ def set_frameworkcontroller_config(experiment_config, port, config_file_name):
with open(stderr_full_path, 'a+') as fout:
fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':')))
return False, err_message
set_V1_common_config(experiment_config, port, config_file_name)
result, message = setNNIManagerIp(experiment_config, port, config_file_name)
if not result:
return result, message
......
......@@ -197,6 +197,8 @@ export namespace ValidationSchemas {
nni_manager_ip: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
nniManagerIp: joi.string().min(1)
}),
version_check: joi.boolean(), // eslint-disable-line @typescript-eslint/camelcase
log_collection: joi.string(), // eslint-disable-line @typescript-eslint/camelcase
remote_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
reuse: joi.boolean()
}),
......
......@@ -19,6 +19,7 @@ import {validateCodeDir} from '../../common/util';
import {NFSConfig} from '../kubernetesConfig';
import {KubernetesTrialJobDetail} from '../kubernetesData';
import {KubernetesTrainingService} from '../kubernetesTrainingService';
import {KubernetesJobRestServer} from '../kubernetesJobRestServer';
import {FrameworkControllerClientFactory} from './frameworkcontrollerApiClient';
import {
FrameworkControllerClusterConfig,
......@@ -52,7 +53,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
public async run(): Promise<void> {
this.kubernetesJobRestServer = component.get(FrameworkControllerJobRestServer);
this.kubernetesJobRestServer = new KubernetesJobRestServer(this);
if (this.kubernetesJobRestServer === undefined) {
throw new Error('kubernetesJobRestServer not initialized!');
}
......
......@@ -19,6 +19,7 @@ import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import { validateCodeDir } from '../../common/util';
import { NFSConfig } from '../kubernetesConfig';
import { KubernetesTrialJobDetail } from '../kubernetesData';
import { KubernetesJobRestServer } from '../kubernetesJobRestServer';
import { KubernetesTrainingService } from '../kubernetesTrainingService';
import { KubeflowOperatorClientFactory } from './kubeflowApiClient';
import { KubeflowClusterConfig, KubeflowClusterConfigAzure, KubeflowClusterConfigFactory, KubeflowClusterConfigNFS,
......@@ -46,7 +47,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
public async run(): Promise<void> {
this.log.info('Run Kubeflow training service.');
this.kubernetesJobRestServer = component.get(KubeflowJobRestServer);
this.kubernetesJobRestServer = new KubernetesJobRestServer(this);
if (this.kubernetesJobRestServer === undefined) {
throw new Error('kubernetesJobRestServer not initialized!');
}
......
......@@ -16,7 +16,6 @@ import { KubernetesTrainingService } from './kubernetesTrainingService';
export class KubernetesJobRestServer extends ClusterJobRestServer {
@Inject
private readonly kubernetesTrainingService? : KubernetesTrainingService;
/**
* constructor to provide NNIRestServer's own rest property, e.g. port
*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment