Unverified Commit 76152d40 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Fix OpenPAI IT (#4057)

parent 56da3c39
...@@ -64,10 +64,11 @@ jobs: ...@@ -64,10 +64,11 @@ jobs:
--nni_docker_image nnidev/nni-nightly \ --nni_docker_image nnidev/nni-nightly \
--pai_storage_config_name confignfs-data \ --pai_storage_config_name confignfs-data \
--pai_token $(pai_token) \ --pai_token $(pai_token) \
--nni_manager_nfs_mount_path /home/quzha/mnt-pai-ne/shinyang3 \ --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) \
--container_nfs_mount_path /mnt/confignfs-data/shinyang3 \ --container_nfs_mount_path $(container_nfs_mount_path) \
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--vc nni --vc nni \
--debug true
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName: Integration test displayName: Integration test
...@@ -82,8 +83,8 @@ jobs: ...@@ -82,8 +83,8 @@ jobs:
--nni_docker_image nnidev/nni-nightly \ --nni_docker_image nnidev/nni-nightly \
--pai_storage_config_name confignfs-data \ --pai_storage_config_name confignfs-data \
--pai_token $(pai_token) \ --pai_token $(pai_token) \
--nni_manager_nfs_mount_path /home/quzha/mnt-pai-ne/shinyang3 \ --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) \
--container_nfs_mount_path /mnt/confignfs-data/shinyang3 \ --container_nfs_mount_path $(container_nfs_mount_path) \
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--vc nni --vc nni
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
......
...@@ -33,6 +33,8 @@ def update_training_service_config(args): ...@@ -33,6 +33,8 @@ def update_training_service_config(args):
config[args.ts]['trial']['paiStorageConfigName'] = args.pai_storage_config_name config[args.ts]['trial']['paiStorageConfigName'] = args.pai_storage_config_name
if args.vc is not None: if args.vc is not None:
config[args.ts]['trial']['virtualCluster'] = args.vc config[args.ts]['trial']['virtualCluster'] = args.vc
if args.debug is not None:
config[args.ts]['debug'] = args.debug.lower() == 'true'
elif args.ts == 'kubeflow': elif args.ts == 'kubeflow':
if args.nfs_server is not None: if args.nfs_server is not None:
config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server
...@@ -146,6 +148,7 @@ if __name__ == '__main__': ...@@ -146,6 +148,7 @@ if __name__ == '__main__':
parser.add_argument("--pai_storage_config_name", type=str) parser.add_argument("--pai_storage_config_name", type=str)
parser.add_argument("--nni_manager_nfs_mount_path", type=str) parser.add_argument("--nni_manager_nfs_mount_path", type=str)
parser.add_argument("--container_nfs_mount_path", type=str) parser.add_argument("--container_nfs_mount_path", type=str)
parser.add_argument("--debug", type=str)
# args for kubeflow and frameworkController # args for kubeflow and frameworkController
parser.add_argument("--nfs_path", type=str) parser.add_argument("--nfs_path", type=str)
parser.add_argument("--keyvault_vaultname", type=str) parser.add_argument("--keyvault_vaultname", type=str)
......
...@@ -228,4 +228,4 @@ export function flattenConfig<T>(config: ExperimentConfig, platform: string): T ...@@ -228,4 +228,4 @@ export function flattenConfig<T>(config: ExperimentConfig, platform: string): T
Object.assign(flattened, config.trainingService); Object.assign(flattened, config.trainingService);
} }
return <T>flattened; return <T>flattened;
} }
\ No newline at end of file
...@@ -70,6 +70,7 @@ class PAITrainingService implements TrainingService { ...@@ -70,6 +70,7 @@ class PAITrainingService implements TrainingService {
this.paiTokenUpdateInterval = 7200000; //2hours this.paiTokenUpdateInterval = 7200000; //2hours
this.log.info('Construct paiBase training service.'); this.log.info('Construct paiBase training service.');
this.config = flattenConfig(config, 'openpai'); this.config = flattenConfig(config, 'openpai');
this.versionCheck = !this.config.debug;
this.paiJobRestServer = new PAIJobRestServer(this); this.paiJobRestServer = new PAIJobRestServer(this);
this.paiToken = this.config.token; this.paiToken = this.config.token;
this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http'; this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http';
...@@ -78,7 +79,7 @@ class PAITrainingService implements TrainingService { ...@@ -78,7 +79,7 @@ class PAITrainingService implements TrainingService {
private async copyTrialCode(): Promise<void> { private async copyTrialCode(): Promise<void> {
await validateCodeDir(this.config.trialCodeDirectory); await validateCodeDir(this.config.trialCodeDirectory);
const nniManagerNFSExpCodeDir = path.join(this.config.trialCodeDirectory, this.experimentId, 'nni-code'); const nniManagerNFSExpCodeDir = path.join(this.config.localStorageMountPoint, this.experimentId, 'nni-code');
await execMkdir(nniManagerNFSExpCodeDir); await execMkdir(nniManagerNFSExpCodeDir);
this.log.info(`Starting copy codeDir data from ${this.config.trialCodeDirectory} to ${nniManagerNFSExpCodeDir}`); this.log.info(`Starting copy codeDir data from ${this.config.trialCodeDirectory} to ${nniManagerNFSExpCodeDir}`);
await execCopydir(this.config.trialCodeDirectory, nniManagerNFSExpCodeDir); await execCopydir(this.config.trialCodeDirectory, nniManagerNFSExpCodeDir);
......
...@@ -26,11 +26,11 @@ class RouterTrainingService implements TrainingService { ...@@ -26,11 +26,11 @@ class RouterTrainingService implements TrainingService {
const instance = new RouterTrainingService(); const instance = new RouterTrainingService();
instance.log = getLogger('RouterTrainingService'); instance.log = getLogger('RouterTrainingService');
const platform = Array.isArray(config.trainingService) ? 'hybrid' : config.trainingService.platform; const platform = Array.isArray(config.trainingService) ? 'hybrid' : config.trainingService.platform;
if (platform === 'remote' && !(<RemoteConfig>config.trainingService).reuseMode) { if (platform === 'remote' && (<RemoteConfig>config.trainingService).reuseMode === false) {
instance.internalTrainingService = new RemoteMachineTrainingService(config); instance.internalTrainingService = new RemoteMachineTrainingService(config);
} else if (platform === 'openpai' && !(<OpenpaiConfig>config.trainingService).reuseMode) { } else if (platform === 'openpai' && (<OpenpaiConfig>config.trainingService).reuseMode === false) {
instance.internalTrainingService = new PAITrainingService(config); instance.internalTrainingService = new PAITrainingService(config);
} else if (platform === 'kubeflow' && !(<KubeflowConfig>config.trainingService).reuseMode) { } else if (platform === 'kubeflow' && (<KubeflowConfig>config.trainingService).reuseMode === false) {
instance.internalTrainingService = new KubeflowTrainingService(); instance.internalTrainingService = new KubeflowTrainingService();
} else { } else {
instance.internalTrainingService = await TrialDispatcher.construct(config); instance.internalTrainingService = await TrialDispatcher.construct(config);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment