Unverified Commit 76152d40 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Fix OpenPAI IT (#4057)

parent 56da3c39
......@@ -64,10 +64,11 @@ jobs:
--nni_docker_image nnidev/nni-nightly \
--pai_storage_config_name confignfs-data \
--pai_token $(pai_token) \
--nni_manager_nfs_mount_path /home/quzha/mnt-pai-ne/shinyang3 \
--container_nfs_mount_path /mnt/confignfs-data/shinyang3 \
--nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) \
--container_nfs_mount_path $(container_nfs_mount_path) \
--nni_manager_ip $(manager_ip) \
--vc nni
--vc nni \
--debug true
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName: Integration test
......@@ -82,8 +83,8 @@ jobs:
--nni_docker_image nnidev/nni-nightly \
--pai_storage_config_name confignfs-data \
--pai_token $(pai_token) \
--nni_manager_nfs_mount_path /home/quzha/mnt-pai-ne/shinyang3 \
--container_nfs_mount_path /mnt/confignfs-data/shinyang3 \
--nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) \
--container_nfs_mount_path $(container_nfs_mount_path) \
--nni_manager_ip $(manager_ip) \
--vc nni
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
......
......@@ -33,6 +33,8 @@ def update_training_service_config(args):
config[args.ts]['trial']['paiStorageConfigName'] = args.pai_storage_config_name
if args.vc is not None:
config[args.ts]['trial']['virtualCluster'] = args.vc
if args.debug is not None:
config[args.ts]['debug'] = args.debug.lower() == 'true'
elif args.ts == 'kubeflow':
if args.nfs_server is not None:
config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server
......@@ -146,6 +148,7 @@ if __name__ == '__main__':
parser.add_argument("--pai_storage_config_name", type=str)
parser.add_argument("--nni_manager_nfs_mount_path", type=str)
parser.add_argument("--container_nfs_mount_path", type=str)
parser.add_argument("--debug", type=str)
# args for kubeflow and frameworkController
parser.add_argument("--nfs_path", type=str)
parser.add_argument("--keyvault_vaultname", type=str)
......
......@@ -228,4 +228,4 @@ export function flattenConfig<T>(config: ExperimentConfig, platform: string): T
Object.assign(flattened, config.trainingService);
}
return <T>flattened;
}
\ No newline at end of file
}
......@@ -70,6 +70,7 @@ class PAITrainingService implements TrainingService {
this.paiTokenUpdateInterval = 7200000; //2hours
this.log.info('Construct paiBase training service.');
this.config = flattenConfig(config, 'openpai');
this.versionCheck = !this.config.debug;
this.paiJobRestServer = new PAIJobRestServer(this);
this.paiToken = this.config.token;
this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http';
......@@ -78,7 +79,7 @@ class PAITrainingService implements TrainingService {
private async copyTrialCode(): Promise<void> {
await validateCodeDir(this.config.trialCodeDirectory);
const nniManagerNFSExpCodeDir = path.join(this.config.trialCodeDirectory, this.experimentId, 'nni-code');
const nniManagerNFSExpCodeDir = path.join(this.config.localStorageMountPoint, this.experimentId, 'nni-code');
await execMkdir(nniManagerNFSExpCodeDir);
this.log.info(`Starting copy codeDir data from ${this.config.trialCodeDirectory} to ${nniManagerNFSExpCodeDir}`);
await execCopydir(this.config.trialCodeDirectory, nniManagerNFSExpCodeDir);
......
......@@ -26,11 +26,11 @@ class RouterTrainingService implements TrainingService {
const instance = new RouterTrainingService();
instance.log = getLogger('RouterTrainingService');
const platform = Array.isArray(config.trainingService) ? 'hybrid' : config.trainingService.platform;
if (platform === 'remote' && !(<RemoteConfig>config.trainingService).reuseMode) {
if (platform === 'remote' && (<RemoteConfig>config.trainingService).reuseMode === false) {
instance.internalTrainingService = new RemoteMachineTrainingService(config);
} else if (platform === 'openpai' && !(<OpenpaiConfig>config.trainingService).reuseMode) {
} else if (platform === 'openpai' && (<OpenpaiConfig>config.trainingService).reuseMode === false) {
instance.internalTrainingService = new PAITrainingService(config);
} else if (platform === 'kubeflow' && !(<KubeflowConfig>config.trainingService).reuseMode) {
} else if (platform === 'kubeflow' && (<KubeflowConfig>config.trainingService).reuseMode === false) {
instance.internalTrainingService = new KubeflowTrainingService();
} else {
instance.internalTrainingService = await TrialDispatcher.construct(config);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment