"...composable_kernel.git" did not exist on "675aa69e45381bf8b179f9faf7db8cf726c0e004"
Unverified Commit fadac07d authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Add pai pipeline (#2092)

parent ba5d18c4
...@@ -112,7 +112,7 @@ if __name__ == '__main__': ...@@ -112,7 +112,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default=None) parser.add_argument("--config", type=str, default=None)
parser.add_argument("--exclude", type=str, default=None) parser.add_argument("--exclude", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'], default='local') parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', 'paiYarn', 'kubeflow', 'frameworkcontroller'], default='local')
parser.add_argument("--local_gpu", action='store_true') parser.add_argument("--local_gpu", action='store_true')
parser.add_argument("--preinstall", action='store_true') parser.add_argument("--preinstall", action='store_true')
args = parser.parse_args() args = parser.parse_args()
......
...@@ -12,7 +12,7 @@ def update_training_service_config(args): ...@@ -12,7 +12,7 @@ def update_training_service_config(args):
config = get_yml_content(TRAINING_SERVICE_FILE) config = get_yml_content(TRAINING_SERVICE_FILE)
if args.nni_manager_ip is not None: if args.nni_manager_ip is not None:
config[args.ts]['nniManagerIp'] = args.nni_manager_ip config[args.ts]['nniManagerIp'] = args.nni_manager_ip
if args.ts == 'pai': if args.ts == 'paiYarn':
if args.pai_user is not None: if args.pai_user is not None:
config[args.ts]['paiYarnConfig']['userName'] = args.pai_user config[args.ts]['paiYarnConfig']['userName'] = args.pai_user
if args.pai_pwd is not None: if args.pai_pwd is not None:
...@@ -27,6 +27,23 @@ def update_training_service_config(args): ...@@ -27,6 +27,23 @@ def update_training_service_config(args):
config[args.ts]['trial']['outputDir'] = args.output_dir config[args.ts]['trial']['outputDir'] = args.output_dir
if args.vc is not None: if args.vc is not None:
config[args.ts]['trial']['virtualCluster'] = args.vc config[args.ts]['trial']['virtualCluster'] = args.vc
if args.ts == 'pai':
if args.pai_user is not None:
config[args.ts]['paiConfig']['userName'] = args.pai_user
if args.pai_host is not None:
config[args.ts]['paiConfig']['host'] = args.pai_host
if args.pai_token is not None:
config[args.ts]['paiConfig']['token'] = args.pai_token
if args.nni_docker_image is not None:
config[args.ts]['trial']['image'] = args.nni_docker_image
if args.nniManagerNFSMountPath is not None:
config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nni_manager_nfs_mount_path
if args.containerNFSMountPath is not None:
config[args.ts]['trial']['containerNFSMountPath'] = args.container_nfs_mount_path
if args.paiStoragePlugin is not None:
config[args.ts]['trial']['paiStoragePlugin'] = args.pai_storage_plugin
if args.vc is not None:
config[args.ts]['trial']['virtualCluster'] = args.vc
elif args.ts == 'kubeflow': elif args.ts == 'kubeflow':
if args.nfs_server is not None: if args.nfs_server is not None:
config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server
...@@ -94,6 +111,10 @@ if __name__ == '__main__': ...@@ -94,6 +111,10 @@ if __name__ == '__main__':
parser.add_argument("--data_dir", type=str) parser.add_argument("--data_dir", type=str)
parser.add_argument("--output_dir", type=str) parser.add_argument("--output_dir", type=str)
parser.add_argument("--vc", type=str) parser.add_argument("--vc", type=str)
parser.add_argument("--pai_token", type=str)
parser.add_argument("--pai_storage_plugin", type=str)
parser.add_argument("--nni_manager_nfs_mount_path", type=str)
parser.add_argument("--container_nfs_mount_path", type=str)
# args for kubeflow and frameworkController # args for kubeflow and frameworkController
parser.add_argument("--nfs_server", type=str) parser.add_argument("--nfs_server", type=str)
parser.add_argument("--nfs_path", type=str) parser.add_argument("--nfs_path", type=str)
......
...@@ -51,9 +51,9 @@ jobs: ...@@ -51,9 +51,9 @@ jobs:
echo "TEST_IMG:$TEST_IMG" echo "TEST_IMG:$TEST_IMG"
cd test cd test
python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) \ python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
--nni_docker_image $TEST_IMG --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip) --pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts pai --exclude multi_phase
PATH=$HOME/.local/bin:$PATH python3 metrics_test.py PATH=$HOME/.local/bin:$PATH python3 metrics_test.py
displayName: 'integration test' displayName: 'integration test'
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
jobs:
- job: 'integration_test_paiYarn'
timeoutInMinutes: 0
steps:
- script: python3 -m pip install --upgrade pip setuptools --user
displayName: 'Install python tools'
- script: |
cd deployment/pypi
echo 'building prerelease package...'
make build
ls $(Build.SourcesDirectory)/deployment/pypi/dist/
condition: eq( variables['build_docker_img'], 'true' )
displayName: 'build nni bdsit_wheel'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
sudo apt-get install swig -y
PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB
displayName: 'Install dependencies for integration tests in PAI mode'
- script: |
set -e
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
echo 'updating docker file for installing nni from local...'
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
export IMG_TAG=`date -u +%y%m%d%H%M`
echo 'build and upload docker image'
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG
export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi
echo "TEST_IMG:$TEST_IMG"
cd test
python3 generate_ts_config.py --ts paiYarn --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) \
--nni_docker_image $TEST_IMG --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip)
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts paiYarn
PATH=$HOME/.local/bin:$PATH python3 metrics_test.py
displayName: 'integration test'
...@@ -52,7 +52,7 @@ frameworkcontroller: ...@@ -52,7 +52,7 @@ frameworkcontroller:
local: local:
trainingServicePlatform: local trainingServicePlatform: local
pai: paiYarn:
nniManagerIp: nniManagerIp:
maxExecDuration: 15m maxExecDuration: 15m
paiYarnConfig: paiYarnConfig:
...@@ -68,6 +68,21 @@ pai: ...@@ -68,6 +68,21 @@ pai:
memoryMB: 8192 memoryMB: 8192
outputDir: outputDir:
virtualCluster: virtualCluster:
pai:
nniManagerIp:
maxExecDuration: 15m
paiConfig:
host:
userName:
trainingServicePlatform: pai
trial:
gpuNum: 1
cpuNum: 1
image:
memoryMB: 8192
nniManagerNFSMountPath:
containerNFSMountPath:
paiStoragePlugin:
remote: remote:
machineList: machineList:
- ip: - ip:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment