Unverified Commit 60983149 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Support aml pipeline (#3477)

parent 0513330a
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: aml
pool: NNI CI REMOTE CLI
timeoutInMinutes: 120
steps:
- script: |
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S)
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
echo "Build docker image: $(build_docker_image)"
python3 -m pip install --upgrade pip setuptools
displayName: Prepare
- script: |
set -e
python3 setup.py build_ts
python3 setup.py bdist_wheel -p manylinux1_x86_64
python3 -m pip install dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl[SMAC,BOHB]
displayName: Build and install NNI
- script: |
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner
- script: |
set -e
docker login -u nnidev -p $(docker_hub_password)
echo '## Build docker image ##'
docker build --build-arg NNI_RELEASE=${NNI_RELEASE} -t nnidev/nni-nightly .
echo '## Upload docker image ##'
docker push nnidev/nni-nightly
condition: eq(variables['build_docker_image'], 'true')
displayName: Build and upload docker image
- script: |
set -e
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts aml \
--subscription_id $(subscriptionId) \
--resource_group $(resourceGroup) \
--workspace_name $(workspaceName) \
--compute_target $(computeTarget) \
--nni_manager_ip $(manager_ip) \
--nni_docker_image nnidev/nni-nightly
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts aml
displayName: Integration test
...@@ -105,4 +105,18 @@ adl: ...@@ -105,4 +105,18 @@ adl:
storageClass: storageClass:
storageSize: storageSize:
trainingServicePlatform: adl trainingServicePlatform: adl
aml:
nniManagerIp:
maxExecDuration: 15m
# PAI has job submission limitation, set maxTrialNum=1 to control trial job numbers for PAI
maxTrialNum: 2
trialConcurrency: 2
trainingServicePlatform: aml
trial:
gpuNum: 1
image:
amlConfig:
subscriptionId:
resourceGroup:
workspaceName:
computeTarget:
...@@ -88,13 +88,24 @@ def update_training_service_config(args): ...@@ -88,13 +88,24 @@ def update_training_service_config(args):
config[args.ts]['trial']['nfs']['server'] = args.adl_nfs_server config[args.ts]['trial']['nfs']['server'] = args.adl_nfs_server
config[args.ts]['trial']['nfs']['path'] = args.adl_nfs_path config[args.ts]['trial']['nfs']['path'] = args.adl_nfs_path
config[args.ts]['trial']['nfs']['container_mount_path'] = args.nadl_fs_container_mount_path config[args.ts]['trial']['nfs']['container_mount_path'] = args.nadl_fs_container_mount_path
elif args.ts == 'aml':
if args.nni_docker_image is not None:
config[args.ts]['trial']['image'] = args.nni_docker_image
if args.subscription_id is not None:
config[args.ts]['amlConfig']['subscriptionId'] = args.subscription_id
if args.resource_group is not None:
config[args.ts]['amlConfig']['resourceGroup'] = args.resource_group
if args.workspace_name is not None:
config[args.ts]['amlConfig']['workspaceName'] = args.workspace_name
if args.compute_target is not None:
config[args.ts]['amlConfig']['computeTarget'] = args.compute_target
dump_yml_content(TRAINING_SERVICE_FILE, config) dump_yml_content(TRAINING_SERVICE_FILE, config)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote', 'local', 'frameworkcontroller', 'adl'], default='pai') parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote', 'local', 'frameworkcontroller', 'adl', 'aml'], default='pai')
parser.add_argument("--nni_docker_image", type=str) parser.add_argument("--nni_docker_image", type=str)
parser.add_argument("--nni_manager_ip", type=str) parser.add_argument("--nni_manager_ip", type=str)
# args for PAI # args for PAI
...@@ -129,6 +140,11 @@ if __name__ == '__main__': ...@@ -129,6 +140,11 @@ if __name__ == '__main__':
parser.add_argument("--adl_nfs_server", type=str) parser.add_argument("--adl_nfs_server", type=str)
parser.add_argument("--adl_nfs_path", type=str) parser.add_argument("--adl_nfs_path", type=str)
parser.add_argument("--adl_nfs_container_mount_path", type=str) parser.add_argument("--adl_nfs_container_mount_path", type=str)
# args for aml
parser.add_argument("--subscription_id", type=str)
parser.add_argument("--resource_group", type=str)
parser.add_argument("--workspace_name", type=str)
parser.add_argument("--compute_target", type=str)
args = parser.parse_args() args = parser.parse_args()
update_training_service_config(args) update_training_service_config(args)
...@@ -281,7 +281,7 @@ if __name__ == '__main__': ...@@ -281,7 +281,7 @@ if __name__ == '__main__':
parser.add_argument("--cases", type=str, default=None) parser.add_argument("--cases", type=str, default=None)
parser.add_argument("--exclude", type=str, default=None) parser.add_argument("--exclude", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai', parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai',
'kubeflow', 'frameworkcontroller', 'adl'], default='local') 'kubeflow', 'frameworkcontroller', 'adl', 'aml'], default='local')
args = parser.parse_args() args = parser.parse_args()
run(args) run(args)
...@@ -71,7 +71,7 @@ class NNIDataStore implements DataStore { ...@@ -71,7 +71,7 @@ class NNIDataStore implements DataStore {
public storeTrialJobEvent( public storeTrialJobEvent(
event: TrialJobEvent, trialJobId: string, hyperParameter?: string, jobDetail?: TrialJobDetail): Promise<void> { event: TrialJobEvent, trialJobId: string, hyperParameter?: string, jobDetail?: TrialJobDetail): Promise<void> {
this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`); //this.log.debug(`storeTrialJobEvent: event: ${event}, data: ${hyperParameter}, jobDetail: ${JSON.stringify(jobDetail)}`);
// Use the timestamp in jobDetail as TrialJobEvent timestamp for different events // Use the timestamp in jobDetail as TrialJobEvent timestamp for different events
let timestamp: number | undefined; let timestamp: number | undefined;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment