Unverified Commit f04d423a authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Support hybrid and V2 config pipieline (#3648)

parent 35c3d169
...@@ -348,6 +348,8 @@ Join IM discussion groups: ...@@ -348,6 +348,8 @@ Join IM discussion groups:
| OpenPAI | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20openpai%20-%20linux?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=65&branchName=master) | | OpenPAI | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20openpai%20-%20linux?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=65&branchName=master) |
| Frameworkcontroller | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20frameworkcontroller?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=70&branchName=master) | | Frameworkcontroller | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20frameworkcontroller?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=70&branchName=master) |
| Kubeflow | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20kubeflow?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=69&branchName=master) | | Kubeflow | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20kubeflow?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=69&branchName=master) |
| Hybrid | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20hybrid?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=79&branchName=master) |
| AzureML | [![Build Status](https://msrasrg.visualstudio.com/NNIOpenSource/_apis/build/status/integration%20test%20-%20aml?branchName=master)](https://msrasrg.visualstudio.com/NNIOpenSource/_build/latest?definitionId=78&branchName=master) |
## Related Projects ## Related Projects
......
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
variables:
worker: remote_nni-ci-gpu-03
jobs:
- job: hybrid
pool: NNI CI REMOTE CLI
timeoutInMinutes: 120
steps:
- script: |
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S)
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
python3 -m pip install --upgrade pip setuptools
displayName: Prepare
- script: |
set -e
python3 test/vso_tools/install_nni.py $(NNI_RELEASE) SMAC,BOHB
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install NNI
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
sourceFolder: dist
targetFolder: /tmp/nnitest/$(Build.BuildId)/dist
overwrite: true
displayName: Copy wheel to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
contents: Dockerfile
targetFolder: /tmp/nnitest/$(Build.BuildId)
overwrite: true
displayName: Copy dockerfile to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
sourceFolder: test
targetFolder: /tmp/nnitest/$(Build.BuildId)/test
overwrite: true
displayName: Copy test scripts to remote machine
timeoutInMinutes: 10
# Need del later
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
contents: interim_vision_patch.py
targetFolder: /tmp/nnitest/$(Build.BuildId)
overwrite: true
displayName: Copy vision patch to remote machine
timeoutInMinutes: 10
- task: SSH@0
inputs:
sshEndpoint: $(worker)
runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/vso_tools/start_docker.py $(NNI_RELEASE) $(Build.BuildId) $(password_in_docker)
displayName: Install NNI and run docker on Linux worker
- script: |
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts hybrid \
--remote_reuse true \
--remote_user nni \
--remote_host $(worker_ip) \
--remote_pwd $(password_in_docker) \
--remote_port $(docker_port) \
--nni_manager_ip $(manager_ip) \
--config_version v2
python3 nni_test/nnitest/run_tests.py --config config/integration_tests_config_v2.yml --ts hybrid
displayName: Integration test
- task: SSH@0
inputs:
sshEndpoint: $(worker)
runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/vso_tools/stop_docker.py $(Build.BuildId)
condition: always()
displayName: Stop docker
experimentName: default_test
searchSpaceFile: ../naive_trial/search_space.json
trialCommand: python3 trial.py
trialCodeDirectory: ../naive_trial
trialGpuNumber: 0
trialConcurrency: 8
maxExperimentDuration: 15m
maxTrialNumber: 8
tuner:
name: TPE
classArgs:
optimize_mode: maximize
trainingService:
platform: local
assessor:
name: Curvefitting
classArgs:
epoch_num: 20
start_step: 6
threshold: 0.95
experimentName: default_test
searchSpaceFile: ../naive_trial/search_space.json
trialCommand: python3 trial.py
trialCodeDirectory: ../naive_trial
trialGpuNumber: 0
trialConcurrency: 8
maxExperimentDuration: 15m
maxTrialNumber: 8
tuner:
name: TPE
classArgs:
optimize_mode: maximize
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
\ No newline at end of file
experimentName: default_test
searchSpaceFile: ../../../examples/trials/sklearn/classification/search_space.json
trialCommand: python3 main.py
trialCodeDirectory: ../../../examples/trials/sklearn/classification
trialGpuNumber: 0
trialConcurrency: 4
maxExperimentDuration: 15m
maxTrialNumber: 2
tuner:
name: demotuner
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: cifar10_search_space.json
trialCommand: python3 main.py --epochs 1 --batches 1
trialCodeDirectory: ../../../examples/trials/cifar10_pytorch
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ni-nas-search-space.json
trialCommand: python3 main.py --epochs 1 --batches 1
trialCodeDirectory: ../../../examples/nas/legacy/classic_nas
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: PPOTuner
classArgs:
optimize_mode: maximize
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ../../../examples/trials/mnist-keras/search_space.json
trialCommand: python3 mnist.py --batch_num 10
trialCodeDirectory: ../../../examples/trials/mnist-annotation
trialGpuNumber: 0
trialConcurrency: 2
maxExperimentDuration: 15m
maxTrialNumber: 2
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ../../../examples/trials/mnist-keras/search_space.json
trialCommand: python3 mnist-keras.py --num_train 200 --epochs 1
trialCodeDirectory: ../../../examples/trials/mnist-keras
trialGpuNumber: 0
trialConcurrency: 2
maxExperimentDuration: 15m
maxTrialNumber: 2
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ../../../examples/trials/mnist-nested-search-space/search_space.json
trialCommand: python3 mnist.py --batch_num 10
trialCodeDirectory: ../../../examples/trials/mnist-nested-search-space
trialGpuNumber: 0
trialConcurrency: 2
maxExperimentDuration: 15m
maxTrialNumber: 2
tuner:
name: TPE
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ./mnist_pytorch_search_space.json
trialCommand: python3 mnist.py --epochs 1 --batch_num 10
trialCodeDirectory: ../../../examples/trials/mnist-pytorch
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ./mnist_search_space.json
trialCommand: python3 mnist.py --batch_num 10
trialCodeDirectory: ../../../examples/trials/mnist-tfv1
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ./mnist_search_space.json
trialCommand: python3 mnist.py
trialCodeDirectory: ../../../examples/trials/mnist-tfv2
trialGpuNumber: 0
trialConcurrency: 2
maxExperimentDuration: 15m
maxTrialNumber: 4
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ../../../examples/trials/sklearn/classification/search_space.json
trialCommand: python3 main.py
trialCodeDirectory: ../../../examples/trials/sklearn/classification
trialGpuNumber: 0
trialConcurrency: 2
maxExperimentDuration: 15m
maxTrialNumber: 4
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ../../../examples/trials/sklearn/regression/search_space.json
trialCommand: python3 main.py
trialCodeDirectory: ../../../examples/trials/sklearn/regression
trialGpuNumber: 0
trialConcurrency: 2
maxExperimentDuration: 15m
maxTrialNumber: 4
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
defaultTestCaseConfig:
launchCommand: nnictl create --config $configFile --debug
stopCommand: nnictl stop
experimentStatusCheck: True
platform: linux darwin win32
trainingService: all
testCases:
#######################################################################
# nni examples test
#######################################################################
- name: sklearn-classification
# test case config yml file relative to nni source code directory
configFile: test/config/examples/sklearn-classification-v2.yml
- name: sklearn-regression
configFile: test/config/examples/sklearn-regression-v2.yml
- name: mnist-tensorflow
configFile: test/config/examples/mnist-tfv2-v2.yml
trainingService: local remote hybrid
- name: mnist-pytorch-local
configFile: test/config/examples/mnist-pytorch-v2.yml
# download data first, to prevent concurrent issue.
launchCommand: python3 ../examples/trials/mnist-pytorch/mnist.py --epochs 1 --batch_num 0 --data_dir ../examples/trials/mnist-pytorch/data && nnictl create --config $configFile --debug
trainingService: local
- name: mnist-pytorch-non-local
configFile: test/config/examples/mnist-pytorch-v2.yml
trainingService: remote pai kubeflow frameworkcontroller dlts hybrid
- name: cifar10-pytorch
configFile: test/config/examples/cifar10-pytorch-v2.yml
- name: cifar10-pytorch-adl
configFile: test/config/examples/cifar10-pytorch-adl.yml
trainingService: adl
- name: classic-nas-gen-ss
configFile: test/config/examples/classic-nas-pytorch-v2.yml
launchCommand: nnictl ss_gen --trial_command="python3 mnist.py --epochs 1" --trial_dir=../examples/nas/legacy/classic_nas --file=config/examples/nni-nas-search-space.json
stopCommand:
experimentStatusCheck: False
trainingService: local
- name: classic-nas-pytorch
configFile: test/config/examples/classic-nas-pytorch-v2.yml
# remove search space file
stopCommand: nnictl stop
onExitCommand: python3 -c "import os; os.remove('config/examples/nni-nas-search-space.json')"
trainingService: local
#########################################################################
# nni features test
#########################################################################
- name: metrics-float
configFile: test/config/metrics_test/config-v2.yml
validator:
class: MetricsValidator
kwargs:
expected_result_file: expected_metrics.json
- name: export-float
configFile: test/config/metrics_test/config-v2.yml
validator:
class: ExportValidator
- name: metrics-dict
configFile: test/config/metrics_test/config_dict_metrics-v2.yml
validator:
class: MetricsValidator
kwargs:
expected_result_file: expected_metrics_dict.json
- name: export-dict
configFile: test/config/metrics_test/config_dict_metrics-v2.yml
validator:
class: ExportValidator
- name: experiment-import
configFile: test/config/nnictl_experiment/sklearn-classification-v2.yml
validator:
class: ImportValidator
kwargs:
import_data_file_path: config/nnictl_experiment/test_import.json
- name: foreground
configFile: test/config/examples/sklearn-regression-v2.yml
launchCommand: python3 nni_test/nnitest/foreground.py --config $configFile --timeout 45
stopCommand:
experimentStatusCheck: False
platform: linux darwin
# Experiment resume test part 1
- name: nnictl-resume-1
configFile: test/config/examples/sklearn-regression-v2.yml
setExperimentIdtoVar: $resumeExpId
# for subfolder in codedir test
launchCommand: python3 -c "import os; os.makedirs('../examples/trials/sklearn/regression/subfolder', exist_ok=True); open('../examples/trials/sklearn/regression/subfolder/subfile', 'a').close()" && nnictl create --config $configFile --debug
# Experiment resume test part 2
- name: nnictl-resume-2
configFile: test/config/examples/sklearn-regression-v2.yml
launchCommand: nnictl resume $resumeExpId
# Experiment view test
- name: nnictl-view
configFile: test/config/examples/sklearn-regression-v2.yml
launchCommand: nnictl view $resumeExpId
experimentStatusCheck: False
#########################################################################
# nni assessor test
#########################################################################
- name: assessor-curvefitting
configFile: test/config/assessors/curvefitting-v2.yml
- name: assessor-medianstop
configFile: test/config/assessors/medianstop-v2.yml
#########################################################################
# nni tuners test
#########################################################################
- name: tuner-regularized_evolution
configFile: test/config/tuners/regularized_evolution_tuner-v2.yml
#########################################################################
# nni customized-tuners test
#########################################################################
- name: customized-tuners-demotuner
configFile: test/config/customized_tuners/demotuner-sklearn-classification-v2.yml
experimentName: default_test
searchSpaceFile: ./search_space.json
trialCommand: python3 trial.py
trialCodeDirectory: .
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ./search_space.json
trialCommand: python3 trial.py --dict_metrics
trialCodeDirectory: .
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ./search_space.json
trialCommand: python3 not_exist.py
trialCodeDirectory: .
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: Random
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
experimentName: default_test
searchSpaceFile: ../../../examples/trials/sklearn/classification/search_space.json
trialCommand: python3 main.py
trialCodeDirectory: ../../../examples/trials/sklearn/classification
trialGpuNumber: 0
trialConcurrency: 1
maxExperimentDuration: 15m
maxTrialNumber: 1
tuner:
name: TPE
trainingService:
platform: local
assessor:
name: Medianstop
classArgs:
optimize_mode: maximize
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment