Unverified Commit 496f653b authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Refactor integration test (step 7) - pipeline updates (linux part) (#4898)

parent 86b3b11d
...@@ -10,7 +10,7 @@ pylint ...@@ -10,7 +10,7 @@ pylint
pyright == 1.1.250 pyright == 1.1.250
pytest pytest
pytest-cov pytest-cov
rstcheck rstcheck >= 6.0
sphinx >= 4.5 sphinx >= 4.5
sphinx-argparse-nni >= 0.4.0 sphinx-argparse-nni >= 0.4.0
sphinx-copybutton sphinx-copybutton
......
...@@ -172,19 +172,19 @@ To run the tutorial, follow the steps below: ...@@ -172,19 +172,19 @@ To run the tutorial, follow the steps below:
2. **Search**: Based on the architecture of simplified PFLD, the setting of multi-stage search space and hyper-parameters for searching should be firstly configured to construct the supernet. For example, 2. **Search**: Based on the architecture of simplified PFLD, the setting of multi-stage search space and hyper-parameters for searching should be firstly configured to construct the supernet. For example,
.. code-block:: .. code-block:: python
from lib.builder import search_space from lib.builder import search_space
from lib.ops import PRIMITIVES from lib.ops import PRIMITIVES
from lib.supernet import PFLDInference, AuxiliaryNet from lib.supernet import PFLDInference, AuxiliaryNet
from nni.algorithms.nas.pytorch.fbnet import LookUpTable, NASConfig, from nni.algorithms.nas.pytorch.fbnet import LookUpTable, NASConfig
# configuration of hyper-parameters # configuration of hyper-parameters
# search_space defines the multi-stage search space # search_space defines the multi-stage search space
nas_config = NASConfig( nas_config = NASConfig(
model_dir=^./ckpt_save^, model_dir="./ckpt_save",
nas_lr=0.01, nas_lr=0.01,
mode=^mul^, mode="mul",
alpha=0.25, alpha=0.25,
beta=0.6, beta=0.6,
search_space=search_space, search_space=search_space,
......
...@@ -24,7 +24,7 @@ stages: ...@@ -24,7 +24,7 @@ stages:
rstcheck -r source \ rstcheck -r source \
--ignore-directives automodule,autoclass,autofunction,cardlinkitem,codesnippetcard,argparse,tabs \ --ignore-directives automodule,autoclass,autofunction,cardlinkitem,codesnippetcard,argparse,tabs \
--ignore-roles githublink --ignore-substitutions release \ --ignore-roles githublink --ignore-substitutions release \
--report warning --report-level warning
displayName: rstcheck displayName: rstcheck
- script: | - script: |
......
...@@ -11,15 +11,10 @@ jobs: ...@@ -11,15 +11,10 @@ jobs:
timeoutInMinutes: 120 timeoutInMinutes: 120
steps: steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml - template: templates/install-dependencies.yml
parameters: parameters:
platform: ubuntu-latest-gpu platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml - template: templates/install-nni.yml
......
...@@ -11,15 +11,10 @@ jobs: ...@@ -11,15 +11,10 @@ jobs:
timeoutInMinutes: 120 timeoutInMinutes: 120
steps: steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml - template: templates/install-dependencies.yml
parameters: parameters:
platform: ubuntu-latest-gpu platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml - template: templates/install-nni.yml
...@@ -42,7 +37,7 @@ jobs: ...@@ -42,7 +37,7 @@ jobs:
- script: | - script: |
cd test cd test
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts local
displayName: Integration test displayName: Integration test
# TODO: should add a test on platforms other than linux # TODO: should add a test on platforms other than linux
...@@ -11,15 +11,10 @@ jobs: ...@@ -11,15 +11,10 @@ jobs:
timeoutInMinutes: 120 timeoutInMinutes: 120
steps: steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml - template: templates/install-dependencies.yml
parameters: parameters:
platform: ubuntu-latest-gpu platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml - template: templates/install-nni.yml
......
...@@ -53,11 +53,11 @@ jobs: ...@@ -53,11 +53,11 @@ jobs:
- script: | - script: |
set -e set -e
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts adl \ --ts adl \
--nni_docker_image nnidev/nni-nightly \ --nni_docker_image nnidev/nni-nightly \
--checkpoint_storage_class $(checkpoint_storage_class) \ --checkpoint_storage_class $(checkpoint_storage_class) \
--checkpoint_storage_size $(checkpoint_storage_size) \ --checkpoint_storage_size $(checkpoint_storage_size) \
--nni_manager_ip $(nni_manager_ip) --nni_manager_ip $(nni_manager_ip)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts adl --exclude multi-phase,multi-thread python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts adl --exclude multi-phase,multi-thread
displayName: Integration test displayName: Integration test
...@@ -7,35 +7,23 @@ schedules: ...@@ -7,35 +7,23 @@ schedules:
jobs: jobs:
- job: aml - job: aml
pool: NNI CI REMOTE CLI pool:
vmImage: ubuntu-latest
timeoutInMinutes: 120 timeoutInMinutes: 120
steps: steps:
- script: | - template: templates/install-dependencies.yml
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S) parameters:
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin" platform: ubuntu-latest
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
echo "Build docker image: $(build_docker_image)"
python3 -m pip install --upgrade pip setuptools - template: templates/install-dependencies-aml.yml
displayName: Prepare
- script: | - template: templates/install-nni.yml
set -e parameters:
python3 setup.py build_ts wheel: true
python3 setup.py bdist_wheel -p manylinux1_x86_64 extra_dep: SMAC,BOHB
python3 -m pip install dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl[SMAC,BOHB]
displayName: Build and install NNI
- script: | - template: templates/install-customized-tuner.yml
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner
- script: | - script: |
set -e set -e
...@@ -50,8 +38,7 @@ jobs: ...@@ -50,8 +38,7 @@ jobs:
- script: | - script: |
set -e set -e
cd test cd test
az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id) python training_service/nnitest/generate_ts_config.py \
python3 nni_test/nnitest/generate_ts_config.py \
--ts aml \ --ts aml \
--subscription_id $(subscriptionId) \ --subscription_id $(subscriptionId) \
--resource_group $(resourceGroup) \ --resource_group $(resourceGroup) \
...@@ -59,6 +46,7 @@ jobs: ...@@ -59,6 +46,7 @@ jobs:
--compute_target $(computeTarget) \ --compute_target $(computeTarget) \
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--nni_docker_image nnidev/nni-nightly --nni_docker_image nnidev/nni-nightly
python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts aml
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts aml
displayName: Integration test displayName: Integration test
- template: templates/save-crashed-info.yml
...@@ -40,7 +40,7 @@ jobs: ...@@ -40,7 +40,7 @@ jobs:
- script: | - script: |
set -e set -e
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts frameworkcontroller \ --ts frameworkcontroller \
--keyvault_vaultname $(keyvault_vaultname) \ --keyvault_vaultname $(keyvault_vaultname) \
--keyvault_name $(keyvault_name) \ --keyvault_name $(keyvault_name) \
...@@ -48,13 +48,13 @@ jobs: ...@@ -48,13 +48,13 @@ jobs:
--azs_share $(azs_share) \ --azs_share $(azs_share) \
--nni_docker_image nnidev/nni-nightly \ --nni_docker_image nnidev/nni-nightly \
--nni_manager_ip $(manager_ip) --nni_manager_ip $(manager_ip)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts frameworkcontroller --exclude multi-phase,multi-thread python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts frameworkcontroller --exclude multi-phase,multi-thread
displayName: Integration test displayName: Integration test
- script: | - script: |
set -e set -e
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts frameworkcontroller \ --ts frameworkcontroller \
--keyvault_vaultname $(keyvault_vaultname) \ --keyvault_vaultname $(keyvault_vaultname) \
--keyvault_name $(keyvault_name) \ --keyvault_name $(keyvault_name) \
...@@ -64,5 +64,5 @@ jobs: ...@@ -64,5 +64,5 @@ jobs:
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--reuse_mode True \ --reuse_mode True \
--config_version v2 --config_version v2
python3 nni_test/nnitest/run_tests.py --config config/integration_tests_config_v2.yml --ts frameworkcontroller --reuse_mode True --exclude multi-phase,multi-thread python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests_config_v2.yml --ts frameworkcontroller --reuse_mode True --exclude multi-phase,multi-thread
displayName: Integration test (reuse mode) displayName: Integration test (reuse mode)
...@@ -7,77 +7,56 @@ schedules: ...@@ -7,77 +7,56 @@ schedules:
jobs: jobs:
- job: hybrid - job: hybrid
pool: NNI CI REMOTE CLI pool: nni-it
timeoutInMinutes: 120 timeoutInMinutes: 120
steps: steps:
- script: | # FIXME: should use GPU here
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S)
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
python3 -m pip install --upgrade pip setuptools
displayName: Prepare
- script: | - template: templates/install-dependencies.yml
set -e parameters:
python3 test/vso_tools/install_nni.py $(NNI_RELEASE) SMAC,BOHB platform: ubuntu-latest
python_env: venv
cd examples/tuners/customized_tuner - template: templates/install-dependencies-aml.yml
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install NNI
- task: CopyFilesOverSSH@0 - template: templates/install-nni.yml
inputs: parameters:
sshEndpoint: $(worker) wheel: true
sourceFolder: dist extra_dep: SMAC,BOHB
targetFolder: /tmp/nnitest/$(Build.BuildId)/dist
overwrite: true
displayName: Copy wheel to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0 - template: templates/install-customized-tuner.yml
inputs:
sshEndpoint: $(worker)
contents: Dockerfile
targetFolder: /tmp/nnitest/$(Build.BuildId)
overwrite: true
displayName: Copy dockerfile to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0 # These parts are exactly same as remote.
inputs: # Refactor these as templates when the refactor is done.
sshEndpoint: $(worker) - script: |
sourceFolder: test set -e
targetFolder: /tmp/nnitest/$(Build.BuildId)/test mkdir -p /tmp/nnitest/$(Build.BuildId)
overwrite: true cp Dockerfile /tmp/nnitest/$(Build.BuildId)
displayName: Copy test scripts to remote machine cp -r dist /tmp/nnitest/$(Build.BuildId)
timeoutInMinutes: 10 cp -r test /tmp/nnitest/$(Build.BuildId)
cp -r test/vso_tools/interim_patch.py /tmp/nnitest/$(Build.BuildId)
displayName: Prepare for docker
# Need del later - script: |
- task: CopyFilesOverSSH@0 set -e
inputs: export worker_ip=localhost
sshEndpoint: $(worker) export manager_ip=host.docker.internal
contents: test/vso_tools/interim_patch.py export password_in_docker=123456
targetFolder: /tmp/nnitest/$(Build.BuildId) echo "##vso[task.setvariable variable=worker_ip]${worker_ip}"
overwrite: true echo "##vso[task.setvariable variable=manager_ip]${manager_ip}"
displayName: Copy torch patch to remote machine echo "##vso[task.setvariable variable=password_in_docker]${password_in_docker}"
timeoutInMinutes: 10 displayName: Override environment variables
- task: SSH@0 - script: |
inputs: set -e
sshEndpoint: $(worker) python /tmp/nnitest/$(Build.BuildId)/test/vso_tools/start_docker.py --sudo $(NNI_RELEASE) $(Build.BuildId) $(password_in_docker)
runOptions: commands displayName: Run a worker in docker
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/vso_tools/start_docker.py $(NNI_RELEASE) $(Build.BuildId) $(password_in_docker) # End of duplicated region
displayName: Install NNI and run docker on Linux worker
- script: | - script: |
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python training_service/nnitest/generate_ts_config.py \
--ts hybrid \ --ts hybrid \
--remote_reuse true \ --remote_reuse true \
--remote_user nni \ --remote_user nni \
...@@ -90,13 +69,7 @@ jobs: ...@@ -90,13 +69,7 @@ jobs:
--workspace_name $(workspace_name) \ --workspace_name $(workspace_name) \
--compute_target $(compute_target) \ --compute_target $(compute_target) \
--config_version v2 --config_version v2
python3 nni_test/nnitest/run_tests.py --config config/integration_tests_config_v2.yml --ts hybrid python training_service/nnitest/run_tests.py --config training_service/config/integration_tests_config_v2.yml --ts hybrid
displayName: Integration test displayName: Integration test
- task: SSH@0 - template: templates/save-crashed-info.yml
inputs:
sshEndpoint: $(worker)
runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/vso_tools/stop_docker.py $(Build.BuildId)
condition: always()
displayName: Stop docker
...@@ -51,7 +51,7 @@ jobs: ...@@ -51,7 +51,7 @@ jobs:
set -e set -e
cd test cd test
az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id) az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id)
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts kubeflow \ --ts kubeflow \
--keyvault_vaultname $(keyvault_vaultname) \ --keyvault_vaultname $(keyvault_vaultname) \
--keyvault_name $(keyvault_name) \ --keyvault_name $(keyvault_name) \
...@@ -59,14 +59,14 @@ jobs: ...@@ -59,14 +59,14 @@ jobs:
--azs_share $(azs_share) \ --azs_share $(azs_share) \
--nni_docker_image nnidev/nni-nightly \ --nni_docker_image nnidev/nni-nightly \
--nni_manager_ip $(manager_ip) --nni_manager_ip $(manager_ip)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts kubeflow --exclude multi-phase,multi-thread python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts kubeflow --exclude multi-phase,multi-thread
displayName: Integration test displayName: Integration test
- script: | - script: |
set -e set -e
cd test cd test
az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id) az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id)
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts kubeflow \ --ts kubeflow \
--keyvault_vaultname $(keyvault_vaultname) \ --keyvault_vaultname $(keyvault_vaultname) \
--keyvault_name $(keyvault_name) \ --keyvault_name $(keyvault_name) \
...@@ -76,5 +76,5 @@ jobs: ...@@ -76,5 +76,5 @@ jobs:
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--reuse_mode True \ --reuse_mode True \
--config_version v2 --config_version v2
python3 nni_test/nnitest/run_tests.py --config config/integration_tests_config_v2.yml --ts kubeflow --reuse_mode True --exclude multi-phase,multi-thread python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests_config_v2.yml --ts kubeflow --reuse_mode True --exclude multi-phase,multi-thread
displayName: Integration test (reuse mode) displayName: Integration test (reuse mode)
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: linux
pool: nni-it
timeoutInMinutes: 120
steps:
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml
- template: templates/install-customized-tuner.yml
- script: |
cd test
python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts local
displayName: Integration test
- template: templates/save-crashed-info.yml
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: linux
pool: nni-it
timeoutInMinutes: 120
steps:
- template: templates/setup-non-ms-hosted-agent.yml
- template: templates/config-version.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu
- template: templates/install-nni.yml
- script: |
set -e
cd examples/tuners/customized_tuner
python setup.py develop
nnictl algo register --meta meta_file.yml
displayName: Install customized tuner
- script: |
cd test
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
- job: windows
pool: NNI CI WINDOWS FULL TEST
timeoutInMinutes: 120
steps:
- script: |
python -m pip install -U -r dependencies/setup.txt
python -m pip install -r dependencies/develop.txt
python -m pip install -r dependencies/recommended.txt
displayName: Install Python tools
- script: |
python -m pip uninstall nni --yes
set NNI_RELEASE=999.0
python setup.py build_ts
python setup.py bdist_wheel -p win_amd64
python -m pip install dist/nni-999.0-py3-none-win_amd64.whl[PPOTuner,DNGO]
displayName: Install NNI
- script: |
cd examples/tuners/customized_tuner
python setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install example customized tuner
# TODO: remove unit test here
- script: |
cd test
python -m pytest ut
echo "TODO: TypeScript UT"
displayName: Unit test
continueOnError: true
- script: |
cd test
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
...@@ -56,7 +56,7 @@ jobs: ...@@ -56,7 +56,7 @@ jobs:
- script: | - script: |
set -e set -e
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts pai \ --ts pai \
--pai_reuse false \ --pai_reuse false \
--pai_host https://ne.openpai.org \ --pai_host https://ne.openpai.org \
...@@ -69,13 +69,13 @@ jobs: ...@@ -69,13 +69,13 @@ jobs:
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--vc nni \ --vc nni \
--debug true --debug true
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts pai
displayName: Integration test displayName: Integration test
- script: | - script: |
set -e set -e
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python3 training_service/nnitest/generate_ts_config.py \
--ts pai \ --ts pai \
--pai_reuse true \ --pai_reuse true \
--pai_host https://ne.openpai.org \ --pai_host https://ne.openpai.org \
...@@ -87,5 +87,5 @@ jobs: ...@@ -87,5 +87,5 @@ jobs:
--container_nfs_mount_path $(container_nfs_mount_path) \ --container_nfs_mount_path $(container_nfs_mount_path) \
--nni_manager_ip $(manager_ip) \ --nni_manager_ip $(manager_ip) \
--vc nni --vc nni
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai python3 training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts pai
displayName: Integration test (reuse mode) displayName: Integration test (reuse mode)
trigger: none trigger: none
pr: none pr: none
schedules: # schedules:
- cron: 0 16 * * * # - cron: 0 16 * * *
branches: # branches:
include: [ master ] # include: [ master ]
# variables set on VSO: (for security concern) # variables set on VSO: (for security concern)
# manager_ip # manager_ip
...@@ -12,77 +12,61 @@ schedules: ...@@ -12,77 +12,61 @@ schedules:
jobs: jobs:
- job: remote_linux2linux - job: remote_linux2linux
pool: NNI CI REMOTE CLI pool: nni-it
timeoutInMinutes: 140 timeoutInMinutes: 140
steps: steps:
- script: | # FIXME: GPU is not supported yet.
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S) # Change to ubuntu-latest-gpu when it's done.
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
python3 -m pip install --upgrade pip setuptools - template: templates/install-dependencies.yml
displayName: Prepare parameters:
platform: ubuntu-latest
python_env: venv
- script: | - template: templates/install-nni.yml
set -e parameters:
python3 test/vso_tools/install_nni.py $(NNI_RELEASE) SMAC,BOHB wheel: true
extra_dep: SMAC,BOHB
cd examples/tuners/customized_tuner - template: templates/install-customized-tuner.yml
python3 setup.py develop --user
nnictl algo register --meta meta_file.yml
displayName: Install NNI
- task: CopyFilesOverSSH@0 - script: |
inputs: sudo systemctl enable nfs-kernel-server
sshEndpoint: $(worker) displayName: Enable NFS service
sourceFolder: dist
targetFolder: /tmp/nnitest/$(Build.BuildId)/dist
overwrite: true
displayName: Copy wheel to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
contents: Dockerfile
targetFolder: /tmp/nnitest/$(Build.BuildId)
overwrite: true
displayName: Copy dockerfile to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
sourceFolder: test
targetFolder: /tmp/nnitest/$(Build.BuildId)/test
overwrite: true
displayName: Copy test scripts to remote machine
timeoutInMinutes: 10
# Need del later - script: |
- task: CopyFilesOverSSH@0 set -e
inputs: mkdir -p /tmp/nnitest/$(Build.BuildId)
sshEndpoint: $(worker) cp Dockerfile /tmp/nnitest/$(Build.BuildId)
contents: test/vso_tools/interim_patch.py cp -r dist /tmp/nnitest/$(Build.BuildId)
targetFolder: /tmp/nnitest/$(Build.BuildId) cp -r test /tmp/nnitest/$(Build.BuildId)
overwrite: true cp -r test/vso_tools/interim_patch.py /tmp/nnitest/$(Build.BuildId)
displayName: Copy torch patch to remote machine displayName: Prepare for docker
timeoutInMinutes: 10
# TODO: Delete variables on pipeline when the PR that introduced this is merged.
- script: |
set -e
export worker_ip=localhost
export manager_ip=host.docker.internal
export password_in_docker=123456
export NFS_IP=host.docker.internal
export Exported_Directory=/var/nfs/general
echo "##vso[task.setvariable variable=worker_ip]${worker_ip}"
echo "##vso[task.setvariable variable=manager_ip]${manager_ip}"
echo "##vso[task.setvariable variable=password_in_docker]${password_in_docker}"
echo "##vso[task.setvariable variable=NFS_IP]${NFS_IP}"
echo "##vso[task.setvariable variable=Exported_Directory]${Exported_Directory}"
displayName: Override environment variables
- task: SSH@0 - script: |
inputs: set -e
sshEndpoint: $(worker) python /tmp/nnitest/$(Build.BuildId)/test/vso_tools/start_docker.py --sudo $(NNI_RELEASE) $(Build.BuildId) $(password_in_docker)
runOptions: commands displayName: Run a worker in docker
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/vso_tools/start_docker.py $(NNI_RELEASE) $(Build.BuildId) $(password_in_docker)
displayName: Install NNI and run docker on Linux worker
- script: | - script: |
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python training_service/nnitest/generate_ts_config.py \
--ts remote \ --ts remote \
--remote_reuse true \ --remote_reuse true \
--remote_user nni \ --remote_user nni \
...@@ -95,12 +79,12 @@ jobs: ...@@ -95,12 +79,12 @@ jobs:
--local_mount_point $(LOCAL_MOUNT_POINT) \ --local_mount_point $(LOCAL_MOUNT_POINT) \
--remote_mount_point $(REMOTE_MOUNT_POINT) \ --remote_mount_point $(REMOTE_MOUNT_POINT) \
--exported_directory $(Exported_Directory) --exported_directory $(Exported_Directory)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts remote
displayName: Integration test (reuse mode) displayName: Integration test (reuse mode)
- script: | - script: |
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py \ python training_service/nnitest/generate_ts_config.py \
--ts remote \ --ts remote \
--remote_reuse false \ --remote_reuse false \
--remote_user nni \ --remote_user nni \
...@@ -108,14 +92,9 @@ jobs: ...@@ -108,14 +92,9 @@ jobs:
--remote_port $(docker_port) \ --remote_port $(docker_port) \
--remote_pwd $(password_in_docker) \ --remote_pwd $(password_in_docker) \
--nni_manager_ip $(manager_ip) --nni_manager_ip $(manager_ip)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts remote
displayName: Integration test displayName: Integration test
- template: templates/save-crashed-info.yml
- task: SSH@0 parameters:
inputs: remote: true
sshEndpoint: $(worker)
runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/vso_tools/stop_docker.py $(Build.BuildId)
condition: always()
displayName: Stop docker
...@@ -33,5 +33,5 @@ jobs: ...@@ -33,5 +33,5 @@ jobs:
- script: | - script: |
set -e set -e
cd test cd test
python3 nni_test/nnitest/test_quantize_model_speedup.py python3 training_service/nnitest/test_quantize_model_speedup.py
displayName: Quantize model speedup test displayName: Quantize model speedup test
...@@ -5,9 +5,10 @@ Usage: ...@@ -5,9 +5,10 @@ Usage:
python start_docker.py <nni-version> <container-name> <password-in-docker> python start_docker.py <nni-version> <container-name> <password-in-docker>
""" """
import argparse
import os
import random import random
import socket import socket
import sys
from _common import build_wheel, run_command, set_variable from _common import build_wheel, run_command, set_variable
...@@ -20,14 +21,28 @@ while True: ...@@ -20,14 +21,28 @@ while True:
sock.close() sock.close()
port = random.randint(10000, 20000) port = random.randint(10000, 20000)
version = sys.argv[1] def main():
container = sys.argv[2] parser = argparse.ArgumentParser()
password = sys.argv[3] parser.add_argument('version', type=str)
parser.add_argument('container', type=str)
run_command(f'docker build --build-arg NNI_RELEASE={version} -t nnidev/nni-nightly .') parser.add_argument('password', type=str)
run_command(f'docker run --privileged -d -t -p {port}:22 --name {container} nnidev/nni-nightly') parser.add_argument('--sudo', default=False, action='store_true')
run_command(f'docker exec {container} useradd --create-home --password {password} nni')
run_command(['docker', 'exec', container, 'bash', '-c', f'echo "nni:{password}" | chpasswd']) args = parser.parse_args()
run_command(['docker', 'exec', container, 'bash', '-c', 'echo "nni ALL=(ALL:ALL) NOPASSWD:ALL" >> /etc/sudoers']) docker = 'sudo docker' if args.sudo else 'docker'
run_command(f'docker exec {container} service ssh start') version, container, password = args.version, args.container, args.password
set_variable('docker_port', port) uid, gid = os.getuid(), os.getgid()
run_command(f'{docker} build --build-arg NNI_RELEASE={version} -t nnidev/nni-nightly .')
run_command(f'{docker} run --privileged -d -t -p {port}:22 --add-host=host.docker.internal:host-gateway --name {container} nnidev/nni-nightly')
# The user inside docker must have the same uid and gid as outside.
# Otherwise NFS will have permission errors.
run_command(f'{docker} exec {container} groupadd -g {gid} nni')
run_command(f'{docker} exec {container} useradd --create-home --password {password} -u {uid} -g {gid} nni')
run_command(docker.split() + ['exec', container, 'bash', '-c', f'echo "nni:{password}" | chpasswd'])
run_command(docker.split() + ['exec', container, 'bash', '-c', 'echo "nni ALL=(ALL:ALL) NOPASSWD:ALL" >> /etc/sudoers'])
run_command(f'{docker} exec {container} service ssh start')
set_variable('docker_port', port)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment