"vscode:/vscode.git/clone" did not exist on "88ef6c0492047a0fa5bdb7493d3bfd43b64605f6"
Unverified Commit d6e49c5f authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Integration tests refactoring (#625)

* Integration test refactoring (#21) (#616)

* Integration test refactoring (#21)

* Refactoring integration tests

* test metrics

* update azure pipeline

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* update trigger

* Integration test refactoring (#618)

* updates

* updates

* update pipeline (#619)

* update pipeline

* updates

* updates

* updates

* updates

* updates

* test pipeline (#623)

* test pipeline

* updates

* updates

* updates

* Update integration test (#624)

* Update integration test

* updates

* updates

* updates

* updates

* updates

* updates
parent 37074e33
trigger: trigger:
- dev-it
- master - master
- dev-remote-ci - dev-remote-ci
jobs: jobs:
- job: 'Ubuntu_16_04' - job: 'Ubuntu_16_04'
pool: pool: 'NNI CI GPU'
vmImage: 'Ubuntu 16.04'
strategy:
matrix:
Python36:
PYTHON_VERSION: '3.6'
steps: steps:
- script: python3 -m pip install --upgrade pip setuptools - script: python3 -m pip install --upgrade pip setuptools --user
displayName: 'Install python tools' displayName: 'Install python tools'
- script: | - script: |
source install.sh source install.sh
displayName: 'Install nni toolkit via source code' displayName: 'Install nni toolkit via source code'
- script: |
python3 -m pip install scikit-learn==0.20.0 --user
python3 -m pip install torch==0.4.1 --user
python3 -m pip install torchvision==0.2.1 --user
python3 -m pip install keras==2.1.6 --user
python3 -m pip install tensorflow-gpu==1.10.0 --user
displayName: 'Install dependencies for integration tests'
- script: | - script: |
cd test cd test
source unittest.sh source unittest.sh
...@@ -25,11 +27,19 @@ jobs: ...@@ -25,11 +27,19 @@ jobs:
- script: | - script: |
cd test cd test
PATH=$HOME/.local/bin:$PATH python3 naive_test.py PATH=$HOME/.local/bin:$PATH python3 naive_test.py
displayName: 'Integration tests' displayName: 'Naive test'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts local
displayName: 'Examples and advanced features tests on local machine'
- script: | - script: |
cd test cd test
PATH=$HOME/.local/bin:$PATH python3 sdk_test.py PATH=$HOME/.local/bin:$PATH python3 metrics_test.py
displayName: 'Built-in dispatcher tests' displayName: 'Trial job metrics test'
- job: 'macOS_10_13' - job: 'macOS_10_13'
pool: pool:
...@@ -52,8 +62,8 @@ jobs: ...@@ -52,8 +62,8 @@ jobs:
- script: | - script: |
cd test cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py
displayName: 'Integration tests' displayName: 'Naive test'
- script: | - script: |
cd test cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 sdk_test.py PATH=$HOME/Library/Python/3.7/bin:$PATH python3 tuner_test.py
displayName: 'Built-in dispatcher tests' displayName: 'Built-in tuners / assessors tests'
\ No newline at end of file
'''Train CIFAR10 with PyTorch.''' '''Train CIFAR10 with PyTorch.'''
from __future__ import print_function from __future__ import print_function
import argparse
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
...@@ -174,6 +174,10 @@ def test(epoch): ...@@ -174,6 +174,10 @@ def test(epoch):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", type=int, default=200)
args, _ = parser.parse_known_args()
try: try:
RCV_CONFIG = nni.get_next_parameter() RCV_CONFIG = nni.get_next_parameter()
#RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'} #RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
...@@ -182,7 +186,7 @@ if __name__ == '__main__': ...@@ -182,7 +186,7 @@ if __name__ == '__main__':
prepare(RCV_CONFIG) prepare(RCV_CONFIG)
acc = 0.0 acc = 0.0
best_acc = 0.0 best_acc = 0.0
for epoch in range(start_epoch, start_epoch+200): for epoch in range(start_epoch, start_epoch+args.epochs):
train(epoch) train(epoch)
acc, best_acc = test(epoch) acc, best_acc = test(epoch)
nni.report_intermediate_result(acc) nni.report_intermediate_result(acc)
......
"""A deep MNIST classifier using convolutional layers.""" """A deep MNIST classifier using convolutional layers."""
import argparse
import logging import logging
import math import math
import tempfile import tempfile
...@@ -180,7 +181,7 @@ def main(params): ...@@ -180,7 +181,7 @@ def main(params):
test_acc = 0.0 test_acc = 0.0
with tf.Session() as sess: with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.global_variables_initializer())
"""@nni.variable(nni.choice(1, 4, 8, 16, 32), name=batch_size)""" """@nni.variable(nni.choice(16, 32), name=batch_size)"""
batch_size = params['batch_size'] batch_size = params['batch_size']
for i in range(params['batch_num']): for i in range(params['batch_num']):
batch = mnist.train.next_batch(batch_size) batch = mnist.train.next_batch(batch_size)
...@@ -210,29 +211,27 @@ def main(params): ...@@ -210,29 +211,27 @@ def main(params):
logger.debug('Final result is %g', test_acc) logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.') logger.debug('Send final result done.')
def get_params():
def generate_default_params(): ''' Get parameters from command line '''
''' parser = argparse.ArgumentParser()
Generate default parameters for mnist network. parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
''' parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
params = { parser.add_argument("--channel_1_num", type=int, default=32)
'data_dir': '/tmp/tensorflow/mnist/input_data', parser.add_argument("--channel_2_num", type=int, default=64)
'dropout_rate': 0.5, parser.add_argument("--conv_size", type=int, default=5)
'channel_1_num': 32, parser.add_argument("--pool_size", type=int, default=2)
'channel_2_num': 64, parser.add_argument("--hidden_size", type=int, default=1024)
'conv_size': 5, parser.add_argument("--learning_rate", type=float, default=1e-4)
'pool_size': 2, parser.add_argument("--batch_num", type=int, default=2000)
'hidden_size': 1024, parser.add_argument("--batch_size", type=int, default=32)
'learning_rate': 1e-4,
'batch_num': 2000, args, _ = parser.parse_known_args()
'batch_size': 32} return args
return params
if __name__ == '__main__': if __name__ == '__main__':
'''@nni.get_next_parameter()''' '''@nni.get_next_parameter()'''
try: try:
main(generate_default_params()) main(vars(get_params()))
except Exception as exception: except Exception as exception:
logger.exception(exception) logger.exception(exception)
raise raise
"""A deep MNIST classifier using convolutional layers.""" """A deep MNIST classifier using convolutional layers."""
import argparse
import logging import logging
import math import math
import tempfile import tempfile
...@@ -198,33 +199,30 @@ def main(params): ...@@ -198,33 +199,30 @@ def main(params):
logger.debug('Final result is %g', test_acc) logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.') logger.debug('Send final result done.')
def get_params():
def generate_default_params(): ''' Get parameters from command line '''
''' parser = argparse.ArgumentParser()
Generate default parameters for mnist network. parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
''' parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
params = { parser.add_argument("--channel_1_num", type=int, default=32)
'data_dir': '/tmp/tensorflow/mnist/input_data', parser.add_argument("--channel_2_num", type=int, default=64)
'dropout_rate': 0.5, parser.add_argument("--conv_size", type=int, default=5)
'channel_1_num': 32, parser.add_argument("--pool_size", type=int, default=2)
'channel_2_num': 64, parser.add_argument("--hidden_size", type=int, default=1024)
'conv_size': 5, parser.add_argument("--learning_rate", type=float, default=1e-4)
'pool_size': 2, parser.add_argument("--batch_num", type=int, default=2000)
'hidden_size': 1024, parser.add_argument("--batch_size", type=int, default=32)
'learning_rate': 1e-4,
'batch_num': 2000, args, _ = parser.parse_known_args()
'batch_size': 32} return args
return params
if __name__ == '__main__': if __name__ == '__main__':
try: try:
# get parameters form tuner # get parameters form tuner
RCV_PARAMS = nni.get_next_parameter() tuner_params = nni.get_next_parameter()
logger.debug(RCV_PARAMS) logger.debug(tuner_params)
# run params = vars(get_params())
params = generate_default_params() params.update(tuner_params)
params.update(RCV_PARAMS)
main(params) main(params)
except Exception as exception: except Exception as exception:
logger.exception(exception) logger.exception(exception)
......
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import argparse
import glob
import subprocess
import time
import traceback
from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, \
parse_max_duration_time, get_succeeded_trial_num, print_stderr
from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL
def gen_new_config(config_file, training_service='local'):
'''
Generates temporary config file for integration test, the file
should be deleted after testing.
'''
config = get_yml_content(config_file)
new_config_file = config_file + '.tmp'
ts = get_yml_content('training_service.yml')[training_service]
print(config)
print(ts)
config.update(ts)
print(config)
dump_yml_content(new_config_file, config)
return new_config_file, config
def run_test(config_file, training_service, local_gpu=False):
'''run test per configuration file'''
new_config_file, config = gen_new_config(config_file, training_service)
if training_service == 'local' and not local_gpu and config['trial']['gpuNum'] > 0:
print('no gpu, skiping: ', config_file)
return
try:
print('Testing %s...' % config_file)
proc = subprocess.run(['nnictl', 'create', '--config', new_config_file])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
max_duration, max_trial_num = get_max_values(config_file)
sleep_interval = 3
for _ in range(0, max_duration+30, sleep_interval):
time.sleep(sleep_interval)
status = get_experiment_status(STATUS_URL)
if status == 'DONE':
num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
if training_service == 'local':
print_stderr(TRIAL_JOBS_URL)
assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
break
assert status == 'DONE', 'Failed to finish in maxExecDuration'
finally:
if os.path.exists(new_config_file):
os.remove(new_config_file)
def get_max_values(config_file):
'''Get maxExecDuration and maxTrialNum of experiment'''
experiment_config = get_yml_content(config_file)
return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']
def run(args):
'''test all configuration files'''
if args.config is None:
config_files = glob.glob('./config_test/**/*.test.yml')
else:
config_files = args.config.split(',')
print(config_files)
for config_file in config_files:
try:
# sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
time.sleep(5)
run_test(config_file, args.ts, args.local_gpu)
print(GREEN + 'Test %s: TEST PASS' % (config_file) + CLEAR)
except Exception as error:
print(RED + 'Test %s: TEST FAIL' % (config_file) + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error
finally:
subprocess.run(['nnictl', 'stop'])
if __name__ == '__main__':
import tensorflow as tf
print('TF VERSION:', tf.__version__)
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai'], default='local')
parser.add_argument("--local_gpu", action='store_true')
parser.add_argument("--preinstall", action='store_true')
args = parser.parse_args()
setup_experiment(args.preinstall)
run(args)
authorName: nni
experimentName: default_test
maxExecDuration: 15m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ./cifar10_search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/cifar10_pytorch
command: python3 main.py --epochs 2
gpuNum: 1
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
{
"lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]},
"optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]},
"model":{"_type":"choice", "_value":["vgg", "resnet18"]}
}
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 1
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist-annotation
command: python3 mnist.py --batch_num 100
gpuNum: 0
useAnnotation: true
multiPhase: false
multiThread: false
trainingServicePlatform: local
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ../../../examples/trials/mnist-keras/search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist-keras
command: python3 mnist-keras.py --num_train 200 --epochs 1
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 1
searchSpacePath: ./mnist_search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist
command: python3 mnist.py --batch_num 100
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
{
"dropout_rate":{"_type":"uniform","_value":[0.5, 0.9]},
"conv_size":{"_type":"choice","_value":[2,3,5,7]},
"hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
"batch_size": {"_type":"choice", "_value": [16, 32]},
"learning_rate":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]}
}
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ../../../examples/trials/sklearn/classification/search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/sklearn/classification
command: python3 main.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ../../../examples/trials/sklearn/regression/search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/sklearn/regression
command: python3 main.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
import time
import nni
if __name__ == '__main__':
for i in range(5):
hyper_params = nni.get_next_parameter()
nni.report_final_result(0.1*i)
time.sleep(3)
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 2
searchSpacePath: ./search_space.json
tuner:
codeDir: ../../../src/sdk/pynni/tests
classFileName: test_multi_phase_tuner.py
className: NaiveMultiPhaseTuner
trial:
codeDir: .
command: python3 multi_phase.py
gpuNum: 0
useAnnotation: false
multiPhase: true
multiThread: false
trainingServicePlatform: local
{
"test":
{
"_type" : "choice",
"_value" : [1, 100]
}
}
\ No newline at end of file
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ./search_space.json
tuner:
codeDir: .
classFileName: multi_thread_tuner.py
className: MultiThreadTuner
trial:
codeDir: .
command: python3 multi_thread_trial.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: true
trainingServicePlatform: local
import nni
import time
if __name__ == '__main__':
nni.get_next_parameter()
time.sleep(3)
nni.report_final_result(0.5)
import time
from nni.tuner import Tuner
class MultiThreadTuner(Tuner):
def __init__(self):
self.parent_done = False
def generate_parameters(self, parameter_id):
if parameter_id == 0:
return {'x': 0}
else:
while not self.parent_done:
time.sleep(2)
return {'x': 1}
def receive_trial_result(self, parameter_id, parameters, value):
if parameter_id == 0:
self.parent_done = True
def update_search_space(self, search_space):
pass
{
"test":
{
"_type" : "choice",
"_value" : [1, 100]
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment