Unverified Commit d6e49c5f authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Integration tests refactoring (#625)

* Integration test refactoring (#21) (#616)

* Integration test refactoring (#21)

* Refactoring integration tests

* test metrics

* update azure pipeline

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* update trigger

* Integration test refactoring (#618)

* updates

* updates

* update pipeline (#619)

* update pipeline

* updates

* updates

* updates

* updates

* updates

* test pipeline (#623)

* test pipeline

* updates

* updates

* updates

* Update integration test (#624)

* Update integration test

* updates

* updates

* updates

* updates

* updates

* updates
parent 37074e33
trigger:
- dev-it
- master
- dev-remote-ci
jobs:
- job: 'Ubuntu_16_04'
pool:
vmImage: 'Ubuntu 16.04'
strategy:
matrix:
Python36:
PYTHON_VERSION: '3.6'
pool: 'NNI CI GPU'
steps:
- script: python3 -m pip install --upgrade pip setuptools
- script: python3 -m pip install --upgrade pip setuptools --user
displayName: 'Install python tools'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
python3 -m pip install scikit-learn==0.20.0 --user
python3 -m pip install torch==0.4.1 --user
python3 -m pip install torchvision==0.2.1 --user
python3 -m pip install keras==2.1.6 --user
python3 -m pip install tensorflow-gpu==1.10.0 --user
displayName: 'Install dependencies for integration tests'
- script: |
cd test
source unittest.sh
......@@ -25,11 +27,19 @@ jobs:
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 naive_test.py
displayName: 'Integration tests'
displayName: 'Naive test'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts local
displayName: 'Examples and advanced features tests on local machine'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 sdk_test.py
displayName: 'Built-in dispatcher tests'
PATH=$HOME/.local/bin:$PATH python3 metrics_test.py
displayName: 'Trial job metrics test'
- job: 'macOS_10_13'
pool:
......@@ -52,8 +62,8 @@ jobs:
- script: |
cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py
displayName: 'Integration tests'
displayName: 'Naive test'
- script: |
cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 sdk_test.py
displayName: 'Built-in dispatcher tests'
\ No newline at end of file
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests'
'''Train CIFAR10 with PyTorch.'''
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
......@@ -174,6 +174,10 @@ def test(epoch):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", type=int, default=200)
args, _ = parser.parse_known_args()
try:
RCV_CONFIG = nni.get_next_parameter()
#RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
......@@ -182,7 +186,7 @@ if __name__ == '__main__':
prepare(RCV_CONFIG)
acc = 0.0
best_acc = 0.0
for epoch in range(start_epoch, start_epoch+200):
for epoch in range(start_epoch, start_epoch+args.epochs):
train(epoch)
acc, best_acc = test(epoch)
nni.report_intermediate_result(acc)
......
"""A deep MNIST classifier using convolutional layers."""
import argparse
import logging
import math
import tempfile
......@@ -180,7 +181,7 @@ def main(params):
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
"""@nni.variable(nni.choice(1, 4, 8, 16, 32), name=batch_size)"""
"""@nni.variable(nni.choice(16, 32), name=batch_size)"""
batch_size = params['batch_size']
for i in range(params['batch_num']):
batch = mnist.train.next_batch(batch_size)
......@@ -210,29 +211,27 @@ def main(params):
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def generate_default_params():
'''
Generate default parameters for mnist network.
'''
params = {
'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5,
'channel_1_num': 32,
'channel_2_num': 64,
'conv_size': 5,
'pool_size': 2,
'hidden_size': 1024,
'learning_rate': 1e-4,
'batch_num': 2000,
'batch_size': 32}
return params
def get_params():
''' Get parameters from command line '''
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
parser.add_argument("--channel_1_num", type=int, default=32)
parser.add_argument("--channel_2_num", type=int, default=64)
parser.add_argument("--conv_size", type=int, default=5)
parser.add_argument("--pool_size", type=int, default=2)
parser.add_argument("--hidden_size", type=int, default=1024)
parser.add_argument("--learning_rate", type=float, default=1e-4)
parser.add_argument("--batch_num", type=int, default=2000)
parser.add_argument("--batch_size", type=int, default=32)
args, _ = parser.parse_known_args()
return args
if __name__ == '__main__':
'''@nni.get_next_parameter()'''
try:
main(generate_default_params())
main(vars(get_params()))
except Exception as exception:
logger.exception(exception)
raise
"""A deep MNIST classifier using convolutional layers."""
import argparse
import logging
import math
import tempfile
......@@ -198,33 +199,30 @@ def main(params):
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def generate_default_params():
'''
Generate default parameters for mnist network.
'''
params = {
'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5,
'channel_1_num': 32,
'channel_2_num': 64,
'conv_size': 5,
'pool_size': 2,
'hidden_size': 1024,
'learning_rate': 1e-4,
'batch_num': 2000,
'batch_size': 32}
return params
def get_params():
''' Get parameters from command line '''
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
parser.add_argument("--channel_1_num", type=int, default=32)
parser.add_argument("--channel_2_num", type=int, default=64)
parser.add_argument("--conv_size", type=int, default=5)
parser.add_argument("--pool_size", type=int, default=2)
parser.add_argument("--hidden_size", type=int, default=1024)
parser.add_argument("--learning_rate", type=float, default=1e-4)
parser.add_argument("--batch_num", type=int, default=2000)
parser.add_argument("--batch_size", type=int, default=32)
args, _ = parser.parse_known_args()
return args
if __name__ == '__main__':
try:
# get parameters form tuner
RCV_PARAMS = nni.get_next_parameter()
logger.debug(RCV_PARAMS)
# run
params = generate_default_params()
params.update(RCV_PARAMS)
tuner_params = nni.get_next_parameter()
logger.debug(tuner_params)
params = vars(get_params())
params.update(tuner_params)
main(params)
except Exception as exception:
logger.exception(exception)
......
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import argparse
import glob
import subprocess
import time
import traceback
from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, \
parse_max_duration_time, get_succeeded_trial_num, print_stderr
from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL
def gen_new_config(config_file, training_service='local'):
'''
Generates temporary config file for integration test, the file
should be deleted after testing.
'''
config = get_yml_content(config_file)
new_config_file = config_file + '.tmp'
ts = get_yml_content('training_service.yml')[training_service]
print(config)
print(ts)
config.update(ts)
print(config)
dump_yml_content(new_config_file, config)
return new_config_file, config
def run_test(config_file, training_service, local_gpu=False):
'''run test per configuration file'''
new_config_file, config = gen_new_config(config_file, training_service)
if training_service == 'local' and not local_gpu and config['trial']['gpuNum'] > 0:
print('no gpu, skiping: ', config_file)
return
try:
print('Testing %s...' % config_file)
proc = subprocess.run(['nnictl', 'create', '--config', new_config_file])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
max_duration, max_trial_num = get_max_values(config_file)
sleep_interval = 3
for _ in range(0, max_duration+30, sleep_interval):
time.sleep(sleep_interval)
status = get_experiment_status(STATUS_URL)
if status == 'DONE':
num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
if training_service == 'local':
print_stderr(TRIAL_JOBS_URL)
assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
break
assert status == 'DONE', 'Failed to finish in maxExecDuration'
finally:
if os.path.exists(new_config_file):
os.remove(new_config_file)
def get_max_values(config_file):
'''Get maxExecDuration and maxTrialNum of experiment'''
experiment_config = get_yml_content(config_file)
return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']
def run(args):
'''test all configuration files'''
if args.config is None:
config_files = glob.glob('./config_test/**/*.test.yml')
else:
config_files = args.config.split(',')
print(config_files)
for config_file in config_files:
try:
# sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
time.sleep(5)
run_test(config_file, args.ts, args.local_gpu)
print(GREEN + 'Test %s: TEST PASS' % (config_file) + CLEAR)
except Exception as error:
print(RED + 'Test %s: TEST FAIL' % (config_file) + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error
finally:
subprocess.run(['nnictl', 'stop'])
if __name__ == '__main__':
import tensorflow as tf
print('TF VERSION:', tf.__version__)
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai'], default='local')
parser.add_argument("--local_gpu", action='store_true')
parser.add_argument("--preinstall", action='store_true')
args = parser.parse_args()
setup_experiment(args.preinstall)
run(args)
authorName: nni
experimentName: default_test
maxExecDuration: 15m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ./cifar10_search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/cifar10_pytorch
command: python3 main.py --epochs 2
gpuNum: 1
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
{
"lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]},
"optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]},
"model":{"_type":"choice", "_value":["vgg", "resnet18"]}
}
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 1
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist-annotation
command: python3 mnist.py --batch_num 100
gpuNum: 0
useAnnotation: true
multiPhase: false
multiThread: false
trainingServicePlatform: local
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ../../../examples/trials/mnist-keras/search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist-keras
command: python3 mnist-keras.py --num_train 200 --epochs 1
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 1
searchSpacePath: ./mnist_search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist
command: python3 mnist.py --batch_num 100
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
{
"dropout_rate":{"_type":"uniform","_value":[0.5, 0.9]},
"conv_size":{"_type":"choice","_value":[2,3,5,7]},
"hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
"batch_size": {"_type":"choice", "_value": [16, 32]},
"learning_rate":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]}
}
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ../../../examples/trials/sklearn/classification/search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/sklearn/classification
command: python3 main.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ../../../examples/trials/sklearn/regression/search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/sklearn/regression
command: python3 main.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
import time
import nni
if __name__ == '__main__':
for i in range(5):
hyper_params = nni.get_next_parameter()
nni.report_final_result(0.1*i)
time.sleep(3)
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 2
searchSpacePath: ./search_space.json
tuner:
codeDir: ../../../src/sdk/pynni/tests
classFileName: test_multi_phase_tuner.py
className: NaiveMultiPhaseTuner
trial:
codeDir: .
command: python3 multi_phase.py
gpuNum: 0
useAnnotation: false
multiPhase: true
multiThread: false
trainingServicePlatform: local
{
"test":
{
"_type" : "choice",
"_value" : [1, 100]
}
}
\ No newline at end of file
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ./search_space.json
tuner:
codeDir: .
classFileName: multi_thread_tuner.py
className: MultiThreadTuner
trial:
codeDir: .
command: python3 multi_thread_trial.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: true
trainingServicePlatform: local
import nni
import time
if __name__ == '__main__':
nni.get_next_parameter()
time.sleep(3)
nni.report_final_result(0.5)
import time
from nni.tuner import Tuner
class MultiThreadTuner(Tuner):
def __init__(self):
self.parent_done = False
def generate_parameters(self, parameter_id):
if parameter_id == 0:
return {'x': 0}
else:
while not self.parent_done:
time.sleep(2)
return {'x': 1}
def receive_trial_result(self, parameter_id, parameters, value):
if parameter_id == 0:
self.parent_done = True
def update_search_space(self, search_space):
pass
{
"test":
{
"_type" : "choice",
"_value" : [1, 100]
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment