Unverified Commit d6e49c5f authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Integration tests refactoring (#625)

* Integration test refactoring (#21) (#616)

* Integration test refactoring (#21)

* Refactoring integration tests

* test metrics

* update azure pipeline

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* update trigger

* Integration test refactoring (#618)

* updates

* updates

* update pipeline (#619)

* update pipeline

* updates

* updates

* updates

* updates

* updates

* test pipeline (#623)

* test pipeline

* updates

* updates

* updates

* Update integration test (#624)

* Update integration test

* updates

* updates

* updates

* updates

* updates

* updates
parent 37074e33
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import subprocess
import time
import traceback
import json
import requests
from utils import get_experiment_status, get_yml_content, parse_max_duration_time, get_succeeded_trial_num
from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, METRICS_URL
def run_test():
'''run metrics test'''
config_file = 'metrics_test/metrics.test.yml'
print('Testing %s...' % config_file)
proc = subprocess.run(['nnictl', 'create', '--config', config_file])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
max_duration, max_trial_num = get_max_values(config_file)
sleep_interval = 3
for _ in range(0, max_duration, sleep_interval):
time.sleep(sleep_interval)
status = get_experiment_status(STATUS_URL)
#print('experiment status:', status)
if status == 'DONE':
num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
check_metrics()
break
assert status == 'DONE', 'Failed to finish in maxExecDuration'
def check_metrics():
with open('metrics_test/expected_metrics.json', 'r') as f:
expected_metrics = json.load(f)
print(expected_metrics)
metrics = requests.get(METRICS_URL).json()
intermediate_result, final_result = get_metric_results(metrics)
assert len(final_result) == 1, 'there should be 1 final result'
assert final_result[0] == expected_metrics['final_result']
assert set(intermediate_result) == set(expected_metrics['intermediate_result'])
def get_metric_results(metrics):
intermediate_result = []
final_result = []
for metric in metrics:
if metric['type'] == 'PERIODICAL':
intermediate_result.append(metric['data'])
elif metric['type'] == 'FINAL':
final_result.append(metric['data'])
print(intermediate_result, final_result)
return [round(float(x),6) for x in intermediate_result], [round(float(x), 6) for x in final_result]
def get_max_values(config_file):
experiment_config = get_yml_content(config_file)
return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']
if __name__ == '__main__':
try:
# sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
time.sleep(5)
run_test()
print(GREEN + 'TEST PASS' + CLEAR)
except Exception as error:
print(RED + 'TEST FAIL' + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error
finally:
subprocess.run(['nnictl', 'stop'])
{
"intermediate_result": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
"final_result": 1.0
}
\ No newline at end of file
authorName: nni
experimentName: default_test
maxExecDuration: 3m
maxTrialNum: 1
trialConcurrency: 1
searchSpacePath: ./search_space.json
tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
trial:
codeDir: .
command: python3 trial.py
gpuNum: 0
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
{
"test":
{
"_type" : "choice",
"_value" : [1, 100]
}
}
\ No newline at end of file
import time
import nni
if __name__ == '__main__':
hyper_params = nni.get_next_parameter()
for i in range(10):
nni.report_intermediate_result(0.1*(i+1))
time.sleep(2)
nni.report_final_result(1.0)
...@@ -24,13 +24,8 @@ import sys ...@@ -24,13 +24,8 @@ import sys
import time import time
import traceback import traceback
from utils import check_experiment_status, fetch_nni_log_path, read_last_line, remove_files, setup_experiment from utils import is_experiment_done, fetch_nni_log_path, read_last_line, remove_files, setup_experiment
from utils import GREEN, RED, CLEAR, EXPERIMENT_URL
GREEN = '\33[32m'
RED = '\33[31m'
CLEAR = '\33[0m'
EXPERIMENT_URL = 'http://localhost:8080/api/v1/nni/experiment'
def run(): def run():
'''run naive integration test''' '''run naive integration test'''
...@@ -51,7 +46,7 @@ def run(): ...@@ -51,7 +46,7 @@ def run():
tuner_status = read_last_line('naive_test/tuner_result.txt') tuner_status = read_last_line('naive_test/tuner_result.txt')
assessor_status = read_last_line('naive_test/assessor_result.txt') assessor_status = read_last_line('naive_test/assessor_result.txt')
experiment_status = check_experiment_status(nnimanager_log_path) experiment_status = is_experiment_done(nnimanager_log_path)
assert tuner_status != 'ERROR', 'Tuner exited with error' assert tuner_status != 'ERROR', 'Tuner exited with error'
assert assessor_status != 'ERROR', 'Assessor exited with error' assert assessor_status != 'ERROR', 'Assessor exited with error'
......
#!/bin/sh
python3 -m nni_cmd.nnictl $@
#!/bin/sh
cd ../../src/nni_manager && node dist/main.js $@
local:
trainingServicePlatform: local
remote:
trainingServicePlatform: remote
machineList:
- ip:
port:
username:
passwd:
pai:
trainingServicePlatform: pai
paiConfig:
userName:
passWord:
host:
trial:
gpuNum:
cpuNum:
memoryMB:
image: msranni/latest
dataDir:
outputDir:
...@@ -23,7 +23,7 @@ import sys ...@@ -23,7 +23,7 @@ import sys
import time import time
import traceback import traceback
from utils import get_yml_content, dump_yml_content, setup_experiment, fetch_nni_log_path, check_experiment_status from utils import get_yml_content, dump_yml_content, setup_experiment, fetch_nni_log_path, is_experiment_done
GREEN = '\33[32m' GREEN = '\33[32m'
RED = '\33[31m' RED = '\33[31m'
...@@ -36,7 +36,7 @@ EXPERIMENT_URL = 'http://localhost:8080/api/v1/nni/experiment' ...@@ -36,7 +36,7 @@ EXPERIMENT_URL = 'http://localhost:8080/api/v1/nni/experiment'
def switch(dispatch_type, dispatch_name): def switch(dispatch_type, dispatch_name):
'''Change dispatch in config.yml''' '''Change dispatch in config.yml'''
config_path = 'sdk_test/local.yml' config_path = 'tuner_test/local.yml'
experiment_config = get_yml_content(config_path) experiment_config = get_yml_content(config_path)
if dispatch_name in ['GridSearch', 'BatchTuner']: if dispatch_name in ['GridSearch', 'BatchTuner']:
experiment_config[dispatch_type.lower()] = { experiment_config[dispatch_type.lower()] = {
...@@ -56,7 +56,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name): ...@@ -56,7 +56,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name):
switch(dispatch_type, dispatch_name) switch(dispatch_type, dispatch_name)
print('Testing %s...' % dispatch_name) print('Testing %s...' % dispatch_name)
proc = subprocess.run(['nnictl', 'create', '--config', 'sdk_test/local.yml']) proc = subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml'])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
nnimanager_log_path = fetch_nni_log_path(EXPERIMENT_URL) nnimanager_log_path = fetch_nni_log_path(EXPERIMENT_URL)
...@@ -64,7 +64,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name): ...@@ -64,7 +64,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name):
for _ in range(20): for _ in range(20):
time.sleep(3) time.sleep(3)
# check if experiment is done # check if experiment is done
experiment_status = check_experiment_status(nnimanager_log_path) experiment_status = is_experiment_done(nnimanager_log_path)
if experiment_status: if experiment_status:
break break
......
...@@ -27,6 +27,16 @@ import yaml ...@@ -27,6 +27,16 @@ import yaml
EXPERIMENT_DONE_SIGNAL = '"Experiment done"' EXPERIMENT_DONE_SIGNAL = '"Experiment done"'
GREEN = '\33[32m'
RED = '\33[31m'
CLEAR = '\33[0m'
REST_ENDPOINT = 'http://localhost:8080/api/v1/nni'
EXPERIMENT_URL = REST_ENDPOINT + '/experiment'
STATUS_URL = REST_ENDPOINT + '/check-status'
TRIAL_JOBS_URL = REST_ENDPOINT + '/trial-jobs'
METRICS_URL = REST_ENDPOINT + '/metric-data'
def read_last_line(file_name): def read_last_line(file_name):
'''read last line of a file and return None if file not found''' '''read last line of a file and return None if file not found'''
try: try:
...@@ -73,10 +83,38 @@ def fetch_nni_log_path(experiment_url): ...@@ -73,10 +83,38 @@ def fetch_nni_log_path(experiment_url):
return nnimanager_log_path return nnimanager_log_path
def check_experiment_status(nnimanager_log_path): def is_experiment_done(nnimanager_log_path):
'''check if the experiment is done successfully''' '''check if the experiment is done successfully'''
assert os.path.exists(nnimanager_log_path), 'Experiment starts failed' assert os.path.exists(nnimanager_log_path), 'Experiment starts failed'
cmds = ['cat', nnimanager_log_path, '|', 'grep', EXPERIMENT_DONE_SIGNAL] cmds = ['cat', nnimanager_log_path, '|', 'grep', EXPERIMENT_DONE_SIGNAL]
completed_process = subprocess.run(' '.join(cmds), shell=True) completed_process = subprocess.run(' '.join(cmds), shell=True)
return completed_process.returncode == 0 return completed_process.returncode == 0
def get_experiment_status(status_url):
nni_status = requests.get(status_url).json()
#print(nni_status)
return nni_status['status']
def get_succeeded_trial_num(trial_jobs_url):
trial_jobs = requests.get(trial_jobs_url).json()
print(trial_jobs)
num_succeed = 0
for trial_job in trial_jobs:
if trial_job['status'] in ['SUCCEEDED', 'EARLY_STOPPED']:
num_succeed += 1
print('num_succeed:', num_succeed)
return num_succeed
def print_stderr(trial_jobs_url):
trial_jobs = requests.get(trial_jobs_url).json()
for trial_job in trial_jobs:
if trial_job['status'] == 'FAILED':
stderr_path = trial_job['stderrPath'].split(':')[-1]
subprocess.run(['cat', stderr_path])
def parse_max_duration_time(max_exec_duration):
unit = max_exec_duration[-1]
time = max_exec_duration[:-1]
units_dict = {'s':1, 'm':60, 'h':3600, 'd':86400}
return int(time) * units_dict[unit]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment