Unverified Commit 21165b53 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #138 from Microsoft/master

merge master
parents 41a9a598 f10c3311
......@@ -54,16 +54,19 @@ import nni
flags = tf.app.flags
flags.DEFINE_string("data_dir", "/tmp/mnist-data",
"Directory for storing mnist data")
flags.DEFINE_boolean("download_only", False,
flags.DEFINE_boolean(
"download_only", False,
"Only perform downloading of data; Do not proceed to "
"session preparation, model definition or training")
flags.DEFINE_integer("task_index", None,
"Worker task index, should be >= 0. task_index=0 is "
flags.DEFINE_integer(
"task_index", None, "Worker task index, should be >= 0. task_index=0 is "
"the master worker task the performs the variable "
"initialization ")
flags.DEFINE_integer("num_gpus", 1, "Total number of gpus for each machine."
flags.DEFINE_integer(
"num_gpus", 1, "Total number of gpus for each machine."
"If you don't use GPU, please set it to '0'")
flags.DEFINE_integer("replicas_to_aggregate", None,
flags.DEFINE_integer(
"replicas_to_aggregate", None,
"Number of replicas to aggregate before parameter update"
"is applied (For sync_replicas mode only; default: "
"num_workers)")
......@@ -96,6 +99,7 @@ IMAGE_PIXELS = 28
# {'cluster': cluster,
# 'task': {'type': 'worker', 'index': 1}})
def generate_default_params():
'''
Generate default hyper parameters
......@@ -106,6 +110,15 @@ def generate_default_params():
'hidden_units': 100,
}
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(unused_argv):
# Receive NNI hyper parameter and update it onto default params
RECEIVED_PARAMS = nni.get_next_parameter()
......@@ -124,7 +137,7 @@ def main(unused_argv):
FLAGS.job_name = task_type
FLAGS.task_index = task_index
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
mnist = download_mnist_retry(FLAGS.data_dir)
if FLAGS.download_only:
sys.exit(0)
......@@ -206,7 +219,8 @@ def main(unused_argv):
hid = tf.nn.relu(hid_lin)
y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b))
cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
cross_entropy = -tf.reduce_sum(
y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
opt = tf.train.AdamOptimizer(PARAMS['learning_rate'])
......@@ -258,8 +272,9 @@ def main(unused_argv):
sess_config = tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=False,
device_filters=["/job:ps",
"/job:worker/task:%d" % FLAGS.task_index])
device_filters=[
"/job:ps", "/job:worker/task:%d" % FLAGS.task_index
])
# The chief worker (task_index==0) session will prepare the session,
# while the remaining workers will wait for the preparation to complete.
......@@ -273,9 +288,11 @@ def main(unused_argv):
server_grpc_url = "grpc://" + worker_spec[FLAGS.task_index]
print("Using existing server at: %s" % server_grpc_url)
sess = sv.prepare_or_wait_for_session(server_grpc_url, config=sess_config)
sess = sv.prepare_or_wait_for_session(
server_grpc_url, config=sess_config)
else:
sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)
sess = sv.prepare_or_wait_for_session(
server.target, config=sess_config)
print("Worker %d: Session initialization complete." % FLAGS.task_index)
......@@ -302,9 +319,14 @@ def main(unused_argv):
(now, FLAGS.task_index, local_step, step))
if step > 0 and step % 5000 == 0 and is_chief:
val_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
val_feed = {
x: mnist.validation.images,
y_: mnist.validation.labels
}
interim_val_xent = sess.run(cross_entropy, feed_dict=val_feed)
print("After %d training step(s), validation cross entropy = %g" % (step, interim_val_xent))
print(
"After %d training step(s), validation cross entropy = %g"
% (step, interim_val_xent))
# Only chief worker can report intermediate metrics
nni.report_intermediate_result(interim_val_xent)
......
......@@ -3,8 +3,9 @@
import logging
import math
import tempfile
import tensorflow as tf
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import nni
......@@ -142,13 +143,21 @@ def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
mnist = download_mnist_retry(params['data_dir'])
print('Mnist download data done.')
logger.debug('Mnist download data done.')
......
......@@ -4,8 +4,9 @@ import argparse
import logging
import math
import tempfile
import tensorflow as tf
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import nni
......@@ -143,13 +144,21 @@ def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
mnist = download_mnist_retry(params['data_dir'])
print('Mnist download data done.')
logger.debug('Mnist download data done.')
......
......@@ -3,8 +3,9 @@ import argparse
import logging
import math
import tempfile
import tensorflow as tf
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
FLAGS = None
......@@ -143,13 +144,21 @@ def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
mnist = download_mnist_retry(params['data_dir'])
print('Mnist download data done.')
logger.debug('Mnist download data done.')
......
......@@ -57,8 +57,8 @@ class CurvefittingAssessor(Assessor):
self.threshold = threshold
# Record the number of gap
self.gap = gap
# Record the number of times of judgments
self.judgment_num = 0
# Record the number of intermediate result in the lastest judgment
self.last_judgment_num = dict()
# Record the best performance
self.set_best_performance = False
self.completed_best_performance = None
......@@ -112,9 +112,10 @@ class CurvefittingAssessor(Assessor):
curr_step = len(trial_history)
if curr_step < self.start_step:
return AssessResult.Good
if (curr_step - self.start_step) // self.gap <= self.judgment_num:
if trial_job_id in self.last_judgment_num.keys() and curr_step - self.last_judgment_num[trial_job_id] < self.gap:
return AssessResult.Good
self.judgment_num = (curr_step - self.start_step) // self.gap
self.last_judgment_num[trial_job_id] = curr_step
try:
start_time = datetime.datetime.now()
......
......@@ -39,7 +39,7 @@ def _outlierDetection_threaded(inputs):
outlier = None
# Create a diagnostic regression model which removes the sample that we want to evaluate
diagnostic_regressor_gp = gp_create_model.createModel(\
diagnostic_regressor_gp = gp_create_model.create_model(\
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model'])
......@@ -81,7 +81,7 @@ def outlierDetection(samples_x, samples_y_aggregation):
for samples_idx in range(0, len(samples_x)):
#sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
# \ % (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
diagnostic_regressor_gp = gp_create_model.createModel(\
diagnostic_regressor_gp = gp_create_model.create_model(\
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(samples_x[samples_idx],
......@@ -95,5 +95,3 @@ def outlierDetection(samples_x, samples_y_aggregation):
outliers = None if len(outliers) == 0 else outliers
return outliers
\ No newline at end of file
......@@ -24,22 +24,20 @@ import os
import random
import statistics
import sys
import numpy as np
from enum import Enum, unique
from multiprocessing.dummy import Pool as ThreadPool
from nni.tuner import Tuner
import numpy as np
import nni.metis_tuner.lib_data as lib_data
import nni.metis_tuner.lib_constraint_summation as lib_constraint_summation
import nni.metis_tuner.Regression_GP.CreateModel as gp_create_model
import nni.metis_tuner.Regression_GP.Selection as gp_selection
import nni.metis_tuner.Regression_GP.Prediction as gp_prediction
import nni.metis_tuner.Regression_GP.OutlierDetection as gp_outlier_detection
import nni.metis_tuner.lib_data as lib_data
import nni.metis_tuner.Regression_GMM.CreateModel as gmm_create_model
import nni.metis_tuner.Regression_GMM.Selection as gmm_selection
import nni.metis_tuner.Regression_GP.CreateModel as gp_create_model
import nni.metis_tuner.Regression_GP.OutlierDetection as gp_outlier_detection
import nni.metis_tuner.Regression_GP.Prediction as gp_prediction
import nni.metis_tuner.Regression_GP.Selection as gp_selection
from nni.tuner import Tuner
logger = logging.getLogger("Metis_Tuner_AutoML")
......@@ -67,7 +65,7 @@ class MetisTuner(Tuner):
"""
def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=True,
selection_num_starting_points=10, cold_start_num=10):
selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.1):
"""
Parameters
----------
......@@ -89,11 +87,15 @@ class MetisTuner(Tuner):
cold_start_num: int
Metis need some trial result to get cold start. when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
"""
self.samples_x = []
self.samples_y = []
self.samples_y_aggregation = []
self.history_parameters = []
self.space = None
self.no_resampling = no_resampling
self.no_candidates = no_candidates
......@@ -101,6 +103,7 @@ class MetisTuner(Tuner):
self.key_order = []
self.cold_start_num = cold_start_num
self.selection_num_starting_points = selection_num_starting_points
self.exploration_probability = exploration_probability
self.minimize_constraints_fun = None
self.minimize_starting_points = None
......@@ -128,7 +131,7 @@ class MetisTuner(Tuner):
except Exception as ex:
logger.exception(ex)
raise RuntimeError("The format search space contains \
some key that didn't define in key_order.")
some key that didn't define in key_order." )
if key_type == 'quniform':
if key_range[2] == 1:
......@@ -200,6 +203,8 @@ class MetisTuner(Tuner):
init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
results = self._pack_output(init_parameter)
else:
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
self.selection_num_starting_points)
results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y,
self.x_bounds, self.x_types,
threshold_samplessize_resampling=(None if self.no_resampling is True else 50),
......@@ -245,7 +250,7 @@ class MetisTuner(Tuner):
# calculate y aggregation
median = get_median(temp_y)
self.samples_y_aggregation[idx] = median
self.samples_y_aggregation[idx] = [median]
else:
self.samples_x.append(sample_x)
self.samples_y.append([value])
......@@ -268,8 +273,12 @@ class MetisTuner(Tuner):
# ===== STEP 1: Compute the current optimum =====
#sys.stderr.write("[%s] Predicting the optimal configuration from the current training dataset...\n" % (os.path.basename(__file__)))
gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
lm_current = gp_selection.selection("lm", samples_y_aggregation, x_bounds,
x_types, gp_model['model'],
lm_current = gp_selection.selection(
"lm",
samples_y_aggregation,
x_bounds,
x_types,
gp_model['model'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
if not lm_current:
......@@ -284,8 +293,12 @@ class MetisTuner(Tuner):
# ===== STEP 2: Get recommended configurations for exploration =====
#sys.stderr.write("[%s] Getting candidates for exploration...\n"
#% \(os.path.basename(__file__)))
results_exploration = gp_selection.selection("lc", samples_y_aggregation,
x_bounds, x_types, gp_model['model'],
results_exploration = gp_selection.selection(
"lc",
samples_y_aggregation,
x_bounds,
x_types,
gp_model['model'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
......@@ -308,7 +321,8 @@ class MetisTuner(Tuner):
print("Getting candidates for exploitation...\n")
try:
gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
results_exploitation = gmm_selection.selection(x_bounds,
results_exploitation = gmm_selection.selection(
x_bounds,
x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
......@@ -340,8 +354,6 @@ class MetisTuner(Tuner):
results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)
if results_outliers is not None:
#temp = len(candidates)
for results_outlier in results_outliers:
if _num_past_samples(samples_x[results_outlier['samples_idx']], samples_x, samples_y) < max_resampling_per_x:
candidates.append({'hyperparameter': samples_x[results_outlier['samples_idx']],\
......@@ -357,7 +369,10 @@ class MetisTuner(Tuner):
logger.info("Evaluating information gain of %d candidates...\n")
next_improvement = 0
threads_inputs = [[candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] for candidate in candidates]
threads_inputs = [[
candidate, samples_x, samples_y, x_bounds, x_types,
minimize_constraints_fun, minimize_starting_points
] for candidate in candidates]
threads_pool = ThreadPool(4)
# Evaluate what would happen if we actually sample each candidate
threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
......@@ -370,19 +385,21 @@ class MetisTuner(Tuner):
temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']
if next_improvement > temp_improvement:
# logger.info("DEBUG: \"next_candidate\" changed: \
# lowest mu might reduce from %f (%s) to %f (%s), %s\n" %\
# lm_current['expected_mu'], str(lm_current['hyperparameter']),\
# threads_result['expected_lowest_mu'],\
# str(threads_result['candidate']['hyperparameter']),\
# threads_result['candidate']['reason'])
logger.info("DEBUG: \"next_candidate\" changed: \
lowest mu might reduce from %f (%s) to %f (%s), %s\n" %\
lm_current['expected_mu'], str(lm_current['hyperparameter']),\
threads_result['expected_lowest_mu'],\
str(threads_result['candidate']['hyperparameter']),\
threads_result['candidate']['reason'])
next_improvement = temp_improvement
next_candidate = threads_result['candidate']
else:
# ===== STEP 6: If we have no candidates, randomly pick one =====
logger.info("DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n")
logger.info(
"DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n"
)
next_candidate = _rand_with_constraints(x_bounds, x_types) \
if minimize_starting_points is None else minimize_starting_points[0]
......@@ -391,7 +408,16 @@ class MetisTuner(Tuner):
next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
'expected_mu': expected_mu, 'expected_sigma': expected_sigma}
# ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step =====
outputs = self._pack_output(lm_current['hyperparameter'])
ap = random.uniform(0, 1)
if outputs in self.history_parameters or ap<=self.exploration_probability:
if next_candidate is not None:
outputs = self._pack_output(next_candidate['hyperparameter'])
else:
random_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
outputs = self._pack_output(random_parameter)
self.history_parameters.append(outputs)
return outputs
......@@ -437,8 +463,12 @@ def _calculate_lowest_mu_threaded(inputs):
# Aggregates multiple observation of the sample sampling points
temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation)
temp_results = gp_selection.selection("lm", temp_y_aggregation,
x_bounds, x_types, temp_gp['model'],
temp_results = gp_selection.selection(
"lm",
temp_y_aggregation,
x_bounds,
x_types,
temp_gp['model'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
......
......@@ -92,7 +92,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
formatter: function (data: TooltipForAccuracy) {
const result = '<div class="tooldetailAccuracy">' +
'<div>Trial No: ' + data.data[0] + '</div>' +
'<div>Default Metrc: ' + data.data[1] + '</div>' +
'<div>Default Metric: ' + data.data[1] + '</div>' +
'<div>Parameters: ' +
'<pre>' + JSON.stringify(data.data[2], null, 4) + '</pre>' +
'</div>' +
......
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 1
maxTrialNum: 4
trialConcurrency: 2
tuner:
builtinTunerName: Random
......
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 2
trialConcurrency: 1
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ./mnist_search_space.json
tuner:
......
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 16
trialConcurrency: 8
maxTrialNum: 8
trialConcurrency: 4
searchSpacePath: ./search_space.json
tuner:
......
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
jobs:
- job: 'integration_test_kubeflow'
timeoutInMinutes: 0
pool: 'NNI CI KUBE CLI'
variables:
new_docker_img: msranni/nni.it.kb:latest
steps:
- script: python3 -m pip install --upgrade pip setuptools --user
......@@ -19,6 +35,12 @@ jobs:
displayName: 'build nni bdsit_wheel'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
......@@ -26,20 +48,11 @@ jobs:
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
echo $IMG_TAG
docker build -f ../docker/Dockerfile -t $(new_docker_img) .
docker push $(new_docker_img)
condition: eq( variables['build_docker_img'], 'true' )
displayName: 'build and upload nni docker image'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
if [ $(build_docker_img) = 'true' ]
then
export TEST_IMG=$(new_docker_img)
export IMG_TAG=`date -u +%y%m%d%H%M`
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG
export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi
......
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
jobs:
- job: 'integration_test_pai'
timeoutInMinutes: 0
pool: 'NNI CI PAI CLI'
variables:
new_docker_img: msranni/nni.it.pai:latest
steps:
- script: python3 -m pip install --upgrade pip setuptools --user
......@@ -19,30 +35,30 @@ jobs:
displayName: 'build nni bdsit_wheel'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
echo 'updating docker file for installing nni from local...'
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
echo $IMG_TAG
docker build -f ../docker/Dockerfile -t $(new_docker_img) .
docker push $(new_docker_img)
condition: eq( variables['build_docker_img'], 'true' )
displayName: 'build and upload nni docker image'
export IMG_TAG=`date -u +%y%m%d%H%M`
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
echo 'build and upload docker image'
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG
- script: |
if [ $(build_docker_img) = 'true' ]
then
export TEST_IMG=$(new_docker_img)
export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi
echo "TEST_IMG:$TEST_IMG"
cd test
python3 generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) \
......
# NNI Annotation 介绍
# NNI Annotation
为了获得良好的用户体验并减少用户负担,NNI 设计了通过注释来使用的语法。
## 概述
使用 NNI 时,只需要:
为了获得良好的用户体验并减少对以后代码的影响,NNI 设计了通过 Annotation(标记)来使用的语法。 通过 Annotation,只需要在代码中加入一些注释字符串,就能启用 NNI,完全不影响代码原先的执行逻辑。
1. 在超参变量前加上如下标记
样例如下
'''@nni.variable(nni.choice(2,3,5,7),name=self.conv_size)'''
```python
'''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
learning_rate = 0.1
```
2. 在中间结果前加上:
此样例中,NNI 会从 (0.1, 0.01, 0.001) 中选择一个值赋给 learning_rate 变量。 第一行就是 NNI 的 Annotation,是 Python 中的一个字符串。 接下来的一行需要是赋值语句。 NNI 会根据 Annotation 行的信息,来给这一行的变量赋上相应的值。
'''@nni.report_intermediate_result(test_acc)'''
通过这种方式,不需要修改任何代码,代码既可以直接运行,又可以使用 NNI 来调参。
3. 在输出结果前加上
## Annotation 的类型
'''@nni.report_final_result(test_acc)'''
NNI 中,有 4 种类型的 Annotation;
4. 在代码中使用函数 `function_choice`
### 1. 变量
'''@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)'''
`'''@nni.variable(sampling_algo, name)'''`
通过这种方法,能够轻松的在 NNI 中实现自动调参
`@nni.variable` 用来标记变量
`@nni.variable`, `nni.choice` 为搜索空间的类型,通过以下 10 种方法来定义搜索空间:
**参数**
1. `@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)`
变量值是选项中的一种,这些变量可以是任意的表达式
- **sampling_algo**: 指定搜索空间的采样算法。 可将其换成 NNI 支持的其它采样函数,函数要以 `nni.` 开头。例如,`choice``uniform`,详见 [SearchSpaceSpec](https://nni.readthedocs.io/zh/latest/SearchSpaceSpec.html)
- **name**: 将被赋值的变量名称。 注意,此参数应该与下面一行等号左边的值相同
2. `@nni.variable(nni.randint(upper),name=variable)`
变量可以是范围 [0, upper) 中的任意整数。
NNI 支持如下 10 种类型来表示搜索空间:
3. `@nni.variable(nni.uniform(low, high),name=variable)`
变量值会是 low 和 high 之间均匀分布的某个值。
- `@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)` 变量值是选项中的一种,这些变量可以是任意的表达式。
- `@nni.variable(nni.randint(upper),name=variable)` 变量可以是范围 [0, upper) 中的任意整数。
- `@nni.variable(nni.uniform(low, high),name=variable)` 变量值会是 low 和 high 之间均匀分布的某个值。
- `@nni.variable(nni.quniform(low, high, q),name=variable)` 变量值会是 low 和 high 之间均匀分布的某个值,公式为:round(uniform(low, high) / q) * q
- `@nni.variable(nni.loguniform(low, high),name=variable)` 变量值是 exp(uniform(low, high)) 的点,数值以对数均匀分布。
- `@nni.variable(nni.qloguniform(low, high, q),name=variable)` 变量值会是 low 和 high 之间均匀分布的某个值,公式为:round(exp(uniform(low, high)) / q) * q
- `@nni.variable(nni.normal(mu, sigma),name=variable)` 变量值为正态分布的实数值,平均值为 mu,标准方差为 sigma。
- `@nni.variable(nni.qnormal(mu, sigma, q),name=variable)` 变量值分布的公式为: round(normal(mu, sigma) / q) * q
- `@nni.variable(nni.lognormal(mu, sigma),name=variable)` 变量值分布的公式为: exp(normal(mu, sigma))
- `@nni.variable(nni.qlognormal(mu, sigma, q),name=variable)` 变量值分布的公式为: round(exp(normal(mu, sigma)) / q) * q
4. `@nni.variable(nni.quniform(low, high, q),name=variable)`
变量值会是 low 和 high 之间均匀分布的某个值,公式为:round(uniform(low, high) / q) * q
样例如下:
5. `@nni.variable(nni.loguniform(low, high),name=variable)`
变量值是 exp(uniform(low, high)) 的点,数值以对数均匀分布。
```python
'''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
learning_rate = 0.1
```
6. `@nni.variable(nni.qloguniform(low, high, q),name=variable)`
变量值会是 low 和 high 之间均匀分布的某个值,公式为:round(exp(uniform(low, high)) / q) * q
### 2. 函数
7. `@nni.variable(nni.normal(label, mu, sigma),name=variable)`
变量值为正态分布的实数值,平均值为 mu,标准方差为 sigma。
`'''@nni.function_choice(*functions, name)'''`
8. `@nni.variable(nni.qnormal(label, mu, sigma, q),name=variable)`
变量值分布的公式为: round(normal(mu, sigma) / q) * q
`@nni.function_choice` 可以从几个函数中选择一个来执行。
9. `@nni.variable(nni.lognormal(label, mu, sigma),name=variable)`
变量值分布的公式为: exp(normal(mu, sigma))
**参数**
10. `@nni.variable(nni.qlognormal(label, mu, sigma, q),name=variable)`
变量值分布的公式为: round(exp(normal(mu, sigma)) / q) * q
\ No newline at end of file
- **functions**: 可选择的函数。 注意,必须是包括参数的完整函数调用。 例如 `max_pool(hidden_layer, pool_size)`。
- **name**: 将被替换的函数名称。
例如:
```python
"""@nni.function_choice(max_pool(hidden_layer, pool_size), avg_pool(hidden_layer, pool_size), name=max_pool)"""
h_pooling = max_pool(hidden_layer, pool_size)
```
### 3. 中间结果
`'''@nni.report_intermediate_result(metrics)'''`
`@nni.report_intermediate_result` 用来返回中间结果,这和 [Trials.md](https://nni.readthedocs.io/zh/latest/Trials.html) 中的 `nni.report_intermediate_result` 用法一样。
### 4. 最终结果
`'''@nni.report_final_result(metrics)'''`
`@nni.report_final_result` 用来返回当前 Trial 的最终结果,这和 [Trials.md](https://nni.readthedocs.io/zh/latest/Trials.html) 中的 `nni.report_final_result` 用法一样。
\ No newline at end of file
import nni
"""A deep MNIST classifier using convolutional layers."""
import logging
import math
import tempfile
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import nni
FLAGS = None
logger = logging.getLogger('mnist_AutoML')
......@@ -123,12 +127,23 @@ def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(params):
"""
Main function, build mnist network, run and send result to NNI.
"""
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
def main(params):
# Import data
mnist = download_mnist_retry(params['data_dir'])
print('Mnist download data done.')
logger.debug('Mnist download data done.')
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
......
......@@ -21,8 +21,9 @@
import logging
import math
import tempfile
import tensorflow as tf
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
FLAGS = None
......@@ -168,13 +169,21 @@ def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
mnist = download_mnist_retry(params['data_dir'])
print('Mnist download data done.')
logger.debug('Mnist download data done.')
......
......@@ -21,8 +21,9 @@
import logging
import math
import tempfile
import tensorflow as tf
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import nni
......@@ -172,13 +173,21 @@ def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def download_mnist_retry(data_dir, max_num_retries=20):
"""Try to download mnist dataset and avoid errors"""
for _ in range(max_num_retries):
try:
return input_data.read_data_sets(data_dir, one_hot=True)
except tf.errors.AlreadyExistsError:
time.sleep(1)
raise Exception("Failed to download MNIST.")
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
mnist = download_mnist_retry(params['data_dir'])
print('Mnist download data done.')
logger.debug('Mnist download data done.')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment