Unverified Commit 7620e7c5 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #214 from microsoft/master

merge master
parents c037a7c1 187494aa
......@@ -17,10 +17,10 @@
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''
"""
gridsearch_tuner.py including:
class GridSearchTuner
'''
"""
import copy
import logging
......@@ -37,29 +37,40 @@ VALUE = '_value'
logger = logging.getLogger('grid_search_AutoML')
class GridSearchTuner(Tuner):
'''
"""
GridSearchTuner will search all the possible configures that the user define in the searchSpace.
The only acceptable types of search space are 'choice', 'quniform', 'randint'
The only acceptable types of search space are ``choice``, ``quniform``, ``randint``
Type 'choice' will select one of the options. Note that it can also be nested.
Type ``choice`` will select one of the options. Note that it can also be nested.
Type 'quniform' will receive three values [low, high, q], where [low, high] specifies a range and 'q' specifies the interval
It will be sampled in a way that the first sampled value is 'low',
Type ``quniform`` will receive three values [``low``, ``high``, ``q``],
where [``low``, ``high``] specifies a range and ``q`` specifies the interval.
It will be sampled in a way that the first sampled value is ``low``,
and each of the following values is 'interval' larger than the value in front of it.
Type 'randint' gives all possible intergers in range[low, high). Note that 'high' is not included.
'''
Type ``randint`` gives all possible intergers in range[``low``, ``high``). Note that ``high`` is not included.
"""
def __init__(self):
self.count = -1
self.expanded_search_space = []
self.supplement_data = dict()
def json2parameter(self, ss_spec):
'''
generate all possible configs for hyperparameters from hyperparameter space.
ss_spec: hyperparameter space
'''
def _json2parameter(self, ss_spec):
"""
Generate all possible configs for hyperparameters from hyperparameter space.
Parameters
----------
ss_spec : dict or list
Hyperparameter space or the ``_value`` of a hyperparameter
Returns
-------
list or dict
All the candidate choices of hyperparameters. for a hyperparameter, chosen_params
is a list. for multiple hyperparameters (e.g., search space), chosen_params is a dict.
"""
if isinstance(ss_spec, dict):
if '_type' in ss_spec.keys():
_type = ss_spec['_type']
......@@ -67,7 +78,7 @@ class GridSearchTuner(Tuner):
chosen_params = list()
if _type == 'choice':
for value in _value:
choice = self.json2parameter(value)
choice = self._json2parameter(value)
if isinstance(choice, list):
chosen_params.extend(choice)
else:
......@@ -81,12 +92,12 @@ class GridSearchTuner(Tuner):
else:
chosen_params = dict()
for key in ss_spec.keys():
chosen_params[key] = self.json2parameter(ss_spec[key])
return self.expand_parameters(chosen_params)
chosen_params[key] = self._json2parameter(ss_spec[key])
return self._expand_parameters(chosen_params)
elif isinstance(ss_spec, list):
chosen_params = list()
for subspec in ss_spec[1:]:
choice = self.json2parameter(subspec)
choice = self._json2parameter(subspec)
if isinstance(choice, list):
chosen_params.extend(choice)
else:
......@@ -97,27 +108,39 @@ class GridSearchTuner(Tuner):
return chosen_params
def _parse_quniform(self, param_value):
'''parse type of quniform parameter and return a list'''
"""
Parse type of quniform parameter and return a list
"""
low, high, q = param_value[0], param_value[1], param_value[2]
return np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
def _parse_randint(self, param_value):
'''parse type of randint parameter and return a list'''
"""
Parse type of randint parameter and return a list
"""
return np.arange(param_value[0], param_value[1]).tolist()
def expand_parameters(self, para):
'''
def _expand_parameters(self, para):
"""
Enumerate all possible combinations of all parameters
para: {key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
return: {{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
'''
Parameters
----------
para : dict
{key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
Returns
-------
dict
{{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
"""
if len(para) == 1:
for key, values in para.items():
return list(map(lambda v: {key: v}, values))
key = list(para)[0]
values = para.pop(key)
rest_para = self.expand_parameters(para)
rest_para = self._expand_parameters(para)
ret_para = list()
for val in values:
for config in rest_para:
......@@ -126,12 +149,37 @@ class GridSearchTuner(Tuner):
return ret_para
def update_search_space(self, search_space):
'''
Check if the search space is valid and expand it: support only 'choice', 'quniform', randint'
'''
self.expanded_search_space = self.json2parameter(search_space)
"""
Check if the search space is valid and expand it: support only ``choice``, ``quniform``, ``randint``.
Parameters
----------
search_space : dict
The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
"""
self.expanded_search_space = self._json2parameter(search_space)
def generate_parameters(self, parameter_id, **kwargs):
"""
Generate parameters for one trial.
Parameters
----------
parameter_id : int
The id for the generated hyperparameter
**kwargs
Not used
Returns
-------
dict
One configuration from the expanded search space.
Raises
------
NoMoreTrialError
If all the configurations has been sent, raise :class:`~nni.NoMoreTrialError`.
"""
self.count += 1
while self.count <= len(self.expanded_search_space) - 1:
_params_tuple = convert_dict2tuple(self.expanded_search_space[self.count])
......@@ -142,15 +190,20 @@ class GridSearchTuner(Tuner):
raise nni.NoMoreTrialError('no more parameters now.')
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
"""
Receive a trial's final performance result reported through :func:`~nni.report_final_result` by the trial.
GridSearchTuner does not need trial's results.
"""
pass
def import_data(self, data):
"""Import additional data for tuning
"""
Import additional data for tuning
Parameters
----------
data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
list
A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value``
"""
_completed_num = 0
for trial_info in data:
......
......@@ -422,7 +422,8 @@ class HyperoptTuner(Tuner):
misc_by_id[tid]['vals'][key] = [val]
def get_suggestion(self, random_search=False):
"""get suggestion from hyperopt
"""
get suggestion from hyperopt
Parameters
----------
......@@ -473,7 +474,8 @@ class HyperoptTuner(Tuner):
return total_params
def import_data(self, data):
"""Import additional data for tuning
"""
Import additional data for tuning
Parameters
----------
......
......@@ -27,21 +27,21 @@ class MedianstopAssessor(Assessor):
Parameters
----------
optimize_mode: str
optimize_mode : str
optimize mode, 'maximize' or 'minimize'
start_step: int
start_step : int
only after receiving start_step number of reported intermediate results
"""
def __init__(self, optimize_mode='maximize', start_step=0):
self.start_step = start_step
self.running_history = dict()
self.completed_avg_history = dict()
self._start_step = start_step
self._running_history = dict()
self._completed_avg_history = dict()
if optimize_mode == 'maximize':
self.high_better = True
self._high_better = True
elif optimize_mode == 'minimize':
self.high_better = False
self._high_better = False
else:
self.high_better = True
self._high_better = True
logger.warning('unrecognized optimize_mode %s', optimize_mode)
def _update_data(self, trial_job_id, trial_history):
......@@ -49,35 +49,35 @@ class MedianstopAssessor(Assessor):
Parameters
----------
trial_job_id: int
trial_job_id : int
trial job id
trial_history: list
trial_history : list
The history performance matrix of each trial
"""
if trial_job_id not in self.running_history:
self.running_history[trial_job_id] = []
self.running_history[trial_job_id].extend(trial_history[len(self.running_history[trial_job_id]):])
if trial_job_id not in self._running_history:
self._running_history[trial_job_id] = []
self._running_history[trial_job_id].extend(trial_history[len(self._running_history[trial_job_id]):])
def trial_end(self, trial_job_id, success):
"""trial_end
Parameters
----------
trial_job_id: int
trial_job_id : int
trial job id
success: bool
success : bool
True if succssfully finish the experiment, False otherwise
"""
if trial_job_id in self.running_history:
if trial_job_id in self._running_history:
if success:
cnt = 0
history_sum = 0
self.completed_avg_history[trial_job_id] = []
for each in self.running_history[trial_job_id]:
self._completed_avg_history[trial_job_id] = []
for each in self._running_history[trial_job_id]:
cnt += 1
history_sum += each
self.completed_avg_history[trial_job_id].append(history_sum / cnt)
self.running_history.pop(trial_job_id)
self._completed_avg_history[trial_job_id].append(history_sum / cnt)
self._running_history.pop(trial_job_id)
else:
logger.warning('trial_end: trial_job_id does not exist in running_history')
......@@ -86,9 +86,9 @@ class MedianstopAssessor(Assessor):
Parameters
----------
trial_job_id: int
trial_job_id : int
trial job id
trial_history: list
trial_history : list
The history performance matrix of each trial
Returns
......@@ -102,7 +102,7 @@ class MedianstopAssessor(Assessor):
unrecognize exception in medianstop_assessor
"""
curr_step = len(trial_history)
if curr_step < self.start_step:
if curr_step < self._start_step:
return AssessResult.Good
try:
......@@ -115,18 +115,18 @@ class MedianstopAssessor(Assessor):
logger.exception(error)
self._update_data(trial_job_id, num_trial_history)
if self.high_better:
if self._high_better:
best_history = max(trial_history)
else:
best_history = min(trial_history)
avg_array = []
for id_ in self.completed_avg_history:
if len(self.completed_avg_history[id_]) >= curr_step:
avg_array.append(self.completed_avg_history[id_][curr_step - 1])
for id_ in self._completed_avg_history:
if len(self._completed_avg_history[id_]) >= curr_step:
avg_array.append(self._completed_avg_history[id_][curr_step - 1])
if avg_array:
avg_array.sort()
if self.high_better:
if self._high_better:
median = avg_array[(len(avg_array)-1) // 2]
return AssessResult.Bad if best_history < median else AssessResult.Good
else:
......
......@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import os
import sys
......@@ -31,7 +32,8 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
'''
Create the Gaussian Mixture Model
'''
samples = [samples_x[i] + [samples_y_aggregation[i]] for i in range(0, len(samples_x))]
samples = [samples_x[i] + [samples_y_aggregation[i]]
for i in range(0, len(samples_x))]
# Sorts so that we can get the top samples
samples = sorted(samples, key=itemgetter(-1))
......@@ -39,13 +41,16 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
samples_goodbatch = samples[0:samples_goodbatch_size]
samples_badbatch = samples[samples_goodbatch_size:]
samples_x_goodbatch = [sample_goodbatch[0:-1] for sample_goodbatch in samples_goodbatch]
samples_x_goodbatch = [sample_goodbatch[0:-1]
for sample_goodbatch in samples_goodbatch]
#samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch]
samples_x_badbatch = [sample_badbatch[0:-1] for sample_badbatch in samples_badbatch]
samples_x_badbatch = [sample_badbatch[0:-1]
for sample_badbatch in samples_badbatch]
# === Trains GMM clustering models === #
#sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__)))
bgmm_goodbatch = mm.BayesianGaussianMixture(n_components=max(1, samples_goodbatch_size - 1))
bgmm_goodbatch = mm.BayesianGaussianMixture(
n_components=max(1, samples_goodbatch_size - 1))
bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1)
bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components)
bgmm_goodbatch.fit(samples_x_goodbatch)
......@@ -55,4 +60,3 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
model['clusteringmodel_good'] = bgmm_goodbatch
model['clusteringmodel_bad'] = bgmm_badbatch
return model
\ No newline at end of file
......@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import os
import random
......@@ -33,14 +34,17 @@ CONSTRAINT_UPPERBOUND = None
CONSTRAINT_PARAMS_IDX = []
def _ratio_scores(parameters_value, clusteringmodel_gmm_good, clusteringmodel_gmm_bad):
def _ratio_scores(parameters_value, clusteringmodel_gmm_good,
clusteringmodel_gmm_bad):
'''
The ratio is smaller the better
'''
ratio = clusteringmodel_gmm_good.score([parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
ratio = clusteringmodel_gmm_good.score(
[parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
sigma = 0
return ratio, sigma
def selection_r(x_bounds,
x_types,
clusteringmodel_gmm_good,
......@@ -60,6 +64,7 @@ def selection_r(x_bounds,
return outputs
def selection(x_bounds,
x_types,
clusteringmodel_gmm_good,
......@@ -69,13 +74,14 @@ def selection(x_bounds,
'''
Select the lowest mu value
'''
results = lib_acquisition_function.next_hyperparameter_lowest_mu(\
_ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],\
x_bounds, x_types, minimize_starting_points, \
results = lib_acquisition_function.next_hyperparameter_lowest_mu(
_ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],
x_bounds, x_types, minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
return results
def _rand_with_constraints(x_bounds, x_types):
'''
Random generate the variable with constraints
......@@ -96,6 +102,7 @@ def _rand_with_constraints(x_bounds, x_types):
outputs[i] = random.randint(x_bounds[i][0], x_bounds[i][1])
return outputs
def _minimize_constraints_fun_summation(x):
'''
Minimize constraints fun summation
......
......@@ -17,7 +17,9 @@
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
OutlierDectection.py
"""
import os
import sys
......@@ -30,19 +32,21 @@ sys.path.insert(1, os.path.join(sys.path[0], '..'))
def _outlierDetection_threaded(inputs):
'''
"""
Detect the outlier
'''
"""
[samples_idx, samples_x, samples_y_aggregation] = inputs
sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"\
sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"
% (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
outlier = None
# Create a diagnostic regression model which removes the sample that we want to evaluate
diagnostic_regressor_gp = gp_create_model.create_model(\
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
# Create a diagnostic regression model which removes the sample that we
# want to evaluate
diagnostic_regressor_gp = gp_create_model.create_model(
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model'])
mu, sigma = gp_prediction.predict(
samples_x[samples_idx], diagnostic_regressor_gp['model'])
# 2.33 is the z-score for 98% confidence level
if abs(samples_y_aggregation[samples_idx] - mu) > (2.33 * sigma):
......@@ -52,16 +56,18 @@ def _outlierDetection_threaded(inputs):
"difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}
return outlier
def outlierDetection_threaded(samples_x, samples_y_aggregation):
'''
"""
Use Multi-thread to detect the outlier
'''
"""
outliers = []
threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]\
threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]
for samples_idx in range(0, len(samples_x))]
threads_pool = ThreadPool(min(4, len(threads_inputs)))
threads_results = threads_pool.map(_outlierDetection_threaded, threads_inputs)
threads_results = threads_pool.map(
_outlierDetection_threaded, threads_inputs)
threads_pool.close()
threads_pool.join()
......@@ -69,15 +75,13 @@ def outlierDetection_threaded(samples_x, samples_y_aggregation):
if threads_result is not None:
outliers.append(threads_result)
else:
print("error here.")
print("Error: threads_result is None.")
outliers = outliers if outliers else None
return outliers
def outlierDetection(samples_x, samples_y_aggregation):
'''
TODO
'''
outliers = []
for samples_idx, _ in enumerate(samples_x):
#sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
......@@ -92,7 +96,8 @@ def outlierDetection(samples_x, samples_y_aggregation):
outliers.append({"samples_idx": samples_idx,
"expected_mu": mu,
"expected_sigma": sigma,
"difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})
"difference": \
abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})
outliers = outliers if outliers else None
return outliers
......@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
lib_acquisition_function.py
"""
import sys
import numpy
......@@ -33,9 +37,9 @@ def next_hyperparameter_expected_improvement(fun_prediction,
samples_y_aggregation,
minimize_starting_points,
minimize_constraints_fun=None):
'''
"""
"Expected Improvement" acquisition function
'''
"""
best_x = None
best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
......@@ -70,6 +74,7 @@ def next_hyperparameter_expected_improvement(fun_prediction,
return outputs
def _expected_improvement(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, samples_y_aggregation,
minimize_constraints_fun):
......@@ -77,7 +82,8 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args,
x = lib_data.match_val_type(x, x_bounds, x_types)
expected_improvement = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, sigma = fun_prediction(x, *fun_prediction_args)
loss_optimum = min(samples_y_aggregation)
......@@ -101,9 +107,9 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
x_bounds, x_types,
minimize_starting_points,
minimize_constraints_fun=None):
'''
"""
"Lowest Confidence" acquisition function
'''
"""
best_x = None
best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
......@@ -120,10 +126,12 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
x_types,
minimize_constraints_fun))
if (best_acquisition_value) is None or (res.fun < best_acquisition_value):
if (best_acquisition_value) is None or (
res.fun < best_acquisition_value):
res.x = numpy.ndarray.tolist(res.x)
res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(res.x) is True):
best_acquisition_value = res.fun
best_x = res.x
......@@ -134,13 +142,15 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
'expected_sigma': sigma, 'acquisition_func': "lc"}
return outputs
def _lowest_confidence(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun):
# This is only for step-wise optimization
x = lib_data.match_val_type(x, x_bounds, x_types)
ci = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, sigma = fun_prediction(x, *fun_prediction_args)
ci = (sigma * 1.96 * 2) / mu
# We want ci to be as large as possible
......@@ -156,9 +166,9 @@ def next_hyperparameter_lowest_mu(fun_prediction,
x_bounds, x_types,
minimize_starting_points,
minimize_constraints_fun=None):
'''
"""
"Lowest Mu" acquisition function
'''
"""
best_x = None
best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
......@@ -169,13 +179,15 @@ def next_hyperparameter_lowest_mu(fun_prediction,
x0=starting_point.reshape(1, -1),
bounds=x_bounds_minmax,
method="L-BFGS-B",
args=(fun_prediction, fun_prediction_args, \
args=(fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun))
if (best_acquisition_value is None) or (res.fun < best_acquisition_value):
if (best_acquisition_value is None) or (
res.fun < best_acquisition_value):
res.x = numpy.ndarray.tolist(res.x)
res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(res.x) is True):
best_acquisition_value = res.fun
best_x = res.x
......@@ -189,14 +201,14 @@ def next_hyperparameter_lowest_mu(fun_prediction,
def _lowest_mu(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun):
'''
"""
Calculate the lowest mu
'''
"""
# This is only for step-wise optimization
x = lib_data.match_val_type(x, x_bounds, x_types)
mu = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, _ = fun_prediction(x, *fun_prediction_args)
return mu
\ No newline at end of file
......@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
lib_constraint_summation.py
"""
import math
import random
......@@ -39,6 +43,7 @@ def check_feasibility(x_bounds, lowerbound, upperbound):
return (x_bounds_lowerbound <= lowerbound <= x_bounds_upperbound) or \
(x_bounds_lowerbound <= upperbound <= x_bounds_upperbound)
def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
'''
Key idea is that we try to move towards upperbound, by randomly choose one
......@@ -55,7 +60,8 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
if x_types[i] == "discrete_int":
x_idx_sorted.append([i, len(x_bounds[i])])
elif (x_types[i] == "range_int") or (x_types[i] == "range_continuous"):
x_idx_sorted.append([i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
x_idx_sorted.append(
[i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
x_idx_sorted = sorted(x_idx_sorted, key=itemgetter(1))
for _ in range(max_retries):
......@@ -77,12 +83,13 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
temp.append(j)
# Randomly pick a number from the integer array
if temp:
outputs[x_idx] = temp[random.randint(0, len(temp) - 1)]
outputs[x_idx] = temp[random.randint(
0, len(temp) - 1)]
elif (x_types[x_idx] == "range_int") or \
(x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(x_bounds[x_idx][0],
min(x_bounds[x_idx][-1], budget_max))
outputs[x_idx] = random.randint(
x_bounds[x_idx][0], min(x_bounds[x_idx][-1], budget_max))
else:
# The last x that we need to assign a random number
......@@ -91,26 +98,28 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
# This check:
# is our smallest possible value going to overflow the available budget space,
# and is our largest possible value going to underflow the lower bound
# and is our largest possible value going to underflow the
# lower bound
if (x_bounds[x_idx][0] <= budget_max) and \
(x_bounds[x_idx][-1] >= randint_lowerbound):
if x_types[x_idx] == "discrete_int":
temp = []
for j in x_bounds[x_idx]:
# if (j <= budget_max) and (j >= randint_lowerbound):
# if (j <= budget_max) and (j >=
# randint_lowerbound):
if randint_lowerbound <= j <= budget_max:
temp.append(j)
if temp:
outputs[x_idx] = temp[random.randint(0, len(temp) - 1)]
outputs[x_idx] = temp[random.randint(
0, len(temp) - 1)]
elif (x_types[x_idx] == "range_int") or \
(x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(randint_lowerbound,
min(x_bounds[x_idx][1], budget_max))
outputs[x_idx] = random.randint(
randint_lowerbound, min(
x_bounds[x_idx][1], budget_max))
if outputs[x_idx] is None:
break
else:
budget_allocated += outputs[x_idx]
if None not in outputs:
break
return outputs
\ No newline at end of file
......@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import math
import random
......@@ -56,7 +57,7 @@ def rand(x_bounds, x_types):
temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)]
outputs.append(temp)
elif x_types[i] == "range_int":
temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1)
temp = random.randint(x_bounds[i][0], x_bounds[i][1] - 1)
outputs.append(temp)
elif x_types[i] == "range_continuous":
temp = random.uniform(x_bounds[i][0], x_bounds[i][1])
......
......@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
metis_tuner.py
"""
import copy
import logging
......@@ -51,10 +55,45 @@ class MetisTuner(Tuner):
More algorithm information you could reference here:
https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/
Attributes
----------
optimize_mode : str
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
"""
def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=False,
selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.9):
def __init__(
self,
optimize_mode="maximize",
no_resampling=True,
no_candidates=False,
selection_num_starting_points=600,
cold_start_num=10,
exploration_probability=0.9):
"""
Parameters
----------
......@@ -62,23 +101,34 @@ class MetisTuner(Tuner):
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False. Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free, then you do not need re-sampling.
no_candidates: bool
True or False. Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks, Metis can skip this step.
selection_num_starting_points: int
how many times Metis should try to find the global optimal in the search space?
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num: int
Metis need some trial result to get cold start. when the number of trial result is less than
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
exploration_probability : float
The probability of Metis to select parameter from exploration instead of exploitation.
x_bounds : list
The constration of parameters.
x_types : list
The type of parameters.
"""
self.samples_x = []
......@@ -101,7 +151,8 @@ class MetisTuner(Tuner):
def update_search_space(self, search_space):
"""Update the self.x_bounds and self.x_types by the search_space.json
"""
Update the self.x_bounds and self.x_types by the search_space.json
Parameters
----------
......@@ -120,12 +171,20 @@ class MetisTuner(Tuner):
key_range = search_space[key]['_value']
idx = self.key_order.index(key)
if key_type == 'quniform':
if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer():
self.x_bounds[idx] = [key_range[0], key_range[1]+1]
if key_range[2] == 1 and key_range[0].is_integer(
) and key_range[1].is_integer():
self.x_bounds[idx] = [key_range[0], key_range[1] + 1]
self.x_types[idx] = 'range_int'
else:
low, high, q = key_range
bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
bounds = np.clip(
np.arange(
np.round(
low / q),
np.round(
high / q) + 1) * q,
low,
high)
self.x_bounds[idx] = bounds
self.x_types[idx] = 'discrete_int'
elif key_type == 'randint':
......@@ -139,22 +198,28 @@ class MetisTuner(Tuner):
for key_value in key_range:
if not isinstance(key_value, (int, float)):
raise RuntimeError("Metis Tuner only support numerical choice.")
raise RuntimeError(
"Metis Tuner only support numerical choice.")
self.x_types[idx] = 'discrete_int'
else:
logger.info("Metis Tuner doesn't support this kind of variable: %s", key_type)
raise RuntimeError("Metis Tuner doesn't support this kind of variable: " + str(key_type))
logger.info(
"Metis Tuner doesn't support this kind of variable: %s",
str(key_type))
raise RuntimeError(
"Metis Tuner doesn't support this kind of variable: %s" %
str(key_type))
else:
logger.info("The format of search space is not a dict.")
raise RuntimeError("The format of search space is not a dict.")
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
self.selection_num_starting_points)
self.minimize_starting_points = _rand_init(
self.x_bounds, self.x_types, self.selection_num_starting_points)
def _pack_output(self, init_parameter):
"""Pack the output
"""
Pack the output
Parameters
----------
......@@ -167,14 +232,18 @@ class MetisTuner(Tuner):
output = {}
for i, param in enumerate(init_parameter):
output[self.key_order[i]] = param
return output
def generate_parameters(self, parameter_id, **kwargs):
"""Generate next parameter for trial
"""
Generate next parameter for trial
If the number of trial result is lower than cold start number,
metis will first random generate some parameters.
Otherwise, metis will choose the parameters by the Gussian Process Model and the Gussian Mixture Model.
Otherwise, metis will choose the parameters by
the Gussian Process Model and the Gussian Mixture Model.
Parameters
----------
......@@ -188,26 +257,34 @@ class MetisTuner(Tuner):
init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
results = self._pack_output(init_parameter)
else:
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
self.selection_num_starting_points)
results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y,
self.x_bounds, self.x_types,
threshold_samplessize_resampling=(None if self.no_resampling is True else 50),
self.minimize_starting_points = _rand_init(
self.x_bounds, self.x_types, self.selection_num_starting_points)
results = self._selection(
self.samples_x,
self.samples_y_aggregation,
self.samples_y,
self.x_bounds,
self.x_types,
threshold_samplessize_resampling=(
None if self.no_resampling is True else 50),
no_candidates=self.no_candidates,
minimize_starting_points=self.minimize_starting_points,
minimize_constraints_fun=self.minimize_constraints_fun)
logger.info("Generate paramageters:\n%s", results)
logger.info("Generate paramageters: \n%s", str(results))
return results
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
"""Tuner receive result from trial.
"""
Tuner receive result from trial.
Parameters
----------
parameter_id : int
The id of parameters, generated by nni manager.
parameters : dict
A group of parameters that trial has tried.
value : dict/float
if value is dict, it should have "default" key.
"""
......@@ -216,8 +293,8 @@ class MetisTuner(Tuner):
value = -value
logger.info("Received trial result.")
logger.info("value is :%s", value)
logger.info("parameter is : %s", parameters)
logger.info("value is : %s", str(value))
logger.info("parameter is : %s", str(parameters))
# parse parameter to sample_x
sample_x = [0 for i in range(len(self.key_order))]
......@@ -244,11 +321,19 @@ class MetisTuner(Tuner):
self.samples_y_aggregation.append([value])
def _selection(self, samples_x, samples_y_aggregation, samples_y,
x_bounds, x_types, max_resampling_per_x=3,
def _selection(
self,
samples_x,
samples_y_aggregation,
samples_y,
x_bounds,
x_types,
max_resampling_per_x=3,
threshold_samplessize_exploitation=12,
threshold_samplessize_resampling=50, no_candidates=False,
minimize_starting_points=None, minimize_constraints_fun=None):
threshold_samplessize_resampling=50,
no_candidates=False,
minimize_starting_points=None,
minimize_constraints_fun=None):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
......@@ -259,7 +344,8 @@ class MetisTuner(Tuner):
samples_size_unique = len(samples_y)
# ===== STEP 1: Compute the current optimum =====
gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
gp_model = gp_create_model.create_model(
samples_x, samples_y_aggregation)
lm_current = gp_selection.selection(
"lm",
samples_y_aggregation,
......@@ -278,7 +364,7 @@ class MetisTuner(Tuner):
})
if no_candidates is False:
# ===== STEP 2: Get recommended configurations for exploration =====
# ===== STEP 2: Get recommended configurations for exploration ====
results_exploration = gp_selection.selection(
"lc",
samples_y_aggregation,
......@@ -303,21 +389,27 @@ class MetisTuner(Tuner):
else:
logger.info("DEBUG: No suitable exploration candidates were")
# ===== STEP 3: Get recommended configurations for exploitation =====
# ===== STEP 3: Get recommended configurations for exploitation ===
if samples_size_all >= threshold_samplessize_exploitation:
logger.info("Getting candidates for exploitation...\n")
try:
gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
gmm = gmm_create_model.create_model(
samples_x, samples_y_aggregation)
if ("discrete_int" in x_types) or ("range_int" in x_types):
results_exploitation = gmm_selection.selection(x_bounds, x_types,
results_exploitation = gmm_selection.selection(
x_bounds,
x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
else:
# If all parameters are of "range_continuous", let's use GMM to generate random starting points
results_exploitation = gmm_selection.selection_r(x_bounds, x_types,
# If all parameters are of "range_continuous",
# let's use GMM to generate random starting points
results_exploitation = gmm_selection.selection_r(
x_bounds,
x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
num_starting_points=self.selection_num_starting_points,
......@@ -335,24 +427,30 @@ class MetisTuner(Tuner):
}
candidates.append(temp_candidate)
logger.info("DEBUG: 1 exploitation_gmm candidate selected\n")
logger.info(
"DEBUG: 1 exploitation_gmm candidate selected\n")
logger.info(temp_candidate)
else:
logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n")
logger.info(
"DEBUG: No suitable exploitation_gmm candidates were found\n")
except ValueError as exception:
# The exception: ValueError: Fitting the mixture model failed
# because some components have ill-defined empirical covariance
# (for instance caused by singleton or collapsed samples).
# Try to decrease the number of components, or increase reg_covar.
logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.")
# Try to decrease the number of components, or increase
# reg_covar.
logger.info(
"DEBUG: No suitable exploitation_gmm \
candidates were found due to exception.")
logger.info(exception)
# ===== STEP 4: Get a list of outliers =====
if (threshold_samplessize_resampling is not None) and \
(samples_size_unique >= threshold_samplessize_resampling):
logger.info("Getting candidates for re-sampling...\n")
results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)
results_outliers = gp_outlier_detection.outlierDetection_threaded(
samples_x, samples_y_aggregation)
if results_outliers is not None:
for results_outlier in results_outliers: # pylint: disable=not-an-iterable
......@@ -365,11 +463,13 @@ class MetisTuner(Tuner):
logger.info("DEBUG: %d re-sampling candidates selected\n")
logger.info(temp_candidate)
else:
logger.info("DEBUG: No suitable resampling candidates were found\n")
logger.info(
"DEBUG: No suitable resampling candidates were found\n")
if candidates:
# ===== STEP 5: Compute the information gain of each candidate towards the optimum =====
logger.info("Evaluating information gain of %d candidates...\n")
# ===== STEP 5: Compute the information gain of each candidate
logger.info(
"Evaluating information gain of %d candidates...\n")
next_improvement = 0
threads_inputs = [[
......@@ -377,36 +477,45 @@ class MetisTuner(Tuner):
minimize_constraints_fun, minimize_starting_points
] for candidate in candidates]
threads_pool = ThreadPool(4)
# Evaluate what would happen if we actually sample each candidate
threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
# Evaluate what would happen if we actually sample each
# candidate
threads_results = threads_pool.map(
_calculate_lowest_mu_threaded, threads_inputs)
threads_pool.close()
threads_pool.join()
for threads_result in threads_results:
if threads_result['expected_lowest_mu'] < lm_current['expected_mu']:
# Information gain
temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']
temp_improvement = threads_result['expected_lowest_mu'] - \
lm_current['expected_mu']
if next_improvement > temp_improvement:
next_improvement = temp_improvement
next_candidate = threads_result['candidate']
else:
# ===== STEP 6: If we have no candidates, randomly pick one =====
# ===== STEP 6: If we have no candidates, randomly pick one ===
logger.info(
"DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n"
)
next_candidate = _rand_with_constraints(x_bounds, x_types) \
if minimize_starting_points is None else minimize_starting_points[0]
next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types)
expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model'])
next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
'expected_mu': expected_mu, 'expected_sigma': expected_sigma}
# ===== STEP 7 =====
# If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold,
# take next config as exploration step
next_candidate = _rand_with_constraints(
x_bounds,
x_types) if minimize_starting_points is None else minimize_starting_points[0]
next_candidate = lib_data.match_val_type(
next_candidate, x_bounds, x_types)
expected_mu, expected_sigma = gp_prediction.predict(
next_candidate, gp_model['model'])
next_candidate = {
'hyperparameter': next_candidate,
'reason': "random",
'expected_mu': expected_mu,
'expected_sigma': expected_sigma}
# STEP 7: If current optimal hyperparameter occurs in the history
# or exploration probability is less than the threshold, take next
# config as exploration step
outputs = self._pack_output(lm_current['hyperparameter'])
ap = random.uniform(0, 1)
if outputs in self.total_data or ap <= self.exploration_probability:
......@@ -419,11 +528,13 @@ class MetisTuner(Tuner):
return outputs
def import_data(self, data):
"""Import additional data for tuning
"""
Import additional data for tuning
Parameters
----------
data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
data : a list of dict
each of which has at least two keys: 'parameter' and 'value'.
"""
_completed_num = 0
for trial_info in data:
......@@ -437,18 +548,26 @@ class MetisTuner(Tuner):
logger.info("Useless trial data, value is %s, skip this trial data.", _value)
continue
self.supplement_data_num += 1
_parameter_id = '_'.join(["ImportData", str(self.supplement_data_num)])
_parameter_id = '_'.join(
["ImportData", str(self.supplement_data_num)])
self.total_data.append(_params)
self.receive_trial_result(parameter_id=_parameter_id, parameters=_params, value=_value)
self.receive_trial_result(
parameter_id=_parameter_id,
parameters=_params,
value=_value)
logger.info("Successfully import data to metis tuner.")
def _rand_with_constraints(x_bounds, x_types):
outputs = None
x_bounds_withconstraints = [x_bounds[i] for i in CONSTRAINT_PARAMS_IDX]
x_types_withconstraints = [x_types[i] for i in CONSTRAINT_PARAMS_IDX]
x_val_withconstraints = lib_constraint_summation.rand(x_bounds_withconstraints,\
x_types_withconstraints, CONSTRAINT_LOWERBOUND, CONSTRAINT_UPPERBOUND)
x_val_withconstraints = lib_constraint_summation.rand(
x_bounds_withconstraints,
x_types_withconstraints,
CONSTRAINT_LOWERBOUND,
CONSTRAINT_UPPERBOUND)
if not x_val_withconstraints:
outputs = [None] * len(x_bounds)
......@@ -462,12 +581,18 @@ def _rand_with_constraints(x_bounds, x_types):
def _calculate_lowest_mu_threaded(inputs):
[candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] = inputs
[candidate, samples_x, samples_y, x_bounds, x_types,
minimize_constraints_fun, minimize_starting_points] = inputs
outputs = {"candidate": candidate, "expected_lowest_mu": None}
for expected_mu in [candidate['expected_mu'] + 1.96 * candidate['expected_sigma'],
candidate['expected_mu'] - 1.96 * candidate['expected_sigma']]:
for expected_mu in [
candidate['expected_mu'] +
1.96 *
candidate['expected_sigma'],
candidate['expected_mu'] -
1.96 *
candidate['expected_sigma']]:
temp_samples_x = copy.deepcopy(samples_x)
temp_samples_y = copy.deepcopy(samples_y)
......@@ -480,8 +605,10 @@ def _calculate_lowest_mu_threaded(inputs):
temp_samples_y.append([expected_mu])
# Aggregates multiple observation of the sample sampling points
temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation)
temp_y_aggregation = [statistics.median(
temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(
temp_samples_x, temp_y_aggregation)
temp_results = gp_selection.selection(
"lm",
temp_y_aggregation,
......@@ -491,7 +618,8 @@ def _calculate_lowest_mu_threaded(inputs):
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
if outputs["expected_lowest_mu"] is None or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
if outputs["expected_lowest_mu"] is None \
or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
outputs["expected_lowest_mu"] = temp_results['expected_mu']
return outputs
......@@ -510,18 +638,19 @@ def _rand_init(x_bounds, x_types, selection_num_starting_points):
'''
Random sample some init seed within bounds.
'''
return [lib_data.rand(x_bounds, x_types) for i \
return [lib_data.rand(x_bounds, x_types) for i
in range(0, selection_num_starting_points)]
def get_median(temp_list):
"""Return median
"""
Return median
"""
num = len(temp_list)
temp_list.sort()
print(temp_list)
if num % 2 == 0:
median = (temp_list[int(num/2)] + temp_list[int(num/2) - 1]) / 2
median = (temp_list[int(num / 2)] + temp_list[int(num / 2) - 1]) / 2
else:
median = temp_list[int(num/2)]
median = temp_list[int(num / 2)]
return median
......@@ -38,7 +38,7 @@ from nni.networkmorphism_tuner.layers import is_layer
def layer_distance(a, b):
"""The distance between two layers."""
# pylint: disable=unidiomatic-typecheck
if type(a) != type(b):
if not isinstance(a, type(b)):
return 1.0
if is_layer(a, "Conv"):
att_diff = [
......@@ -96,7 +96,8 @@ def skip_connection_distance(a, b):
return 1.0
len_a = abs(a[1] - a[0])
len_b = abs(b[1] - b[0])
return (abs(a[0] - b[0]) + abs(len_a - len_b)) / (max(a[0], b[0]) + max(len_a, len_b))
return (abs(a[0] - b[0]) + abs(len_a - len_b)) / \
(max(a[0], b[0]) + max(len_a, len_b))
def skip_connections_distance(list_a, list_b):
......@@ -161,7 +162,8 @@ class IncrementalGaussianProcess:
def incremental_fit(self, train_x, train_y):
""" Incrementally fit the regressor. """
if not self._first_fitted:
raise ValueError("The first_fit function needs to be called first.")
raise ValueError(
"The first_fit function needs to be called first.")
train_x, train_y = np.array(train_x), np.array(train_y)
......@@ -174,7 +176,7 @@ class IncrementalGaussianProcess:
temp_distance_matrix = np.concatenate((up_k, down_k), axis=0)
k_matrix = bourgain_embedding_matrix(temp_distance_matrix)
diagonal = np.diag_indices_from(k_matrix)
diagonal = (diagonal[0][-len(train_x) :], diagonal[1][-len(train_x) :])
diagonal = (diagonal[0][-len(train_x):], diagonal[1][-len(train_x):])
k_matrix[diagonal] += self.alpha
try:
......@@ -186,7 +188,8 @@ class IncrementalGaussianProcess:
self._y = np.concatenate((self._y, train_y), axis=0)
self._distance_matrix = temp_distance_matrix
self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3
self._alpha_vector = cho_solve(
(self._l_matrix, True), self._y) # Line 3
return self
......@@ -209,7 +212,8 @@ class IncrementalGaussianProcess:
self._l_matrix = cholesky(k_matrix, lower=True) # Line 2
self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3
self._alpha_vector = cho_solve(
(self._l_matrix, True), self._y) # Line 3
self._first_fitted = True
return self
......@@ -227,7 +231,9 @@ class IncrementalGaussianProcess:
# compute inverse K_inv of K based on its Cholesky
# decomposition L and its inverse L_inv
l_inv = solve_triangular(self._l_matrix.T, np.eye(self._l_matrix.shape[0]))
l_inv = solve_triangular(
self._l_matrix.T, np.eye(
self._l_matrix.shape[0]))
k_inv = l_inv.dot(l_inv.T)
# Compute variance of predictive distribution
y_var = np.ones(len(train_x), dtype=np.float)
......@@ -378,7 +384,11 @@ class BayesianOptimizer:
continue
temp_acq_value = self.acq(temp_graph)
pq.put(elem_class(temp_acq_value, elem.father_id, temp_graph))
pq.put(
elem_class(
temp_acq_value,
elem.father_id,
temp_graph))
descriptors.append(temp_graph.extract_descriptor())
if self._accept_new_acq_value(opt_acq, temp_acq_value):
opt_acq = temp_acq_value
......
......@@ -249,7 +249,8 @@ class Graph:
self.reverse_adj_list[v_id].remove(edge_tuple)
break
self.reverse_adj_list[new_v_id].append((u_id, layer_id))
for index, value in enumerate(self.layer_id_to_output_node_ids[layer_id]):
for index, value in enumerate(
self.layer_id_to_output_node_ids[layer_id]):
if value == v_id:
self.layer_id_to_output_node_ids[layer_id][index] = new_v_id
break
......@@ -350,7 +351,8 @@ class Graph:
self._replace_layer(layer_id, new_layer)
elif is_layer(layer, "BatchNormalization"):
new_layer = wider_bn(layer, start_dim, total_dim, n_add, self.weighted)
new_layer = wider_bn(
layer, start_dim, total_dim, n_add, self.weighted)
self._replace_layer(layer_id, new_layer)
self._search(v, start_dim, total_dim, n_add)
......@@ -405,7 +407,8 @@ class Graph:
target_id: A convolutional layer ID. The new block should be inserted after the block.
new_layer: An instance of StubLayer subclasses.
"""
self.operation_history.append(("to_deeper_model", target_id, new_layer))
self.operation_history.append(
("to_deeper_model", target_id, new_layer))
input_id = self.layer_id_to_input_node_ids[target_id][0]
output_id = self.layer_id_to_output_node_ids[target_id][0]
if self.weighted:
......@@ -478,14 +481,20 @@ class Graph:
pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
end_node_id = self.layer_id_to_output_node_ids[end_id][0]
skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id)
skip_output_id = self._insert_pooling_layer_chain(
start_node_id, end_node_id)
# Add the conv layer
new_conv_layer = get_conv_class(self.n_dim)(filters_start, filters_end, 1)
new_conv_layer = get_conv_class(
self.n_dim)(
filters_start,
filters_end,
1)
skip_output_id = self.add_layer(new_conv_layer, skip_output_id)
# Add the add layer.
add_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id]))
add_input_node_id = self._add_node(
deepcopy(self.node_list[end_node_id]))
add_layer = StubAdd()
self._redirect_edge(pre_end_node_id, end_node_id, add_input_node_id)
......@@ -504,7 +513,8 @@ class Graph:
weights = np.zeros((filters_end, filters_start) + filter_shape)
bias = np.zeros(filters_end)
new_conv_layer.set_weights(
(add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weights, np.array([0, 1])), add_noise(
bias, np.array([0, 1])))
)
def to_concat_skip_model(self, start_id, end_id):
......@@ -513,7 +523,8 @@ class Graph:
start_id: The convolutional layer ID, after which to start the skip-connection.
end_id: The convolutional layer ID, after which to end the skip-connection.
"""
self.operation_history.append(("to_concat_skip_model", start_id, end_id))
self.operation_history.append(
("to_concat_skip_model", start_id, end_id))
filters_end = self.layer_list[end_id].output.shape[-1]
filters_start = self.layer_list[start_id].output.shape[-1]
start_node_id = self.layer_id_to_output_node_ids[start_id][0]
......@@ -521,9 +532,11 @@ class Graph:
pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
end_node_id = self.layer_id_to_output_node_ids[end_id][0]
skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id)
skip_output_id = self._insert_pooling_layer_chain(
start_node_id, end_node_id)
concat_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id]))
concat_input_node_id = self._add_node(
deepcopy(self.node_list[end_node_id]))
self._redirect_edge(pre_end_node_id, end_node_id, concat_input_node_id)
concat_layer = StubConcatenate()
......@@ -532,7 +545,10 @@ class Graph:
self.node_list[skip_output_id],
]
concat_output_node_id = self._add_node(Node(concat_layer.output_shape))
self._add_edge(concat_layer, concat_input_node_id, concat_output_node_id)
self._add_edge(
concat_layer,
concat_input_node_id,
concat_output_node_id)
self._add_edge(concat_layer, skip_output_id, concat_output_node_id)
concat_layer.output = self.node_list[concat_output_node_id]
self.node_list[concat_output_node_id].shape = concat_layer.output_shape
......@@ -559,7 +575,8 @@ class Graph:
)
bias = np.zeros(filters_end)
new_conv_layer.set_weights(
(add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weights, np.array([0, 1])), add_noise(
bias, np.array([0, 1])))
)
def _insert_pooling_layer_chain(self, start_node_id, end_node_id):
......@@ -568,7 +585,8 @@ class Graph:
new_layer = deepcopy(layer)
if is_layer(new_layer, "Conv"):
filters = self.node_list[start_node_id].shape[-1]
new_layer = get_conv_class(self.n_dim)(filters, filters, 1, layer.stride)
new_layer = get_conv_class(self.n_dim)(
filters, filters, 1, layer.stride)
if self.weighted:
init_conv_weight(new_layer)
else:
......@@ -601,8 +619,10 @@ class Graph:
temp_v = v
temp_layer_id = layer_id
skip_type = None
while not (temp_v in index_in_main_chain and temp_u in index_in_main_chain):
if is_layer(self.layer_list[temp_layer_id], "Concatenate"):
while not (
temp_v in index_in_main_chain and temp_u in index_in_main_chain):
if is_layer(
self.layer_list[temp_layer_id], "Concatenate"):
skip_type = NetworkDescriptor.CONCAT_CONNECT
if is_layer(self.layer_list[temp_layer_id], "Add"):
skip_type = NetworkDescriptor.ADD_CONNECT
......@@ -711,7 +731,8 @@ class Graph:
def wide_layer_ids(self):
return (
self._conv_layer_ids_in_order()[:-1] + self._dense_layer_ids_in_order()[:-1]
self._conv_layer_ids_in_order(
)[:-1] + self._dense_layer_ids_in_order()[:-1]
)
def skip_connection_layer_ids(self):
......@@ -810,7 +831,8 @@ class KerasModel:
topo_node_list = self.graph.topological_order
output_id = topo_node_list[-1]
input_id = topo_node_list[0]
input_tensor = keras.layers.Input(shape=graph.node_list[input_id].shape)
input_tensor = keras.layers.Input(
shape=graph.node_list[input_id].shape)
node_list = deepcopy(self.graph.node_list)
node_list[input_id] = input_tensor
......@@ -838,7 +860,8 @@ class KerasModel:
output_tensor = keras.layers.Activation("softmax", name="activation_add")(
output_tensor
)
self.model = keras.models.Model(inputs=input_tensor, outputs=output_tensor)
self.model = keras.models.Model(
inputs=input_tensor, outputs=output_tensor)
if graph.weighted:
for index, layer in enumerate(self.layers):
......@@ -892,7 +915,8 @@ class JSONModel:
for layer_id, item in enumerate(graph.layer_list):
layer = graph.layer_list[layer_id]
layer_information = layer_description_extractor(layer, graph.node_to_id)
layer_information = layer_description_extractor(
layer, graph.node_to_id)
layer_list.append((layer_id, layer_information))
data["node_list"] = node_list
......@@ -938,7 +962,8 @@ def json_to_graph(json_model: str):
graph.input_shape = input_shape
vis = json_model["vis"]
graph.vis = {tuple(item): True for item in vis} if vis is not None else None
graph.vis = {
tuple(item): True for item in vis} if vis is not None else None
graph.weighted = json_model["weighted"]
layer_id_to_input_node_ids = json_model["layer_id_to_input_node_ids"]
graph.layer_id_to_input_node_ids = {
......
......@@ -40,7 +40,8 @@ def to_wider_graph(graph):
'''
weighted_layer_ids = graph.wide_layer_ids()
weighted_layer_ids = list(
filter(lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
filter(
lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
)
wider_layers = sample(weighted_layer_ids, 1)
......@@ -58,12 +59,14 @@ def to_wider_graph(graph):
def to_skip_connection_graph(graph):
''' skip connection graph
'''
# The last conv layer cannot be widen since wider operator cannot be done over the two sides of flatten.
# The last conv layer cannot be widen since wider operator cannot be done
# over the two sides of flatten.
weighted_layer_ids = graph.skip_connection_layer_ids()
valid_connection = []
for skip_type in sorted([NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
for skip_type in sorted(
[NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
for index_a in range(len(weighted_layer_ids)):
for index_b in range(len(weighted_layer_ids))[index_a + 1 :]:
for index_b in range(len(weighted_layer_ids))[index_a + 1:]:
valid_connection.append((index_a, index_b, skip_type))
if not valid_connection:
......@@ -84,9 +87,14 @@ def create_new_layer(layer, n_dim):
input_shape = layer.output.shape
dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU]
conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim), StubReLU]
conv_deeper_classes = [
get_conv_class(n_dim),
get_batch_norm_class(n_dim),
StubReLU]
if is_layer(layer, "ReLU"):
conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim)]
conv_deeper_classes = [
get_conv_class(n_dim),
get_batch_norm_class(n_dim)]
dense_deeper_classes = [StubDense, get_dropout_class(n_dim)]
elif is_layer(layer, "Dropout"):
dense_deeper_classes = [StubDense, StubReLU]
......
......@@ -52,7 +52,8 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True):
if weighted:
new_conv_layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
new_weights = [
add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])),
......@@ -74,7 +75,8 @@ def dense_to_deeper_block(dense_layer, weighted=True):
new_dense_layer = StubDense(units, units)
if weighted:
new_dense_layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
return [StubReLU(), new_dense_layer]
......@@ -97,8 +99,11 @@ def wider_pre_dense(layer, n_add, weighted=True):
teacher_index = rand[i]
new_weight = teacher_w[teacher_index, :]
new_weight = new_weight[np.newaxis, :]
student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0)
student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b))
student_w = np.concatenate(
(student_w, add_noise(new_weight, student_w)), axis=0)
student_b = np.append(
student_b, add_noise(
teacher_b[teacher_index], student_b))
new_pre_layer = StubDense(layer.input_units, n_units2 + n_add)
new_pre_layer.set_weights((student_w, student_b))
......@@ -209,7 +214,7 @@ def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True):
student_w[:, : start_dim * n_units_each_channel],
add_noise(new_weight, student_w),
student_w[
:, start_dim * n_units_each_channel : total_dim * n_units_each_channel
:, start_dim * n_units_each_channel: total_dim * n_units_each_channel
],
),
axis=1,
......@@ -225,7 +230,8 @@ def add_noise(weights, other_weights):
'''
w_range = np.ptp(other_weights.flatten())
noise_range = NOISE_RATIO * w_range
noise = np.random.uniform(-noise_range / 2.0, noise_range / 2.0, weights.shape)
noise = np.random.uniform(-noise_range / 2.0,
noise_range / 2.0, weights.shape)
return np.add(noise, weights)
......@@ -236,7 +242,8 @@ def init_dense_weight(layer):
weight = np.eye(units)
bias = np.zeros(units)
layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
......@@ -256,7 +263,8 @@ def init_conv_weight(layer):
bias = np.zeros(n_filters)
layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
......
......@@ -28,8 +28,10 @@ from nni.networkmorphism_tuner.utils import Constant
class AvgPool(nn.Module):
'''AvgPool Module.
'''
"""
AvgPool Module.
"""
def __init__(self):
super().__init__()
......@@ -39,8 +41,10 @@ class AvgPool(nn.Module):
class GlobalAvgPool1d(AvgPool):
'''GlobalAvgPool1d Module.
'''
"""
GlobalAvgPool1d Module.
"""
def forward(self, input_tensor):
return functional.avg_pool1d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2]
......@@ -48,8 +52,10 @@ class GlobalAvgPool1d(AvgPool):
class GlobalAvgPool2d(AvgPool):
'''GlobalAvgPool2d Module.
'''
"""
GlobalAvgPool2d Module.
"""
def forward(self, input_tensor):
return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2]
......@@ -57,8 +63,10 @@ class GlobalAvgPool2d(AvgPool):
class GlobalAvgPool3d(AvgPool):
'''GlobalAvgPool3d Module.
'''
"""
GlobalAvgPool3d Module.
"""
def forward(self, input_tensor):
return functional.avg_pool3d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2]
......@@ -66,70 +74,86 @@ class GlobalAvgPool3d(AvgPool):
class StubLayer:
'''StubLayer Module. Base Module.
'''
"""
StubLayer Module. Base Module.
"""
def __init__(self, input_node=None, output_node=None):
self.input = input_node
self.output = output_node
self.weights = None
def build(self, shape):
'''build shape.
'''
"""
build shape.
"""
def set_weights(self, weights):
'''set weights.
'''
"""
set weights.
"""
self.weights = weights
def import_weights(self, torch_layer):
'''import weights.
'''
"""
import weights.
"""
def import_weights_keras(self, keras_layer):
'''import weights from keras layer.
'''
"""
import weights from keras layer.
"""
def export_weights(self, torch_layer):
'''export weights.
'''
"""
export weights.
"""
def export_weights_keras(self, keras_layer):
'''export weights to keras layer.
'''
"""
export weights to keras layer.
"""
def get_weights(self):
'''get weights.
'''
"""
get weights.
"""
return self.weights
def size(self):
'''size().
'''
"""
size().
"""
return 0
@property
def output_shape(self):
'''output shape.
'''
"""
output shape.
"""
return self.input.shape
def to_real_layer(self):
'''to real layer.
'''
"""
to real layer.
"""
def __str__(self):
'''str() function to print.
'''
"""
str() function to print.
"""
return type(self).__name__[4:]
class StubWeightBiasLayer(StubLayer):
'''StubWeightBiasLayer Module to set the bias.
'''
"""
StubWeightBiasLayer Module to set the bias.
"""
def import_weights(self, torch_layer):
self.set_weights(
(torch_layer.weight.data.cpu().numpy(), torch_layer.bias.data.cpu().numpy())
(torch_layer.weight.data.cpu().numpy(),
torch_layer.bias.data.cpu().numpy())
)
def import_weights_keras(self, keras_layer):
......@@ -144,8 +168,10 @@ class StubWeightBiasLayer(StubLayer):
class StubBatchNormalization(StubWeightBiasLayer):
'''StubBatchNormalization Module. Batch Norm.
'''
"""
StubBatchNormalization Module. Batch Norm.
"""
def __init__(self, num_features, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.num_features = num_features
......@@ -175,29 +201,37 @@ class StubBatchNormalization(StubWeightBiasLayer):
class StubBatchNormalization1d(StubBatchNormalization):
'''StubBatchNormalization1d Module.
'''
"""
StubBatchNormalization1d Module.
"""
def to_real_layer(self):
return torch.nn.BatchNorm1d(self.num_features)
class StubBatchNormalization2d(StubBatchNormalization):
'''StubBatchNormalization2d Module.
'''
"""
StubBatchNormalization2d Module.
"""
def to_real_layer(self):
return torch.nn.BatchNorm2d(self.num_features)
class StubBatchNormalization3d(StubBatchNormalization):
'''StubBatchNormalization3d Module.
'''
"""
StubBatchNormalization3d Module.
"""
def to_real_layer(self):
return torch.nn.BatchNorm3d(self.num_features)
class StubDense(StubWeightBiasLayer):
'''StubDense Module. Linear.
'''
"""
StubDense Module. Linear.
"""
def __init__(self, input_units, units, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.input_units = input_units
......@@ -208,7 +242,9 @@ class StubDense(StubWeightBiasLayer):
return (self.units,)
def import_weights_keras(self, keras_layer):
self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1]))
self.set_weights(
(keras_layer.get_weights()[0].T,
keras_layer.get_weights()[1]))
def export_weights_keras(self, keras_layer):
keras_layer.set_weights((self.weights[0].T, self.weights[1]))
......@@ -221,9 +257,12 @@ class StubDense(StubWeightBiasLayer):
class StubConv(StubWeightBiasLayer):
'''StubConv Module. Conv.
'''
def __init__(self, input_channel, filters, kernel_size, stride=1, input_node=None, output_node=None):
"""
StubConv Module. Conv.
"""
def __init__(self, input_channel, filters, kernel_size,
stride=1, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.input_channel = input_channel
self.filters = filters
......@@ -242,13 +281,16 @@ class StubConv(StubWeightBiasLayer):
return tuple(ret)
def import_weights_keras(self, keras_layer):
self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1]))
self.set_weights(
(keras_layer.get_weights()[0].T,
keras_layer.get_weights()[1]))
def export_weights_keras(self, keras_layer):
keras_layer.set_weights((self.weights[0].T, self.weights[1]))
def size(self):
return (self.input_channel * self.kernel_size * self.kernel_size + 1) * self.filters
return (self.input_channel * self.kernel_size *
self.kernel_size + 1) * self.filters
@abstractmethod
def to_real_layer(self):
......@@ -272,8 +314,10 @@ class StubConv(StubWeightBiasLayer):
class StubConv1d(StubConv):
'''StubConv1d Module.
'''
"""
StubConv1d Module.
"""
def to_real_layer(self):
return torch.nn.Conv1d(
self.input_channel,
......@@ -285,8 +329,10 @@ class StubConv1d(StubConv):
class StubConv2d(StubConv):
'''StubConv2d Module.
'''
"""
StubConv2d Module.
"""
def to_real_layer(self):
return torch.nn.Conv2d(
self.input_channel,
......@@ -298,8 +344,10 @@ class StubConv2d(StubConv):
class StubConv3d(StubConv):
'''StubConv3d Module.
'''
"""
StubConv3d Module.
"""
def to_real_layer(self):
return torch.nn.Conv3d(
self.input_channel,
......@@ -311,8 +359,10 @@ class StubConv3d(StubConv):
class StubAggregateLayer(StubLayer):
'''StubAggregateLayer Module.
'''
"""
StubAggregateLayer Module.
"""
def __init__(self, input_nodes=None, output_node=None):
if input_nodes is None:
input_nodes = []
......@@ -320,8 +370,8 @@ class StubAggregateLayer(StubLayer):
class StubConcatenate(StubAggregateLayer):
'''StubConcatenate Module.
'''
"""StubConcatenate Module.
"""
@property
def output_shape(self):
ret = 0
......@@ -335,8 +385,9 @@ class StubConcatenate(StubAggregateLayer):
class StubAdd(StubAggregateLayer):
'''StubAdd Module.
'''
"""
StubAdd Module.
"""
@property
def output_shape(self):
return self.input[0].shape
......@@ -346,8 +397,9 @@ class StubAdd(StubAggregateLayer):
class StubFlatten(StubLayer):
'''StubFlatten Module.
'''
"""
StubFlatten Module.
"""
@property
def output_shape(self):
ret = 1
......@@ -360,22 +412,28 @@ class StubFlatten(StubLayer):
class StubReLU(StubLayer):
'''StubReLU Module.
'''
"""
StubReLU Module.
"""
def to_real_layer(self):
return torch.nn.ReLU()
class StubSoftmax(StubLayer):
'''StubSoftmax Module.
'''
"""
StubSoftmax Module.
"""
def to_real_layer(self):
return torch.nn.LogSoftmax(dim=1)
class StubDropout(StubLayer):
'''StubDropout Module.
'''
"""
StubDropout Module.
"""
def __init__(self, rate, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.rate = rate
......@@ -386,36 +444,45 @@ class StubDropout(StubLayer):
class StubDropout1d(StubDropout):
'''StubDropout1d Module.
'''
"""
StubDropout1d Module.
"""
def to_real_layer(self):
return torch.nn.Dropout(self.rate)
class StubDropout2d(StubDropout):
'''StubDropout2d Module.
'''
"""
StubDropout2d Module.
"""
def to_real_layer(self):
return torch.nn.Dropout2d(self.rate)
class StubDropout3d(StubDropout):
'''StubDropout3d Module.
'''
"""
StubDropout3d Module.
"""
def to_real_layer(self):
return torch.nn.Dropout3d(self.rate)
class StubInput(StubLayer):
'''StubInput Module.
'''
"""
StubInput Module.
"""
def __init__(self, input_node=None, output_node=None):
super().__init__(input_node, output_node)
class StubPooling(StubLayer):
'''StubPooling Module.
'''
"""
StubPooling Module.
"""
def __init__(self,
kernel_size=None,
......@@ -444,30 +511,37 @@ class StubPooling(StubLayer):
class StubPooling1d(StubPooling):
'''StubPooling1d Module.
'''
"""
StubPooling1d Module.
"""
def to_real_layer(self):
return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride)
class StubPooling2d(StubPooling):
'''StubPooling2d Module.
'''
"""
StubPooling2d Module.
"""
def to_real_layer(self):
return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride)
class StubPooling3d(StubPooling):
'''StubPooling3d Module.
'''
"""
StubPooling3d Module.
"""
def to_real_layer(self):
return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride)
class StubGlobalPooling(StubLayer):
'''StubGlobalPooling Module.
'''
"""
StubGlobalPooling Module.
"""
def __init__(self, input_node=None, output_node=None):
super().__init__(input_node, output_node)
......@@ -481,49 +555,63 @@ class StubGlobalPooling(StubLayer):
class StubGlobalPooling1d(StubGlobalPooling):
'''StubGlobalPooling1d Module.
'''
"""
StubGlobalPooling1d Module.
"""
def to_real_layer(self):
return GlobalAvgPool1d()
class StubGlobalPooling2d(StubGlobalPooling):
'''StubGlobalPooling2d Module.
'''
"""
StubGlobalPooling2d Module.
"""
def to_real_layer(self):
return GlobalAvgPool2d()
class StubGlobalPooling3d(StubGlobalPooling):
'''StubGlobalPooling3d Module.
'''
"""
StubGlobalPooling3d Module.
"""
def to_real_layer(self):
return GlobalAvgPool3d()
class TorchConcatenate(nn.Module):
'''TorchConcatenate Module.
'''
"""
TorchConcatenate Module.
"""
def forward(self, input_list):
return torch.cat(input_list, dim=1)
class TorchAdd(nn.Module):
'''TorchAdd Module.
'''
"""
TorchAdd Module.
"""
def forward(self, input_list):
return input_list[0] + input_list[1]
class TorchFlatten(nn.Module):
'''TorchFlatten Module.
'''
"""
TorchFlatten Module.
"""
def forward(self, input_tensor):
return input_tensor.view(input_tensor.size(0), -1)
def keras_dropout(layer, rate):
'''keras dropout layer.
'''
"""
Keras dropout layer.
"""
from keras import layers
......@@ -539,8 +627,9 @@ def keras_dropout(layer, rate):
def to_real_keras_layer(layer):
''' real keras layer.
'''
"""
Real keras layer.
"""
from keras import layers
if is_layer(layer, "Dense"):
......@@ -574,10 +663,14 @@ def to_real_keras_layer(layer):
def is_layer(layer, layer_type):
'''judge the layer type.
Returns:
"""
Judge the layer type.
Returns
-------
bool
boolean -- True or False
'''
"""
if layer_type == "Input":
return isinstance(layer, StubInput)
......@@ -607,8 +700,9 @@ def is_layer(layer, layer_type):
def layer_description_extractor(layer, node_to_id):
'''get layer description.
'''
"""
Get layer description.
"""
layer_input = layer.input
layer_output = layer.output
......@@ -641,7 +735,8 @@ def layer_description_extractor(layer, node_to_id):
layer.units,
]
elif isinstance(layer, (StubBatchNormalization,)):
return (type(layer).__name__, layer_input, layer_output, layer.num_features)
return (type(layer).__name__, layer_input,
layer_output, layer.num_features)
elif isinstance(layer, (StubDropout,)):
return (type(layer).__name__, layer_input, layer_output, layer.rate)
elif isinstance(layer, StubPooling):
......@@ -658,8 +753,8 @@ def layer_description_extractor(layer, node_to_id):
def layer_description_builder(layer_information, id_to_node):
'''build layer from description.
'''
"""build layer from description.
"""
layer_type = layer_information[0]
layer_input_ids = layer_information[1]
......@@ -696,8 +791,9 @@ def layer_description_builder(layer_information, id_to_node):
def layer_width(layer):
'''get layer width.
'''
"""
Get layer width.
"""
if is_layer(layer, "Dense"):
return layer.units
......
......@@ -17,11 +17,13 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
"""
networkmorphsim_tuner.py
"""
import logging
import os
from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward
from nni.networkmorphism_tuner.bayesian import BayesianOptimizer
......@@ -34,7 +36,35 @@ logger = logging.getLogger("NetworkMorphism_AutoML")
class NetworkMorphismTuner(Tuner):
"""NetworkMorphismTuner is a tuner which using network morphism techniques."""
"""
NetworkMorphismTuner is a tuner which using network morphism techniques.
Attributes
----------
n_classes : int
The class number or output node number (default: ``10``)
input_shape : tuple
A tuple including: (input_width, input_width, input_channel)
t_min : float
The minimum temperature for simulated annealing. (default: ``Constant.T_MIN``)
beta : float
The beta in acquisition function. (default: ``Constant.BETA``)
algorithm_name : str
algorithm name used in the network morphism (default: ``"Bayesian"``)
optimize_mode : str
optimize mode "minimize" or "maximize" (default: ``"minimize"``)
verbose : bool
verbose to print the log (default: ``True``)
bo : BayesianOptimizer
The optimizer used in networkmorphsim tuner.
max_model_size : int
max model size to the graph (default: ``Constant.MAX_MODEL_SIZE``)
default_model_len : int
default model length (default: ``Constant.MODEL_LEN``)
default_model_width : int
default model width (default: ``Constant.MODEL_WIDTH``)
search_space : dict
"""
def __init__(
self,
......@@ -52,36 +82,8 @@ class NetworkMorphismTuner(Tuner):
default_model_len=Constant.MODEL_LEN,
default_model_width=Constant.MODEL_WIDTH,
):
""" initilizer of the NetworkMorphismTuner.
Parameters
----------
task : str
task mode, such as "cv","common" etc. (default: {"cv"})
input_width : int
input sample shape (default: {32})
input_channel : int
input sample shape (default: {3})
n_output_node : int
output node number (default: {10})
algorithm_name : str
algorithm name used in the network morphism (default: {"Bayesian"})
optimize_mode : str
optimize mode "minimize" or "maximize" (default: {"minimize"})
path : str
default mode path to save the model file (default: {"model_path"})
verbose : bool
verbose to print the log (default: {True})
beta : float
The beta in acquisition function. (default: {Constant.BETA})
t_min : float
The minimum temperature for simulated annealing. (default: {Constant.T_MIN})
max_model_size : int
max model size to the graph (default: {Constant.MAX_MODEL_SIZE})
default_model_len : int
default model length (default: {Constant.MODEL_LEN})
default_model_width : int
default model width (default: {Constant.MODEL_WIDTH})
"""
initilizer of the NetworkMorphismTuner.
"""
if not os.path.exists(path):
......@@ -92,7 +94,8 @@ class NetworkMorphismTuner(Tuner):
elif task == "common":
self.generators = [MlpGenerator]
else:
raise NotImplementedError('{} task not supported in List ["cv","common"]')
raise NotImplementedError(
'{} task not supported in List ["cv","common"]')
self.n_classes = n_output_node
self.input_shape = (input_width, input_width, input_channel)
......@@ -106,7 +109,8 @@ class NetworkMorphismTuner(Tuner):
self.verbose = verbose
self.model_count = 0
self.bo = BayesianOptimizer(self, self.t_min, self.optimize_mode, self.beta)
self.bo = BayesianOptimizer(
self, self.t_min, self.optimize_mode, self.beta)
self.training_queue = []
self.descriptors = []
self.history = []
......@@ -117,6 +121,7 @@ class NetworkMorphismTuner(Tuner):
self.search_space = dict()
def update_search_space(self, search_space):
"""
Update search space definition in tuner by search_space in neural architecture.
......@@ -140,7 +145,8 @@ class NetworkMorphismTuner(Tuner):
new_father_id, generated_graph = self.generate()
new_model_id = self.model_count
self.model_count += 1
self.training_queue.append((generated_graph, new_father_id, new_model_id))
self.training_queue.append(
(generated_graph, new_father_id, new_model_id))
self.descriptors.append(generated_graph.extract_descriptor())
graph, father_id, model_id = self.training_queue.pop(0)
......@@ -153,12 +159,15 @@ class NetworkMorphismTuner(Tuner):
return json_out
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
""" Record an observation of the objective function.
"""
Record an observation of the objective function.
Parameters
----------
parameter_id : int
the id of a group of paramters that generated by nni manager.
parameters : dict
A group of parameters.
value : dict/float
if value is dict, it should have "default" key.
"""
......@@ -175,8 +184,11 @@ class NetworkMorphismTuner(Tuner):
self.add_model(reward, model_id)
self.update(father_id, graph, reward, model_id)
def init_search(self):
"""Call the generators to generate the initial architectures for the search."""
"""
Call the generators to generate the initial architectures for the search.
"""
if self.verbose:
logger.info("Initializing search.")
for generator in self.generators:
......@@ -191,14 +203,16 @@ class NetworkMorphismTuner(Tuner):
if self.verbose:
logger.info("Initialization finished.")
def generate(self):
"""Generate the next neural architecture.
"""
Generate the next neural architecture.
Returns
-------
other_info: any object
other_info : any object
Anything to be saved in the training queue together with the architecture.
generated_graph: Graph
generated_graph : Graph
An instance of Graph.
"""
generated_graph, new_father_id = self.bo.generate(self.descriptors)
......@@ -211,7 +225,8 @@ class NetworkMorphismTuner(Tuner):
return new_father_id, generated_graph
def update(self, other_info, graph, metric_value, model_id):
""" Update the controller with evaluation result of a neural architecture.
"""
Update the controller with evaluation result of a neural architecture.
Parameters
----------
......@@ -228,7 +243,8 @@ class NetworkMorphismTuner(Tuner):
self.bo.add_child(father_id, model_id)
def add_model(self, metric_value, model_id):
""" Add model to the history, x_queue and y_queue
"""
Add model to the history, x_queue and y_queue
Parameters
----------
......@@ -252,16 +268,21 @@ class NetworkMorphismTuner(Tuner):
file.close()
return ret
def get_best_model_id(self):
""" Get the best model_id from history using the metric value
"""
Get the best model_id from history using the metric value
"""
if self.optimize_mode is OptimizeMode.Maximize:
return max(self.history, key=lambda x: x["metric_value"])["model_id"]
return max(self.history, key=lambda x: x["metric_value"])[
"model_id"]
return min(self.history, key=lambda x: x["metric_value"])["model_id"]
def load_model_by_id(self, model_id):
"""Get the model by model_id
"""
Get the model by model_id
Parameters
----------
......@@ -281,7 +302,8 @@ class NetworkMorphismTuner(Tuner):
return load_model
def load_best_model(self):
""" Get the best model by model id
"""
Get the best model by model id
Returns
-------
......@@ -291,7 +313,8 @@ class NetworkMorphismTuner(Tuner):
return self.load_model_by_id(self.get_best_model_id())
def get_metric_value_by_id(self, model_id):
""" Get the model metric valud by its model_id
"""
Get the model metric valud by its model_id
Parameters
----------
......
......@@ -92,17 +92,25 @@ class CnnGenerator(NetworkGenerator):
for i in range(model_len):
output_node_id = graph.add_layer(StubReLU(), output_node_id)
output_node_id = graph.add_layer(
self.batch_norm(graph.node_list[output_node_id].shape[-1]), output_node_id
self.batch_norm(
graph.node_list[output_node_id].shape[-1]), output_node_id
)
output_node_id = graph.add_layer(
self.conv(temp_input_channel, model_width, kernel_size=3, stride=stride),
self.conv(
temp_input_channel,
model_width,
kernel_size=3,
stride=stride),
output_node_id,
)
temp_input_channel = model_width
if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1):
output_node_id = graph.add_layer(self.pooling(), output_node_id)
if pooling_len == 0 or (
(i + 1) % pooling_len == 0 and i != model_len - 1):
output_node_id = graph.add_layer(
self.pooling(), output_node_id)
output_node_id = graph.add_layer(self.global_avg_pooling(), output_node_id)
output_node_id = graph.add_layer(
self.global_avg_pooling(), output_node_id)
output_node_id = graph.add_layer(
self.dropout(Constant.CONV_DROPOUT_RATE), output_node_id
)
......@@ -111,7 +119,11 @@ class CnnGenerator(NetworkGenerator):
output_node_id,
)
output_node_id = graph.add_layer(StubReLU(), output_node_id)
graph.add_layer(StubDense(model_width, self.n_output_node), output_node_id)
graph.add_layer(
StubDense(
model_width,
self.n_output_node),
output_node_id)
return graph
......@@ -145,7 +157,8 @@ class MlpGenerator(NetworkGenerator):
if model_width is None:
model_width = Constant.MODEL_WIDTH
if isinstance(model_width, list) and not len(model_width) == model_len:
raise ValueError("The length of 'model_width' does not match 'model_len'")
raise ValueError(
"The length of 'model_width' does not match 'model_len'")
elif isinstance(model_width, int):
model_width = [model_width] * model_len
......@@ -162,5 +175,9 @@ class MlpGenerator(NetworkGenerator):
output_node_id = graph.add_layer(StubReLU(), output_node_id)
n_nodes_prev_layer = width
graph.add_layer(StubDense(n_nodes_prev_layer, self.n_output_node), output_node_id)
graph.add_layer(
StubDense(
n_nodes_prev_layer,
self.n_output_node),
output_node_id)
return graph
......@@ -59,9 +59,12 @@ class NetworkMorphismTestCase(TestCase):
graph_recover.layer_id_to_input_node_ids,
)
self.assertEqual(graph_init.adj_list, graph_recover.adj_list)
self.assertEqual(graph_init.reverse_adj_list, graph_recover.reverse_adj_list)
self.assertEqual(
len(graph_init.operation_history), len(graph_recover.operation_history)
graph_init.reverse_adj_list,
graph_recover.reverse_adj_list)
self.assertEqual(
len(graph_init.operation_history), len(
graph_recover.operation_history)
)
self.assertEqual(graph_init.n_dim, graph_recover.n_dim)
self.assertEqual(graph_init.conv, graph_recover.conv)
......@@ -71,7 +74,8 @@ class NetworkMorphismTestCase(TestCase):
node_list_init = [node.shape for node in graph_init.node_list]
node_list_recover = [node.shape for node in graph_recover.node_list]
self.assertEqual(node_list_init, node_list_recover)
self.assertEqual(len(graph_init.node_to_id), len(graph_recover.node_to_id))
self.assertEqual(len(graph_init.node_to_id),
len(graph_recover.node_to_id))
layer_list_init = [
layer_description_extractor(item, graph_init.node_to_id)
for item in graph_init.layer_list
......@@ -82,7 +86,8 @@ class NetworkMorphismTestCase(TestCase):
]
self.assertEqual(layer_list_init, layer_list_recover)
node_to_id_init = [graph_init.node_to_id[node] for node in graph_init.node_list]
node_to_id_init = [graph_init.node_to_id[node]
for node in graph_init.node_list]
node_to_id_recover = [
graph_recover.node_to_id[node] for node in graph_recover.node_list
]
......
......@@ -77,7 +77,7 @@ class PdType:
class CategoricalPd(Pd):
"""
categorical prossibility distribution
Categorical prossibility distribution
"""
def __init__(self, logits, mask_npinf, nsteps, size, is_act_model):
self.logits = logits
......@@ -154,7 +154,7 @@ class CategoricalPd(Pd):
class CategoricalPdType(PdType):
"""
to create CategoricalPd
To create CategoricalPd
"""
def __init__(self, ncat, nsteps, np_mask, is_act_model):
self.ncat = ncat
......@@ -180,7 +180,7 @@ class CategoricalPdType(PdType):
def _matching_fc(tensor, name, size, nsteps, init_scale, init_bias, np_mask, is_act_model):
"""
add fc op, and add mask op when not in action mode
Add fc op, and add mask op when not in action mode
"""
if tensor.shape[-1] == size:
assert False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment