Unverified Commit 0168ff1c authored by xuehui's avatar xuehui Committed by GitHub
Browse files

update docstring and pylint (#1662)

* update docstring of batchtuner

* update docstring of batch tuner

* update docstring of evolution tuner

* update docstring and pylint of metis_tuner

* fix pylint related to logger in metis_tuner

* fix pylint

* update

* fix pylint in metis_tuner

* update in networkmorphsim_tuner

* update

* update

* update docstring in hyperopt_tuner

* update batch_tuner

* delete unused space

* update in metis

* update sdk_reference.rst

* update netowrkmorhism

* update networkmorphsim

* update batch_tuner

* update batch_tuner

* update

* update metis

* roll back to print

* update Returns

* update

* delete white space
parent 803f056a
......@@ -36,6 +36,9 @@ Tuner
.. autoclass:: nni.metis_tuner.metis_tuner.MetisTuner
:members:
.. autoclass:: nni.batch_tuner.batch_tuner.BatchTuner
:members:
Assessor
------------------------
.. autoclass:: nni.assessor.Assessor
......
......@@ -31,22 +31,27 @@ TYPE = '_type'
CHOICE = 'choice'
VALUE = '_value'
logger = logging.getLogger('batch_tuner_AutoML')
LOGGER = logging.getLogger('batch_tuner_AutoML')
class BatchTuner(Tuner):
"""
BatchTuner is tuner will running all the configure that user want to run batchly.
Examples
--------
The search space only be accepted like:
```
{
'combine_params': { '_type': 'choice',
'_value': '[{...}, {...}, {...}]',
}
}
```
"""
def __init__(self):
self.count = -1
self.values = []
self._count = -1
self._values = []
def is_valid(self, search_space):
"""
......@@ -55,6 +60,11 @@ class BatchTuner(Tuner):
Parameters
----------
search_space : dict
Returns
-------
None or list
If valid, return candidate values; else return None.
"""
if not len(search_space) == 1:
raise RuntimeError('BatchTuner only supprt one combined-paramreters key.')
......@@ -62,11 +72,14 @@ class BatchTuner(Tuner):
for param in search_space:
param_type = search_space[param][TYPE]
if not param_type == CHOICE:
raise RuntimeError('BatchTuner only supprt one combined-paramreters type is choice.')
else:
if isinstance(search_space[param][VALUE], list):
return search_space[param][VALUE]
raise RuntimeError('The combined-paramreters value in BatchTuner is not a list.')
raise RuntimeError('BatchTuner only supprt \
one combined-paramreters type is choice.')
if isinstance(search_space[param][VALUE], list):
return search_space[param][VALUE]
raise RuntimeError('The combined-paramreters \
value in BatchTuner is not a list.')
return None
def update_search_space(self, search_space):
......@@ -76,7 +89,7 @@ class BatchTuner(Tuner):
----------
search_space : dict
"""
self.values = self.is_valid(search_space)
self._values = self.is_valid(search_space)
def generate_parameters(self, parameter_id, **kwargs):
"""Returns a dict of trial (hyper-)parameters, as a serializable object.
......@@ -84,41 +97,49 @@ class BatchTuner(Tuner):
Parameters
----------
parameter_id : int
Returns
-------
dict
A candidate parameter group.
"""
self.count += 1
if self.count > len(self.values) - 1:
self._count += 1
if self._count > len(self._values) - 1:
raise nni.NoMoreTrialError('no more parameters now.')
return self.values[self.count]
return self._values[self._count]
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
pass
def import_data(self, data):
"""Import additional data for tuning
Parameters
----------
data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
"""
if not self.values:
logger.info("Search space has not been initialized, skip this data import")
if not self._values:
LOGGER.info("Search space has not been initialized, skip this data import")
return
self.values = self.values[(self.count+1):]
self.count = -1
self._values = self._values[(self._count+1):]
self._count = -1
_completed_num = 0
for trial_info in data:
logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data))
LOGGER .info("Importing data, current processing \
progress %s / %s", _completed_num, len(data))
# simply validate data format
assert "parameter" in trial_info
_params = trial_info["parameter"]
assert "value" in trial_info
_value = trial_info['value']
if not _value:
logger.info("Useless trial data, value is %s, skip this trial data.", _value)
LOGGER.info("Useless trial data, value is %s, skip this trial data.", _value)
continue
_completed_num += 1
if _params in self.values:
self.values.remove(_params)
logger.info("Successfully import data to batch tuner, total data: %d, imported data: %d.", len(data), _completed_num)
if _params in self._values:
self._values.remove(_params)
LOGGER .info("Successfully import data to batch tuner, \
total data: %d, imported data: %d.", len(data), _completed_num)
......@@ -32,7 +32,9 @@ import nni.parameter_expressions as parameter_expressions
def json2space(x, oldy=None, name=NodeType.ROOT):
"""Change search space from json format to hyperopt format
"""
Change search space from json format to hyperopt format
"""
y = list()
if isinstance(x, dict):
......@@ -59,7 +61,9 @@ def json2space(x, oldy=None, name=NodeType.ROOT):
return y
def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeType.ROOT):
"""Json to pramaters.
"""
Json to pramaters.
"""
if isinstance(x, dict):
if NodeType.TYPE in x.keys():
......@@ -117,6 +121,17 @@ def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeTyp
class Individual:
"""
Indicidual class to store the indv info.
Attributes
----------
config : str
Search space.
info : str
The str to save information of individual.
result : float
The final metric of a individual.
store_dir : str
save_dir : str
"""
def __init__(self, config=None, info=None, result=None, save_dir=None):
......@@ -124,6 +139,7 @@ class Individual:
Parameters
----------
config : str
A config to represent a group of parameters.
info : str
result : float
save_dir : str
......@@ -140,6 +156,8 @@ class Individual:
def mutation(self, config=None, info=None, save_dir=None):
"""
Mutation by reset state information.
Parameters
----------
config : str
......@@ -177,8 +195,11 @@ class EvolutionTuner(Tuner):
self.population = None
self.space = None
def update_search_space(self, search_space):
"""Update search space.
"""
Update search space.
Search_space contains the information that user pre-defined.
Parameters
......@@ -191,15 +212,19 @@ class EvolutionTuner(Tuner):
self.random_state = np.random.RandomState()
self.population = []
is_rand = dict()
for item in self.space:
is_rand[item] = True
for _ in range(self.population_size):
config = json2parameter(
self.searchspace_json, is_rand, self.random_state)
self.population.append(Individual(config=config))
def generate_parameters(self, parameter_id, **kwargs):
"""Returns a dict of trial (hyper-)parameters, as a serializable object.
"""
This function will returns a dict of trial (hyper-)parameters, as a serializable object.
Parameters
----------
......@@ -207,15 +232,19 @@ class EvolutionTuner(Tuner):
Returns
-------
config : dict
dict
A group of candaidte parameters that evolution tuner generated.
"""
if not self.population:
raise RuntimeError('The population is empty')
pos = -1
for i in range(len(self.population)):
if self.population[i].result is None:
pos = i
break
if pos != -1:
indiv = copy.deepcopy(self.population[pos])
self.population.pop(pos)
......@@ -230,6 +259,7 @@ class EvolutionTuner(Tuner):
self.population[0].config)
is_rand = dict()
mutation_pos = space[random.randint(0, len(space)-1)]
for i in range(len(self.space)):
is_rand[self.space[i]] = (self.space[i] == mutation_pos)
config = json2parameter(
......@@ -238,21 +268,27 @@ class EvolutionTuner(Tuner):
# remove "_index" from config and save params-id
total_config = config
self.total_data[parameter_id] = total_config
config = split_index(total_config)
return config
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
'''Record the result from a trial
"""
Record the result from a trial
Parameters
----------
parameters: dict
parameter_id : int
parameters : dict
value : dict/float
if value is dict, it should have "default" key.
value is final metrics of the trial.
'''
"""
reward = extract_scalar_reward(value)
if parameter_id not in self.total_data:
raise RuntimeError('Received parameter_id not in total_data.')
# restore the paramsters contains "_index"
......
......@@ -422,7 +422,8 @@ class HyperoptTuner(Tuner):
misc_by_id[tid]['vals'][key] = [val]
def get_suggestion(self, random_search=False):
"""get suggestion from hyperopt
"""
get suggestion from hyperopt
Parameters
----------
......@@ -473,7 +474,8 @@ class HyperoptTuner(Tuner):
return total_params
def import_data(self, data):
"""Import additional data for tuning
"""
Import additional data for tuning
Parameters
----------
......
......@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import os
import sys
......@@ -31,7 +32,8 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
'''
Create the Gaussian Mixture Model
'''
samples = [samples_x[i] + [samples_y_aggregation[i]] for i in range(0, len(samples_x))]
samples = [samples_x[i] + [samples_y_aggregation[i]]
for i in range(0, len(samples_x))]
# Sorts so that we can get the top samples
samples = sorted(samples, key=itemgetter(-1))
......@@ -39,13 +41,16 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
samples_goodbatch = samples[0:samples_goodbatch_size]
samples_badbatch = samples[samples_goodbatch_size:]
samples_x_goodbatch = [sample_goodbatch[0:-1] for sample_goodbatch in samples_goodbatch]
samples_x_goodbatch = [sample_goodbatch[0:-1]
for sample_goodbatch in samples_goodbatch]
#samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch]
samples_x_badbatch = [sample_badbatch[0:-1] for sample_badbatch in samples_badbatch]
samples_x_badbatch = [sample_badbatch[0:-1]
for sample_badbatch in samples_badbatch]
# === Trains GMM clustering models === #
#sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__)))
bgmm_goodbatch = mm.BayesianGaussianMixture(n_components=max(1, samples_goodbatch_size - 1))
bgmm_goodbatch = mm.BayesianGaussianMixture(
n_components=max(1, samples_goodbatch_size - 1))
bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1)
bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components)
bgmm_goodbatch.fit(samples_x_goodbatch)
......@@ -55,4 +60,3 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
model['clusteringmodel_good'] = bgmm_goodbatch
model['clusteringmodel_bad'] = bgmm_badbatch
return model
\ No newline at end of file
......@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import os
import random
......@@ -33,14 +34,17 @@ CONSTRAINT_UPPERBOUND = None
CONSTRAINT_PARAMS_IDX = []
def _ratio_scores(parameters_value, clusteringmodel_gmm_good, clusteringmodel_gmm_bad):
def _ratio_scores(parameters_value, clusteringmodel_gmm_good,
clusteringmodel_gmm_bad):
'''
The ratio is smaller the better
'''
ratio = clusteringmodel_gmm_good.score([parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
ratio = clusteringmodel_gmm_good.score(
[parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
sigma = 0
return ratio, sigma
def selection_r(x_bounds,
x_types,
clusteringmodel_gmm_good,
......@@ -60,6 +64,7 @@ def selection_r(x_bounds,
return outputs
def selection(x_bounds,
x_types,
clusteringmodel_gmm_good,
......@@ -69,13 +74,14 @@ def selection(x_bounds,
'''
Select the lowest mu value
'''
results = lib_acquisition_function.next_hyperparameter_lowest_mu(\
_ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],\
x_bounds, x_types, minimize_starting_points, \
minimize_constraints_fun=minimize_constraints_fun)
results = lib_acquisition_function.next_hyperparameter_lowest_mu(
_ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],
x_bounds, x_types, minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
return results
def _rand_with_constraints(x_bounds, x_types):
'''
Random generate the variable with constraints
......@@ -96,6 +102,7 @@ def _rand_with_constraints(x_bounds, x_types):
outputs[i] = random.randint(x_bounds[i][0], x_bounds[i][1])
return outputs
def _minimize_constraints_fun_summation(x):
'''
Minimize constraints fun summation
......
......@@ -17,7 +17,9 @@
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
OutlierDectection.py
"""
import os
import sys
......@@ -30,19 +32,21 @@ sys.path.insert(1, os.path.join(sys.path[0], '..'))
def _outlierDetection_threaded(inputs):
'''
"""
Detect the outlier
'''
"""
[samples_idx, samples_x, samples_y_aggregation] = inputs
sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"\
% (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"
% (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
outlier = None
# Create a diagnostic regression model which removes the sample that we want to evaluate
diagnostic_regressor_gp = gp_create_model.create_model(\
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model'])
# Create a diagnostic regression model which removes the sample that we
# want to evaluate
diagnostic_regressor_gp = gp_create_model.create_model(
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(
samples_x[samples_idx], diagnostic_regressor_gp['model'])
# 2.33 is the z-score for 98% confidence level
if abs(samples_y_aggregation[samples_idx] - mu) > (2.33 * sigma):
......@@ -52,16 +56,18 @@ def _outlierDetection_threaded(inputs):
"difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}
return outlier
def outlierDetection_threaded(samples_x, samples_y_aggregation):
'''
"""
Use Multi-thread to detect the outlier
'''
"""
outliers = []
threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]\
for samples_idx in range(0, len(samples_x))]
threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]
for samples_idx in range(0, len(samples_x))]
threads_pool = ThreadPool(min(4, len(threads_inputs)))
threads_results = threads_pool.map(_outlierDetection_threaded, threads_inputs)
threads_results = threads_pool.map(
_outlierDetection_threaded, threads_inputs)
threads_pool.close()
threads_pool.join()
......@@ -69,15 +75,13 @@ def outlierDetection_threaded(samples_x, samples_y_aggregation):
if threads_result is not None:
outliers.append(threads_result)
else:
print("error here.")
print("Error: threads_result is None.")
outliers = outliers if outliers else None
return outliers
def outlierDetection(samples_x, samples_y_aggregation):
'''
TODO
'''
outliers = []
for samples_idx, _ in enumerate(samples_x):
#sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
......@@ -92,7 +96,8 @@ def outlierDetection(samples_x, samples_y_aggregation):
outliers.append({"samples_idx": samples_idx,
"expected_mu": mu,
"expected_sigma": sigma,
"difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})
"difference": \
abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})
outliers = outliers if outliers else None
return outliers
......@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
lib_acquisition_function.py
"""
import sys
import numpy
......@@ -33,9 +37,9 @@ def next_hyperparameter_expected_improvement(fun_prediction,
samples_y_aggregation,
minimize_starting_points,
minimize_constraints_fun=None):
'''
"""
"Expected Improvement" acquisition function
'''
"""
best_x = None
best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
......@@ -70,6 +74,7 @@ def next_hyperparameter_expected_improvement(fun_prediction,
return outputs
def _expected_improvement(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, samples_y_aggregation,
minimize_constraints_fun):
......@@ -77,7 +82,8 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args,
x = lib_data.match_val_type(x, x_bounds, x_types)
expected_improvement = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, sigma = fun_prediction(x, *fun_prediction_args)
loss_optimum = min(samples_y_aggregation)
......@@ -87,7 +93,7 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args,
with numpy.errstate(divide="ignore"):
Z = scaling_factor * (mu - loss_optimum) / sigma
expected_improvement = scaling_factor * (mu - loss_optimum) * \
norm.cdf(Z) + sigma * norm.pdf(Z)
norm.cdf(Z) + sigma * norm.pdf(Z)
expected_improvement = 0.0 if sigma == 0.0 else expected_improvement
# We want expected_improvement to be as large as possible
......@@ -101,9 +107,9 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
x_bounds, x_types,
minimize_starting_points,
minimize_constraints_fun=None):
'''
"""
"Lowest Confidence" acquisition function
'''
"""
best_x = None
best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
......@@ -120,10 +126,12 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
x_types,
minimize_constraints_fun))
if (best_acquisition_value) is None or (res.fun < best_acquisition_value):
if (best_acquisition_value) is None or (
res.fun < best_acquisition_value):
res.x = numpy.ndarray.tolist(res.x)
res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(res.x) is True):
best_acquisition_value = res.fun
best_x = res.x
......@@ -134,13 +142,15 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
'expected_sigma': sigma, 'acquisition_func': "lc"}
return outputs
def _lowest_confidence(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun):
# This is only for step-wise optimization
x = lib_data.match_val_type(x, x_bounds, x_types)
ci = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, sigma = fun_prediction(x, *fun_prediction_args)
ci = (sigma * 1.96 * 2) / mu
# We want ci to be as large as possible
......@@ -156,9 +166,9 @@ def next_hyperparameter_lowest_mu(fun_prediction,
x_bounds, x_types,
minimize_starting_points,
minimize_constraints_fun=None):
'''
"""
"Lowest Mu" acquisition function
'''
"""
best_x = None
best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
......@@ -169,13 +179,15 @@ def next_hyperparameter_lowest_mu(fun_prediction,
x0=starting_point.reshape(1, -1),
bounds=x_bounds_minmax,
method="L-BFGS-B",
args=(fun_prediction, fun_prediction_args, \
args=(fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun))
if (best_acquisition_value is None) or (res.fun < best_acquisition_value):
if (best_acquisition_value is None) or (
res.fun < best_acquisition_value):
res.x = numpy.ndarray.tolist(res.x)
res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(res.x) is True):
best_acquisition_value = res.fun
best_x = res.x
......@@ -189,14 +201,14 @@ def next_hyperparameter_lowest_mu(fun_prediction,
def _lowest_mu(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun):
'''
"""
Calculate the lowest mu
'''
"""
# This is only for step-wise optimization
x = lib_data.match_val_type(x, x_bounds, x_types)
mu = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True):
if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, _ = fun_prediction(x, *fun_prediction_args)
return mu
\ No newline at end of file
......@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
lib_constraint_summation.py
"""
import math
import random
......@@ -39,6 +43,7 @@ def check_feasibility(x_bounds, lowerbound, upperbound):
return (x_bounds_lowerbound <= lowerbound <= x_bounds_upperbound) or \
(x_bounds_lowerbound <= upperbound <= x_bounds_upperbound)
def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
'''
Key idea is that we try to move towards upperbound, by randomly choose one
......@@ -55,7 +60,8 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
if x_types[i] == "discrete_int":
x_idx_sorted.append([i, len(x_bounds[i])])
elif (x_types[i] == "range_int") or (x_types[i] == "range_continuous"):
x_idx_sorted.append([i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
x_idx_sorted.append(
[i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
x_idx_sorted = sorted(x_idx_sorted, key=itemgetter(1))
for _ in range(max_retries):
......@@ -77,12 +83,13 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
temp.append(j)
# Randomly pick a number from the integer array
if temp:
outputs[x_idx] = temp[random.randint(0, len(temp) - 1)]
outputs[x_idx] = temp[random.randint(
0, len(temp) - 1)]
elif (x_types[x_idx] == "range_int") or \
(x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(x_bounds[x_idx][0],
min(x_bounds[x_idx][-1], budget_max))
(x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(
x_bounds[x_idx][0], min(x_bounds[x_idx][-1], budget_max))
else:
# The last x that we need to assign a random number
......@@ -91,26 +98,28 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
# This check:
# is our smallest possible value going to overflow the available budget space,
# and is our largest possible value going to underflow the lower bound
# and is our largest possible value going to underflow the
# lower bound
if (x_bounds[x_idx][0] <= budget_max) and \
(x_bounds[x_idx][-1] >= randint_lowerbound):
if x_types[x_idx] == "discrete_int":
temp = []
for j in x_bounds[x_idx]:
# if (j <= budget_max) and (j >= randint_lowerbound):
# if (j <= budget_max) and (j >=
# randint_lowerbound):
if randint_lowerbound <= j <= budget_max:
temp.append(j)
if temp:
outputs[x_idx] = temp[random.randint(0, len(temp) - 1)]
outputs[x_idx] = temp[random.randint(
0, len(temp) - 1)]
elif (x_types[x_idx] == "range_int") or \
(x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(randint_lowerbound,
min(x_bounds[x_idx][1], budget_max))
outputs[x_idx] = random.randint(
randint_lowerbound, min(
x_bounds[x_idx][1], budget_max))
if outputs[x_idx] is None:
break
else:
budget_allocated += outputs[x_idx]
budget_allocated += outputs[x_idx]
if None not in outputs:
break
return outputs
\ No newline at end of file
......@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import math
import random
......@@ -56,7 +57,7 @@ def rand(x_bounds, x_types):
temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)]
outputs.append(temp)
elif x_types[i] == "range_int":
temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1)
temp = random.randint(x_bounds[i][0], x_bounds[i][1] - 1)
outputs.append(temp)
elif x_types[i] == "range_continuous":
temp = random.uniform(x_bounds[i][0], x_bounds[i][1])
......
......@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
metis_tuner.py
"""
import copy
import logging
......@@ -51,10 +55,45 @@ class MetisTuner(Tuner):
More algorithm information you could reference here:
https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/
Attributes
----------
optimize_mode : str
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
"""
def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=False,
selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.9):
def __init__(
self,
optimize_mode="maximize",
no_resampling=True,
no_candidates=False,
selection_num_starting_points=600,
cold_start_num=10,
exploration_probability=0.9):
"""
Parameters
----------
......@@ -62,23 +101,34 @@ class MetisTuner(Tuner):
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False. Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free, then you do not need re-sampling.
no_candidates: bool
True or False. Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks, Metis can skip this step.
selection_num_starting_points: int
how many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability : float
The probability of Metis to select parameter from exploration instead of exploitation.
cold_start_num: int
Metis need some trial result to get cold start. when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
x_bounds : list
The constration of parameters.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
x_types : list
The type of parameters.
"""
self.samples_x = []
......@@ -101,7 +151,8 @@ class MetisTuner(Tuner):
def update_search_space(self, search_space):
"""Update the self.x_bounds and self.x_types by the search_space.json
"""
Update the self.x_bounds and self.x_types by the search_space.json
Parameters
----------
......@@ -120,12 +171,20 @@ class MetisTuner(Tuner):
key_range = search_space[key]['_value']
idx = self.key_order.index(key)
if key_type == 'quniform':
if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer():
self.x_bounds[idx] = [key_range[0], key_range[1]+1]
if key_range[2] == 1 and key_range[0].is_integer(
) and key_range[1].is_integer():
self.x_bounds[idx] = [key_range[0], key_range[1] + 1]
self.x_types[idx] = 'range_int'
else:
low, high, q = key_range
bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high)
bounds = np.clip(
np.arange(
np.round(
low / q),
np.round(
high / q) + 1) * q,
low,
high)
self.x_bounds[idx] = bounds
self.x_types[idx] = 'discrete_int'
elif key_type == 'randint':
......@@ -139,22 +198,28 @@ class MetisTuner(Tuner):
for key_value in key_range:
if not isinstance(key_value, (int, float)):
raise RuntimeError("Metis Tuner only support numerical choice.")
raise RuntimeError(
"Metis Tuner only support numerical choice.")
self.x_types[idx] = 'discrete_int'
else:
logger.info("Metis Tuner doesn't support this kind of variable: %s", key_type)
raise RuntimeError("Metis Tuner doesn't support this kind of variable: " + str(key_type))
logger.info(
"Metis Tuner doesn't support this kind of variable: %s",
str(key_type))
raise RuntimeError(
"Metis Tuner doesn't support this kind of variable: %s" %
str(key_type))
else:
logger.info("The format of search space is not a dict.")
raise RuntimeError("The format of search space is not a dict.")
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
self.selection_num_starting_points)
self.minimize_starting_points = _rand_init(
self.x_bounds, self.x_types, self.selection_num_starting_points)
def _pack_output(self, init_parameter):
"""Pack the output
"""
Pack the output
Parameters
----------
......@@ -167,14 +232,18 @@ class MetisTuner(Tuner):
output = {}
for i, param in enumerate(init_parameter):
output[self.key_order[i]] = param
return output
def generate_parameters(self, parameter_id, **kwargs):
"""Generate next parameter for trial
"""
Generate next parameter for trial
If the number of trial result is lower than cold start number,
metis will first random generate some parameters.
Otherwise, metis will choose the parameters by the Gussian Process Model and the Gussian Mixture Model.
Otherwise, metis will choose the parameters by
the Gussian Process Model and the Gussian Mixture Model.
Parameters
----------
......@@ -188,26 +257,34 @@ class MetisTuner(Tuner):
init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
results = self._pack_output(init_parameter)
else:
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
self.selection_num_starting_points)
results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y,
self.x_bounds, self.x_types,
threshold_samplessize_resampling=(None if self.no_resampling is True else 50),
no_candidates=self.no_candidates,
minimize_starting_points=self.minimize_starting_points,
minimize_constraints_fun=self.minimize_constraints_fun)
logger.info("Generate paramageters:\n%s", results)
self.minimize_starting_points = _rand_init(
self.x_bounds, self.x_types, self.selection_num_starting_points)
results = self._selection(
self.samples_x,
self.samples_y_aggregation,
self.samples_y,
self.x_bounds,
self.x_types,
threshold_samplessize_resampling=(
None if self.no_resampling is True else 50),
no_candidates=self.no_candidates,
minimize_starting_points=self.minimize_starting_points,
minimize_constraints_fun=self.minimize_constraints_fun)
logger.info("Generate paramageters: \n%s", str(results))
return results
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
"""Tuner receive result from trial.
"""
Tuner receive result from trial.
Parameters
----------
parameter_id : int
The id of parameters, generated by nni manager.
parameters : dict
A group of parameters that trial has tried.
value : dict/float
if value is dict, it should have "default" key.
"""
......@@ -216,8 +293,8 @@ class MetisTuner(Tuner):
value = -value
logger.info("Received trial result.")
logger.info("value is :%s", value)
logger.info("parameter is : %s", parameters)
logger.info("value is : %s", str(value))
logger.info("parameter is : %s", str(parameters))
# parse parameter to sample_x
sample_x = [0 for i in range(len(self.key_order))]
......@@ -244,11 +321,19 @@ class MetisTuner(Tuner):
self.samples_y_aggregation.append([value])
def _selection(self, samples_x, samples_y_aggregation, samples_y,
x_bounds, x_types, max_resampling_per_x=3,
threshold_samplessize_exploitation=12,
threshold_samplessize_resampling=50, no_candidates=False,
minimize_starting_points=None, minimize_constraints_fun=None):
def _selection(
self,
samples_x,
samples_y_aggregation,
samples_y,
x_bounds,
x_types,
max_resampling_per_x=3,
threshold_samplessize_exploitation=12,
threshold_samplessize_resampling=50,
no_candidates=False,
minimize_starting_points=None,
minimize_constraints_fun=None):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
......@@ -259,7 +344,8 @@ class MetisTuner(Tuner):
samples_size_unique = len(samples_y)
# ===== STEP 1: Compute the current optimum =====
gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
gp_model = gp_create_model.create_model(
samples_x, samples_y_aggregation)
lm_current = gp_selection.selection(
"lm",
samples_y_aggregation,
......@@ -278,7 +364,7 @@ class MetisTuner(Tuner):
})
if no_candidates is False:
# ===== STEP 2: Get recommended configurations for exploration =====
# ===== STEP 2: Get recommended configurations for exploration ====
results_exploration = gp_selection.selection(
"lc",
samples_y_aggregation,
......@@ -303,25 +389,31 @@ class MetisTuner(Tuner):
else:
logger.info("DEBUG: No suitable exploration candidates were")
# ===== STEP 3: Get recommended configurations for exploitation =====
# ===== STEP 3: Get recommended configurations for exploitation ===
if samples_size_all >= threshold_samplessize_exploitation:
logger.info("Getting candidates for exploitation...\n")
try:
gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
gmm = gmm_create_model.create_model(
samples_x, samples_y_aggregation)
if ("discrete_int" in x_types) or ("range_int" in x_types):
results_exploitation = gmm_selection.selection(x_bounds, x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
results_exploitation = gmm_selection.selection(
x_bounds,
x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
else:
# If all parameters are of "range_continuous", let's use GMM to generate random starting points
results_exploitation = gmm_selection.selection_r(x_bounds, x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
num_starting_points=self.selection_num_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
# If all parameters are of "range_continuous",
# let's use GMM to generate random starting points
results_exploitation = gmm_selection.selection_r(
x_bounds,
x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
num_starting_points=self.selection_num_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
if results_exploitation is not None:
if _num_past_samples(results_exploitation['hyperparameter'], samples_x, samples_y) == 0:
......@@ -335,24 +427,30 @@ class MetisTuner(Tuner):
}
candidates.append(temp_candidate)
logger.info("DEBUG: 1 exploitation_gmm candidate selected\n")
logger.info(
"DEBUG: 1 exploitation_gmm candidate selected\n")
logger.info(temp_candidate)
else:
logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n")
logger.info(
"DEBUG: No suitable exploitation_gmm candidates were found\n")
except ValueError as exception:
# The exception: ValueError: Fitting the mixture model failed
# because some components have ill-defined empirical covariance
# (for instance caused by singleton or collapsed samples).
# Try to decrease the number of components, or increase reg_covar.
logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.")
# Try to decrease the number of components, or increase
# reg_covar.
logger.info(
"DEBUG: No suitable exploitation_gmm \
candidates were found due to exception.")
logger.info(exception)
# ===== STEP 4: Get a list of outliers =====
if (threshold_samplessize_resampling is not None) and \
(samples_size_unique >= threshold_samplessize_resampling):
(samples_size_unique >= threshold_samplessize_resampling):
logger.info("Getting candidates for re-sampling...\n")
results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)
results_outliers = gp_outlier_detection.outlierDetection_threaded(
samples_x, samples_y_aggregation)
if results_outliers is not None:
for results_outlier in results_outliers: # pylint: disable=not-an-iterable
......@@ -365,11 +463,13 @@ class MetisTuner(Tuner):
logger.info("DEBUG: %d re-sampling candidates selected\n")
logger.info(temp_candidate)
else:
logger.info("DEBUG: No suitable resampling candidates were found\n")
logger.info(
"DEBUG: No suitable resampling candidates were found\n")
if candidates:
# ===== STEP 5: Compute the information gain of each candidate towards the optimum =====
logger.info("Evaluating information gain of %d candidates...\n")
# ===== STEP 5: Compute the information gain of each candidate
logger.info(
"Evaluating information gain of %d candidates...\n")
next_improvement = 0
threads_inputs = [[
......@@ -377,36 +477,45 @@ class MetisTuner(Tuner):
minimize_constraints_fun, minimize_starting_points
] for candidate in candidates]
threads_pool = ThreadPool(4)
# Evaluate what would happen if we actually sample each candidate
threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
# Evaluate what would happen if we actually sample each
# candidate
threads_results = threads_pool.map(
_calculate_lowest_mu_threaded, threads_inputs)
threads_pool.close()
threads_pool.join()
for threads_result in threads_results:
if threads_result['expected_lowest_mu'] < lm_current['expected_mu']:
# Information gain
temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']
temp_improvement = threads_result['expected_lowest_mu'] - \
lm_current['expected_mu']
if next_improvement > temp_improvement:
next_improvement = temp_improvement
next_candidate = threads_result['candidate']
else:
# ===== STEP 6: If we have no candidates, randomly pick one =====
# ===== STEP 6: If we have no candidates, randomly pick one ===
logger.info(
"DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n"
)
next_candidate = _rand_with_constraints(x_bounds, x_types) \
if minimize_starting_points is None else minimize_starting_points[0]
next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types)
expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model'])
next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
'expected_mu': expected_mu, 'expected_sigma': expected_sigma}
# ===== STEP 7 =====
# If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold,
# take next config as exploration step
next_candidate = _rand_with_constraints(
x_bounds,
x_types) if minimize_starting_points is None else minimize_starting_points[0]
next_candidate = lib_data.match_val_type(
next_candidate, x_bounds, x_types)
expected_mu, expected_sigma = gp_prediction.predict(
next_candidate, gp_model['model'])
next_candidate = {
'hyperparameter': next_candidate,
'reason': "random",
'expected_mu': expected_mu,
'expected_sigma': expected_sigma}
# STEP 7: If current optimal hyperparameter occurs in the history
# or exploration probability is less than the threshold, take next
# config as exploration step
outputs = self._pack_output(lm_current['hyperparameter'])
ap = random.uniform(0, 1)
if outputs in self.total_data or ap <= self.exploration_probability:
......@@ -419,11 +528,13 @@ class MetisTuner(Tuner):
return outputs
def import_data(self, data):
"""Import additional data for tuning
"""
Import additional data for tuning
Parameters
----------
data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
data : a list of dict
each of which has at least two keys: 'parameter' and 'value'.
"""
_completed_num = 0
for trial_info in data:
......@@ -437,18 +548,26 @@ class MetisTuner(Tuner):
logger.info("Useless trial data, value is %s, skip this trial data.", _value)
continue
self.supplement_data_num += 1
_parameter_id = '_'.join(["ImportData", str(self.supplement_data_num)])
_parameter_id = '_'.join(
["ImportData", str(self.supplement_data_num)])
self.total_data.append(_params)
self.receive_trial_result(parameter_id=_parameter_id, parameters=_params, value=_value)
self.receive_trial_result(
parameter_id=_parameter_id,
parameters=_params,
value=_value)
logger.info("Successfully import data to metis tuner.")
def _rand_with_constraints(x_bounds, x_types):
outputs = None
x_bounds_withconstraints = [x_bounds[i] for i in CONSTRAINT_PARAMS_IDX]
x_types_withconstraints = [x_types[i] for i in CONSTRAINT_PARAMS_IDX]
x_val_withconstraints = lib_constraint_summation.rand(x_bounds_withconstraints,\
x_types_withconstraints, CONSTRAINT_LOWERBOUND, CONSTRAINT_UPPERBOUND)
x_val_withconstraints = lib_constraint_summation.rand(
x_bounds_withconstraints,
x_types_withconstraints,
CONSTRAINT_LOWERBOUND,
CONSTRAINT_UPPERBOUND)
if not x_val_withconstraints:
outputs = [None] * len(x_bounds)
......@@ -462,12 +581,18 @@ def _rand_with_constraints(x_bounds, x_types):
def _calculate_lowest_mu_threaded(inputs):
[candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] = inputs
[candidate, samples_x, samples_y, x_bounds, x_types,
minimize_constraints_fun, minimize_starting_points] = inputs
outputs = {"candidate": candidate, "expected_lowest_mu": None}
for expected_mu in [candidate['expected_mu'] + 1.96 * candidate['expected_sigma'],
candidate['expected_mu'] - 1.96 * candidate['expected_sigma']]:
for expected_mu in [
candidate['expected_mu'] +
1.96 *
candidate['expected_sigma'],
candidate['expected_mu'] -
1.96 *
candidate['expected_sigma']]:
temp_samples_x = copy.deepcopy(samples_x)
temp_samples_y = copy.deepcopy(samples_y)
......@@ -480,8 +605,10 @@ def _calculate_lowest_mu_threaded(inputs):
temp_samples_y.append([expected_mu])
# Aggregates multiple observation of the sample sampling points
temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation)
temp_y_aggregation = [statistics.median(
temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(
temp_samples_x, temp_y_aggregation)
temp_results = gp_selection.selection(
"lm",
temp_y_aggregation,
......@@ -491,7 +618,8 @@ def _calculate_lowest_mu_threaded(inputs):
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
if outputs["expected_lowest_mu"] is None or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
if outputs["expected_lowest_mu"] is None \
or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
outputs["expected_lowest_mu"] = temp_results['expected_mu']
return outputs
......@@ -510,18 +638,19 @@ def _rand_init(x_bounds, x_types, selection_num_starting_points):
'''
Random sample some init seed within bounds.
'''
return [lib_data.rand(x_bounds, x_types) for i \
in range(0, selection_num_starting_points)]
return [lib_data.rand(x_bounds, x_types) for i
in range(0, selection_num_starting_points)]
def get_median(temp_list):
"""Return median
"""
Return median
"""
num = len(temp_list)
temp_list.sort()
print(temp_list)
if num % 2 == 0:
median = (temp_list[int(num/2)] + temp_list[int(num/2) - 1]) / 2
median = (temp_list[int(num / 2)] + temp_list[int(num / 2) - 1]) / 2
else:
median = temp_list[int(num/2)]
median = temp_list[int(num / 2)]
return median
......@@ -38,7 +38,7 @@ from nni.networkmorphism_tuner.layers import is_layer
def layer_distance(a, b):
"""The distance between two layers."""
# pylint: disable=unidiomatic-typecheck
if type(a) != type(b):
if not isinstance(a, type(b)):
return 1.0
if is_layer(a, "Conv"):
att_diff = [
......@@ -96,7 +96,8 @@ def skip_connection_distance(a, b):
return 1.0
len_a = abs(a[1] - a[0])
len_b = abs(b[1] - b[0])
return (abs(a[0] - b[0]) + abs(len_a - len_b)) / (max(a[0], b[0]) + max(len_a, len_b))
return (abs(a[0] - b[0]) + abs(len_a - len_b)) / \
(max(a[0], b[0]) + max(len_a, len_b))
def skip_connections_distance(list_a, list_b):
......@@ -161,7 +162,8 @@ class IncrementalGaussianProcess:
def incremental_fit(self, train_x, train_y):
""" Incrementally fit the regressor. """
if not self._first_fitted:
raise ValueError("The first_fit function needs to be called first.")
raise ValueError(
"The first_fit function needs to be called first.")
train_x, train_y = np.array(train_x), np.array(train_y)
......@@ -174,7 +176,7 @@ class IncrementalGaussianProcess:
temp_distance_matrix = np.concatenate((up_k, down_k), axis=0)
k_matrix = bourgain_embedding_matrix(temp_distance_matrix)
diagonal = np.diag_indices_from(k_matrix)
diagonal = (diagonal[0][-len(train_x) :], diagonal[1][-len(train_x) :])
diagonal = (diagonal[0][-len(train_x):], diagonal[1][-len(train_x):])
k_matrix[diagonal] += self.alpha
try:
......@@ -186,7 +188,8 @@ class IncrementalGaussianProcess:
self._y = np.concatenate((self._y, train_y), axis=0)
self._distance_matrix = temp_distance_matrix
self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3
self._alpha_vector = cho_solve(
(self._l_matrix, True), self._y) # Line 3
return self
......@@ -209,7 +212,8 @@ class IncrementalGaussianProcess:
self._l_matrix = cholesky(k_matrix, lower=True) # Line 2
self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3
self._alpha_vector = cho_solve(
(self._l_matrix, True), self._y) # Line 3
self._first_fitted = True
return self
......@@ -227,7 +231,9 @@ class IncrementalGaussianProcess:
# compute inverse K_inv of K based on its Cholesky
# decomposition L and its inverse L_inv
l_inv = solve_triangular(self._l_matrix.T, np.eye(self._l_matrix.shape[0]))
l_inv = solve_triangular(
self._l_matrix.T, np.eye(
self._l_matrix.shape[0]))
k_inv = l_inv.dot(l_inv.T)
# Compute variance of predictive distribution
y_var = np.ones(len(train_x), dtype=np.float)
......@@ -378,7 +384,11 @@ class BayesianOptimizer:
continue
temp_acq_value = self.acq(temp_graph)
pq.put(elem_class(temp_acq_value, elem.father_id, temp_graph))
pq.put(
elem_class(
temp_acq_value,
elem.father_id,
temp_graph))
descriptors.append(temp_graph.extract_descriptor())
if self._accept_new_acq_value(opt_acq, temp_acq_value):
opt_acq = temp_acq_value
......
......@@ -249,7 +249,8 @@ class Graph:
self.reverse_adj_list[v_id].remove(edge_tuple)
break
self.reverse_adj_list[new_v_id].append((u_id, layer_id))
for index, value in enumerate(self.layer_id_to_output_node_ids[layer_id]):
for index, value in enumerate(
self.layer_id_to_output_node_ids[layer_id]):
if value == v_id:
self.layer_id_to_output_node_ids[layer_id][index] = new_v_id
break
......@@ -350,7 +351,8 @@ class Graph:
self._replace_layer(layer_id, new_layer)
elif is_layer(layer, "BatchNormalization"):
new_layer = wider_bn(layer, start_dim, total_dim, n_add, self.weighted)
new_layer = wider_bn(
layer, start_dim, total_dim, n_add, self.weighted)
self._replace_layer(layer_id, new_layer)
self._search(v, start_dim, total_dim, n_add)
......@@ -405,7 +407,8 @@ class Graph:
target_id: A convolutional layer ID. The new block should be inserted after the block.
new_layer: An instance of StubLayer subclasses.
"""
self.operation_history.append(("to_deeper_model", target_id, new_layer))
self.operation_history.append(
("to_deeper_model", target_id, new_layer))
input_id = self.layer_id_to_input_node_ids[target_id][0]
output_id = self.layer_id_to_output_node_ids[target_id][0]
if self.weighted:
......@@ -478,14 +481,20 @@ class Graph:
pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
end_node_id = self.layer_id_to_output_node_ids[end_id][0]
skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id)
skip_output_id = self._insert_pooling_layer_chain(
start_node_id, end_node_id)
# Add the conv layer
new_conv_layer = get_conv_class(self.n_dim)(filters_start, filters_end, 1)
new_conv_layer = get_conv_class(
self.n_dim)(
filters_start,
filters_end,
1)
skip_output_id = self.add_layer(new_conv_layer, skip_output_id)
# Add the add layer.
add_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id]))
add_input_node_id = self._add_node(
deepcopy(self.node_list[end_node_id]))
add_layer = StubAdd()
self._redirect_edge(pre_end_node_id, end_node_id, add_input_node_id)
......@@ -504,7 +513,8 @@ class Graph:
weights = np.zeros((filters_end, filters_start) + filter_shape)
bias = np.zeros(filters_end)
new_conv_layer.set_weights(
(add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weights, np.array([0, 1])), add_noise(
bias, np.array([0, 1])))
)
def to_concat_skip_model(self, start_id, end_id):
......@@ -513,7 +523,8 @@ class Graph:
start_id: The convolutional layer ID, after which to start the skip-connection.
end_id: The convolutional layer ID, after which to end the skip-connection.
"""
self.operation_history.append(("to_concat_skip_model", start_id, end_id))
self.operation_history.append(
("to_concat_skip_model", start_id, end_id))
filters_end = self.layer_list[end_id].output.shape[-1]
filters_start = self.layer_list[start_id].output.shape[-1]
start_node_id = self.layer_id_to_output_node_ids[start_id][0]
......@@ -521,9 +532,11 @@ class Graph:
pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
end_node_id = self.layer_id_to_output_node_ids[end_id][0]
skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id)
skip_output_id = self._insert_pooling_layer_chain(
start_node_id, end_node_id)
concat_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id]))
concat_input_node_id = self._add_node(
deepcopy(self.node_list[end_node_id]))
self._redirect_edge(pre_end_node_id, end_node_id, concat_input_node_id)
concat_layer = StubConcatenate()
......@@ -532,7 +545,10 @@ class Graph:
self.node_list[skip_output_id],
]
concat_output_node_id = self._add_node(Node(concat_layer.output_shape))
self._add_edge(concat_layer, concat_input_node_id, concat_output_node_id)
self._add_edge(
concat_layer,
concat_input_node_id,
concat_output_node_id)
self._add_edge(concat_layer, skip_output_id, concat_output_node_id)
concat_layer.output = self.node_list[concat_output_node_id]
self.node_list[concat_output_node_id].shape = concat_layer.output_shape
......@@ -559,7 +575,8 @@ class Graph:
)
bias = np.zeros(filters_end)
new_conv_layer.set_weights(
(add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weights, np.array([0, 1])), add_noise(
bias, np.array([0, 1])))
)
def _insert_pooling_layer_chain(self, start_node_id, end_node_id):
......@@ -568,7 +585,8 @@ class Graph:
new_layer = deepcopy(layer)
if is_layer(new_layer, "Conv"):
filters = self.node_list[start_node_id].shape[-1]
new_layer = get_conv_class(self.n_dim)(filters, filters, 1, layer.stride)
new_layer = get_conv_class(self.n_dim)(
filters, filters, 1, layer.stride)
if self.weighted:
init_conv_weight(new_layer)
else:
......@@ -601,8 +619,10 @@ class Graph:
temp_v = v
temp_layer_id = layer_id
skip_type = None
while not (temp_v in index_in_main_chain and temp_u in index_in_main_chain):
if is_layer(self.layer_list[temp_layer_id], "Concatenate"):
while not (
temp_v in index_in_main_chain and temp_u in index_in_main_chain):
if is_layer(
self.layer_list[temp_layer_id], "Concatenate"):
skip_type = NetworkDescriptor.CONCAT_CONNECT
if is_layer(self.layer_list[temp_layer_id], "Add"):
skip_type = NetworkDescriptor.ADD_CONNECT
......@@ -711,7 +731,8 @@ class Graph:
def wide_layer_ids(self):
return (
self._conv_layer_ids_in_order()[:-1] + self._dense_layer_ids_in_order()[:-1]
self._conv_layer_ids_in_order(
)[:-1] + self._dense_layer_ids_in_order()[:-1]
)
def skip_connection_layer_ids(self):
......@@ -810,7 +831,8 @@ class KerasModel:
topo_node_list = self.graph.topological_order
output_id = topo_node_list[-1]
input_id = topo_node_list[0]
input_tensor = keras.layers.Input(shape=graph.node_list[input_id].shape)
input_tensor = keras.layers.Input(
shape=graph.node_list[input_id].shape)
node_list = deepcopy(self.graph.node_list)
node_list[input_id] = input_tensor
......@@ -838,7 +860,8 @@ class KerasModel:
output_tensor = keras.layers.Activation("softmax", name="activation_add")(
output_tensor
)
self.model = keras.models.Model(inputs=input_tensor, outputs=output_tensor)
self.model = keras.models.Model(
inputs=input_tensor, outputs=output_tensor)
if graph.weighted:
for index, layer in enumerate(self.layers):
......@@ -892,7 +915,8 @@ class JSONModel:
for layer_id, item in enumerate(graph.layer_list):
layer = graph.layer_list[layer_id]
layer_information = layer_description_extractor(layer, graph.node_to_id)
layer_information = layer_description_extractor(
layer, graph.node_to_id)
layer_list.append((layer_id, layer_information))
data["node_list"] = node_list
......@@ -938,7 +962,8 @@ def json_to_graph(json_model: str):
graph.input_shape = input_shape
vis = json_model["vis"]
graph.vis = {tuple(item): True for item in vis} if vis is not None else None
graph.vis = {
tuple(item): True for item in vis} if vis is not None else None
graph.weighted = json_model["weighted"]
layer_id_to_input_node_ids = json_model["layer_id_to_input_node_ids"]
graph.layer_id_to_input_node_ids = {
......
......@@ -40,7 +40,8 @@ def to_wider_graph(graph):
'''
weighted_layer_ids = graph.wide_layer_ids()
weighted_layer_ids = list(
filter(lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
filter(
lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
)
wider_layers = sample(weighted_layer_ids, 1)
......@@ -58,12 +59,14 @@ def to_wider_graph(graph):
def to_skip_connection_graph(graph):
''' skip connection graph
'''
# The last conv layer cannot be widen since wider operator cannot be done over the two sides of flatten.
# The last conv layer cannot be widen since wider operator cannot be done
# over the two sides of flatten.
weighted_layer_ids = graph.skip_connection_layer_ids()
valid_connection = []
for skip_type in sorted([NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
for skip_type in sorted(
[NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
for index_a in range(len(weighted_layer_ids)):
for index_b in range(len(weighted_layer_ids))[index_a + 1 :]:
for index_b in range(len(weighted_layer_ids))[index_a + 1:]:
valid_connection.append((index_a, index_b, skip_type))
if not valid_connection:
......@@ -84,9 +87,14 @@ def create_new_layer(layer, n_dim):
input_shape = layer.output.shape
dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU]
conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim), StubReLU]
conv_deeper_classes = [
get_conv_class(n_dim),
get_batch_norm_class(n_dim),
StubReLU]
if is_layer(layer, "ReLU"):
conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim)]
conv_deeper_classes = [
get_conv_class(n_dim),
get_batch_norm_class(n_dim)]
dense_deeper_classes = [StubDense, get_dropout_class(n_dim)]
elif is_layer(layer, "Dropout"):
dense_deeper_classes = [StubDense, StubReLU]
......
......@@ -52,7 +52,8 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True):
if weighted:
new_conv_layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
new_weights = [
add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])),
......@@ -74,7 +75,8 @@ def dense_to_deeper_block(dense_layer, weighted=True):
new_dense_layer = StubDense(units, units)
if weighted:
new_dense_layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
return [StubReLU(), new_dense_layer]
......@@ -97,8 +99,11 @@ def wider_pre_dense(layer, n_add, weighted=True):
teacher_index = rand[i]
new_weight = teacher_w[teacher_index, :]
new_weight = new_weight[np.newaxis, :]
student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0)
student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b))
student_w = np.concatenate(
(student_w, add_noise(new_weight, student_w)), axis=0)
student_b = np.append(
student_b, add_noise(
teacher_b[teacher_index], student_b))
new_pre_layer = StubDense(layer.input_units, n_units2 + n_add)
new_pre_layer.set_weights((student_w, student_b))
......@@ -209,7 +214,7 @@ def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True):
student_w[:, : start_dim * n_units_each_channel],
add_noise(new_weight, student_w),
student_w[
:, start_dim * n_units_each_channel : total_dim * n_units_each_channel
:, start_dim * n_units_each_channel: total_dim * n_units_each_channel
],
),
axis=1,
......@@ -225,7 +230,8 @@ def add_noise(weights, other_weights):
'''
w_range = np.ptp(other_weights.flatten())
noise_range = NOISE_RATIO * w_range
noise = np.random.uniform(-noise_range / 2.0, noise_range / 2.0, weights.shape)
noise = np.random.uniform(-noise_range / 2.0,
noise_range / 2.0, weights.shape)
return np.add(noise, weights)
......@@ -236,7 +242,8 @@ def init_dense_weight(layer):
weight = np.eye(units)
bias = np.zeros(units)
layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
......@@ -256,7 +263,8 @@ def init_conv_weight(layer):
bias = np.zeros(n_filters)
layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))
(add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
)
......
......@@ -28,8 +28,10 @@ from nni.networkmorphism_tuner.utils import Constant
class AvgPool(nn.Module):
'''AvgPool Module.
'''
"""
AvgPool Module.
"""
def __init__(self):
super().__init__()
......@@ -39,8 +41,10 @@ class AvgPool(nn.Module):
class GlobalAvgPool1d(AvgPool):
'''GlobalAvgPool1d Module.
'''
"""
GlobalAvgPool1d Module.
"""
def forward(self, input_tensor):
return functional.avg_pool1d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2]
......@@ -48,8 +52,10 @@ class GlobalAvgPool1d(AvgPool):
class GlobalAvgPool2d(AvgPool):
'''GlobalAvgPool2d Module.
'''
"""
GlobalAvgPool2d Module.
"""
def forward(self, input_tensor):
return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2]
......@@ -57,8 +63,10 @@ class GlobalAvgPool2d(AvgPool):
class GlobalAvgPool3d(AvgPool):
'''GlobalAvgPool3d Module.
'''
"""
GlobalAvgPool3d Module.
"""
def forward(self, input_tensor):
return functional.avg_pool3d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2]
......@@ -66,70 +74,86 @@ class GlobalAvgPool3d(AvgPool):
class StubLayer:
'''StubLayer Module. Base Module.
'''
"""
StubLayer Module. Base Module.
"""
def __init__(self, input_node=None, output_node=None):
self.input = input_node
self.output = output_node
self.weights = None
def build(self, shape):
'''build shape.
'''
"""
build shape.
"""
def set_weights(self, weights):
'''set weights.
'''
"""
set weights.
"""
self.weights = weights
def import_weights(self, torch_layer):
'''import weights.
'''
"""
import weights.
"""
def import_weights_keras(self, keras_layer):
'''import weights from keras layer.
'''
"""
import weights from keras layer.
"""
def export_weights(self, torch_layer):
'''export weights.
'''
"""
export weights.
"""
def export_weights_keras(self, keras_layer):
'''export weights to keras layer.
'''
"""
export weights to keras layer.
"""
def get_weights(self):
'''get weights.
'''
"""
get weights.
"""
return self.weights
def size(self):
'''size().
'''
"""
size().
"""
return 0
@property
def output_shape(self):
'''output shape.
'''
"""
output shape.
"""
return self.input.shape
def to_real_layer(self):
'''to real layer.
'''
"""
to real layer.
"""
def __str__(self):
'''str() function to print.
'''
"""
str() function to print.
"""
return type(self).__name__[4:]
class StubWeightBiasLayer(StubLayer):
'''StubWeightBiasLayer Module to set the bias.
'''
"""
StubWeightBiasLayer Module to set the bias.
"""
def import_weights(self, torch_layer):
self.set_weights(
(torch_layer.weight.data.cpu().numpy(), torch_layer.bias.data.cpu().numpy())
(torch_layer.weight.data.cpu().numpy(),
torch_layer.bias.data.cpu().numpy())
)
def import_weights_keras(self, keras_layer):
......@@ -144,8 +168,10 @@ class StubWeightBiasLayer(StubLayer):
class StubBatchNormalization(StubWeightBiasLayer):
'''StubBatchNormalization Module. Batch Norm.
'''
"""
StubBatchNormalization Module. Batch Norm.
"""
def __init__(self, num_features, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.num_features = num_features
......@@ -175,29 +201,37 @@ class StubBatchNormalization(StubWeightBiasLayer):
class StubBatchNormalization1d(StubBatchNormalization):
'''StubBatchNormalization1d Module.
'''
"""
StubBatchNormalization1d Module.
"""
def to_real_layer(self):
return torch.nn.BatchNorm1d(self.num_features)
class StubBatchNormalization2d(StubBatchNormalization):
'''StubBatchNormalization2d Module.
'''
"""
StubBatchNormalization2d Module.
"""
def to_real_layer(self):
return torch.nn.BatchNorm2d(self.num_features)
class StubBatchNormalization3d(StubBatchNormalization):
'''StubBatchNormalization3d Module.
'''
"""
StubBatchNormalization3d Module.
"""
def to_real_layer(self):
return torch.nn.BatchNorm3d(self.num_features)
class StubDense(StubWeightBiasLayer):
'''StubDense Module. Linear.
'''
"""
StubDense Module. Linear.
"""
def __init__(self, input_units, units, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.input_units = input_units
......@@ -208,7 +242,9 @@ class StubDense(StubWeightBiasLayer):
return (self.units,)
def import_weights_keras(self, keras_layer):
self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1]))
self.set_weights(
(keras_layer.get_weights()[0].T,
keras_layer.get_weights()[1]))
def export_weights_keras(self, keras_layer):
keras_layer.set_weights((self.weights[0].T, self.weights[1]))
......@@ -221,9 +257,12 @@ class StubDense(StubWeightBiasLayer):
class StubConv(StubWeightBiasLayer):
'''StubConv Module. Conv.
'''
def __init__(self, input_channel, filters, kernel_size, stride=1, input_node=None, output_node=None):
"""
StubConv Module. Conv.
"""
def __init__(self, input_channel, filters, kernel_size,
stride=1, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.input_channel = input_channel
self.filters = filters
......@@ -242,13 +281,16 @@ class StubConv(StubWeightBiasLayer):
return tuple(ret)
def import_weights_keras(self, keras_layer):
self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1]))
self.set_weights(
(keras_layer.get_weights()[0].T,
keras_layer.get_weights()[1]))
def export_weights_keras(self, keras_layer):
keras_layer.set_weights((self.weights[0].T, self.weights[1]))
def size(self):
return (self.input_channel * self.kernel_size * self.kernel_size + 1) * self.filters
return (self.input_channel * self.kernel_size *
self.kernel_size + 1) * self.filters
@abstractmethod
def to_real_layer(self):
......@@ -272,8 +314,10 @@ class StubConv(StubWeightBiasLayer):
class StubConv1d(StubConv):
'''StubConv1d Module.
'''
"""
StubConv1d Module.
"""
def to_real_layer(self):
return torch.nn.Conv1d(
self.input_channel,
......@@ -285,8 +329,10 @@ class StubConv1d(StubConv):
class StubConv2d(StubConv):
'''StubConv2d Module.
'''
"""
StubConv2d Module.
"""
def to_real_layer(self):
return torch.nn.Conv2d(
self.input_channel,
......@@ -298,8 +344,10 @@ class StubConv2d(StubConv):
class StubConv3d(StubConv):
'''StubConv3d Module.
'''
"""
StubConv3d Module.
"""
def to_real_layer(self):
return torch.nn.Conv3d(
self.input_channel,
......@@ -311,8 +359,10 @@ class StubConv3d(StubConv):
class StubAggregateLayer(StubLayer):
'''StubAggregateLayer Module.
'''
"""
StubAggregateLayer Module.
"""
def __init__(self, input_nodes=None, output_node=None):
if input_nodes is None:
input_nodes = []
......@@ -320,8 +370,8 @@ class StubAggregateLayer(StubLayer):
class StubConcatenate(StubAggregateLayer):
'''StubConcatenate Module.
'''
"""StubConcatenate Module.
"""
@property
def output_shape(self):
ret = 0
......@@ -335,8 +385,9 @@ class StubConcatenate(StubAggregateLayer):
class StubAdd(StubAggregateLayer):
'''StubAdd Module.
'''
"""
StubAdd Module.
"""
@property
def output_shape(self):
return self.input[0].shape
......@@ -346,8 +397,9 @@ class StubAdd(StubAggregateLayer):
class StubFlatten(StubLayer):
'''StubFlatten Module.
'''
"""
StubFlatten Module.
"""
@property
def output_shape(self):
ret = 1
......@@ -360,22 +412,28 @@ class StubFlatten(StubLayer):
class StubReLU(StubLayer):
'''StubReLU Module.
'''
"""
StubReLU Module.
"""
def to_real_layer(self):
return torch.nn.ReLU()
class StubSoftmax(StubLayer):
'''StubSoftmax Module.
'''
"""
StubSoftmax Module.
"""
def to_real_layer(self):
return torch.nn.LogSoftmax(dim=1)
class StubDropout(StubLayer):
'''StubDropout Module.
'''
"""
StubDropout Module.
"""
def __init__(self, rate, input_node=None, output_node=None):
super().__init__(input_node, output_node)
self.rate = rate
......@@ -386,36 +444,45 @@ class StubDropout(StubLayer):
class StubDropout1d(StubDropout):
'''StubDropout1d Module.
'''
"""
StubDropout1d Module.
"""
def to_real_layer(self):
return torch.nn.Dropout(self.rate)
class StubDropout2d(StubDropout):
'''StubDropout2d Module.
'''
"""
StubDropout2d Module.
"""
def to_real_layer(self):
return torch.nn.Dropout2d(self.rate)
class StubDropout3d(StubDropout):
'''StubDropout3d Module.
'''
"""
StubDropout3d Module.
"""
def to_real_layer(self):
return torch.nn.Dropout3d(self.rate)
class StubInput(StubLayer):
'''StubInput Module.
'''
"""
StubInput Module.
"""
def __init__(self, input_node=None, output_node=None):
super().__init__(input_node, output_node)
class StubPooling(StubLayer):
'''StubPooling Module.
'''
"""
StubPooling Module.
"""
def __init__(self,
kernel_size=None,
......@@ -444,30 +511,37 @@ class StubPooling(StubLayer):
class StubPooling1d(StubPooling):
'''StubPooling1d Module.
'''
"""
StubPooling1d Module.
"""
def to_real_layer(self):
return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride)
class StubPooling2d(StubPooling):
'''StubPooling2d Module.
'''
"""
StubPooling2d Module.
"""
def to_real_layer(self):
return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride)
class StubPooling3d(StubPooling):
'''StubPooling3d Module.
'''
"""
StubPooling3d Module.
"""
def to_real_layer(self):
return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride)
class StubGlobalPooling(StubLayer):
'''StubGlobalPooling Module.
'''
"""
StubGlobalPooling Module.
"""
def __init__(self, input_node=None, output_node=None):
super().__init__(input_node, output_node)
......@@ -481,49 +555,63 @@ class StubGlobalPooling(StubLayer):
class StubGlobalPooling1d(StubGlobalPooling):
'''StubGlobalPooling1d Module.
'''
"""
StubGlobalPooling1d Module.
"""
def to_real_layer(self):
return GlobalAvgPool1d()
class StubGlobalPooling2d(StubGlobalPooling):
'''StubGlobalPooling2d Module.
'''
"""
StubGlobalPooling2d Module.
"""
def to_real_layer(self):
return GlobalAvgPool2d()
class StubGlobalPooling3d(StubGlobalPooling):
'''StubGlobalPooling3d Module.
'''
"""
StubGlobalPooling3d Module.
"""
def to_real_layer(self):
return GlobalAvgPool3d()
class TorchConcatenate(nn.Module):
'''TorchConcatenate Module.
'''
"""
TorchConcatenate Module.
"""
def forward(self, input_list):
return torch.cat(input_list, dim=1)
class TorchAdd(nn.Module):
'''TorchAdd Module.
'''
"""
TorchAdd Module.
"""
def forward(self, input_list):
return input_list[0] + input_list[1]
class TorchFlatten(nn.Module):
'''TorchFlatten Module.
'''
"""
TorchFlatten Module.
"""
def forward(self, input_tensor):
return input_tensor.view(input_tensor.size(0), -1)
def keras_dropout(layer, rate):
'''keras dropout layer.
'''
"""
Keras dropout layer.
"""
from keras import layers
......@@ -539,8 +627,9 @@ def keras_dropout(layer, rate):
def to_real_keras_layer(layer):
''' real keras layer.
'''
"""
Real keras layer.
"""
from keras import layers
if is_layer(layer, "Dense"):
......@@ -574,10 +663,14 @@ def to_real_keras_layer(layer):
def is_layer(layer, layer_type):
'''judge the layer type.
Returns:
"""
Judge the layer type.
Returns
-------
bool
boolean -- True or False
'''
"""
if layer_type == "Input":
return isinstance(layer, StubInput)
......@@ -607,8 +700,9 @@ def is_layer(layer, layer_type):
def layer_description_extractor(layer, node_to_id):
'''get layer description.
'''
"""
Get layer description.
"""
layer_input = layer.input
layer_output = layer.output
......@@ -641,7 +735,8 @@ def layer_description_extractor(layer, node_to_id):
layer.units,
]
elif isinstance(layer, (StubBatchNormalization,)):
return (type(layer).__name__, layer_input, layer_output, layer.num_features)
return (type(layer).__name__, layer_input,
layer_output, layer.num_features)
elif isinstance(layer, (StubDropout,)):
return (type(layer).__name__, layer_input, layer_output, layer.rate)
elif isinstance(layer, StubPooling):
......@@ -658,8 +753,8 @@ def layer_description_extractor(layer, node_to_id):
def layer_description_builder(layer_information, id_to_node):
'''build layer from description.
'''
"""build layer from description.
"""
layer_type = layer_information[0]
layer_input_ids = layer_information[1]
......@@ -696,8 +791,9 @@ def layer_description_builder(layer_information, id_to_node):
def layer_width(layer):
'''get layer width.
'''
"""
Get layer width.
"""
if is_layer(layer, "Dense"):
return layer.units
......
......@@ -17,11 +17,13 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
"""
networkmorphsim_tuner.py
"""
import logging
import os
from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward
from nni.networkmorphism_tuner.bayesian import BayesianOptimizer
......@@ -34,7 +36,35 @@ logger = logging.getLogger("NetworkMorphism_AutoML")
class NetworkMorphismTuner(Tuner):
"""NetworkMorphismTuner is a tuner which using network morphism techniques."""
"""
NetworkMorphismTuner is a tuner which using network morphism techniques.
Attributes
----------
n_classes : int
The class number or output node number (default: ``10``)
input_shape : tuple
A tuple including: (input_width, input_width, input_channel)
t_min : float
The minimum temperature for simulated annealing. (default: ``Constant.T_MIN``)
beta : float
The beta in acquisition function. (default: ``Constant.BETA``)
algorithm_name : str
algorithm name used in the network morphism (default: ``"Bayesian"``)
optimize_mode : str
optimize mode "minimize" or "maximize" (default: ``"minimize"``)
verbose : bool
verbose to print the log (default: ``True``)
bo : BayesianOptimizer
The optimizer used in networkmorphsim tuner.
max_model_size : int
max model size to the graph (default: ``Constant.MAX_MODEL_SIZE``)
default_model_len : int
default model length (default: ``Constant.MODEL_LEN``)
default_model_width : int
default model width (default: ``Constant.MODEL_WIDTH``)
search_space : dict
"""
def __init__(
self,
......@@ -52,36 +82,8 @@ class NetworkMorphismTuner(Tuner):
default_model_len=Constant.MODEL_LEN,
default_model_width=Constant.MODEL_WIDTH,
):
""" initilizer of the NetworkMorphismTuner.
Parameters
----------
task : str
task mode, such as "cv","common" etc. (default: {"cv"})
input_width : int
input sample shape (default: {32})
input_channel : int
input sample shape (default: {3})
n_output_node : int
output node number (default: {10})
algorithm_name : str
algorithm name used in the network morphism (default: {"Bayesian"})
optimize_mode : str
optimize mode "minimize" or "maximize" (default: {"minimize"})
path : str
default mode path to save the model file (default: {"model_path"})
verbose : bool
verbose to print the log (default: {True})
beta : float
The beta in acquisition function. (default: {Constant.BETA})
t_min : float
The minimum temperature for simulated annealing. (default: {Constant.T_MIN})
max_model_size : int
max model size to the graph (default: {Constant.MAX_MODEL_SIZE})
default_model_len : int
default model length (default: {Constant.MODEL_LEN})
default_model_width : int
default model width (default: {Constant.MODEL_WIDTH})
"""
initilizer of the NetworkMorphismTuner.
"""
if not os.path.exists(path):
......@@ -92,7 +94,8 @@ class NetworkMorphismTuner(Tuner):
elif task == "common":
self.generators = [MlpGenerator]
else:
raise NotImplementedError('{} task not supported in List ["cv","common"]')
raise NotImplementedError(
'{} task not supported in List ["cv","common"]')
self.n_classes = n_output_node
self.input_shape = (input_width, input_width, input_channel)
......@@ -106,7 +109,8 @@ class NetworkMorphismTuner(Tuner):
self.verbose = verbose
self.model_count = 0
self.bo = BayesianOptimizer(self, self.t_min, self.optimize_mode, self.beta)
self.bo = BayesianOptimizer(
self, self.t_min, self.optimize_mode, self.beta)
self.training_queue = []
self.descriptors = []
self.history = []
......@@ -117,6 +121,7 @@ class NetworkMorphismTuner(Tuner):
self.search_space = dict()
def update_search_space(self, search_space):
"""
Update search space definition in tuner by search_space in neural architecture.
......@@ -140,7 +145,8 @@ class NetworkMorphismTuner(Tuner):
new_father_id, generated_graph = self.generate()
new_model_id = self.model_count
self.model_count += 1
self.training_queue.append((generated_graph, new_father_id, new_model_id))
self.training_queue.append(
(generated_graph, new_father_id, new_model_id))
self.descriptors.append(generated_graph.extract_descriptor())
graph, father_id, model_id = self.training_queue.pop(0)
......@@ -153,12 +159,15 @@ class NetworkMorphismTuner(Tuner):
return json_out
def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
""" Record an observation of the objective function.
"""
Record an observation of the objective function.
Parameters
----------
parameter_id : int
the id of a group of paramters that generated by nni manager.
parameters : dict
A group of parameters.
value : dict/float
if value is dict, it should have "default" key.
"""
......@@ -175,8 +184,11 @@ class NetworkMorphismTuner(Tuner):
self.add_model(reward, model_id)
self.update(father_id, graph, reward, model_id)
def init_search(self):
"""Call the generators to generate the initial architectures for the search."""
"""
Call the generators to generate the initial architectures for the search.
"""
if self.verbose:
logger.info("Initializing search.")
for generator in self.generators:
......@@ -191,14 +203,16 @@ class NetworkMorphismTuner(Tuner):
if self.verbose:
logger.info("Initialization finished.")
def generate(self):
"""Generate the next neural architecture.
"""
Generate the next neural architecture.
Returns
-------
other_info: any object
other_info : any object
Anything to be saved in the training queue together with the architecture.
generated_graph: Graph
generated_graph : Graph
An instance of Graph.
"""
generated_graph, new_father_id = self.bo.generate(self.descriptors)
......@@ -211,7 +225,8 @@ class NetworkMorphismTuner(Tuner):
return new_father_id, generated_graph
def update(self, other_info, graph, metric_value, model_id):
""" Update the controller with evaluation result of a neural architecture.
"""
Update the controller with evaluation result of a neural architecture.
Parameters
----------
......@@ -228,7 +243,8 @@ class NetworkMorphismTuner(Tuner):
self.bo.add_child(father_id, model_id)
def add_model(self, metric_value, model_id):
""" Add model to the history, x_queue and y_queue
"""
Add model to the history, x_queue and y_queue
Parameters
----------
......@@ -252,16 +268,21 @@ class NetworkMorphismTuner(Tuner):
file.close()
return ret
def get_best_model_id(self):
""" Get the best model_id from history using the metric value
"""
Get the best model_id from history using the metric value
"""
if self.optimize_mode is OptimizeMode.Maximize:
return max(self.history, key=lambda x: x["metric_value"])["model_id"]
return max(self.history, key=lambda x: x["metric_value"])[
"model_id"]
return min(self.history, key=lambda x: x["metric_value"])["model_id"]
def load_model_by_id(self, model_id):
"""Get the model by model_id
"""
Get the model by model_id
Parameters
----------
......@@ -281,7 +302,8 @@ class NetworkMorphismTuner(Tuner):
return load_model
def load_best_model(self):
""" Get the best model by model id
"""
Get the best model by model id
Returns
-------
......@@ -291,7 +313,8 @@ class NetworkMorphismTuner(Tuner):
return self.load_model_by_id(self.get_best_model_id())
def get_metric_value_by_id(self, model_id):
""" Get the model metric valud by its model_id
"""
Get the model metric valud by its model_id
Parameters
----------
......
......@@ -92,17 +92,25 @@ class CnnGenerator(NetworkGenerator):
for i in range(model_len):
output_node_id = graph.add_layer(StubReLU(), output_node_id)
output_node_id = graph.add_layer(
self.batch_norm(graph.node_list[output_node_id].shape[-1]), output_node_id
self.batch_norm(
graph.node_list[output_node_id].shape[-1]), output_node_id
)
output_node_id = graph.add_layer(
self.conv(temp_input_channel, model_width, kernel_size=3, stride=stride),
self.conv(
temp_input_channel,
model_width,
kernel_size=3,
stride=stride),
output_node_id,
)
temp_input_channel = model_width
if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1):
output_node_id = graph.add_layer(self.pooling(), output_node_id)
if pooling_len == 0 or (
(i + 1) % pooling_len == 0 and i != model_len - 1):
output_node_id = graph.add_layer(
self.pooling(), output_node_id)
output_node_id = graph.add_layer(self.global_avg_pooling(), output_node_id)
output_node_id = graph.add_layer(
self.global_avg_pooling(), output_node_id)
output_node_id = graph.add_layer(
self.dropout(Constant.CONV_DROPOUT_RATE), output_node_id
)
......@@ -111,7 +119,11 @@ class CnnGenerator(NetworkGenerator):
output_node_id,
)
output_node_id = graph.add_layer(StubReLU(), output_node_id)
graph.add_layer(StubDense(model_width, self.n_output_node), output_node_id)
graph.add_layer(
StubDense(
model_width,
self.n_output_node),
output_node_id)
return graph
......@@ -145,7 +157,8 @@ class MlpGenerator(NetworkGenerator):
if model_width is None:
model_width = Constant.MODEL_WIDTH
if isinstance(model_width, list) and not len(model_width) == model_len:
raise ValueError("The length of 'model_width' does not match 'model_len'")
raise ValueError(
"The length of 'model_width' does not match 'model_len'")
elif isinstance(model_width, int):
model_width = [model_width] * model_len
......@@ -162,5 +175,9 @@ class MlpGenerator(NetworkGenerator):
output_node_id = graph.add_layer(StubReLU(), output_node_id)
n_nodes_prev_layer = width
graph.add_layer(StubDense(n_nodes_prev_layer, self.n_output_node), output_node_id)
graph.add_layer(
StubDense(
n_nodes_prev_layer,
self.n_output_node),
output_node_id)
return graph
......@@ -59,9 +59,12 @@ class NetworkMorphismTestCase(TestCase):
graph_recover.layer_id_to_input_node_ids,
)
self.assertEqual(graph_init.adj_list, graph_recover.adj_list)
self.assertEqual(graph_init.reverse_adj_list, graph_recover.reverse_adj_list)
self.assertEqual(
len(graph_init.operation_history), len(graph_recover.operation_history)
graph_init.reverse_adj_list,
graph_recover.reverse_adj_list)
self.assertEqual(
len(graph_init.operation_history), len(
graph_recover.operation_history)
)
self.assertEqual(graph_init.n_dim, graph_recover.n_dim)
self.assertEqual(graph_init.conv, graph_recover.conv)
......@@ -71,7 +74,8 @@ class NetworkMorphismTestCase(TestCase):
node_list_init = [node.shape for node in graph_init.node_list]
node_list_recover = [node.shape for node in graph_recover.node_list]
self.assertEqual(node_list_init, node_list_recover)
self.assertEqual(len(graph_init.node_to_id), len(graph_recover.node_to_id))
self.assertEqual(len(graph_init.node_to_id),
len(graph_recover.node_to_id))
layer_list_init = [
layer_description_extractor(item, graph_init.node_to_id)
for item in graph_init.layer_list
......@@ -82,7 +86,8 @@ class NetworkMorphismTestCase(TestCase):
]
self.assertEqual(layer_list_init, layer_list_recover)
node_to_id_init = [graph_init.node_to_id[node] for node in graph_init.node_list]
node_to_id_init = [graph_init.node_to_id[node]
for node in graph_init.node_list]
node_to_id_recover = [
graph_recover.node_to_id[node] for node in graph_recover.node_list
]
......@@ -192,8 +197,8 @@ class NetworkMorphismTestCase(TestCase):
"""
tuner = NetworkMorphismTuner()
tuner.add_model(0.8, 0)
tuner.add_model(0.9, 1)
tuner.add_model(0.8, 0)
tuner.add_model(0.9, 1)
self.assertEqual(tuner.get_best_model_id(), 1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment