Unverified Commit 0168ff1c authored by xuehui's avatar xuehui Committed by GitHub
Browse files

update docstring and pylint (#1662)

* update docstring of batchtuner

* update docstring of batch tuner

* update docstring of evolution tuner

* update docstring and pylint of metis_tuner

* fix pylint related to logger in metis_tuner

* fix pylint

* update

* fix pylint in metis_tuner

* update in networkmorphsim_tuner

* update

* update

* update docstring in hyperopt_tuner

* update batch_tuner

* delete unused space

* update in metis

* update sdk_reference.rst

* update netowrkmorhism

* update networkmorphsim

* update batch_tuner

* update batch_tuner

* update

* update metis

* roll back to print

* update Returns

* update

* delete white space
parent 803f056a
...@@ -36,6 +36,9 @@ Tuner ...@@ -36,6 +36,9 @@ Tuner
.. autoclass:: nni.metis_tuner.metis_tuner.MetisTuner .. autoclass:: nni.metis_tuner.metis_tuner.MetisTuner
:members: :members:
.. autoclass:: nni.batch_tuner.batch_tuner.BatchTuner
:members:
Assessor Assessor
------------------------ ------------------------
.. autoclass:: nni.assessor.Assessor .. autoclass:: nni.assessor.Assessor
......
...@@ -31,22 +31,27 @@ TYPE = '_type' ...@@ -31,22 +31,27 @@ TYPE = '_type'
CHOICE = 'choice' CHOICE = 'choice'
VALUE = '_value' VALUE = '_value'
logger = logging.getLogger('batch_tuner_AutoML') LOGGER = logging.getLogger('batch_tuner_AutoML')
class BatchTuner(Tuner): class BatchTuner(Tuner):
""" """
BatchTuner is tuner will running all the configure that user want to run batchly. BatchTuner is tuner will running all the configure that user want to run batchly.
Examples
--------
The search space only be accepted like: The search space only be accepted like:
```
{ {
'combine_params': { '_type': 'choice', 'combine_params': { '_type': 'choice',
'_value': '[{...}, {...}, {...}]', '_value': '[{...}, {...}, {...}]',
} }
} }
```
""" """
def __init__(self): def __init__(self):
self.count = -1 self._count = -1
self.values = [] self._values = []
def is_valid(self, search_space): def is_valid(self, search_space):
""" """
...@@ -55,6 +60,11 @@ class BatchTuner(Tuner): ...@@ -55,6 +60,11 @@ class BatchTuner(Tuner):
Parameters Parameters
---------- ----------
search_space : dict search_space : dict
Returns
-------
None or list
If valid, return candidate values; else return None.
""" """
if not len(search_space) == 1: if not len(search_space) == 1:
raise RuntimeError('BatchTuner only supprt one combined-paramreters key.') raise RuntimeError('BatchTuner only supprt one combined-paramreters key.')
...@@ -62,11 +72,14 @@ class BatchTuner(Tuner): ...@@ -62,11 +72,14 @@ class BatchTuner(Tuner):
for param in search_space: for param in search_space:
param_type = search_space[param][TYPE] param_type = search_space[param][TYPE]
if not param_type == CHOICE: if not param_type == CHOICE:
raise RuntimeError('BatchTuner only supprt one combined-paramreters type is choice.') raise RuntimeError('BatchTuner only supprt \
else: one combined-paramreters type is choice.')
if isinstance(search_space[param][VALUE], list):
return search_space[param][VALUE] if isinstance(search_space[param][VALUE], list):
raise RuntimeError('The combined-paramreters value in BatchTuner is not a list.') return search_space[param][VALUE]
raise RuntimeError('The combined-paramreters \
value in BatchTuner is not a list.')
return None return None
def update_search_space(self, search_space): def update_search_space(self, search_space):
...@@ -76,7 +89,7 @@ class BatchTuner(Tuner): ...@@ -76,7 +89,7 @@ class BatchTuner(Tuner):
---------- ----------
search_space : dict search_space : dict
""" """
self.values = self.is_valid(search_space) self._values = self.is_valid(search_space)
def generate_parameters(self, parameter_id, **kwargs): def generate_parameters(self, parameter_id, **kwargs):
"""Returns a dict of trial (hyper-)parameters, as a serializable object. """Returns a dict of trial (hyper-)parameters, as a serializable object.
...@@ -84,41 +97,49 @@ class BatchTuner(Tuner): ...@@ -84,41 +97,49 @@ class BatchTuner(Tuner):
Parameters Parameters
---------- ----------
parameter_id : int parameter_id : int
Returns
-------
dict
A candidate parameter group.
""" """
self.count += 1 self._count += 1
if self.count > len(self.values) - 1: if self._count > len(self._values) - 1:
raise nni.NoMoreTrialError('no more parameters now.') raise nni.NoMoreTrialError('no more parameters now.')
return self.values[self.count] return self._values[self._count]
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
pass pass
def import_data(self, data): def import_data(self, data):
"""Import additional data for tuning """Import additional data for tuning
Parameters Parameters
---------- ----------
data: data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
""" """
if not self.values: if not self._values:
logger.info("Search space has not been initialized, skip this data import") LOGGER.info("Search space has not been initialized, skip this data import")
return return
self.values = self.values[(self.count+1):] self._values = self._values[(self._count+1):]
self.count = -1 self._count = -1
_completed_num = 0 _completed_num = 0
for trial_info in data: for trial_info in data:
logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data)) LOGGER .info("Importing data, current processing \
progress %s / %s", _completed_num, len(data))
# simply validate data format # simply validate data format
assert "parameter" in trial_info assert "parameter" in trial_info
_params = trial_info["parameter"] _params = trial_info["parameter"]
assert "value" in trial_info assert "value" in trial_info
_value = trial_info['value'] _value = trial_info['value']
if not _value: if not _value:
logger.info("Useless trial data, value is %s, skip this trial data.", _value) LOGGER.info("Useless trial data, value is %s, skip this trial data.", _value)
continue continue
_completed_num += 1 _completed_num += 1
if _params in self.values: if _params in self._values:
self.values.remove(_params) self._values.remove(_params)
logger.info("Successfully import data to batch tuner, total data: %d, imported data: %d.", len(data), _completed_num) LOGGER .info("Successfully import data to batch tuner, \
total data: %d, imported data: %d.", len(data), _completed_num)
...@@ -32,7 +32,9 @@ import nni.parameter_expressions as parameter_expressions ...@@ -32,7 +32,9 @@ import nni.parameter_expressions as parameter_expressions
def json2space(x, oldy=None, name=NodeType.ROOT): def json2space(x, oldy=None, name=NodeType.ROOT):
"""Change search space from json format to hyperopt format """
Change search space from json format to hyperopt format
""" """
y = list() y = list()
if isinstance(x, dict): if isinstance(x, dict):
...@@ -59,7 +61,9 @@ def json2space(x, oldy=None, name=NodeType.ROOT): ...@@ -59,7 +61,9 @@ def json2space(x, oldy=None, name=NodeType.ROOT):
return y return y
def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeType.ROOT): def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeType.ROOT):
"""Json to pramaters. """
Json to pramaters.
""" """
if isinstance(x, dict): if isinstance(x, dict):
if NodeType.TYPE in x.keys(): if NodeType.TYPE in x.keys():
...@@ -117,6 +121,17 @@ def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeTyp ...@@ -117,6 +121,17 @@ def json2parameter(x, is_rand, random_state, oldy=None, Rand=False, name=NodeTyp
class Individual: class Individual:
""" """
Indicidual class to store the indv info. Indicidual class to store the indv info.
Attributes
----------
config : str
Search space.
info : str
The str to save information of individual.
result : float
The final metric of a individual.
store_dir : str
save_dir : str
""" """
def __init__(self, config=None, info=None, result=None, save_dir=None): def __init__(self, config=None, info=None, result=None, save_dir=None):
...@@ -124,6 +139,7 @@ class Individual: ...@@ -124,6 +139,7 @@ class Individual:
Parameters Parameters
---------- ----------
config : str config : str
A config to represent a group of parameters.
info : str info : str
result : float result : float
save_dir : str save_dir : str
...@@ -140,6 +156,8 @@ class Individual: ...@@ -140,6 +156,8 @@ class Individual:
def mutation(self, config=None, info=None, save_dir=None): def mutation(self, config=None, info=None, save_dir=None):
""" """
Mutation by reset state information.
Parameters Parameters
---------- ----------
config : str config : str
...@@ -177,8 +195,11 @@ class EvolutionTuner(Tuner): ...@@ -177,8 +195,11 @@ class EvolutionTuner(Tuner):
self.population = None self.population = None
self.space = None self.space = None
def update_search_space(self, search_space): def update_search_space(self, search_space):
"""Update search space. """
Update search space.
Search_space contains the information that user pre-defined. Search_space contains the information that user pre-defined.
Parameters Parameters
...@@ -191,15 +212,19 @@ class EvolutionTuner(Tuner): ...@@ -191,15 +212,19 @@ class EvolutionTuner(Tuner):
self.random_state = np.random.RandomState() self.random_state = np.random.RandomState()
self.population = [] self.population = []
is_rand = dict() is_rand = dict()
for item in self.space: for item in self.space:
is_rand[item] = True is_rand[item] = True
for _ in range(self.population_size): for _ in range(self.population_size):
config = json2parameter( config = json2parameter(
self.searchspace_json, is_rand, self.random_state) self.searchspace_json, is_rand, self.random_state)
self.population.append(Individual(config=config)) self.population.append(Individual(config=config))
def generate_parameters(self, parameter_id, **kwargs): def generate_parameters(self, parameter_id, **kwargs):
"""Returns a dict of trial (hyper-)parameters, as a serializable object. """
This function will returns a dict of trial (hyper-)parameters, as a serializable object.
Parameters Parameters
---------- ----------
...@@ -207,15 +232,19 @@ class EvolutionTuner(Tuner): ...@@ -207,15 +232,19 @@ class EvolutionTuner(Tuner):
Returns Returns
------- -------
config : dict dict
A group of candaidte parameters that evolution tuner generated.
""" """
if not self.population: if not self.population:
raise RuntimeError('The population is empty') raise RuntimeError('The population is empty')
pos = -1 pos = -1
for i in range(len(self.population)): for i in range(len(self.population)):
if self.population[i].result is None: if self.population[i].result is None:
pos = i pos = i
break break
if pos != -1: if pos != -1:
indiv = copy.deepcopy(self.population[pos]) indiv = copy.deepcopy(self.population[pos])
self.population.pop(pos) self.population.pop(pos)
...@@ -230,6 +259,7 @@ class EvolutionTuner(Tuner): ...@@ -230,6 +259,7 @@ class EvolutionTuner(Tuner):
self.population[0].config) self.population[0].config)
is_rand = dict() is_rand = dict()
mutation_pos = space[random.randint(0, len(space)-1)] mutation_pos = space[random.randint(0, len(space)-1)]
for i in range(len(self.space)): for i in range(len(self.space)):
is_rand[self.space[i]] = (self.space[i] == mutation_pos) is_rand[self.space[i]] = (self.space[i] == mutation_pos)
config = json2parameter( config = json2parameter(
...@@ -238,21 +268,27 @@ class EvolutionTuner(Tuner): ...@@ -238,21 +268,27 @@ class EvolutionTuner(Tuner):
# remove "_index" from config and save params-id # remove "_index" from config and save params-id
total_config = config total_config = config
self.total_data[parameter_id] = total_config self.total_data[parameter_id] = total_config
config = split_index(total_config) config = split_index(total_config)
return config return config
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
'''Record the result from a trial """
Record the result from a trial
Parameters Parameters
---------- ----------
parameters: dict parameter_id : int
parameters : dict
value : dict/float value : dict/float
if value is dict, it should have "default" key. if value is dict, it should have "default" key.
value is final metrics of the trial. value is final metrics of the trial.
''' """
reward = extract_scalar_reward(value) reward = extract_scalar_reward(value)
if parameter_id not in self.total_data: if parameter_id not in self.total_data:
raise RuntimeError('Received parameter_id not in total_data.') raise RuntimeError('Received parameter_id not in total_data.')
# restore the paramsters contains "_index" # restore the paramsters contains "_index"
......
...@@ -422,7 +422,8 @@ class HyperoptTuner(Tuner): ...@@ -422,7 +422,8 @@ class HyperoptTuner(Tuner):
misc_by_id[tid]['vals'][key] = [val] misc_by_id[tid]['vals'][key] = [val]
def get_suggestion(self, random_search=False): def get_suggestion(self, random_search=False):
"""get suggestion from hyperopt """
get suggestion from hyperopt
Parameters Parameters
---------- ----------
...@@ -473,7 +474,8 @@ class HyperoptTuner(Tuner): ...@@ -473,7 +474,8 @@ class HyperoptTuner(Tuner):
return total_params return total_params
def import_data(self, data): def import_data(self, data):
"""Import additional data for tuning """
Import additional data for tuning
Parameters Parameters
---------- ----------
......
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import os import os
import sys import sys
...@@ -31,7 +32,8 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34): ...@@ -31,7 +32,8 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
''' '''
Create the Gaussian Mixture Model Create the Gaussian Mixture Model
''' '''
samples = [samples_x[i] + [samples_y_aggregation[i]] for i in range(0, len(samples_x))] samples = [samples_x[i] + [samples_y_aggregation[i]]
for i in range(0, len(samples_x))]
# Sorts so that we can get the top samples # Sorts so that we can get the top samples
samples = sorted(samples, key=itemgetter(-1)) samples = sorted(samples, key=itemgetter(-1))
...@@ -39,13 +41,16 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34): ...@@ -39,13 +41,16 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
samples_goodbatch = samples[0:samples_goodbatch_size] samples_goodbatch = samples[0:samples_goodbatch_size]
samples_badbatch = samples[samples_goodbatch_size:] samples_badbatch = samples[samples_goodbatch_size:]
samples_x_goodbatch = [sample_goodbatch[0:-1] for sample_goodbatch in samples_goodbatch] samples_x_goodbatch = [sample_goodbatch[0:-1]
for sample_goodbatch in samples_goodbatch]
#samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch] #samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch]
samples_x_badbatch = [sample_badbatch[0:-1] for sample_badbatch in samples_badbatch] samples_x_badbatch = [sample_badbatch[0:-1]
for sample_badbatch in samples_badbatch]
# === Trains GMM clustering models === # # === Trains GMM clustering models === #
#sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__))) #sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__)))
bgmm_goodbatch = mm.BayesianGaussianMixture(n_components=max(1, samples_goodbatch_size - 1)) bgmm_goodbatch = mm.BayesianGaussianMixture(
n_components=max(1, samples_goodbatch_size - 1))
bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1) bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1)
bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components) bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components)
bgmm_goodbatch.fit(samples_x_goodbatch) bgmm_goodbatch.fit(samples_x_goodbatch)
...@@ -55,4 +60,3 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34): ...@@ -55,4 +60,3 @@ def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
model['clusteringmodel_good'] = bgmm_goodbatch model['clusteringmodel_good'] = bgmm_goodbatch
model['clusteringmodel_bad'] = bgmm_badbatch model['clusteringmodel_bad'] = bgmm_badbatch
return model return model
\ No newline at end of file
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import os import os
import random import random
...@@ -33,14 +34,17 @@ CONSTRAINT_UPPERBOUND = None ...@@ -33,14 +34,17 @@ CONSTRAINT_UPPERBOUND = None
CONSTRAINT_PARAMS_IDX = [] CONSTRAINT_PARAMS_IDX = []
def _ratio_scores(parameters_value, clusteringmodel_gmm_good, clusteringmodel_gmm_bad): def _ratio_scores(parameters_value, clusteringmodel_gmm_good,
clusteringmodel_gmm_bad):
''' '''
The ratio is smaller the better The ratio is smaller the better
''' '''
ratio = clusteringmodel_gmm_good.score([parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value]) ratio = clusteringmodel_gmm_good.score(
[parameters_value]) / clusteringmodel_gmm_bad.score([parameters_value])
sigma = 0 sigma = 0
return ratio, sigma return ratio, sigma
def selection_r(x_bounds, def selection_r(x_bounds,
x_types, x_types,
clusteringmodel_gmm_good, clusteringmodel_gmm_good,
...@@ -60,6 +64,7 @@ def selection_r(x_bounds, ...@@ -60,6 +64,7 @@ def selection_r(x_bounds,
return outputs return outputs
def selection(x_bounds, def selection(x_bounds,
x_types, x_types,
clusteringmodel_gmm_good, clusteringmodel_gmm_good,
...@@ -69,13 +74,14 @@ def selection(x_bounds, ...@@ -69,13 +74,14 @@ def selection(x_bounds,
''' '''
Select the lowest mu value Select the lowest mu value
''' '''
results = lib_acquisition_function.next_hyperparameter_lowest_mu(\ results = lib_acquisition_function.next_hyperparameter_lowest_mu(
_ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],\ _ratio_scores, [clusteringmodel_gmm_good, clusteringmodel_gmm_bad],
x_bounds, x_types, minimize_starting_points, \ x_bounds, x_types, minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun) minimize_constraints_fun=minimize_constraints_fun)
return results return results
def _rand_with_constraints(x_bounds, x_types): def _rand_with_constraints(x_bounds, x_types):
''' '''
Random generate the variable with constraints Random generate the variable with constraints
...@@ -96,6 +102,7 @@ def _rand_with_constraints(x_bounds, x_types): ...@@ -96,6 +102,7 @@ def _rand_with_constraints(x_bounds, x_types):
outputs[i] = random.randint(x_bounds[i][0], x_bounds[i][1]) outputs[i] = random.randint(x_bounds[i][0], x_bounds[i][1])
return outputs return outputs
def _minimize_constraints_fun_summation(x): def _minimize_constraints_fun_summation(x):
''' '''
Minimize constraints fun summation Minimize constraints fun summation
......
...@@ -17,7 +17,9 @@ ...@@ -17,7 +17,9 @@
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
OutlierDectection.py
"""
import os import os
import sys import sys
...@@ -30,19 +32,21 @@ sys.path.insert(1, os.path.join(sys.path[0], '..')) ...@@ -30,19 +32,21 @@ sys.path.insert(1, os.path.join(sys.path[0], '..'))
def _outlierDetection_threaded(inputs): def _outlierDetection_threaded(inputs):
''' """
Detect the outlier Detect the outlier
''' """
[samples_idx, samples_x, samples_y_aggregation] = inputs [samples_idx, samples_x, samples_y_aggregation] = inputs
sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"\ sys.stderr.write("[%s] DEBUG: Evaluating %dth of %d samples\n"
% (os.path.basename(__file__), samples_idx + 1, len(samples_x))) % (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
outlier = None outlier = None
# Create a diagnostic regression model which removes the sample that we want to evaluate # Create a diagnostic regression model which removes the sample that we
diagnostic_regressor_gp = gp_create_model.create_model(\ # want to evaluate
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\ diagnostic_regressor_gp = gp_create_model.create_model(
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:]) samples_x[0:samples_idx] + samples_x[samples_idx + 1:],
mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model']) samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(
samples_x[samples_idx], diagnostic_regressor_gp['model'])
# 2.33 is the z-score for 98% confidence level # 2.33 is the z-score for 98% confidence level
if abs(samples_y_aggregation[samples_idx] - mu) > (2.33 * sigma): if abs(samples_y_aggregation[samples_idx] - mu) > (2.33 * sigma):
...@@ -52,16 +56,18 @@ def _outlierDetection_threaded(inputs): ...@@ -52,16 +56,18 @@ def _outlierDetection_threaded(inputs):
"difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)} "difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}
return outlier return outlier
def outlierDetection_threaded(samples_x, samples_y_aggregation): def outlierDetection_threaded(samples_x, samples_y_aggregation):
''' """
Use Multi-thread to detect the outlier Use Multi-thread to detect the outlier
''' """
outliers = [] outliers = []
threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]\ threads_inputs = [[samples_idx, samples_x, samples_y_aggregation]
for samples_idx in range(0, len(samples_x))] for samples_idx in range(0, len(samples_x))]
threads_pool = ThreadPool(min(4, len(threads_inputs))) threads_pool = ThreadPool(min(4, len(threads_inputs)))
threads_results = threads_pool.map(_outlierDetection_threaded, threads_inputs) threads_results = threads_pool.map(
_outlierDetection_threaded, threads_inputs)
threads_pool.close() threads_pool.close()
threads_pool.join() threads_pool.join()
...@@ -69,15 +75,13 @@ def outlierDetection_threaded(samples_x, samples_y_aggregation): ...@@ -69,15 +75,13 @@ def outlierDetection_threaded(samples_x, samples_y_aggregation):
if threads_result is not None: if threads_result is not None:
outliers.append(threads_result) outliers.append(threads_result)
else: else:
print("error here.") print("Error: threads_result is None.")
outliers = outliers if outliers else None outliers = outliers if outliers else None
return outliers return outliers
def outlierDetection(samples_x, samples_y_aggregation): def outlierDetection(samples_x, samples_y_aggregation):
'''
TODO
'''
outliers = [] outliers = []
for samples_idx, _ in enumerate(samples_x): for samples_idx, _ in enumerate(samples_x):
#sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n" #sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
...@@ -92,7 +96,8 @@ def outlierDetection(samples_x, samples_y_aggregation): ...@@ -92,7 +96,8 @@ def outlierDetection(samples_x, samples_y_aggregation):
outliers.append({"samples_idx": samples_idx, outliers.append({"samples_idx": samples_idx,
"expected_mu": mu, "expected_mu": mu,
"expected_sigma": sigma, "expected_sigma": sigma,
"difference": abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)}) "difference": \
abs(samples_y_aggregation[samples_idx] - mu) - (2.33 * sigma)})
outliers = outliers if outliers else None outliers = outliers if outliers else None
return outliers return outliers
...@@ -16,7 +16,11 @@ ...@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
lib_acquisition_function.py
"""
import sys import sys
import numpy import numpy
...@@ -33,9 +37,9 @@ def next_hyperparameter_expected_improvement(fun_prediction, ...@@ -33,9 +37,9 @@ def next_hyperparameter_expected_improvement(fun_prediction,
samples_y_aggregation, samples_y_aggregation,
minimize_starting_points, minimize_starting_points,
minimize_constraints_fun=None): minimize_constraints_fun=None):
''' """
"Expected Improvement" acquisition function "Expected Improvement" acquisition function
''' """
best_x = None best_x = None
best_acquisition_value = None best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds] x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
...@@ -70,6 +74,7 @@ def next_hyperparameter_expected_improvement(fun_prediction, ...@@ -70,6 +74,7 @@ def next_hyperparameter_expected_improvement(fun_prediction,
return outputs return outputs
def _expected_improvement(x, fun_prediction, fun_prediction_args, def _expected_improvement(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, samples_y_aggregation, x_bounds, x_types, samples_y_aggregation,
minimize_constraints_fun): minimize_constraints_fun):
...@@ -77,7 +82,8 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args, ...@@ -77,7 +82,8 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args,
x = lib_data.match_val_type(x, x_bounds, x_types) x = lib_data.match_val_type(x, x_bounds, x_types)
expected_improvement = sys.maxsize expected_improvement = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True): if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, sigma = fun_prediction(x, *fun_prediction_args) mu, sigma = fun_prediction(x, *fun_prediction_args)
loss_optimum = min(samples_y_aggregation) loss_optimum = min(samples_y_aggregation)
...@@ -87,7 +93,7 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args, ...@@ -87,7 +93,7 @@ def _expected_improvement(x, fun_prediction, fun_prediction_args,
with numpy.errstate(divide="ignore"): with numpy.errstate(divide="ignore"):
Z = scaling_factor * (mu - loss_optimum) / sigma Z = scaling_factor * (mu - loss_optimum) / sigma
expected_improvement = scaling_factor * (mu - loss_optimum) * \ expected_improvement = scaling_factor * (mu - loss_optimum) * \
norm.cdf(Z) + sigma * norm.pdf(Z) norm.cdf(Z) + sigma * norm.pdf(Z)
expected_improvement = 0.0 if sigma == 0.0 else expected_improvement expected_improvement = 0.0 if sigma == 0.0 else expected_improvement
# We want expected_improvement to be as large as possible # We want expected_improvement to be as large as possible
...@@ -101,9 +107,9 @@ def next_hyperparameter_lowest_confidence(fun_prediction, ...@@ -101,9 +107,9 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
x_bounds, x_types, x_bounds, x_types,
minimize_starting_points, minimize_starting_points,
minimize_constraints_fun=None): minimize_constraints_fun=None):
''' """
"Lowest Confidence" acquisition function "Lowest Confidence" acquisition function
''' """
best_x = None best_x = None
best_acquisition_value = None best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds] x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
...@@ -120,10 +126,12 @@ def next_hyperparameter_lowest_confidence(fun_prediction, ...@@ -120,10 +126,12 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
x_types, x_types,
minimize_constraints_fun)) minimize_constraints_fun))
if (best_acquisition_value) is None or (res.fun < best_acquisition_value): if (best_acquisition_value) is None or (
res.fun < best_acquisition_value):
res.x = numpy.ndarray.tolist(res.x) res.x = numpy.ndarray.tolist(res.x)
res.x = lib_data.match_val_type(res.x, x_bounds, x_types) res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True): if (minimize_constraints_fun is None) or (
minimize_constraints_fun(res.x) is True):
best_acquisition_value = res.fun best_acquisition_value = res.fun
best_x = res.x best_x = res.x
...@@ -134,13 +142,15 @@ def next_hyperparameter_lowest_confidence(fun_prediction, ...@@ -134,13 +142,15 @@ def next_hyperparameter_lowest_confidence(fun_prediction,
'expected_sigma': sigma, 'acquisition_func': "lc"} 'expected_sigma': sigma, 'acquisition_func': "lc"}
return outputs return outputs
def _lowest_confidence(x, fun_prediction, fun_prediction_args, def _lowest_confidence(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun): x_bounds, x_types, minimize_constraints_fun):
# This is only for step-wise optimization # This is only for step-wise optimization
x = lib_data.match_val_type(x, x_bounds, x_types) x = lib_data.match_val_type(x, x_bounds, x_types)
ci = sys.maxsize ci = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True): if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, sigma = fun_prediction(x, *fun_prediction_args) mu, sigma = fun_prediction(x, *fun_prediction_args)
ci = (sigma * 1.96 * 2) / mu ci = (sigma * 1.96 * 2) / mu
# We want ci to be as large as possible # We want ci to be as large as possible
...@@ -156,9 +166,9 @@ def next_hyperparameter_lowest_mu(fun_prediction, ...@@ -156,9 +166,9 @@ def next_hyperparameter_lowest_mu(fun_prediction,
x_bounds, x_types, x_bounds, x_types,
minimize_starting_points, minimize_starting_points,
minimize_constraints_fun=None): minimize_constraints_fun=None):
''' """
"Lowest Mu" acquisition function "Lowest Mu" acquisition function
''' """
best_x = None best_x = None
best_acquisition_value = None best_acquisition_value = None
x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds] x_bounds_minmax = [[i[0], i[-1]] for i in x_bounds]
...@@ -169,13 +179,15 @@ def next_hyperparameter_lowest_mu(fun_prediction, ...@@ -169,13 +179,15 @@ def next_hyperparameter_lowest_mu(fun_prediction,
x0=starting_point.reshape(1, -1), x0=starting_point.reshape(1, -1),
bounds=x_bounds_minmax, bounds=x_bounds_minmax,
method="L-BFGS-B", method="L-BFGS-B",
args=(fun_prediction, fun_prediction_args, \ args=(fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun)) x_bounds, x_types, minimize_constraints_fun))
if (best_acquisition_value is None) or (res.fun < best_acquisition_value): if (best_acquisition_value is None) or (
res.fun < best_acquisition_value):
res.x = numpy.ndarray.tolist(res.x) res.x = numpy.ndarray.tolist(res.x)
res.x = lib_data.match_val_type(res.x, x_bounds, x_types) res.x = lib_data.match_val_type(res.x, x_bounds, x_types)
if (minimize_constraints_fun is None) or (minimize_constraints_fun(res.x) is True): if (minimize_constraints_fun is None) or (
minimize_constraints_fun(res.x) is True):
best_acquisition_value = res.fun best_acquisition_value = res.fun
best_x = res.x best_x = res.x
...@@ -189,14 +201,14 @@ def next_hyperparameter_lowest_mu(fun_prediction, ...@@ -189,14 +201,14 @@ def next_hyperparameter_lowest_mu(fun_prediction,
def _lowest_mu(x, fun_prediction, fun_prediction_args, def _lowest_mu(x, fun_prediction, fun_prediction_args,
x_bounds, x_types, minimize_constraints_fun): x_bounds, x_types, minimize_constraints_fun):
''' """
Calculate the lowest mu Calculate the lowest mu
''' """
# This is only for step-wise optimization # This is only for step-wise optimization
x = lib_data.match_val_type(x, x_bounds, x_types) x = lib_data.match_val_type(x, x_bounds, x_types)
mu = sys.maxsize mu = sys.maxsize
if (minimize_constraints_fun is None) or (minimize_constraints_fun(x) is True): if (minimize_constraints_fun is None) or (
minimize_constraints_fun(x) is True):
mu, _ = fun_prediction(x, *fun_prediction_args) mu, _ = fun_prediction(x, *fun_prediction_args)
return mu return mu
\ No newline at end of file
...@@ -16,7 +16,11 @@ ...@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
lib_constraint_summation.py
"""
import math import math
import random import random
...@@ -39,6 +43,7 @@ def check_feasibility(x_bounds, lowerbound, upperbound): ...@@ -39,6 +43,7 @@ def check_feasibility(x_bounds, lowerbound, upperbound):
return (x_bounds_lowerbound <= lowerbound <= x_bounds_upperbound) or \ return (x_bounds_lowerbound <= lowerbound <= x_bounds_upperbound) or \
(x_bounds_lowerbound <= upperbound <= x_bounds_upperbound) (x_bounds_lowerbound <= upperbound <= x_bounds_upperbound)
def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
''' '''
Key idea is that we try to move towards upperbound, by randomly choose one Key idea is that we try to move towards upperbound, by randomly choose one
...@@ -55,7 +60,8 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): ...@@ -55,7 +60,8 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
if x_types[i] == "discrete_int": if x_types[i] == "discrete_int":
x_idx_sorted.append([i, len(x_bounds[i])]) x_idx_sorted.append([i, len(x_bounds[i])])
elif (x_types[i] == "range_int") or (x_types[i] == "range_continuous"): elif (x_types[i] == "range_int") or (x_types[i] == "range_continuous"):
x_idx_sorted.append([i, math.floor(x_bounds[i][1] - x_bounds[i][0])]) x_idx_sorted.append(
[i, math.floor(x_bounds[i][1] - x_bounds[i][0])])
x_idx_sorted = sorted(x_idx_sorted, key=itemgetter(1)) x_idx_sorted = sorted(x_idx_sorted, key=itemgetter(1))
for _ in range(max_retries): for _ in range(max_retries):
...@@ -77,12 +83,13 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): ...@@ -77,12 +83,13 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
temp.append(j) temp.append(j)
# Randomly pick a number from the integer array # Randomly pick a number from the integer array
if temp: if temp:
outputs[x_idx] = temp[random.randint(0, len(temp) - 1)] outputs[x_idx] = temp[random.randint(
0, len(temp) - 1)]
elif (x_types[x_idx] == "range_int") or \ elif (x_types[x_idx] == "range_int") or \
(x_types[x_idx] == "range_continuous"): (x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(x_bounds[x_idx][0], outputs[x_idx] = random.randint(
min(x_bounds[x_idx][-1], budget_max)) x_bounds[x_idx][0], min(x_bounds[x_idx][-1], budget_max))
else: else:
# The last x that we need to assign a random number # The last x that we need to assign a random number
...@@ -91,26 +98,28 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100): ...@@ -91,26 +98,28 @@ def rand(x_bounds, x_types, lowerbound, upperbound, max_retries=100):
# This check: # This check:
# is our smallest possible value going to overflow the available budget space, # is our smallest possible value going to overflow the available budget space,
# and is our largest possible value going to underflow the lower bound # and is our largest possible value going to underflow the
# lower bound
if (x_bounds[x_idx][0] <= budget_max) and \ if (x_bounds[x_idx][0] <= budget_max) and \
(x_bounds[x_idx][-1] >= randint_lowerbound): (x_bounds[x_idx][-1] >= randint_lowerbound):
if x_types[x_idx] == "discrete_int": if x_types[x_idx] == "discrete_int":
temp = [] temp = []
for j in x_bounds[x_idx]: for j in x_bounds[x_idx]:
# if (j <= budget_max) and (j >= randint_lowerbound): # if (j <= budget_max) and (j >=
# randint_lowerbound):
if randint_lowerbound <= j <= budget_max: if randint_lowerbound <= j <= budget_max:
temp.append(j) temp.append(j)
if temp: if temp:
outputs[x_idx] = temp[random.randint(0, len(temp) - 1)] outputs[x_idx] = temp[random.randint(
0, len(temp) - 1)]
elif (x_types[x_idx] == "range_int") or \ elif (x_types[x_idx] == "range_int") or \
(x_types[x_idx] == "range_continuous"): (x_types[x_idx] == "range_continuous"):
outputs[x_idx] = random.randint(randint_lowerbound, outputs[x_idx] = random.randint(
min(x_bounds[x_idx][1], budget_max)) randint_lowerbound, min(
x_bounds[x_idx][1], budget_max))
if outputs[x_idx] is None: if outputs[x_idx] is None:
break break
else: budget_allocated += outputs[x_idx]
budget_allocated += outputs[x_idx]
if None not in outputs: if None not in outputs:
break break
return outputs return outputs
\ No newline at end of file
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import math import math
import random import random
...@@ -56,7 +57,7 @@ def rand(x_bounds, x_types): ...@@ -56,7 +57,7 @@ def rand(x_bounds, x_types):
temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)] temp = x_bounds[i][random.randint(0, len(x_bounds[i]) - 1)]
outputs.append(temp) outputs.append(temp)
elif x_types[i] == "range_int": elif x_types[i] == "range_int":
temp = random.randint(x_bounds[i][0], x_bounds[i][1] -1) temp = random.randint(x_bounds[i][0], x_bounds[i][1] - 1)
outputs.append(temp) outputs.append(temp)
elif x_types[i] == "range_continuous": elif x_types[i] == "range_continuous":
temp = random.uniform(x_bounds[i][0], x_bounds[i][1]) temp = random.uniform(x_bounds[i][0], x_bounds[i][1])
......
...@@ -16,7 +16,11 @@ ...@@ -16,7 +16,11 @@
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
"""
metis_tuner.py
"""
import copy import copy
import logging import logging
...@@ -51,10 +55,45 @@ class MetisTuner(Tuner): ...@@ -51,10 +55,45 @@ class MetisTuner(Tuner):
More algorithm information you could reference here: More algorithm information you could reference here:
https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/ https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/
Attributes
----------
optimize_mode : str
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
""" """
def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=False, def __init__(
selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.9): self,
optimize_mode="maximize",
no_resampling=True,
no_candidates=False,
selection_num_starting_points=600,
cold_start_num=10,
exploration_probability=0.9):
""" """
Parameters Parameters
---------- ----------
...@@ -62,23 +101,34 @@ class MetisTuner(Tuner): ...@@ -62,23 +101,34 @@ class MetisTuner(Tuner):
optimize_mode is a string that including two mode "maximize" and "minimize" optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool no_resampling : bool
True or False. Should Metis consider re-sampling as part of the search strategy? True or False.
If you are confident that the training dataset is noise-free, then you do not need re-sampling. Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
no_candidates: bool then you do not need re-sampling.
True or False. Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks, Metis can skip this step. no_candidates : bool
True or False.
selection_num_starting_points: int Should Metis suggest parameters for the next benchmark?
how many times Metis should try to find the global optimal in the search space? If you do not plan to do more benchmarks,
The higher the number, the longer it takes to output the solution. Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability : float
The probability of Metis to select parameter from exploration instead of exploitation.
cold_start_num: int x_bounds : list
Metis need some trial result to get cold start. when the number of trial result is less than The constration of parameters.
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float x_types : list
The probability of Metis to select parameter from exploration instead of exploitation. The type of parameters.
""" """
self.samples_x = [] self.samples_x = []
...@@ -101,7 +151,8 @@ class MetisTuner(Tuner): ...@@ -101,7 +151,8 @@ class MetisTuner(Tuner):
def update_search_space(self, search_space): def update_search_space(self, search_space):
"""Update the self.x_bounds and self.x_types by the search_space.json """
Update the self.x_bounds and self.x_types by the search_space.json
Parameters Parameters
---------- ----------
...@@ -120,12 +171,20 @@ class MetisTuner(Tuner): ...@@ -120,12 +171,20 @@ class MetisTuner(Tuner):
key_range = search_space[key]['_value'] key_range = search_space[key]['_value']
idx = self.key_order.index(key) idx = self.key_order.index(key)
if key_type == 'quniform': if key_type == 'quniform':
if key_range[2] == 1 and key_range[0].is_integer() and key_range[1].is_integer(): if key_range[2] == 1 and key_range[0].is_integer(
self.x_bounds[idx] = [key_range[0], key_range[1]+1] ) and key_range[1].is_integer():
self.x_bounds[idx] = [key_range[0], key_range[1] + 1]
self.x_types[idx] = 'range_int' self.x_types[idx] = 'range_int'
else: else:
low, high, q = key_range low, high, q = key_range
bounds = np.clip(np.arange(np.round(low/q), np.round(high/q)+1) * q, low, high) bounds = np.clip(
np.arange(
np.round(
low / q),
np.round(
high / q) + 1) * q,
low,
high)
self.x_bounds[idx] = bounds self.x_bounds[idx] = bounds
self.x_types[idx] = 'discrete_int' self.x_types[idx] = 'discrete_int'
elif key_type == 'randint': elif key_type == 'randint':
...@@ -139,22 +198,28 @@ class MetisTuner(Tuner): ...@@ -139,22 +198,28 @@ class MetisTuner(Tuner):
for key_value in key_range: for key_value in key_range:
if not isinstance(key_value, (int, float)): if not isinstance(key_value, (int, float)):
raise RuntimeError("Metis Tuner only support numerical choice.") raise RuntimeError(
"Metis Tuner only support numerical choice.")
self.x_types[idx] = 'discrete_int' self.x_types[idx] = 'discrete_int'
else: else:
logger.info("Metis Tuner doesn't support this kind of variable: %s", key_type) logger.info(
raise RuntimeError("Metis Tuner doesn't support this kind of variable: " + str(key_type)) "Metis Tuner doesn't support this kind of variable: %s",
str(key_type))
raise RuntimeError(
"Metis Tuner doesn't support this kind of variable: %s" %
str(key_type))
else: else:
logger.info("The format of search space is not a dict.") logger.info("The format of search space is not a dict.")
raise RuntimeError("The format of search space is not a dict.") raise RuntimeError("The format of search space is not a dict.")
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \ self.minimize_starting_points = _rand_init(
self.selection_num_starting_points) self.x_bounds, self.x_types, self.selection_num_starting_points)
def _pack_output(self, init_parameter): def _pack_output(self, init_parameter):
"""Pack the output """
Pack the output
Parameters Parameters
---------- ----------
...@@ -167,14 +232,18 @@ class MetisTuner(Tuner): ...@@ -167,14 +232,18 @@ class MetisTuner(Tuner):
output = {} output = {}
for i, param in enumerate(init_parameter): for i, param in enumerate(init_parameter):
output[self.key_order[i]] = param output[self.key_order[i]] = param
return output return output
def generate_parameters(self, parameter_id, **kwargs): def generate_parameters(self, parameter_id, **kwargs):
"""Generate next parameter for trial """
Generate next parameter for trial
If the number of trial result is lower than cold start number, If the number of trial result is lower than cold start number,
metis will first random generate some parameters. metis will first random generate some parameters.
Otherwise, metis will choose the parameters by the Gussian Process Model and the Gussian Mixture Model. Otherwise, metis will choose the parameters by
the Gussian Process Model and the Gussian Mixture Model.
Parameters Parameters
---------- ----------
...@@ -188,26 +257,34 @@ class MetisTuner(Tuner): ...@@ -188,26 +257,34 @@ class MetisTuner(Tuner):
init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0] init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
results = self._pack_output(init_parameter) results = self._pack_output(init_parameter)
else: else:
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \ self.minimize_starting_points = _rand_init(
self.selection_num_starting_points) self.x_bounds, self.x_types, self.selection_num_starting_points)
results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y, results = self._selection(
self.x_bounds, self.x_types, self.samples_x,
threshold_samplessize_resampling=(None if self.no_resampling is True else 50), self.samples_y_aggregation,
no_candidates=self.no_candidates, self.samples_y,
minimize_starting_points=self.minimize_starting_points, self.x_bounds,
minimize_constraints_fun=self.minimize_constraints_fun) self.x_types,
threshold_samplessize_resampling=(
logger.info("Generate paramageters:\n%s", results) None if self.no_resampling is True else 50),
no_candidates=self.no_candidates,
minimize_starting_points=self.minimize_starting_points,
minimize_constraints_fun=self.minimize_constraints_fun)
logger.info("Generate paramageters: \n%s", str(results))
return results return results
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
"""Tuner receive result from trial. """
Tuner receive result from trial.
Parameters Parameters
---------- ----------
parameter_id : int parameter_id : int
The id of parameters, generated by nni manager.
parameters : dict parameters : dict
A group of parameters that trial has tried.
value : dict/float value : dict/float
if value is dict, it should have "default" key. if value is dict, it should have "default" key.
""" """
...@@ -216,8 +293,8 @@ class MetisTuner(Tuner): ...@@ -216,8 +293,8 @@ class MetisTuner(Tuner):
value = -value value = -value
logger.info("Received trial result.") logger.info("Received trial result.")
logger.info("value is :%s", value) logger.info("value is : %s", str(value))
logger.info("parameter is : %s", parameters) logger.info("parameter is : %s", str(parameters))
# parse parameter to sample_x # parse parameter to sample_x
sample_x = [0 for i in range(len(self.key_order))] sample_x = [0 for i in range(len(self.key_order))]
...@@ -244,11 +321,19 @@ class MetisTuner(Tuner): ...@@ -244,11 +321,19 @@ class MetisTuner(Tuner):
self.samples_y_aggregation.append([value]) self.samples_y_aggregation.append([value])
def _selection(self, samples_x, samples_y_aggregation, samples_y, def _selection(
x_bounds, x_types, max_resampling_per_x=3, self,
threshold_samplessize_exploitation=12, samples_x,
threshold_samplessize_resampling=50, no_candidates=False, samples_y_aggregation,
minimize_starting_points=None, minimize_constraints_fun=None): samples_y,
x_bounds,
x_types,
max_resampling_per_x=3,
threshold_samplessize_exploitation=12,
threshold_samplessize_resampling=50,
no_candidates=False,
minimize_starting_points=None,
minimize_constraints_fun=None):
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
...@@ -259,7 +344,8 @@ class MetisTuner(Tuner): ...@@ -259,7 +344,8 @@ class MetisTuner(Tuner):
samples_size_unique = len(samples_y) samples_size_unique = len(samples_y)
# ===== STEP 1: Compute the current optimum ===== # ===== STEP 1: Compute the current optimum =====
gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation) gp_model = gp_create_model.create_model(
samples_x, samples_y_aggregation)
lm_current = gp_selection.selection( lm_current = gp_selection.selection(
"lm", "lm",
samples_y_aggregation, samples_y_aggregation,
...@@ -278,7 +364,7 @@ class MetisTuner(Tuner): ...@@ -278,7 +364,7 @@ class MetisTuner(Tuner):
}) })
if no_candidates is False: if no_candidates is False:
# ===== STEP 2: Get recommended configurations for exploration ===== # ===== STEP 2: Get recommended configurations for exploration ====
results_exploration = gp_selection.selection( results_exploration = gp_selection.selection(
"lc", "lc",
samples_y_aggregation, samples_y_aggregation,
...@@ -303,25 +389,31 @@ class MetisTuner(Tuner): ...@@ -303,25 +389,31 @@ class MetisTuner(Tuner):
else: else:
logger.info("DEBUG: No suitable exploration candidates were") logger.info("DEBUG: No suitable exploration candidates were")
# ===== STEP 3: Get recommended configurations for exploitation ===== # ===== STEP 3: Get recommended configurations for exploitation ===
if samples_size_all >= threshold_samplessize_exploitation: if samples_size_all >= threshold_samplessize_exploitation:
logger.info("Getting candidates for exploitation...\n") logger.info("Getting candidates for exploitation...\n")
try: try:
gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation) gmm = gmm_create_model.create_model(
samples_x, samples_y_aggregation)
if ("discrete_int" in x_types) or ("range_int" in x_types): if ("discrete_int" in x_types) or ("range_int" in x_types):
results_exploitation = gmm_selection.selection(x_bounds, x_types, results_exploitation = gmm_selection.selection(
gmm['clusteringmodel_good'], x_bounds,
gmm['clusteringmodel_bad'], x_types,
minimize_starting_points, gmm['clusteringmodel_good'],
minimize_constraints_fun=minimize_constraints_fun) gmm['clusteringmodel_bad'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
else: else:
# If all parameters are of "range_continuous", let's use GMM to generate random starting points # If all parameters are of "range_continuous",
results_exploitation = gmm_selection.selection_r(x_bounds, x_types, # let's use GMM to generate random starting points
gmm['clusteringmodel_good'], results_exploitation = gmm_selection.selection_r(
gmm['clusteringmodel_bad'], x_bounds,
num_starting_points=self.selection_num_starting_points, x_types,
minimize_constraints_fun=minimize_constraints_fun) gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
num_starting_points=self.selection_num_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
if results_exploitation is not None: if results_exploitation is not None:
if _num_past_samples(results_exploitation['hyperparameter'], samples_x, samples_y) == 0: if _num_past_samples(results_exploitation['hyperparameter'], samples_x, samples_y) == 0:
...@@ -335,24 +427,30 @@ class MetisTuner(Tuner): ...@@ -335,24 +427,30 @@ class MetisTuner(Tuner):
} }
candidates.append(temp_candidate) candidates.append(temp_candidate)
logger.info("DEBUG: 1 exploitation_gmm candidate selected\n") logger.info(
"DEBUG: 1 exploitation_gmm candidate selected\n")
logger.info(temp_candidate) logger.info(temp_candidate)
else: else:
logger.info("DEBUG: No suitable exploitation_gmm candidates were found\n") logger.info(
"DEBUG: No suitable exploitation_gmm candidates were found\n")
except ValueError as exception: except ValueError as exception:
# The exception: ValueError: Fitting the mixture model failed # The exception: ValueError: Fitting the mixture model failed
# because some components have ill-defined empirical covariance # because some components have ill-defined empirical covariance
# (for instance caused by singleton or collapsed samples). # (for instance caused by singleton or collapsed samples).
# Try to decrease the number of components, or increase reg_covar. # Try to decrease the number of components, or increase
logger.info("DEBUG: No suitable exploitation_gmm candidates were found due to exception.") # reg_covar.
logger.info(
"DEBUG: No suitable exploitation_gmm \
candidates were found due to exception.")
logger.info(exception) logger.info(exception)
# ===== STEP 4: Get a list of outliers ===== # ===== STEP 4: Get a list of outliers =====
if (threshold_samplessize_resampling is not None) and \ if (threshold_samplessize_resampling is not None) and \
(samples_size_unique >= threshold_samplessize_resampling): (samples_size_unique >= threshold_samplessize_resampling):
logger.info("Getting candidates for re-sampling...\n") logger.info("Getting candidates for re-sampling...\n")
results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation) results_outliers = gp_outlier_detection.outlierDetection_threaded(
samples_x, samples_y_aggregation)
if results_outliers is not None: if results_outliers is not None:
for results_outlier in results_outliers: # pylint: disable=not-an-iterable for results_outlier in results_outliers: # pylint: disable=not-an-iterable
...@@ -365,11 +463,13 @@ class MetisTuner(Tuner): ...@@ -365,11 +463,13 @@ class MetisTuner(Tuner):
logger.info("DEBUG: %d re-sampling candidates selected\n") logger.info("DEBUG: %d re-sampling candidates selected\n")
logger.info(temp_candidate) logger.info(temp_candidate)
else: else:
logger.info("DEBUG: No suitable resampling candidates were found\n") logger.info(
"DEBUG: No suitable resampling candidates were found\n")
if candidates: if candidates:
# ===== STEP 5: Compute the information gain of each candidate towards the optimum ===== # ===== STEP 5: Compute the information gain of each candidate
logger.info("Evaluating information gain of %d candidates...\n") logger.info(
"Evaluating information gain of %d candidates...\n")
next_improvement = 0 next_improvement = 0
threads_inputs = [[ threads_inputs = [[
...@@ -377,36 +477,45 @@ class MetisTuner(Tuner): ...@@ -377,36 +477,45 @@ class MetisTuner(Tuner):
minimize_constraints_fun, minimize_starting_points minimize_constraints_fun, minimize_starting_points
] for candidate in candidates] ] for candidate in candidates]
threads_pool = ThreadPool(4) threads_pool = ThreadPool(4)
# Evaluate what would happen if we actually sample each candidate # Evaluate what would happen if we actually sample each
threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs) # candidate
threads_results = threads_pool.map(
_calculate_lowest_mu_threaded, threads_inputs)
threads_pool.close() threads_pool.close()
threads_pool.join() threads_pool.join()
for threads_result in threads_results: for threads_result in threads_results:
if threads_result['expected_lowest_mu'] < lm_current['expected_mu']: if threads_result['expected_lowest_mu'] < lm_current['expected_mu']:
# Information gain # Information gain
temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu'] temp_improvement = threads_result['expected_lowest_mu'] - \
lm_current['expected_mu']
if next_improvement > temp_improvement: if next_improvement > temp_improvement:
next_improvement = temp_improvement next_improvement = temp_improvement
next_candidate = threads_result['candidate'] next_candidate = threads_result['candidate']
else: else:
# ===== STEP 6: If we have no candidates, randomly pick one ===== # ===== STEP 6: If we have no candidates, randomly pick one ===
logger.info( logger.info(
"DEBUG: No candidates from exploration, exploitation,\ "DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n" and resampling. We will random a candidate for next_candidate\n"
) )
next_candidate = _rand_with_constraints(x_bounds, x_types) \ next_candidate = _rand_with_constraints(
if minimize_starting_points is None else minimize_starting_points[0] x_bounds,
next_candidate = lib_data.match_val_type(next_candidate, x_bounds, x_types) x_types) if minimize_starting_points is None else minimize_starting_points[0]
expected_mu, expected_sigma = gp_prediction.predict(next_candidate, gp_model['model']) next_candidate = lib_data.match_val_type(
next_candidate = {'hyperparameter': next_candidate, 'reason': "random", next_candidate, x_bounds, x_types)
'expected_mu': expected_mu, 'expected_sigma': expected_sigma} expected_mu, expected_sigma = gp_prediction.predict(
next_candidate, gp_model['model'])
# ===== STEP 7 ===== next_candidate = {
# If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, 'hyperparameter': next_candidate,
# take next config as exploration step 'reason': "random",
'expected_mu': expected_mu,
'expected_sigma': expected_sigma}
# STEP 7: If current optimal hyperparameter occurs in the history
# or exploration probability is less than the threshold, take next
# config as exploration step
outputs = self._pack_output(lm_current['hyperparameter']) outputs = self._pack_output(lm_current['hyperparameter'])
ap = random.uniform(0, 1) ap = random.uniform(0, 1)
if outputs in self.total_data or ap <= self.exploration_probability: if outputs in self.total_data or ap <= self.exploration_probability:
...@@ -419,11 +528,13 @@ class MetisTuner(Tuner): ...@@ -419,11 +528,13 @@ class MetisTuner(Tuner):
return outputs return outputs
def import_data(self, data): def import_data(self, data):
"""Import additional data for tuning """
Import additional data for tuning
Parameters Parameters
---------- ----------
data: data : a list of dict
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' each of which has at least two keys: 'parameter' and 'value'.
""" """
_completed_num = 0 _completed_num = 0
for trial_info in data: for trial_info in data:
...@@ -437,18 +548,26 @@ class MetisTuner(Tuner): ...@@ -437,18 +548,26 @@ class MetisTuner(Tuner):
logger.info("Useless trial data, value is %s, skip this trial data.", _value) logger.info("Useless trial data, value is %s, skip this trial data.", _value)
continue continue
self.supplement_data_num += 1 self.supplement_data_num += 1
_parameter_id = '_'.join(["ImportData", str(self.supplement_data_num)]) _parameter_id = '_'.join(
["ImportData", str(self.supplement_data_num)])
self.total_data.append(_params) self.total_data.append(_params)
self.receive_trial_result(parameter_id=_parameter_id, parameters=_params, value=_value) self.receive_trial_result(
parameter_id=_parameter_id,
parameters=_params,
value=_value)
logger.info("Successfully import data to metis tuner.") logger.info("Successfully import data to metis tuner.")
def _rand_with_constraints(x_bounds, x_types): def _rand_with_constraints(x_bounds, x_types):
outputs = None outputs = None
x_bounds_withconstraints = [x_bounds[i] for i in CONSTRAINT_PARAMS_IDX] x_bounds_withconstraints = [x_bounds[i] for i in CONSTRAINT_PARAMS_IDX]
x_types_withconstraints = [x_types[i] for i in CONSTRAINT_PARAMS_IDX] x_types_withconstraints = [x_types[i] for i in CONSTRAINT_PARAMS_IDX]
x_val_withconstraints = lib_constraint_summation.rand(x_bounds_withconstraints,\ x_val_withconstraints = lib_constraint_summation.rand(
x_types_withconstraints, CONSTRAINT_LOWERBOUND, CONSTRAINT_UPPERBOUND) x_bounds_withconstraints,
x_types_withconstraints,
CONSTRAINT_LOWERBOUND,
CONSTRAINT_UPPERBOUND)
if not x_val_withconstraints: if not x_val_withconstraints:
outputs = [None] * len(x_bounds) outputs = [None] * len(x_bounds)
...@@ -462,12 +581,18 @@ def _rand_with_constraints(x_bounds, x_types): ...@@ -462,12 +581,18 @@ def _rand_with_constraints(x_bounds, x_types):
def _calculate_lowest_mu_threaded(inputs): def _calculate_lowest_mu_threaded(inputs):
[candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] = inputs [candidate, samples_x, samples_y, x_bounds, x_types,
minimize_constraints_fun, minimize_starting_points] = inputs
outputs = {"candidate": candidate, "expected_lowest_mu": None} outputs = {"candidate": candidate, "expected_lowest_mu": None}
for expected_mu in [candidate['expected_mu'] + 1.96 * candidate['expected_sigma'], for expected_mu in [
candidate['expected_mu'] - 1.96 * candidate['expected_sigma']]: candidate['expected_mu'] +
1.96 *
candidate['expected_sigma'],
candidate['expected_mu'] -
1.96 *
candidate['expected_sigma']]:
temp_samples_x = copy.deepcopy(samples_x) temp_samples_x = copy.deepcopy(samples_x)
temp_samples_y = copy.deepcopy(samples_y) temp_samples_y = copy.deepcopy(samples_y)
...@@ -480,8 +605,10 @@ def _calculate_lowest_mu_threaded(inputs): ...@@ -480,8 +605,10 @@ def _calculate_lowest_mu_threaded(inputs):
temp_samples_y.append([expected_mu]) temp_samples_y.append([expected_mu])
# Aggregates multiple observation of the sample sampling points # Aggregates multiple observation of the sample sampling points
temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y] temp_y_aggregation = [statistics.median(
temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation) temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(
temp_samples_x, temp_y_aggregation)
temp_results = gp_selection.selection( temp_results = gp_selection.selection(
"lm", "lm",
temp_y_aggregation, temp_y_aggregation,
...@@ -491,7 +618,8 @@ def _calculate_lowest_mu_threaded(inputs): ...@@ -491,7 +618,8 @@ def _calculate_lowest_mu_threaded(inputs):
minimize_starting_points, minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun) minimize_constraints_fun=minimize_constraints_fun)
if outputs["expected_lowest_mu"] is None or outputs["expected_lowest_mu"] > temp_results['expected_mu']: if outputs["expected_lowest_mu"] is None \
or outputs["expected_lowest_mu"] > temp_results['expected_mu']:
outputs["expected_lowest_mu"] = temp_results['expected_mu'] outputs["expected_lowest_mu"] = temp_results['expected_mu']
return outputs return outputs
...@@ -510,18 +638,19 @@ def _rand_init(x_bounds, x_types, selection_num_starting_points): ...@@ -510,18 +638,19 @@ def _rand_init(x_bounds, x_types, selection_num_starting_points):
''' '''
Random sample some init seed within bounds. Random sample some init seed within bounds.
''' '''
return [lib_data.rand(x_bounds, x_types) for i \ return [lib_data.rand(x_bounds, x_types) for i
in range(0, selection_num_starting_points)] in range(0, selection_num_starting_points)]
def get_median(temp_list): def get_median(temp_list):
"""Return median """
Return median
""" """
num = len(temp_list) num = len(temp_list)
temp_list.sort() temp_list.sort()
print(temp_list) print(temp_list)
if num % 2 == 0: if num % 2 == 0:
median = (temp_list[int(num/2)] + temp_list[int(num/2) - 1]) / 2 median = (temp_list[int(num / 2)] + temp_list[int(num / 2) - 1]) / 2
else: else:
median = temp_list[int(num/2)] median = temp_list[int(num / 2)]
return median return median
...@@ -38,7 +38,7 @@ from nni.networkmorphism_tuner.layers import is_layer ...@@ -38,7 +38,7 @@ from nni.networkmorphism_tuner.layers import is_layer
def layer_distance(a, b): def layer_distance(a, b):
"""The distance between two layers.""" """The distance between two layers."""
# pylint: disable=unidiomatic-typecheck # pylint: disable=unidiomatic-typecheck
if type(a) != type(b): if not isinstance(a, type(b)):
return 1.0 return 1.0
if is_layer(a, "Conv"): if is_layer(a, "Conv"):
att_diff = [ att_diff = [
...@@ -96,7 +96,8 @@ def skip_connection_distance(a, b): ...@@ -96,7 +96,8 @@ def skip_connection_distance(a, b):
return 1.0 return 1.0
len_a = abs(a[1] - a[0]) len_a = abs(a[1] - a[0])
len_b = abs(b[1] - b[0]) len_b = abs(b[1] - b[0])
return (abs(a[0] - b[0]) + abs(len_a - len_b)) / (max(a[0], b[0]) + max(len_a, len_b)) return (abs(a[0] - b[0]) + abs(len_a - len_b)) / \
(max(a[0], b[0]) + max(len_a, len_b))
def skip_connections_distance(list_a, list_b): def skip_connections_distance(list_a, list_b):
...@@ -161,7 +162,8 @@ class IncrementalGaussianProcess: ...@@ -161,7 +162,8 @@ class IncrementalGaussianProcess:
def incremental_fit(self, train_x, train_y): def incremental_fit(self, train_x, train_y):
""" Incrementally fit the regressor. """ """ Incrementally fit the regressor. """
if not self._first_fitted: if not self._first_fitted:
raise ValueError("The first_fit function needs to be called first.") raise ValueError(
"The first_fit function needs to be called first.")
train_x, train_y = np.array(train_x), np.array(train_y) train_x, train_y = np.array(train_x), np.array(train_y)
...@@ -174,7 +176,7 @@ class IncrementalGaussianProcess: ...@@ -174,7 +176,7 @@ class IncrementalGaussianProcess:
temp_distance_matrix = np.concatenate((up_k, down_k), axis=0) temp_distance_matrix = np.concatenate((up_k, down_k), axis=0)
k_matrix = bourgain_embedding_matrix(temp_distance_matrix) k_matrix = bourgain_embedding_matrix(temp_distance_matrix)
diagonal = np.diag_indices_from(k_matrix) diagonal = np.diag_indices_from(k_matrix)
diagonal = (diagonal[0][-len(train_x) :], diagonal[1][-len(train_x) :]) diagonal = (diagonal[0][-len(train_x):], diagonal[1][-len(train_x):])
k_matrix[diagonal] += self.alpha k_matrix[diagonal] += self.alpha
try: try:
...@@ -186,7 +188,8 @@ class IncrementalGaussianProcess: ...@@ -186,7 +188,8 @@ class IncrementalGaussianProcess:
self._y = np.concatenate((self._y, train_y), axis=0) self._y = np.concatenate((self._y, train_y), axis=0)
self._distance_matrix = temp_distance_matrix self._distance_matrix = temp_distance_matrix
self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3 self._alpha_vector = cho_solve(
(self._l_matrix, True), self._y) # Line 3
return self return self
...@@ -209,7 +212,8 @@ class IncrementalGaussianProcess: ...@@ -209,7 +212,8 @@ class IncrementalGaussianProcess:
self._l_matrix = cholesky(k_matrix, lower=True) # Line 2 self._l_matrix = cholesky(k_matrix, lower=True) # Line 2
self._alpha_vector = cho_solve((self._l_matrix, True), self._y) # Line 3 self._alpha_vector = cho_solve(
(self._l_matrix, True), self._y) # Line 3
self._first_fitted = True self._first_fitted = True
return self return self
...@@ -227,7 +231,9 @@ class IncrementalGaussianProcess: ...@@ -227,7 +231,9 @@ class IncrementalGaussianProcess:
# compute inverse K_inv of K based on its Cholesky # compute inverse K_inv of K based on its Cholesky
# decomposition L and its inverse L_inv # decomposition L and its inverse L_inv
l_inv = solve_triangular(self._l_matrix.T, np.eye(self._l_matrix.shape[0])) l_inv = solve_triangular(
self._l_matrix.T, np.eye(
self._l_matrix.shape[0]))
k_inv = l_inv.dot(l_inv.T) k_inv = l_inv.dot(l_inv.T)
# Compute variance of predictive distribution # Compute variance of predictive distribution
y_var = np.ones(len(train_x), dtype=np.float) y_var = np.ones(len(train_x), dtype=np.float)
...@@ -378,7 +384,11 @@ class BayesianOptimizer: ...@@ -378,7 +384,11 @@ class BayesianOptimizer:
continue continue
temp_acq_value = self.acq(temp_graph) temp_acq_value = self.acq(temp_graph)
pq.put(elem_class(temp_acq_value, elem.father_id, temp_graph)) pq.put(
elem_class(
temp_acq_value,
elem.father_id,
temp_graph))
descriptors.append(temp_graph.extract_descriptor()) descriptors.append(temp_graph.extract_descriptor())
if self._accept_new_acq_value(opt_acq, temp_acq_value): if self._accept_new_acq_value(opt_acq, temp_acq_value):
opt_acq = temp_acq_value opt_acq = temp_acq_value
......
...@@ -249,7 +249,8 @@ class Graph: ...@@ -249,7 +249,8 @@ class Graph:
self.reverse_adj_list[v_id].remove(edge_tuple) self.reverse_adj_list[v_id].remove(edge_tuple)
break break
self.reverse_adj_list[new_v_id].append((u_id, layer_id)) self.reverse_adj_list[new_v_id].append((u_id, layer_id))
for index, value in enumerate(self.layer_id_to_output_node_ids[layer_id]): for index, value in enumerate(
self.layer_id_to_output_node_ids[layer_id]):
if value == v_id: if value == v_id:
self.layer_id_to_output_node_ids[layer_id][index] = new_v_id self.layer_id_to_output_node_ids[layer_id][index] = new_v_id
break break
...@@ -350,7 +351,8 @@ class Graph: ...@@ -350,7 +351,8 @@ class Graph:
self._replace_layer(layer_id, new_layer) self._replace_layer(layer_id, new_layer)
elif is_layer(layer, "BatchNormalization"): elif is_layer(layer, "BatchNormalization"):
new_layer = wider_bn(layer, start_dim, total_dim, n_add, self.weighted) new_layer = wider_bn(
layer, start_dim, total_dim, n_add, self.weighted)
self._replace_layer(layer_id, new_layer) self._replace_layer(layer_id, new_layer)
self._search(v, start_dim, total_dim, n_add) self._search(v, start_dim, total_dim, n_add)
...@@ -405,7 +407,8 @@ class Graph: ...@@ -405,7 +407,8 @@ class Graph:
target_id: A convolutional layer ID. The new block should be inserted after the block. target_id: A convolutional layer ID. The new block should be inserted after the block.
new_layer: An instance of StubLayer subclasses. new_layer: An instance of StubLayer subclasses.
""" """
self.operation_history.append(("to_deeper_model", target_id, new_layer)) self.operation_history.append(
("to_deeper_model", target_id, new_layer))
input_id = self.layer_id_to_input_node_ids[target_id][0] input_id = self.layer_id_to_input_node_ids[target_id][0]
output_id = self.layer_id_to_output_node_ids[target_id][0] output_id = self.layer_id_to_output_node_ids[target_id][0]
if self.weighted: if self.weighted:
...@@ -478,14 +481,20 @@ class Graph: ...@@ -478,14 +481,20 @@ class Graph:
pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0] pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
end_node_id = self.layer_id_to_output_node_ids[end_id][0] end_node_id = self.layer_id_to_output_node_ids[end_id][0]
skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id) skip_output_id = self._insert_pooling_layer_chain(
start_node_id, end_node_id)
# Add the conv layer # Add the conv layer
new_conv_layer = get_conv_class(self.n_dim)(filters_start, filters_end, 1) new_conv_layer = get_conv_class(
self.n_dim)(
filters_start,
filters_end,
1)
skip_output_id = self.add_layer(new_conv_layer, skip_output_id) skip_output_id = self.add_layer(new_conv_layer, skip_output_id)
# Add the add layer. # Add the add layer.
add_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id])) add_input_node_id = self._add_node(
deepcopy(self.node_list[end_node_id]))
add_layer = StubAdd() add_layer = StubAdd()
self._redirect_edge(pre_end_node_id, end_node_id, add_input_node_id) self._redirect_edge(pre_end_node_id, end_node_id, add_input_node_id)
...@@ -504,7 +513,8 @@ class Graph: ...@@ -504,7 +513,8 @@ class Graph:
weights = np.zeros((filters_end, filters_start) + filter_shape) weights = np.zeros((filters_end, filters_start) + filter_shape)
bias = np.zeros(filters_end) bias = np.zeros(filters_end)
new_conv_layer.set_weights( new_conv_layer.set_weights(
(add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) (add_noise(weights, np.array([0, 1])), add_noise(
bias, np.array([0, 1])))
) )
def to_concat_skip_model(self, start_id, end_id): def to_concat_skip_model(self, start_id, end_id):
...@@ -513,7 +523,8 @@ class Graph: ...@@ -513,7 +523,8 @@ class Graph:
start_id: The convolutional layer ID, after which to start the skip-connection. start_id: The convolutional layer ID, after which to start the skip-connection.
end_id: The convolutional layer ID, after which to end the skip-connection. end_id: The convolutional layer ID, after which to end the skip-connection.
""" """
self.operation_history.append(("to_concat_skip_model", start_id, end_id)) self.operation_history.append(
("to_concat_skip_model", start_id, end_id))
filters_end = self.layer_list[end_id].output.shape[-1] filters_end = self.layer_list[end_id].output.shape[-1]
filters_start = self.layer_list[start_id].output.shape[-1] filters_start = self.layer_list[start_id].output.shape[-1]
start_node_id = self.layer_id_to_output_node_ids[start_id][0] start_node_id = self.layer_id_to_output_node_ids[start_id][0]
...@@ -521,9 +532,11 @@ class Graph: ...@@ -521,9 +532,11 @@ class Graph:
pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0] pre_end_node_id = self.layer_id_to_input_node_ids[end_id][0]
end_node_id = self.layer_id_to_output_node_ids[end_id][0] end_node_id = self.layer_id_to_output_node_ids[end_id][0]
skip_output_id = self._insert_pooling_layer_chain(start_node_id, end_node_id) skip_output_id = self._insert_pooling_layer_chain(
start_node_id, end_node_id)
concat_input_node_id = self._add_node(deepcopy(self.node_list[end_node_id])) concat_input_node_id = self._add_node(
deepcopy(self.node_list[end_node_id]))
self._redirect_edge(pre_end_node_id, end_node_id, concat_input_node_id) self._redirect_edge(pre_end_node_id, end_node_id, concat_input_node_id)
concat_layer = StubConcatenate() concat_layer = StubConcatenate()
...@@ -532,7 +545,10 @@ class Graph: ...@@ -532,7 +545,10 @@ class Graph:
self.node_list[skip_output_id], self.node_list[skip_output_id],
] ]
concat_output_node_id = self._add_node(Node(concat_layer.output_shape)) concat_output_node_id = self._add_node(Node(concat_layer.output_shape))
self._add_edge(concat_layer, concat_input_node_id, concat_output_node_id) self._add_edge(
concat_layer,
concat_input_node_id,
concat_output_node_id)
self._add_edge(concat_layer, skip_output_id, concat_output_node_id) self._add_edge(concat_layer, skip_output_id, concat_output_node_id)
concat_layer.output = self.node_list[concat_output_node_id] concat_layer.output = self.node_list[concat_output_node_id]
self.node_list[concat_output_node_id].shape = concat_layer.output_shape self.node_list[concat_output_node_id].shape = concat_layer.output_shape
...@@ -559,7 +575,8 @@ class Graph: ...@@ -559,7 +575,8 @@ class Graph:
) )
bias = np.zeros(filters_end) bias = np.zeros(filters_end)
new_conv_layer.set_weights( new_conv_layer.set_weights(
(add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) (add_noise(weights, np.array([0, 1])), add_noise(
bias, np.array([0, 1])))
) )
def _insert_pooling_layer_chain(self, start_node_id, end_node_id): def _insert_pooling_layer_chain(self, start_node_id, end_node_id):
...@@ -568,7 +585,8 @@ class Graph: ...@@ -568,7 +585,8 @@ class Graph:
new_layer = deepcopy(layer) new_layer = deepcopy(layer)
if is_layer(new_layer, "Conv"): if is_layer(new_layer, "Conv"):
filters = self.node_list[start_node_id].shape[-1] filters = self.node_list[start_node_id].shape[-1]
new_layer = get_conv_class(self.n_dim)(filters, filters, 1, layer.stride) new_layer = get_conv_class(self.n_dim)(
filters, filters, 1, layer.stride)
if self.weighted: if self.weighted:
init_conv_weight(new_layer) init_conv_weight(new_layer)
else: else:
...@@ -601,8 +619,10 @@ class Graph: ...@@ -601,8 +619,10 @@ class Graph:
temp_v = v temp_v = v
temp_layer_id = layer_id temp_layer_id = layer_id
skip_type = None skip_type = None
while not (temp_v in index_in_main_chain and temp_u in index_in_main_chain): while not (
if is_layer(self.layer_list[temp_layer_id], "Concatenate"): temp_v in index_in_main_chain and temp_u in index_in_main_chain):
if is_layer(
self.layer_list[temp_layer_id], "Concatenate"):
skip_type = NetworkDescriptor.CONCAT_CONNECT skip_type = NetworkDescriptor.CONCAT_CONNECT
if is_layer(self.layer_list[temp_layer_id], "Add"): if is_layer(self.layer_list[temp_layer_id], "Add"):
skip_type = NetworkDescriptor.ADD_CONNECT skip_type = NetworkDescriptor.ADD_CONNECT
...@@ -711,7 +731,8 @@ class Graph: ...@@ -711,7 +731,8 @@ class Graph:
def wide_layer_ids(self): def wide_layer_ids(self):
return ( return (
self._conv_layer_ids_in_order()[:-1] + self._dense_layer_ids_in_order()[:-1] self._conv_layer_ids_in_order(
)[:-1] + self._dense_layer_ids_in_order()[:-1]
) )
def skip_connection_layer_ids(self): def skip_connection_layer_ids(self):
...@@ -810,7 +831,8 @@ class KerasModel: ...@@ -810,7 +831,8 @@ class KerasModel:
topo_node_list = self.graph.topological_order topo_node_list = self.graph.topological_order
output_id = topo_node_list[-1] output_id = topo_node_list[-1]
input_id = topo_node_list[0] input_id = topo_node_list[0]
input_tensor = keras.layers.Input(shape=graph.node_list[input_id].shape) input_tensor = keras.layers.Input(
shape=graph.node_list[input_id].shape)
node_list = deepcopy(self.graph.node_list) node_list = deepcopy(self.graph.node_list)
node_list[input_id] = input_tensor node_list[input_id] = input_tensor
...@@ -838,7 +860,8 @@ class KerasModel: ...@@ -838,7 +860,8 @@ class KerasModel:
output_tensor = keras.layers.Activation("softmax", name="activation_add")( output_tensor = keras.layers.Activation("softmax", name="activation_add")(
output_tensor output_tensor
) )
self.model = keras.models.Model(inputs=input_tensor, outputs=output_tensor) self.model = keras.models.Model(
inputs=input_tensor, outputs=output_tensor)
if graph.weighted: if graph.weighted:
for index, layer in enumerate(self.layers): for index, layer in enumerate(self.layers):
...@@ -892,7 +915,8 @@ class JSONModel: ...@@ -892,7 +915,8 @@ class JSONModel:
for layer_id, item in enumerate(graph.layer_list): for layer_id, item in enumerate(graph.layer_list):
layer = graph.layer_list[layer_id] layer = graph.layer_list[layer_id]
layer_information = layer_description_extractor(layer, graph.node_to_id) layer_information = layer_description_extractor(
layer, graph.node_to_id)
layer_list.append((layer_id, layer_information)) layer_list.append((layer_id, layer_information))
data["node_list"] = node_list data["node_list"] = node_list
...@@ -938,7 +962,8 @@ def json_to_graph(json_model: str): ...@@ -938,7 +962,8 @@ def json_to_graph(json_model: str):
graph.input_shape = input_shape graph.input_shape = input_shape
vis = json_model["vis"] vis = json_model["vis"]
graph.vis = {tuple(item): True for item in vis} if vis is not None else None graph.vis = {
tuple(item): True for item in vis} if vis is not None else None
graph.weighted = json_model["weighted"] graph.weighted = json_model["weighted"]
layer_id_to_input_node_ids = json_model["layer_id_to_input_node_ids"] layer_id_to_input_node_ids = json_model["layer_id_to_input_node_ids"]
graph.layer_id_to_input_node_ids = { graph.layer_id_to_input_node_ids = {
......
...@@ -40,7 +40,8 @@ def to_wider_graph(graph): ...@@ -40,7 +40,8 @@ def to_wider_graph(graph):
''' '''
weighted_layer_ids = graph.wide_layer_ids() weighted_layer_ids = graph.wide_layer_ids()
weighted_layer_ids = list( weighted_layer_ids = list(
filter(lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids) filter(
lambda x: graph.layer_list[x].output.shape[-1], weighted_layer_ids)
) )
wider_layers = sample(weighted_layer_ids, 1) wider_layers = sample(weighted_layer_ids, 1)
...@@ -58,12 +59,14 @@ def to_wider_graph(graph): ...@@ -58,12 +59,14 @@ def to_wider_graph(graph):
def to_skip_connection_graph(graph): def to_skip_connection_graph(graph):
''' skip connection graph ''' skip connection graph
''' '''
# The last conv layer cannot be widen since wider operator cannot be done over the two sides of flatten. # The last conv layer cannot be widen since wider operator cannot be done
# over the two sides of flatten.
weighted_layer_ids = graph.skip_connection_layer_ids() weighted_layer_ids = graph.skip_connection_layer_ids()
valid_connection = [] valid_connection = []
for skip_type in sorted([NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]): for skip_type in sorted(
[NetworkDescriptor.ADD_CONNECT, NetworkDescriptor.CONCAT_CONNECT]):
for index_a in range(len(weighted_layer_ids)): for index_a in range(len(weighted_layer_ids)):
for index_b in range(len(weighted_layer_ids))[index_a + 1 :]: for index_b in range(len(weighted_layer_ids))[index_a + 1:]:
valid_connection.append((index_a, index_b, skip_type)) valid_connection.append((index_a, index_b, skip_type))
if not valid_connection: if not valid_connection:
...@@ -84,9 +87,14 @@ def create_new_layer(layer, n_dim): ...@@ -84,9 +87,14 @@ def create_new_layer(layer, n_dim):
input_shape = layer.output.shape input_shape = layer.output.shape
dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU] dense_deeper_classes = [StubDense, get_dropout_class(n_dim), StubReLU]
conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim), StubReLU] conv_deeper_classes = [
get_conv_class(n_dim),
get_batch_norm_class(n_dim),
StubReLU]
if is_layer(layer, "ReLU"): if is_layer(layer, "ReLU"):
conv_deeper_classes = [get_conv_class(n_dim), get_batch_norm_class(n_dim)] conv_deeper_classes = [
get_conv_class(n_dim),
get_batch_norm_class(n_dim)]
dense_deeper_classes = [StubDense, get_dropout_class(n_dim)] dense_deeper_classes = [StubDense, get_dropout_class(n_dim)]
elif is_layer(layer, "Dropout"): elif is_layer(layer, "Dropout"):
dense_deeper_classes = [StubDense, StubReLU] dense_deeper_classes = [StubDense, StubReLU]
......
...@@ -52,7 +52,8 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True): ...@@ -52,7 +52,8 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True):
if weighted: if weighted:
new_conv_layer.set_weights( new_conv_layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) (add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
) )
new_weights = [ new_weights = [
add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])),
...@@ -74,7 +75,8 @@ def dense_to_deeper_block(dense_layer, weighted=True): ...@@ -74,7 +75,8 @@ def dense_to_deeper_block(dense_layer, weighted=True):
new_dense_layer = StubDense(units, units) new_dense_layer = StubDense(units, units)
if weighted: if weighted:
new_dense_layer.set_weights( new_dense_layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) (add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
) )
return [StubReLU(), new_dense_layer] return [StubReLU(), new_dense_layer]
...@@ -97,8 +99,11 @@ def wider_pre_dense(layer, n_add, weighted=True): ...@@ -97,8 +99,11 @@ def wider_pre_dense(layer, n_add, weighted=True):
teacher_index = rand[i] teacher_index = rand[i]
new_weight = teacher_w[teacher_index, :] new_weight = teacher_w[teacher_index, :]
new_weight = new_weight[np.newaxis, :] new_weight = new_weight[np.newaxis, :]
student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0) student_w = np.concatenate(
student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b)) (student_w, add_noise(new_weight, student_w)), axis=0)
student_b = np.append(
student_b, add_noise(
teacher_b[teacher_index], student_b))
new_pre_layer = StubDense(layer.input_units, n_units2 + n_add) new_pre_layer = StubDense(layer.input_units, n_units2 + n_add)
new_pre_layer.set_weights((student_w, student_b)) new_pre_layer.set_weights((student_w, student_b))
...@@ -209,7 +214,7 @@ def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): ...@@ -209,7 +214,7 @@ def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True):
student_w[:, : start_dim * n_units_each_channel], student_w[:, : start_dim * n_units_each_channel],
add_noise(new_weight, student_w), add_noise(new_weight, student_w),
student_w[ student_w[
:, start_dim * n_units_each_channel : total_dim * n_units_each_channel :, start_dim * n_units_each_channel: total_dim * n_units_each_channel
], ],
), ),
axis=1, axis=1,
...@@ -225,7 +230,8 @@ def add_noise(weights, other_weights): ...@@ -225,7 +230,8 @@ def add_noise(weights, other_weights):
''' '''
w_range = np.ptp(other_weights.flatten()) w_range = np.ptp(other_weights.flatten())
noise_range = NOISE_RATIO * w_range noise_range = NOISE_RATIO * w_range
noise = np.random.uniform(-noise_range / 2.0, noise_range / 2.0, weights.shape) noise = np.random.uniform(-noise_range / 2.0,
noise_range / 2.0, weights.shape)
return np.add(noise, weights) return np.add(noise, weights)
...@@ -236,7 +242,8 @@ def init_dense_weight(layer): ...@@ -236,7 +242,8 @@ def init_dense_weight(layer):
weight = np.eye(units) weight = np.eye(units)
bias = np.zeros(units) bias = np.zeros(units)
layer.set_weights( layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) (add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
) )
...@@ -256,7 +263,8 @@ def init_conv_weight(layer): ...@@ -256,7 +263,8 @@ def init_conv_weight(layer):
bias = np.zeros(n_filters) bias = np.zeros(n_filters)
layer.set_weights( layer.set_weights(
(add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1]))) (add_noise(weight, np.array([0, 1])),
add_noise(bias, np.array([0, 1])))
) )
......
...@@ -28,8 +28,10 @@ from nni.networkmorphism_tuner.utils import Constant ...@@ -28,8 +28,10 @@ from nni.networkmorphism_tuner.utils import Constant
class AvgPool(nn.Module): class AvgPool(nn.Module):
'''AvgPool Module. """
''' AvgPool Module.
"""
def __init__(self): def __init__(self):
super().__init__() super().__init__()
...@@ -39,8 +41,10 @@ class AvgPool(nn.Module): ...@@ -39,8 +41,10 @@ class AvgPool(nn.Module):
class GlobalAvgPool1d(AvgPool): class GlobalAvgPool1d(AvgPool):
'''GlobalAvgPool1d Module. """
''' GlobalAvgPool1d Module.
"""
def forward(self, input_tensor): def forward(self, input_tensor):
return functional.avg_pool1d(input_tensor, input_tensor.size()[2:]).view( return functional.avg_pool1d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2] input_tensor.size()[:2]
...@@ -48,8 +52,10 @@ class GlobalAvgPool1d(AvgPool): ...@@ -48,8 +52,10 @@ class GlobalAvgPool1d(AvgPool):
class GlobalAvgPool2d(AvgPool): class GlobalAvgPool2d(AvgPool):
'''GlobalAvgPool2d Module. """
''' GlobalAvgPool2d Module.
"""
def forward(self, input_tensor): def forward(self, input_tensor):
return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view( return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2] input_tensor.size()[:2]
...@@ -57,8 +63,10 @@ class GlobalAvgPool2d(AvgPool): ...@@ -57,8 +63,10 @@ class GlobalAvgPool2d(AvgPool):
class GlobalAvgPool3d(AvgPool): class GlobalAvgPool3d(AvgPool):
'''GlobalAvgPool3d Module. """
''' GlobalAvgPool3d Module.
"""
def forward(self, input_tensor): def forward(self, input_tensor):
return functional.avg_pool3d(input_tensor, input_tensor.size()[2:]).view( return functional.avg_pool3d(input_tensor, input_tensor.size()[2:]).view(
input_tensor.size()[:2] input_tensor.size()[:2]
...@@ -66,70 +74,86 @@ class GlobalAvgPool3d(AvgPool): ...@@ -66,70 +74,86 @@ class GlobalAvgPool3d(AvgPool):
class StubLayer: class StubLayer:
'''StubLayer Module. Base Module. """
''' StubLayer Module. Base Module.
"""
def __init__(self, input_node=None, output_node=None): def __init__(self, input_node=None, output_node=None):
self.input = input_node self.input = input_node
self.output = output_node self.output = output_node
self.weights = None self.weights = None
def build(self, shape): def build(self, shape):
'''build shape. """
''' build shape.
"""
def set_weights(self, weights): def set_weights(self, weights):
'''set weights. """
''' set weights.
"""
self.weights = weights self.weights = weights
def import_weights(self, torch_layer): def import_weights(self, torch_layer):
'''import weights. """
''' import weights.
"""
def import_weights_keras(self, keras_layer): def import_weights_keras(self, keras_layer):
'''import weights from keras layer. """
''' import weights from keras layer.
"""
def export_weights(self, torch_layer): def export_weights(self, torch_layer):
'''export weights. """
''' export weights.
"""
def export_weights_keras(self, keras_layer): def export_weights_keras(self, keras_layer):
'''export weights to keras layer. """
''' export weights to keras layer.
"""
def get_weights(self): def get_weights(self):
'''get weights. """
''' get weights.
"""
return self.weights return self.weights
def size(self): def size(self):
'''size(). """
''' size().
"""
return 0 return 0
@property @property
def output_shape(self): def output_shape(self):
'''output shape. """
''' output shape.
"""
return self.input.shape return self.input.shape
def to_real_layer(self): def to_real_layer(self):
'''to real layer. """
''' to real layer.
"""
def __str__(self): def __str__(self):
'''str() function to print. """
''' str() function to print.
"""
return type(self).__name__[4:] return type(self).__name__[4:]
class StubWeightBiasLayer(StubLayer): class StubWeightBiasLayer(StubLayer):
'''StubWeightBiasLayer Module to set the bias. """
''' StubWeightBiasLayer Module to set the bias.
"""
def import_weights(self, torch_layer): def import_weights(self, torch_layer):
self.set_weights( self.set_weights(
(torch_layer.weight.data.cpu().numpy(), torch_layer.bias.data.cpu().numpy()) (torch_layer.weight.data.cpu().numpy(),
torch_layer.bias.data.cpu().numpy())
) )
def import_weights_keras(self, keras_layer): def import_weights_keras(self, keras_layer):
...@@ -144,8 +168,10 @@ class StubWeightBiasLayer(StubLayer): ...@@ -144,8 +168,10 @@ class StubWeightBiasLayer(StubLayer):
class StubBatchNormalization(StubWeightBiasLayer): class StubBatchNormalization(StubWeightBiasLayer):
'''StubBatchNormalization Module. Batch Norm. """
''' StubBatchNormalization Module. Batch Norm.
"""
def __init__(self, num_features, input_node=None, output_node=None): def __init__(self, num_features, input_node=None, output_node=None):
super().__init__(input_node, output_node) super().__init__(input_node, output_node)
self.num_features = num_features self.num_features = num_features
...@@ -175,29 +201,37 @@ class StubBatchNormalization(StubWeightBiasLayer): ...@@ -175,29 +201,37 @@ class StubBatchNormalization(StubWeightBiasLayer):
class StubBatchNormalization1d(StubBatchNormalization): class StubBatchNormalization1d(StubBatchNormalization):
'''StubBatchNormalization1d Module. """
''' StubBatchNormalization1d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.BatchNorm1d(self.num_features) return torch.nn.BatchNorm1d(self.num_features)
class StubBatchNormalization2d(StubBatchNormalization): class StubBatchNormalization2d(StubBatchNormalization):
'''StubBatchNormalization2d Module. """
''' StubBatchNormalization2d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.BatchNorm2d(self.num_features) return torch.nn.BatchNorm2d(self.num_features)
class StubBatchNormalization3d(StubBatchNormalization): class StubBatchNormalization3d(StubBatchNormalization):
'''StubBatchNormalization3d Module. """
''' StubBatchNormalization3d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.BatchNorm3d(self.num_features) return torch.nn.BatchNorm3d(self.num_features)
class StubDense(StubWeightBiasLayer): class StubDense(StubWeightBiasLayer):
'''StubDense Module. Linear. """
''' StubDense Module. Linear.
"""
def __init__(self, input_units, units, input_node=None, output_node=None): def __init__(self, input_units, units, input_node=None, output_node=None):
super().__init__(input_node, output_node) super().__init__(input_node, output_node)
self.input_units = input_units self.input_units = input_units
...@@ -208,7 +242,9 @@ class StubDense(StubWeightBiasLayer): ...@@ -208,7 +242,9 @@ class StubDense(StubWeightBiasLayer):
return (self.units,) return (self.units,)
def import_weights_keras(self, keras_layer): def import_weights_keras(self, keras_layer):
self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1])) self.set_weights(
(keras_layer.get_weights()[0].T,
keras_layer.get_weights()[1]))
def export_weights_keras(self, keras_layer): def export_weights_keras(self, keras_layer):
keras_layer.set_weights((self.weights[0].T, self.weights[1])) keras_layer.set_weights((self.weights[0].T, self.weights[1]))
...@@ -221,9 +257,12 @@ class StubDense(StubWeightBiasLayer): ...@@ -221,9 +257,12 @@ class StubDense(StubWeightBiasLayer):
class StubConv(StubWeightBiasLayer): class StubConv(StubWeightBiasLayer):
'''StubConv Module. Conv. """
''' StubConv Module. Conv.
def __init__(self, input_channel, filters, kernel_size, stride=1, input_node=None, output_node=None): """
def __init__(self, input_channel, filters, kernel_size,
stride=1, input_node=None, output_node=None):
super().__init__(input_node, output_node) super().__init__(input_node, output_node)
self.input_channel = input_channel self.input_channel = input_channel
self.filters = filters self.filters = filters
...@@ -242,13 +281,16 @@ class StubConv(StubWeightBiasLayer): ...@@ -242,13 +281,16 @@ class StubConv(StubWeightBiasLayer):
return tuple(ret) return tuple(ret)
def import_weights_keras(self, keras_layer): def import_weights_keras(self, keras_layer):
self.set_weights((keras_layer.get_weights()[0].T, keras_layer.get_weights()[1])) self.set_weights(
(keras_layer.get_weights()[0].T,
keras_layer.get_weights()[1]))
def export_weights_keras(self, keras_layer): def export_weights_keras(self, keras_layer):
keras_layer.set_weights((self.weights[0].T, self.weights[1])) keras_layer.set_weights((self.weights[0].T, self.weights[1]))
def size(self): def size(self):
return (self.input_channel * self.kernel_size * self.kernel_size + 1) * self.filters return (self.input_channel * self.kernel_size *
self.kernel_size + 1) * self.filters
@abstractmethod @abstractmethod
def to_real_layer(self): def to_real_layer(self):
...@@ -272,8 +314,10 @@ class StubConv(StubWeightBiasLayer): ...@@ -272,8 +314,10 @@ class StubConv(StubWeightBiasLayer):
class StubConv1d(StubConv): class StubConv1d(StubConv):
'''StubConv1d Module. """
''' StubConv1d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.Conv1d( return torch.nn.Conv1d(
self.input_channel, self.input_channel,
...@@ -285,8 +329,10 @@ class StubConv1d(StubConv): ...@@ -285,8 +329,10 @@ class StubConv1d(StubConv):
class StubConv2d(StubConv): class StubConv2d(StubConv):
'''StubConv2d Module. """
''' StubConv2d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.Conv2d( return torch.nn.Conv2d(
self.input_channel, self.input_channel,
...@@ -298,8 +344,10 @@ class StubConv2d(StubConv): ...@@ -298,8 +344,10 @@ class StubConv2d(StubConv):
class StubConv3d(StubConv): class StubConv3d(StubConv):
'''StubConv3d Module. """
''' StubConv3d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.Conv3d( return torch.nn.Conv3d(
self.input_channel, self.input_channel,
...@@ -311,8 +359,10 @@ class StubConv3d(StubConv): ...@@ -311,8 +359,10 @@ class StubConv3d(StubConv):
class StubAggregateLayer(StubLayer): class StubAggregateLayer(StubLayer):
'''StubAggregateLayer Module. """
''' StubAggregateLayer Module.
"""
def __init__(self, input_nodes=None, output_node=None): def __init__(self, input_nodes=None, output_node=None):
if input_nodes is None: if input_nodes is None:
input_nodes = [] input_nodes = []
...@@ -320,8 +370,8 @@ class StubAggregateLayer(StubLayer): ...@@ -320,8 +370,8 @@ class StubAggregateLayer(StubLayer):
class StubConcatenate(StubAggregateLayer): class StubConcatenate(StubAggregateLayer):
'''StubConcatenate Module. """StubConcatenate Module.
''' """
@property @property
def output_shape(self): def output_shape(self):
ret = 0 ret = 0
...@@ -335,8 +385,9 @@ class StubConcatenate(StubAggregateLayer): ...@@ -335,8 +385,9 @@ class StubConcatenate(StubAggregateLayer):
class StubAdd(StubAggregateLayer): class StubAdd(StubAggregateLayer):
'''StubAdd Module. """
''' StubAdd Module.
"""
@property @property
def output_shape(self): def output_shape(self):
return self.input[0].shape return self.input[0].shape
...@@ -346,8 +397,9 @@ class StubAdd(StubAggregateLayer): ...@@ -346,8 +397,9 @@ class StubAdd(StubAggregateLayer):
class StubFlatten(StubLayer): class StubFlatten(StubLayer):
'''StubFlatten Module. """
''' StubFlatten Module.
"""
@property @property
def output_shape(self): def output_shape(self):
ret = 1 ret = 1
...@@ -360,22 +412,28 @@ class StubFlatten(StubLayer): ...@@ -360,22 +412,28 @@ class StubFlatten(StubLayer):
class StubReLU(StubLayer): class StubReLU(StubLayer):
'''StubReLU Module. """
''' StubReLU Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.ReLU() return torch.nn.ReLU()
class StubSoftmax(StubLayer): class StubSoftmax(StubLayer):
'''StubSoftmax Module. """
''' StubSoftmax Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.LogSoftmax(dim=1) return torch.nn.LogSoftmax(dim=1)
class StubDropout(StubLayer): class StubDropout(StubLayer):
'''StubDropout Module. """
''' StubDropout Module.
"""
def __init__(self, rate, input_node=None, output_node=None): def __init__(self, rate, input_node=None, output_node=None):
super().__init__(input_node, output_node) super().__init__(input_node, output_node)
self.rate = rate self.rate = rate
...@@ -386,36 +444,45 @@ class StubDropout(StubLayer): ...@@ -386,36 +444,45 @@ class StubDropout(StubLayer):
class StubDropout1d(StubDropout): class StubDropout1d(StubDropout):
'''StubDropout1d Module. """
''' StubDropout1d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.Dropout(self.rate) return torch.nn.Dropout(self.rate)
class StubDropout2d(StubDropout): class StubDropout2d(StubDropout):
'''StubDropout2d Module. """
''' StubDropout2d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.Dropout2d(self.rate) return torch.nn.Dropout2d(self.rate)
class StubDropout3d(StubDropout): class StubDropout3d(StubDropout):
'''StubDropout3d Module. """
''' StubDropout3d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.Dropout3d(self.rate) return torch.nn.Dropout3d(self.rate)
class StubInput(StubLayer): class StubInput(StubLayer):
'''StubInput Module. """
''' StubInput Module.
"""
def __init__(self, input_node=None, output_node=None): def __init__(self, input_node=None, output_node=None):
super().__init__(input_node, output_node) super().__init__(input_node, output_node)
class StubPooling(StubLayer): class StubPooling(StubLayer):
'''StubPooling Module. """
''' StubPooling Module.
"""
def __init__(self, def __init__(self,
kernel_size=None, kernel_size=None,
...@@ -444,30 +511,37 @@ class StubPooling(StubLayer): ...@@ -444,30 +511,37 @@ class StubPooling(StubLayer):
class StubPooling1d(StubPooling): class StubPooling1d(StubPooling):
'''StubPooling1d Module. """
''' StubPooling1d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride) return torch.nn.MaxPool1d(self.kernel_size, stride=self.stride)
class StubPooling2d(StubPooling): class StubPooling2d(StubPooling):
'''StubPooling2d Module. """
''' StubPooling2d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride) return torch.nn.MaxPool2d(self.kernel_size, stride=self.stride)
class StubPooling3d(StubPooling): class StubPooling3d(StubPooling):
'''StubPooling3d Module. """
''' StubPooling3d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride) return torch.nn.MaxPool3d(self.kernel_size, stride=self.stride)
class StubGlobalPooling(StubLayer): class StubGlobalPooling(StubLayer):
'''StubGlobalPooling Module. """
''' StubGlobalPooling Module.
"""
def __init__(self, input_node=None, output_node=None): def __init__(self, input_node=None, output_node=None):
super().__init__(input_node, output_node) super().__init__(input_node, output_node)
...@@ -481,49 +555,63 @@ class StubGlobalPooling(StubLayer): ...@@ -481,49 +555,63 @@ class StubGlobalPooling(StubLayer):
class StubGlobalPooling1d(StubGlobalPooling): class StubGlobalPooling1d(StubGlobalPooling):
'''StubGlobalPooling1d Module. """
''' StubGlobalPooling1d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return GlobalAvgPool1d() return GlobalAvgPool1d()
class StubGlobalPooling2d(StubGlobalPooling): class StubGlobalPooling2d(StubGlobalPooling):
'''StubGlobalPooling2d Module. """
''' StubGlobalPooling2d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return GlobalAvgPool2d() return GlobalAvgPool2d()
class StubGlobalPooling3d(StubGlobalPooling): class StubGlobalPooling3d(StubGlobalPooling):
'''StubGlobalPooling3d Module. """
''' StubGlobalPooling3d Module.
"""
def to_real_layer(self): def to_real_layer(self):
return GlobalAvgPool3d() return GlobalAvgPool3d()
class TorchConcatenate(nn.Module): class TorchConcatenate(nn.Module):
'''TorchConcatenate Module. """
''' TorchConcatenate Module.
"""
def forward(self, input_list): def forward(self, input_list):
return torch.cat(input_list, dim=1) return torch.cat(input_list, dim=1)
class TorchAdd(nn.Module): class TorchAdd(nn.Module):
'''TorchAdd Module. """
''' TorchAdd Module.
"""
def forward(self, input_list): def forward(self, input_list):
return input_list[0] + input_list[1] return input_list[0] + input_list[1]
class TorchFlatten(nn.Module): class TorchFlatten(nn.Module):
'''TorchFlatten Module. """
''' TorchFlatten Module.
"""
def forward(self, input_tensor): def forward(self, input_tensor):
return input_tensor.view(input_tensor.size(0), -1) return input_tensor.view(input_tensor.size(0), -1)
def keras_dropout(layer, rate): def keras_dropout(layer, rate):
'''keras dropout layer. """
''' Keras dropout layer.
"""
from keras import layers from keras import layers
...@@ -539,8 +627,9 @@ def keras_dropout(layer, rate): ...@@ -539,8 +627,9 @@ def keras_dropout(layer, rate):
def to_real_keras_layer(layer): def to_real_keras_layer(layer):
''' real keras layer. """
''' Real keras layer.
"""
from keras import layers from keras import layers
if is_layer(layer, "Dense"): if is_layer(layer, "Dense"):
...@@ -574,10 +663,14 @@ def to_real_keras_layer(layer): ...@@ -574,10 +663,14 @@ def to_real_keras_layer(layer):
def is_layer(layer, layer_type): def is_layer(layer, layer_type):
'''judge the layer type. """
Returns: Judge the layer type.
Returns
-------
bool
boolean -- True or False boolean -- True or False
''' """
if layer_type == "Input": if layer_type == "Input":
return isinstance(layer, StubInput) return isinstance(layer, StubInput)
...@@ -607,8 +700,9 @@ def is_layer(layer, layer_type): ...@@ -607,8 +700,9 @@ def is_layer(layer, layer_type):
def layer_description_extractor(layer, node_to_id): def layer_description_extractor(layer, node_to_id):
'''get layer description. """
''' Get layer description.
"""
layer_input = layer.input layer_input = layer.input
layer_output = layer.output layer_output = layer.output
...@@ -641,7 +735,8 @@ def layer_description_extractor(layer, node_to_id): ...@@ -641,7 +735,8 @@ def layer_description_extractor(layer, node_to_id):
layer.units, layer.units,
] ]
elif isinstance(layer, (StubBatchNormalization,)): elif isinstance(layer, (StubBatchNormalization,)):
return (type(layer).__name__, layer_input, layer_output, layer.num_features) return (type(layer).__name__, layer_input,
layer_output, layer.num_features)
elif isinstance(layer, (StubDropout,)): elif isinstance(layer, (StubDropout,)):
return (type(layer).__name__, layer_input, layer_output, layer.rate) return (type(layer).__name__, layer_input, layer_output, layer.rate)
elif isinstance(layer, StubPooling): elif isinstance(layer, StubPooling):
...@@ -658,8 +753,8 @@ def layer_description_extractor(layer, node_to_id): ...@@ -658,8 +753,8 @@ def layer_description_extractor(layer, node_to_id):
def layer_description_builder(layer_information, id_to_node): def layer_description_builder(layer_information, id_to_node):
'''build layer from description. """build layer from description.
''' """
layer_type = layer_information[0] layer_type = layer_information[0]
layer_input_ids = layer_information[1] layer_input_ids = layer_information[1]
...@@ -696,8 +791,9 @@ def layer_description_builder(layer_information, id_to_node): ...@@ -696,8 +791,9 @@ def layer_description_builder(layer_information, id_to_node):
def layer_width(layer): def layer_width(layer):
'''get layer width. """
''' Get layer width.
"""
if is_layer(layer, "Dense"): if is_layer(layer, "Dense"):
return layer.units return layer.units
......
...@@ -17,11 +17,13 @@ ...@@ -17,11 +17,13 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ================================================================================================== # ==================================================================================================
"""
networkmorphsim_tuner.py
"""
import logging import logging
import os import os
from nni.tuner import Tuner from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward from nni.utils import OptimizeMode, extract_scalar_reward
from nni.networkmorphism_tuner.bayesian import BayesianOptimizer from nni.networkmorphism_tuner.bayesian import BayesianOptimizer
...@@ -34,7 +36,35 @@ logger = logging.getLogger("NetworkMorphism_AutoML") ...@@ -34,7 +36,35 @@ logger = logging.getLogger("NetworkMorphism_AutoML")
class NetworkMorphismTuner(Tuner): class NetworkMorphismTuner(Tuner):
"""NetworkMorphismTuner is a tuner which using network morphism techniques.""" """
NetworkMorphismTuner is a tuner which using network morphism techniques.
Attributes
----------
n_classes : int
The class number or output node number (default: ``10``)
input_shape : tuple
A tuple including: (input_width, input_width, input_channel)
t_min : float
The minimum temperature for simulated annealing. (default: ``Constant.T_MIN``)
beta : float
The beta in acquisition function. (default: ``Constant.BETA``)
algorithm_name : str
algorithm name used in the network morphism (default: ``"Bayesian"``)
optimize_mode : str
optimize mode "minimize" or "maximize" (default: ``"minimize"``)
verbose : bool
verbose to print the log (default: ``True``)
bo : BayesianOptimizer
The optimizer used in networkmorphsim tuner.
max_model_size : int
max model size to the graph (default: ``Constant.MAX_MODEL_SIZE``)
default_model_len : int
default model length (default: ``Constant.MODEL_LEN``)
default_model_width : int
default model width (default: ``Constant.MODEL_WIDTH``)
search_space : dict
"""
def __init__( def __init__(
self, self,
...@@ -52,36 +82,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -52,36 +82,8 @@ class NetworkMorphismTuner(Tuner):
default_model_len=Constant.MODEL_LEN, default_model_len=Constant.MODEL_LEN,
default_model_width=Constant.MODEL_WIDTH, default_model_width=Constant.MODEL_WIDTH,
): ):
""" initilizer of the NetworkMorphismTuner. """
initilizer of the NetworkMorphismTuner.
Parameters
----------
task : str
task mode, such as "cv","common" etc. (default: {"cv"})
input_width : int
input sample shape (default: {32})
input_channel : int
input sample shape (default: {3})
n_output_node : int
output node number (default: {10})
algorithm_name : str
algorithm name used in the network morphism (default: {"Bayesian"})
optimize_mode : str
optimize mode "minimize" or "maximize" (default: {"minimize"})
path : str
default mode path to save the model file (default: {"model_path"})
verbose : bool
verbose to print the log (default: {True})
beta : float
The beta in acquisition function. (default: {Constant.BETA})
t_min : float
The minimum temperature for simulated annealing. (default: {Constant.T_MIN})
max_model_size : int
max model size to the graph (default: {Constant.MAX_MODEL_SIZE})
default_model_len : int
default model length (default: {Constant.MODEL_LEN})
default_model_width : int
default model width (default: {Constant.MODEL_WIDTH})
""" """
if not os.path.exists(path): if not os.path.exists(path):
...@@ -92,7 +94,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -92,7 +94,8 @@ class NetworkMorphismTuner(Tuner):
elif task == "common": elif task == "common":
self.generators = [MlpGenerator] self.generators = [MlpGenerator]
else: else:
raise NotImplementedError('{} task not supported in List ["cv","common"]') raise NotImplementedError(
'{} task not supported in List ["cv","common"]')
self.n_classes = n_output_node self.n_classes = n_output_node
self.input_shape = (input_width, input_width, input_channel) self.input_shape = (input_width, input_width, input_channel)
...@@ -106,7 +109,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -106,7 +109,8 @@ class NetworkMorphismTuner(Tuner):
self.verbose = verbose self.verbose = verbose
self.model_count = 0 self.model_count = 0
self.bo = BayesianOptimizer(self, self.t_min, self.optimize_mode, self.beta) self.bo = BayesianOptimizer(
self, self.t_min, self.optimize_mode, self.beta)
self.training_queue = [] self.training_queue = []
self.descriptors = [] self.descriptors = []
self.history = [] self.history = []
...@@ -117,6 +121,7 @@ class NetworkMorphismTuner(Tuner): ...@@ -117,6 +121,7 @@ class NetworkMorphismTuner(Tuner):
self.search_space = dict() self.search_space = dict()
def update_search_space(self, search_space): def update_search_space(self, search_space):
""" """
Update search space definition in tuner by search_space in neural architecture. Update search space definition in tuner by search_space in neural architecture.
...@@ -140,7 +145,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -140,7 +145,8 @@ class NetworkMorphismTuner(Tuner):
new_father_id, generated_graph = self.generate() new_father_id, generated_graph = self.generate()
new_model_id = self.model_count new_model_id = self.model_count
self.model_count += 1 self.model_count += 1
self.training_queue.append((generated_graph, new_father_id, new_model_id)) self.training_queue.append(
(generated_graph, new_father_id, new_model_id))
self.descriptors.append(generated_graph.extract_descriptor()) self.descriptors.append(generated_graph.extract_descriptor())
graph, father_id, model_id = self.training_queue.pop(0) graph, father_id, model_id = self.training_queue.pop(0)
...@@ -153,12 +159,15 @@ class NetworkMorphismTuner(Tuner): ...@@ -153,12 +159,15 @@ class NetworkMorphismTuner(Tuner):
return json_out return json_out
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
""" Record an observation of the objective function. """
Record an observation of the objective function.
Parameters Parameters
---------- ----------
parameter_id : int parameter_id : int
the id of a group of paramters that generated by nni manager.
parameters : dict parameters : dict
A group of parameters.
value : dict/float value : dict/float
if value is dict, it should have "default" key. if value is dict, it should have "default" key.
""" """
...@@ -175,8 +184,11 @@ class NetworkMorphismTuner(Tuner): ...@@ -175,8 +184,11 @@ class NetworkMorphismTuner(Tuner):
self.add_model(reward, model_id) self.add_model(reward, model_id)
self.update(father_id, graph, reward, model_id) self.update(father_id, graph, reward, model_id)
def init_search(self): def init_search(self):
"""Call the generators to generate the initial architectures for the search.""" """
Call the generators to generate the initial architectures for the search.
"""
if self.verbose: if self.verbose:
logger.info("Initializing search.") logger.info("Initializing search.")
for generator in self.generators: for generator in self.generators:
...@@ -191,14 +203,16 @@ class NetworkMorphismTuner(Tuner): ...@@ -191,14 +203,16 @@ class NetworkMorphismTuner(Tuner):
if self.verbose: if self.verbose:
logger.info("Initialization finished.") logger.info("Initialization finished.")
def generate(self): def generate(self):
"""Generate the next neural architecture. """
Generate the next neural architecture.
Returns Returns
------- -------
other_info: any object other_info : any object
Anything to be saved in the training queue together with the architecture. Anything to be saved in the training queue together with the architecture.
generated_graph: Graph generated_graph : Graph
An instance of Graph. An instance of Graph.
""" """
generated_graph, new_father_id = self.bo.generate(self.descriptors) generated_graph, new_father_id = self.bo.generate(self.descriptors)
...@@ -211,7 +225,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -211,7 +225,8 @@ class NetworkMorphismTuner(Tuner):
return new_father_id, generated_graph return new_father_id, generated_graph
def update(self, other_info, graph, metric_value, model_id): def update(self, other_info, graph, metric_value, model_id):
""" Update the controller with evaluation result of a neural architecture. """
Update the controller with evaluation result of a neural architecture.
Parameters Parameters
---------- ----------
...@@ -228,7 +243,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -228,7 +243,8 @@ class NetworkMorphismTuner(Tuner):
self.bo.add_child(father_id, model_id) self.bo.add_child(father_id, model_id)
def add_model(self, metric_value, model_id): def add_model(self, metric_value, model_id):
""" Add model to the history, x_queue and y_queue """
Add model to the history, x_queue and y_queue
Parameters Parameters
---------- ----------
...@@ -252,16 +268,21 @@ class NetworkMorphismTuner(Tuner): ...@@ -252,16 +268,21 @@ class NetworkMorphismTuner(Tuner):
file.close() file.close()
return ret return ret
def get_best_model_id(self): def get_best_model_id(self):
""" Get the best model_id from history using the metric value """
Get the best model_id from history using the metric value
""" """
if self.optimize_mode is OptimizeMode.Maximize: if self.optimize_mode is OptimizeMode.Maximize:
return max(self.history, key=lambda x: x["metric_value"])["model_id"] return max(self.history, key=lambda x: x["metric_value"])[
"model_id"]
return min(self.history, key=lambda x: x["metric_value"])["model_id"] return min(self.history, key=lambda x: x["metric_value"])["model_id"]
def load_model_by_id(self, model_id): def load_model_by_id(self, model_id):
"""Get the model by model_id """
Get the model by model_id
Parameters Parameters
---------- ----------
...@@ -281,7 +302,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -281,7 +302,8 @@ class NetworkMorphismTuner(Tuner):
return load_model return load_model
def load_best_model(self): def load_best_model(self):
""" Get the best model by model id """
Get the best model by model id
Returns Returns
------- -------
...@@ -291,7 +313,8 @@ class NetworkMorphismTuner(Tuner): ...@@ -291,7 +313,8 @@ class NetworkMorphismTuner(Tuner):
return self.load_model_by_id(self.get_best_model_id()) return self.load_model_by_id(self.get_best_model_id())
def get_metric_value_by_id(self, model_id): def get_metric_value_by_id(self, model_id):
""" Get the model metric valud by its model_id """
Get the model metric valud by its model_id
Parameters Parameters
---------- ----------
......
...@@ -92,17 +92,25 @@ class CnnGenerator(NetworkGenerator): ...@@ -92,17 +92,25 @@ class CnnGenerator(NetworkGenerator):
for i in range(model_len): for i in range(model_len):
output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id)
output_node_id = graph.add_layer( output_node_id = graph.add_layer(
self.batch_norm(graph.node_list[output_node_id].shape[-1]), output_node_id self.batch_norm(
graph.node_list[output_node_id].shape[-1]), output_node_id
) )
output_node_id = graph.add_layer( output_node_id = graph.add_layer(
self.conv(temp_input_channel, model_width, kernel_size=3, stride=stride), self.conv(
temp_input_channel,
model_width,
kernel_size=3,
stride=stride),
output_node_id, output_node_id,
) )
temp_input_channel = model_width temp_input_channel = model_width
if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): if pooling_len == 0 or (
output_node_id = graph.add_layer(self.pooling(), output_node_id) (i + 1) % pooling_len == 0 and i != model_len - 1):
output_node_id = graph.add_layer(
self.pooling(), output_node_id)
output_node_id = graph.add_layer(self.global_avg_pooling(), output_node_id) output_node_id = graph.add_layer(
self.global_avg_pooling(), output_node_id)
output_node_id = graph.add_layer( output_node_id = graph.add_layer(
self.dropout(Constant.CONV_DROPOUT_RATE), output_node_id self.dropout(Constant.CONV_DROPOUT_RATE), output_node_id
) )
...@@ -111,7 +119,11 @@ class CnnGenerator(NetworkGenerator): ...@@ -111,7 +119,11 @@ class CnnGenerator(NetworkGenerator):
output_node_id, output_node_id,
) )
output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id)
graph.add_layer(StubDense(model_width, self.n_output_node), output_node_id) graph.add_layer(
StubDense(
model_width,
self.n_output_node),
output_node_id)
return graph return graph
...@@ -145,7 +157,8 @@ class MlpGenerator(NetworkGenerator): ...@@ -145,7 +157,8 @@ class MlpGenerator(NetworkGenerator):
if model_width is None: if model_width is None:
model_width = Constant.MODEL_WIDTH model_width = Constant.MODEL_WIDTH
if isinstance(model_width, list) and not len(model_width) == model_len: if isinstance(model_width, list) and not len(model_width) == model_len:
raise ValueError("The length of 'model_width' does not match 'model_len'") raise ValueError(
"The length of 'model_width' does not match 'model_len'")
elif isinstance(model_width, int): elif isinstance(model_width, int):
model_width = [model_width] * model_len model_width = [model_width] * model_len
...@@ -162,5 +175,9 @@ class MlpGenerator(NetworkGenerator): ...@@ -162,5 +175,9 @@ class MlpGenerator(NetworkGenerator):
output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id)
n_nodes_prev_layer = width n_nodes_prev_layer = width
graph.add_layer(StubDense(n_nodes_prev_layer, self.n_output_node), output_node_id) graph.add_layer(
StubDense(
n_nodes_prev_layer,
self.n_output_node),
output_node_id)
return graph return graph
...@@ -59,9 +59,12 @@ class NetworkMorphismTestCase(TestCase): ...@@ -59,9 +59,12 @@ class NetworkMorphismTestCase(TestCase):
graph_recover.layer_id_to_input_node_ids, graph_recover.layer_id_to_input_node_ids,
) )
self.assertEqual(graph_init.adj_list, graph_recover.adj_list) self.assertEqual(graph_init.adj_list, graph_recover.adj_list)
self.assertEqual(graph_init.reverse_adj_list, graph_recover.reverse_adj_list)
self.assertEqual( self.assertEqual(
len(graph_init.operation_history), len(graph_recover.operation_history) graph_init.reverse_adj_list,
graph_recover.reverse_adj_list)
self.assertEqual(
len(graph_init.operation_history), len(
graph_recover.operation_history)
) )
self.assertEqual(graph_init.n_dim, graph_recover.n_dim) self.assertEqual(graph_init.n_dim, graph_recover.n_dim)
self.assertEqual(graph_init.conv, graph_recover.conv) self.assertEqual(graph_init.conv, graph_recover.conv)
...@@ -71,7 +74,8 @@ class NetworkMorphismTestCase(TestCase): ...@@ -71,7 +74,8 @@ class NetworkMorphismTestCase(TestCase):
node_list_init = [node.shape for node in graph_init.node_list] node_list_init = [node.shape for node in graph_init.node_list]
node_list_recover = [node.shape for node in graph_recover.node_list] node_list_recover = [node.shape for node in graph_recover.node_list]
self.assertEqual(node_list_init, node_list_recover) self.assertEqual(node_list_init, node_list_recover)
self.assertEqual(len(graph_init.node_to_id), len(graph_recover.node_to_id)) self.assertEqual(len(graph_init.node_to_id),
len(graph_recover.node_to_id))
layer_list_init = [ layer_list_init = [
layer_description_extractor(item, graph_init.node_to_id) layer_description_extractor(item, graph_init.node_to_id)
for item in graph_init.layer_list for item in graph_init.layer_list
...@@ -82,7 +86,8 @@ class NetworkMorphismTestCase(TestCase): ...@@ -82,7 +86,8 @@ class NetworkMorphismTestCase(TestCase):
] ]
self.assertEqual(layer_list_init, layer_list_recover) self.assertEqual(layer_list_init, layer_list_recover)
node_to_id_init = [graph_init.node_to_id[node] for node in graph_init.node_list] node_to_id_init = [graph_init.node_to_id[node]
for node in graph_init.node_list]
node_to_id_recover = [ node_to_id_recover = [
graph_recover.node_to_id[node] for node in graph_recover.node_list graph_recover.node_to_id[node] for node in graph_recover.node_list
] ]
...@@ -192,8 +197,8 @@ class NetworkMorphismTestCase(TestCase): ...@@ -192,8 +197,8 @@ class NetworkMorphismTestCase(TestCase):
""" """
tuner = NetworkMorphismTuner() tuner = NetworkMorphismTuner()
tuner.add_model(0.8, 0) tuner.add_model(0.8, 0)
tuner.add_model(0.9, 1) tuner.add_model(0.9, 1)
self.assertEqual(tuner.get_best_model_id(), 1) self.assertEqual(tuner.get_best_model_id(), 1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment