"examples/vscode:/vscode.git/clone" did not exist on "f36b62a91f612342492c4c7c1cfff52b24966537"
Commit 1b4bcdba authored by Lee's avatar Lee Committed by xuehui
Browse files

Optimize MetisTuner (#811)

* add different tuner config files for config_test

* change MetisTuner config test due to no lightgbm python module in integration test

* install smac package in azure-pipelines

* SMAC need swig to be installed

* Try to install swig from source code

* remove SMAC test because the dependency can not be installed

* use sudo to install the swig

* sleep 10s to make sure the port has been released

* remove tuner test for networkmorphism because it uses more than 30s to release the tcp port

* word "down" to "done"

* add config test for Curvefitting assessor

* change file name

* Fix data type not match bug

* Optimize MetisTunner

* pretty the code

* Follow the review comment

* add exploration probability
parent 33ad0f9d
......@@ -39,7 +39,7 @@ def _outlierDetection_threaded(inputs):
outlier = None
# Create a diagnostic regression model which removes the sample that we want to evaluate
diagnostic_regressor_gp = gp_create_model.createModel(\
diagnostic_regressor_gp = gp_create_model.create_model(\
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(samples_x[samples_idx], diagnostic_regressor_gp['model'])
......@@ -81,7 +81,7 @@ def outlierDetection(samples_x, samples_y_aggregation):
for samples_idx in range(0, len(samples_x)):
#sys.stderr.write("[%s] DEBUG: Evaluating %d of %d samples\n"
# \ % (os.path.basename(__file__), samples_idx + 1, len(samples_x)))
diagnostic_regressor_gp = gp_create_model.createModel(\
diagnostic_regressor_gp = gp_create_model.create_model(\
samples_x[0:samples_idx] + samples_x[samples_idx + 1:],\
samples_y_aggregation[0:samples_idx] + samples_y_aggregation[samples_idx + 1:])
mu, sigma = gp_prediction.predict(samples_x[samples_idx],
......@@ -95,5 +95,3 @@ def outlierDetection(samples_x, samples_y_aggregation):
outliers = None if len(outliers) == 0 else outliers
return outliers
\ No newline at end of file
......@@ -24,22 +24,20 @@ import os
import random
import statistics
import sys
import numpy as np
from enum import Enum, unique
from multiprocessing.dummy import Pool as ThreadPool
from nni.tuner import Tuner
import numpy as np
import nni.metis_tuner.lib_data as lib_data
import nni.metis_tuner.lib_constraint_summation as lib_constraint_summation
import nni.metis_tuner.Regression_GP.CreateModel as gp_create_model
import nni.metis_tuner.Regression_GP.Selection as gp_selection
import nni.metis_tuner.Regression_GP.Prediction as gp_prediction
import nni.metis_tuner.Regression_GP.OutlierDetection as gp_outlier_detection
import nni.metis_tuner.lib_data as lib_data
import nni.metis_tuner.Regression_GMM.CreateModel as gmm_create_model
import nni.metis_tuner.Regression_GMM.Selection as gmm_selection
import nni.metis_tuner.Regression_GP.CreateModel as gp_create_model
import nni.metis_tuner.Regression_GP.OutlierDetection as gp_outlier_detection
import nni.metis_tuner.Regression_GP.Prediction as gp_prediction
import nni.metis_tuner.Regression_GP.Selection as gp_selection
from nni.tuner import Tuner
logger = logging.getLogger("Metis_Tuner_AutoML")
......@@ -67,7 +65,7 @@ class MetisTuner(Tuner):
"""
def __init__(self, optimize_mode="maximize", no_resampling=True, no_candidates=True,
selection_num_starting_points=10, cold_start_num=10):
selection_num_starting_points=600, cold_start_num=10, exploration_probability=0.1):
"""
Parameters
----------
......@@ -89,11 +87,15 @@ class MetisTuner(Tuner):
cold_start_num: int
Metis need some trial result to get cold start. when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
"""
self.samples_x = []
self.samples_y = []
self.samples_y_aggregation = []
self.history_parameters = set()
self.space = None
self.no_resampling = no_resampling
self.no_candidates = no_candidates
......@@ -101,6 +103,7 @@ class MetisTuner(Tuner):
self.key_order = []
self.cold_start_num = cold_start_num
self.selection_num_starting_points = selection_num_starting_points
self.exploration_probability = exploration_probability
self.minimize_constraints_fun = None
self.minimize_starting_points = None
......@@ -128,7 +131,7 @@ class MetisTuner(Tuner):
except Exception as ex:
logger.exception(ex)
raise RuntimeError("The format search space contains \
some key that didn't define in key_order.")
some key that didn't define in key_order." )
if key_type == 'quniform':
if key_range[2] == 1:
......@@ -200,6 +203,8 @@ class MetisTuner(Tuner):
init_parameter = _rand_init(self.x_bounds, self.x_types, 1)[0]
results = self._pack_output(init_parameter)
else:
self.minimize_starting_points = _rand_init(self.x_bounds, self.x_types, \
self.selection_num_starting_points)
results = self._selection(self.samples_x, self.samples_y_aggregation, self.samples_y,
self.x_bounds, self.x_types,
threshold_samplessize_resampling=(None if self.no_resampling is True else 50),
......@@ -245,7 +250,7 @@ class MetisTuner(Tuner):
# calculate y aggregation
median = get_median(temp_y)
self.samples_y_aggregation[idx] = median
self.samples_y_aggregation[idx] = [median]
else:
self.samples_x.append(sample_x)
self.samples_y.append([value])
......@@ -268,8 +273,12 @@ class MetisTuner(Tuner):
# ===== STEP 1: Compute the current optimum =====
#sys.stderr.write("[%s] Predicting the optimal configuration from the current training dataset...\n" % (os.path.basename(__file__)))
gp_model = gp_create_model.create_model(samples_x, samples_y_aggregation)
lm_current = gp_selection.selection("lm", samples_y_aggregation, x_bounds,
x_types, gp_model['model'],
lm_current = gp_selection.selection(
"lm",
samples_y_aggregation,
x_bounds,
x_types,
gp_model['model'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
if not lm_current:
......@@ -284,8 +293,12 @@ class MetisTuner(Tuner):
# ===== STEP 2: Get recommended configurations for exploration =====
#sys.stderr.write("[%s] Getting candidates for exploration...\n"
#% \(os.path.basename(__file__)))
results_exploration = gp_selection.selection("lc", samples_y_aggregation,
x_bounds, x_types, gp_model['model'],
results_exploration = gp_selection.selection(
"lc",
samples_y_aggregation,
x_bounds,
x_types,
gp_model['model'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
......@@ -308,7 +321,8 @@ class MetisTuner(Tuner):
print("Getting candidates for exploitation...\n")
try:
gmm = gmm_create_model.create_model(samples_x, samples_y_aggregation)
results_exploitation = gmm_selection.selection(x_bounds,
results_exploitation = gmm_selection.selection(
x_bounds,
x_types,
gmm['clusteringmodel_good'],
gmm['clusteringmodel_bad'],
......@@ -340,7 +354,6 @@ class MetisTuner(Tuner):
results_outliers = gp_outlier_detection.outlierDetection_threaded(samples_x, samples_y_aggregation)
if results_outliers is not None:
#temp = len(candidates)
for results_outlier in results_outliers:
if _num_past_samples(samples_x[results_outlier['samples_idx']], samples_x, samples_y) < max_resampling_per_x:
......@@ -357,7 +370,10 @@ class MetisTuner(Tuner):
logger.info("Evaluating information gain of %d candidates...\n")
next_improvement = 0
threads_inputs = [[candidate, samples_x, samples_y, x_bounds, x_types, minimize_constraints_fun, minimize_starting_points] for candidate in candidates]
threads_inputs = [[
candidate, samples_x, samples_y, x_bounds, x_types,
minimize_constraints_fun, minimize_starting_points
] for candidate in candidates]
threads_pool = ThreadPool(4)
# Evaluate what would happen if we actually sample each candidate
threads_results = threads_pool.map(_calculate_lowest_mu_threaded, threads_inputs)
......@@ -370,19 +386,21 @@ class MetisTuner(Tuner):
temp_improvement = threads_result['expected_lowest_mu'] - lm_current['expected_mu']
if next_improvement > temp_improvement:
# logger.info("DEBUG: \"next_candidate\" changed: \
# lowest mu might reduce from %f (%s) to %f (%s), %s\n" %\
# lm_current['expected_mu'], str(lm_current['hyperparameter']),\
# threads_result['expected_lowest_mu'],\
# str(threads_result['candidate']['hyperparameter']),\
# threads_result['candidate']['reason'])
logger.info("DEBUG: \"next_candidate\" changed: \
lowest mu might reduce from %f (%s) to %f (%s), %s\n" %\
lm_current['expected_mu'], str(lm_current['hyperparameter']),\
threads_result['expected_lowest_mu'],\
str(threads_result['candidate']['hyperparameter']),\
threads_result['candidate']['reason'])
next_improvement = temp_improvement
next_candidate = threads_result['candidate']
else:
# ===== STEP 6: If we have no candidates, randomly pick one =====
logger.info("DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n")
logger.info(
"DEBUG: No candidates from exploration, exploitation,\
and resampling. We will random a candidate for next_candidate\n"
)
next_candidate = _rand_with_constraints(x_bounds, x_types) \
if minimize_starting_points is None else minimize_starting_points[0]
......@@ -391,7 +409,12 @@ class MetisTuner(Tuner):
next_candidate = {'hyperparameter': next_candidate, 'reason': "random",
'expected_mu': expected_mu, 'expected_sigma': expected_sigma}
# ===== STEP 7: If current optimal hyperparameter occurs in the history or exploration probability is less than the threshold, take next config as exploration step =====
outputs = self._pack_output(lm_current['hyperparameter'])
ap = random.uniform(0, 1)
if outputs in self.history_parameters or ap<=self.exploration_probability:
outputs = self._pack_output(next_candidate['hyperparameter'])
self.history_parameters.add(outputs)
return outputs
......@@ -437,8 +460,12 @@ def _calculate_lowest_mu_threaded(inputs):
# Aggregates multiple observation of the sample sampling points
temp_y_aggregation = [statistics.median(temp_sample_y) for temp_sample_y in temp_samples_y]
temp_gp = gp_create_model.create_model(temp_samples_x, temp_y_aggregation)
temp_results = gp_selection.selection("lm", temp_y_aggregation,
x_bounds, x_types, temp_gp['model'],
temp_results = gp_selection.selection(
"lm",
temp_y_aggregation,
x_bounds,
x_types,
temp_gp['model'],
minimize_starting_points,
minimize_constraints_fun=minimize_constraints_fun)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment