Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
a911b856
Unverified
Commit
a911b856
authored
Apr 21, 2022
by
Yuge Zhang
Committed by
GitHub
Apr 21, 2022
Browse files
Resolve conflicts for #4760 (#4762)
parent
14d2966b
Changes
688
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
655 additions
and
304 deletions
+655
-304
nni/algorithms/compression/v2/pytorch/utils/__init__.py
nni/algorithms/compression/v2/pytorch/utils/__init__.py
+3
-0
nni/algorithms/compression/v2/pytorch/utils/constructor_helper.py
...rithms/compression/v2/pytorch/utils/constructor_helper.py
+6
-6
nni/algorithms/compression/v2/pytorch/utils/pruning.py
nni/algorithms/compression/v2/pytorch/utils/pruning.py
+12
-1
nni/algorithms/feature_engineering/gbdt_selector/__init__.py
nni/algorithms/feature_engineering/gbdt_selector/__init__.py
+3
-0
nni/algorithms/feature_engineering/gradient_selector/__init__.py
...orithms/feature_engineering/gradient_selector/__init__.py
+3
-0
nni/algorithms/hpo/batch_tuner.py
nni/algorithms/hpo/batch_tuner.py
+48
-35
nni/algorithms/hpo/bohb_advisor/__init__.py
nni/algorithms/hpo/bohb_advisor/__init__.py
+3
-0
nni/algorithms/hpo/bohb_advisor/bohb_advisor.py
nni/algorithms/hpo/bohb_advisor/bohb_advisor.py
+131
-18
nni/algorithms/hpo/curvefitting_assessor/curvefitting_assessor.py
...rithms/hpo/curvefitting_assessor/curvefitting_assessor.py
+38
-32
nni/algorithms/hpo/dngo_tuner.py
nni/algorithms/hpo/dngo_tuner.py
+17
-1
nni/algorithms/hpo/evolution_tuner.py
nni/algorithms/hpo/evolution_tuner.py
+45
-26
nni/algorithms/hpo/gp_tuner/__init__.py
nni/algorithms/hpo/gp_tuner/__init__.py
+3
-0
nni/algorithms/hpo/gp_tuner/gp_tuner.py
nni/algorithms/hpo/gp_tuner/gp_tuner.py
+58
-10
nni/algorithms/hpo/gridsearch_tuner.py
nni/algorithms/hpo/gridsearch_tuner.py
+36
-6
nni/algorithms/hpo/hyperband_advisor.py
nni/algorithms/hpo/hyperband_advisor.py
+124
-11
nni/algorithms/hpo/hyperopt_tuner.py
nni/algorithms/hpo/hyperopt_tuner.py
+26
-57
nni/algorithms/hpo/medianstop_assessor.py
nni/algorithms/hpo/medianstop_assessor.py
+26
-34
nni/algorithms/hpo/metis_tuner/__init__.py
nni/algorithms/hpo/metis_tuner/__init__.py
+3
-0
nni/algorithms/hpo/metis_tuner/metis_tuner.py
nni/algorithms/hpo/metis_tuner/metis_tuner.py
+67
-67
nni/algorithms/hpo/networkmorphism_tuner/__init__.py
nni/algorithms/hpo/networkmorphism_tuner/__init__.py
+3
-0
No files found.
Too many changes to show.
To preserve performance only
688 of 688+
files are displayed.
Plain diff
Email patch
nni/algorithms/compression/v2/pytorch/utils/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.config_validation
import
CompressorSchema
from
.pruning
import
(
config_list_canonical
,
...
...
nni/algorithms/compression/v2/pytorch/utils/constructor_helper.py
View file @
a911b856
...
...
@@ -10,7 +10,7 @@ from torch.optim import Optimizer
from
torch.optim.lr_scheduler
import
_LRScheduler
from
nni.common.serializer
import
_trace_cls
from
nni.common.serializer
import
Traceable
from
nni.common.serializer
import
Traceable
,
is_traceable
__all__
=
[
'OptimizerConstructHelper'
,
'LRSchedulerConstructHelper'
]
...
...
@@ -80,14 +80,14 @@ class OptimizerConstructHelper(ConstructHelper):
@
staticmethod
def
from_trace
(
model
:
Module
,
optimizer_trace
:
Traceable
):
assert
is
instanc
e
(
optimizer_trace
,
Traceable
),
\
assert
is
_traceabl
e
(
optimizer_trace
),
\
'Please use nni.trace to wrap the optimizer class before initialize the optimizer.'
assert
isinstance
(
optimizer_trace
,
Optimizer
),
\
'It is not an instance of torch.nn.Optimizer.'
return
OptimizerConstructHelper
(
model
,
optimizer_trace
.
_get_nni_attr
(
'
symbol
'
)
,
*
optimizer_trace
.
_get_nni_attr
(
'
args
'
)
,
**
optimizer_trace
.
_get_nni_attr
(
'
kwargs
'
)
)
optimizer_trace
.
trace_
symbol
,
*
optimizer_trace
.
trace_
args
,
**
optimizer_trace
.
trace_
kwargs
)
class
LRSchedulerConstructHelper
(
ConstructHelper
):
...
...
@@ -112,7 +112,7 @@ class LRSchedulerConstructHelper(ConstructHelper):
@
staticmethod
def
from_trace
(
lr_scheduler_trace
:
Traceable
):
assert
is
instanc
e
(
lr_scheduler_trace
,
Traceable
),
\
assert
is
_traceabl
e
(
lr_scheduler_trace
),
\
'Please use nni.trace to wrap the lr scheduler class before initialize the scheduler.'
assert
isinstance
(
lr_scheduler_trace
,
_LRScheduler
),
\
'It is not an instance of torch.nn.lr_scheduler._LRScheduler.'
...
...
nni/algorithms/compression/v2/pytorch/utils/pruning.py
View file @
a911b856
...
...
@@ -198,7 +198,18 @@ def compute_sparsity(origin_model: Module, compact_model: Module, compact_model_
The current state means `compact_model` + `compact_model_masks`
(i.e., `compact_model_masks` applied on `compact_model`).
The compact model is the origin model after pruning,
and it may have different structure with origin_model cause of speed up.
and it may have different structure with origin_model cause of speedup.
Parameters
----------
origin_model : torch.nn.Module
The original un-pruned model.
compact_model : torch.nn.Module
The model after speedup or original model.
compact_model_masks: Dict[str, Dict[str, Tensor]]
The masks applied on the compact model, if the original model have been speedup, this should be {}.
config_list : List[Dict]
The config_list used by pruning the original model.
Returns
-------
...
...
nni/algorithms/feature_engineering/gbdt_selector/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.gbdt_selector
import
GBDTSelector
\ No newline at end of file
nni/algorithms/feature_engineering/gradient_selector/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.gradient_selector
import
FeatureGradientSelector
\ No newline at end of file
nni/algorithms/hpo/batch_tuner.py
View file @
a911b856
...
...
@@ -20,27 +20,64 @@ LOGGER = logging.getLogger('batch_tuner_AutoML')
class
BatchTuner
(
Tuner
):
"""
BatchTuner is tuner will running all the configure that user want to run batchly.
Batch tuner is a special tuner that allows users to simply provide several hyperparameter sets,
and it will evaluate each set.
Batch tuner does **not** support standard search space.
Search space of batch tuner looks like a single ``choice`` in standard search space,
but it has different meaning.
Consider following search space:
.. code-block::
'combine_params': {
'_type': 'choice',
'_value': [
{'x': 0, 'y': 1},
{'x': 1, 'y': 2},
{'x': 1, 'y': 3},
]
}
Batch tuner will generate following 4 hyperparameter sets:
1. {'x': 0, 'y': 1}
2. {'x': 1, 'y': 2}
3. {'x': 1, 'y': 3}
If this search space was used with grid search tuner, it would instead generate:
1. {'combine_params': {'x': 0, 'y': 1 }}
2. {'combine_params': {'x': 1, 'y': 2 }}
3. {'combine_params': {'x': 1, 'y': 3 }}
Examples
--------
The search space only be accepted like:
::
{'combine_params':
{ '_type': 'choice',
'_value': '[{...}, {...}, {...}]',
}
.. code-block::
config.search_space = {
'combine_params': {
'_type': 'choice',
'_value': [
{'optimizer': 'Adam', 'learning_rate': 0.001},
{'optimizer': 'Adam', 'learning_rate': 0.0001},
{'optimizer': 'Adam', 'learning_rate': 0.00001},
{'optimizer': 'SGD', 'learning_rate': 0.01},
{'optimizer': 'SGD', 'learning_rate': 0.005},
]
}
}
config.tuner.name = 'BatchTuner'
"""
def
__init__
(
self
):
self
.
_count
=
-
1
self
.
_values
=
[]
def
is_valid
(
self
,
search_space
):
def
_
is_valid
(
self
,
search_space
):
"""
Check the search space is valid: only contains 'choice' type
...
...
@@ -70,27 +107,10 @@ class BatchTuner(Tuner):
return
None
def
update_search_space
(
self
,
search_space
):
"""Update the search space
Parameters
----------
search_space : dict
"""
validate_search_space
(
search_space
,
[
'choice'
])
self
.
_values
=
self
.
is_valid
(
search_space
)
self
.
_values
=
self
.
_
is_valid
(
search_space
)
def
generate_parameters
(
self
,
parameter_id
,
**
kwargs
):
"""Returns a dict of trial (hyper-)parameters, as a serializable object.
Parameters
----------
parameter_id : int
Returns
-------
dict
A candidate parameter group.
"""
self
.
_count
+=
1
if
self
.
_count
>
len
(
self
.
_values
)
-
1
:
raise
nni
.
NoMoreTrialError
(
'no more parameters now.'
)
...
...
@@ -100,13 +120,6 @@ class BatchTuner(Tuner):
pass
def
import_data
(
self
,
data
):
"""Import additional data for tuning
Parameters
----------
data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
"""
if
not
self
.
_values
:
LOGGER
.
info
(
"Search space has not been initialized, skip this data import"
)
return
...
...
nni/algorithms/hpo/bohb_advisor/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.bohb_advisor
import
BOHB
,
BOHBClassArgsValidator
nni/algorithms/hpo/bohb_advisor/bohb_advisor.py
View file @
a911b856
...
...
@@ -249,20 +249,52 @@ class BOHBClassArgsValidator(ClassArgsValidator):
class
BOHB
(
MsgDispatcherBase
):
"""
BOHB performs robust and efficient hyperparameter optimization
at scale by combining the speed of Hyperband searches with the
guidance and guarantees of convergence of Bayesian Optimization.
Instead of sampling new configurations at random, BOHB uses
kernel density estimators to select promising candidates.
`BOHB <https://arxiv.org/abs/1807.01774>`__ is a robust and efficient hyperparameter tuning algorithm at scale.
BO is an abbreviation for "Bayesian Optimization" and HB is an abbreviation for "Hyperband".
BOHB relies on HB (Hyperband) to determine how many configurations to evaluate with which budget,
but it replaces the random selection of configurations at the beginning of each HB iteration
by a model-based search (Bayesian Optimization).
Once the desired number of configurations for the iteration is reached,
the standard successive halving procedure is carried out using these configurations.
It keeps track of the performance of all function evaluations g(x, b) of configurations x
on all budgets b to use as a basis for our models in later iterations.
Please refer to the paper :footcite:t:`falkner2018bohb` for detailed algorithm.
Note that BOHB needs additional installation using the following command:
.. code-block:: bash
pip install nni[BOHB]
Examples
--------
.. code-block::
config.advisor.name = 'BOHB'
config.advisor.class_args = {
'optimize_mode': 'maximize',
'min_budget': 1,
'max_budget': 27,
'eta': 3,
'min_points_in_model': 7,
'top_n_percent': 15,
'num_samples': 64,
'random_fraction': 0.33,
'bandwidth_factor': 3.0,
'min_bandwidth': 0.001
}
Parameters
----------
optimize_mode: str
o
ptimize mode, 'maximize' or 'minimize'
O
ptimize mode, 'maximize' or 'minimize'
.
min_budget: float
The smallest budget to consider. Needs to be positive!
The smallest budget to assign to a trial job, (budget can be the number of mini-batches or epochs).
Needs to be positive.
max_budget: float
The largest budget to
consider
. Needs to be larger than min_budget
!
The largest budget to
assign to a trial job
. Needs to be larger than min_budget
.
The budgets will be geometrically distributed
:math:`a^2 + b^2 = c^2
\\
sim
\\
eta^k` for :math:`k
\\
in [0, 1, ... , num
\\
_subsets - 1]`.
eta: int
...
...
@@ -271,21 +303,102 @@ class BOHB(MsgDispatcherBase):
1/eta of them 'advances' to the next round.
Must be greater or equal to 2.
min_points_in_model: int
number of observations to start building a KDE. Default 'None' means
dim+1, the bare minimum.
Number of observations to start building a KDE. Default 'None' means dim+1;
when the number of completed trials in this budget is equal to or larger than ``max{dim+1, min_points_in_model}``,
BOHB will start to build a KDE model of this budget then use said KDE model to guide configuration selection.
Needs to be positive. (dim means the number of hyperparameters in search space)
top_n_percent: int
percentage ( between 1 and 99, default 15) of the observations that are considered good.
Percentage (between 1 and 99, default 15) of the observations which are considered good.
Good points and bad points are used for building KDE models.
For example, if you have 100 observed trials and top_n_percent is 15,
then the top 15% of points will be used for building the good points models "l(x)".
The remaining 85% of points will be used for building the bad point models "g(x)".
num_samples: int
number of samples to optimize EI (default 64)
Number of samples to optimize EI (default 64).
In this case, it will sample "num_samples" points and compare the result of l(x)/g(x).
Then it will return the one with the maximum l(x)/g(x) value as the next configuration
if the optimize_mode is ``maximize``. Otherwise, it returns the smallest one.
random_fraction: float
fraction of purely random configurations that are sampled from the
prior without the model.
Fraction of purely random configurations that are sampled from the prior without the model.
bandwidth_factor: float
to encourage diversity, the points proposed to optimize EI, are sampled
from a 'widened' KDE where the bandwidth is multiplied by this factor (default: 3)
To encourage diversity, the points proposed to optimize EI are sampled
from a 'widened' KDE where the bandwidth is multiplied by this factor (default: 3).
It is suggested to use the default value if you are not familiar with KDE.
min_bandwidth: float
to keep diversity, even when all (good) samples have the same value for one of the parameters,
a minimum bandwidth (Default: 1e-3) is used instead of zero.
To keep diversity, even when all (good) samples have the same value for one of the parameters,
a minimum bandwidth (default: 1e-3) is used instead of zero.
It is suggested to use the default value if you are not familiar with KDE.
config_space: str
Directly use a .pcs file serialized by `ConfigSpace <https://automl.github.io/ConfigSpace/>` in "pcs new" format.
In this case, search space file (if provided in config) will be ignored.
Note that this path needs to be an absolute path. Relative path is currently not supported.
Notes
-----
Below is the introduction of the BOHB process separated in two parts:
**The first part HB (Hyperband).**
BOHB follows Hyperband’s way of choosing the budgets and continue to use SuccessiveHalving.
For more details, you can refer to the :class:`nni.algorithms.hpo.hyperband_advisor.Hyperband`
and the `reference paper for Hyperband <https://arxiv.org/abs/1603.06560>`__.
This procedure is summarized by the pseudocode below.
.. image:: ../../img/bohb_1.png
:scale: 80 %
:align: center
**The second part BO (Bayesian Optimization)**
The BO part of BOHB closely resembles TPE with one major difference:
It opted for a single multidimensional KDE compared to the hierarchy of one-dimensional KDEs used in TPE
in order to better handle interaction effects in the input space.
Tree Parzen Estimator(TPE): uses a KDE (kernel density estimator) to model the densities.
.. image:: ../../img/bohb_2.png
:scale: 80 %
:align: center
To fit useful KDEs, we require a minimum number of data points Nmin;
this is set to d + 1 for our experiments, where d is the number of hyperparameters.
To build a model as early as possible, we do not wait until Nb = \|Db\|,
where the number of observations for budget b is large enough to satisfy q · Nb ≥ Nmin.
Instead, after initializing with Nmin + 2 random configurations, we choose the
best and worst configurations, respectively, to model the two densities.
Note that it also samples a constant fraction named **random fraction** of the configurations uniformly at random.
.. image:: ../../img/bohb_3.png
:scale: 80 %
:align: center
.. image:: ../../img/bohb_6.jpg
:scale: 65 %
:align: center
**The above image shows the workflow of BOHB.**
Here set max_budget = 9, min_budget = 1, eta = 3, others as default.
In this case, s_max = 2, so we will continuously run the {s=2, s=1, s=0, s=2, s=1, s=0, ...} cycle.
In each stage of SuccessiveHalving (the orange box), it will pick the top 1/eta configurations and run them again with more budget,
repeating the SuccessiveHalving stage until the end of this iteration.
At the same time, it collects the configurations, budgets and final metrics of each trial
and use these to build a multidimensional KDEmodel with the key "budget".
Multidimensional KDE is used to guide the selection of configurations for the next iteration.
The sampling procedure (using Multidimensional KDE to guide selection) is summarized by the pseudocode below.
.. image:: ../../img/bohb_4.png
:scale: 80 %
:align: center
**Here is a simple experiment which tunes MNIST with BOHB.**
Code implementation: :githublink:`examples/trials/mnist-advisor <examples/trials/mnist-advisor>`
The following is the experimental final results:
.. image:: ../../img/bohb_5.png
:scale: 80 %
:align: center
More experimental results can be found in the `reference paper <https://arxiv.org/abs/1807.01774>`__.
It shows that BOHB makes good use of previous results and has a balanced trade-off in exploration and exploitation.
"""
def
__init__
(
self
,
...
...
nni/algorithms/hpo/curvefitting_assessor/curvefitting_assessor.py
View file @
a911b856
...
...
@@ -22,18 +22,52 @@ class CurvefittingClassArgsValidator(ClassArgsValidator):
}).
validate
(
kwargs
)
class
CurvefittingAssessor
(
Assessor
):
"""CurvefittingAssessor uses learning curve fitting algorithm to predict the learning curve performance in the future.
"""
CurvefittingAssessor uses learning curve fitting algorithm to predict the learning curve performance in the future.
The intermediate result **must** be accuracy. Curve fitting does not support minimizing loss.
Curve fitting assessor is an LPA (learning, predicting, assessing) algorithm.
It stops a pending trial X at step S if the trial's forecast result at target step is convergence and lower than the
best performance in the history.
Paper: `Speeding up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves
<https://ml.informatik.uni-freiburg.de/wp-content/uploads/papers/15-IJCAI-Extrapolation_of_Learning_Curves.pdf>`__
Examples
--------
.. code-block::
config.assessor.name = 'Curvefitting'
config.tuner.class_args = {
'epoch_num': 20,
'start_step': 6,
'threshold': 9,
'gap': 1,
}
Parameters
----------
epoch_num : int
The total number of epoch
The total number of epochs.
We need to know the number of epochs to determine which points we need to predict.
start_step : int
only after receiving start_step number of reported intermediate results
A trial is determined to be stopped or not only after receiving start_step number of intermediate results.
threshold : float
The threshold that we decide to early stop the worse performance curve.
The threshold that we use to decide to early stop the worst performance curve.
For example: if threshold = 0.95, and the best performance in the history is 0.9,
then we will stop the trial who's predicted value is lower than 0.95 * 0.9 = 0.855.
gap : int
The gap interval between assessor judgements.
For example: if gap = 2, start_step = 6,
then we will assess the result when we get 6, 8, 10, 12, ... intermediate results.
"""
def
__init__
(
self
,
epoch_num
=
20
,
start_step
=
6
,
threshold
=
0.95
,
gap
=
1
):
...
...
@@ -56,15 +90,6 @@ class CurvefittingAssessor(Assessor):
logger
.
info
(
'Successfully initials the curvefitting assessor'
)
def
trial_end
(
self
,
trial_job_id
,
success
):
"""update the best performance of completed trial job
Parameters
----------
trial_job_id : int
trial job id
success : bool
True if succssfully finish the experiment, False otherwise
"""
if
success
:
if
self
.
set_best_performance
:
self
.
completed_best_performance
=
max
(
self
.
completed_best_performance
,
self
.
trial_history
[
-
1
])
...
...
@@ -76,25 +101,6 @@ class CurvefittingAssessor(Assessor):
logger
.
info
(
'No need to update, trial job id: %s'
,
trial_job_id
)
def
assess_trial
(
self
,
trial_job_id
,
trial_history
):
"""assess whether a trial should be early stop by curve fitting algorithm
Parameters
----------
trial_job_id : int
trial job id
trial_history : list
The history performance matrix of each trial
Returns
-------
bool
AssessResult.Good or AssessResult.Bad
Raises
------
Exception
unrecognize exception in curvefitting_assessor
"""
scalar_trial_history
=
extract_scalar_history
(
trial_history
)
self
.
trial_history
=
scalar_trial_history
if
not
self
.
set_best_performance
:
...
...
nni/algorithms/hpo/dngo_tuner.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
warnings
...
...
@@ -44,7 +47,20 @@ def _random_config(search_space, random_state):
class
DNGOTuner
(
Tuner
):
"""
Use neural networks as an alternative to GPs to model distributions over functions in bayesian optimization.
Parameters
----------
optimize : maximize | minimize, default = maximize
If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics.
sample_size : int, default = 1000
Number of samples to select in each iteration. The best one will be picked from the samples as the next trial.
trials_per_update : int, default = 20
Number of trials to collect before updating the model.
num_epochs_per_training : int, default = 500
Number of epochs to train DNGO model.
"""
def
__init__
(
self
,
optimize_mode
=
'maximize'
,
sample_size
=
1000
,
trials_per_update
=
20
,
num_epochs_per_training
=
500
):
self
.
searchspace_json
=
None
self
.
random_state
=
None
...
...
nni/algorithms/hpo/evolution_tuner.py
View file @
a911b856
...
...
@@ -4,6 +4,7 @@
"""
evolution_tuner.py
"""
from
__future__
import
annotations
import
copy
import
random
...
...
@@ -22,28 +23,19 @@ logger = logging.getLogger(__name__)
class
Individual
:
"""
Indi
c
idual class to store the indv info.
Indi
v
idual class to store the indv info.
Attribu
tes
Parame
te
r
s
----------
config : str
config : str
, default = None
Search space.
info : str
info : str
, default = None
The str to save information of individual.
result : float
result : float
, None = None
The final metric of a individual.
"""
def
__init__
(
self
,
config
=
None
,
info
=
None
,
result
=
None
):
"""
Parameters
----------
config : str
A config to represent a group of parameters.
info : str
result : float
save_dir : str
"""
self
.
config
=
config
self
.
result
=
result
self
.
info
=
info
...
...
@@ -61,18 +53,36 @@ class EvolutionClassArgsValidator(ClassArgsValidator):
class
EvolutionTuner
(
Tuner
):
"""
EvolutionTuner is tuner using navie evolution algorithm.
Naive Evolution comes from `Large-Scale Evolution of Image Classifiers <https://arxiv.org/pdf/1703.01041.pdf>`__
It randomly initializes a population based on the search space.
For each generation, it chooses better ones and does some mutation.
(e.g., changes a hyperparameter, adds/removes one layer, etc.) on them to get the next generation.
Naive Evolution requires many trials to works but it’s very simple and it’s easily expanded with new features.
Examples
--------
.. code-block::
config.tuner.name = 'Evolution'
config.tuner.class_args = {
'optimize_mode': 'maximize',
'population_size': 100
}
Parameters
----------
optimize_mode: str
Optimize mode, 'maximize' or 'minimize'.
If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
population_size: int
The initial size of the population (trial num) in the evolution tuner(default=32).
The larger population size, the better evolution performance.
It's suggested that ``population_size`` be much larger than ``concurrency`` so users can get the most out of the algorithm.
And at least ``concurrency``, or the tuner will fail on its first generation of parameters.
"""
def
__init__
(
self
,
optimize_mode
=
"maximize"
,
population_size
=
32
):
"""
Parameters
----------
optimize_mode : str, default 'maximize'
population_size : int
initial population size. The larger population size,
the better evolution performance.
"""
def
__init__
(
self
,
optimize_mode
=
'maximize'
,
population_size
=
32
):
self
.
optimize_mode
=
OptimizeMode
(
optimize_mode
)
self
.
population_size
=
population_size
...
...
@@ -89,11 +99,11 @@ class EvolutionTuner(Tuner):
def
update_search_space
(
self
,
search_space
):
"""
Update search space.
Search_space contains the information that user pre-defined.
Parameters
----------
search_space : dict
"""
self
.
searchspace_json
=
search_space
...
...
@@ -109,8 +119,10 @@ class EvolutionTuner(Tuner):
"""
To deal with trial failure. If a trial fails,
random generate the parameters and add into the population.
Parameters
----------
parameter_id : int
Unique identifier for hyper-parameters used by this trial.
success : bool
...
...
@@ -136,12 +148,15 @@ class EvolutionTuner(Tuner):
def
generate_multiple_parameters
(
self
,
parameter_id_list
,
**
kwargs
):
"""
Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
Parameters
----------
parameter_id_list : list of int
Unique identifiers for each set of requested hyper-parameters.
**kwargs
Not used
Returns
-------
list
...
...
@@ -182,12 +197,13 @@ class EvolutionTuner(Tuner):
Parameters
----------
parameter_id : int
Returns
-------
dict
A group of cand
a
idte parameters that evolution tuner generated.
A group of candid
a
te parameters that evolution tuner generated.
"""
pos
=
-
1
...
...
@@ -234,10 +250,12 @@ class EvolutionTuner(Tuner):
Parameters
----------
parameter_id : int
Returns
-------
dict
One newly generated configuration.
"""
...
...
@@ -258,6 +276,7 @@ class EvolutionTuner(Tuner):
Parameters
----------
parameter_id : int
parameters : dict
value : dict/float
...
...
nni/algorithms/hpo/gp_tuner/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.gp_tuner
import
GPTuner
,
GPClassArgsValidator
nni/algorithms/hpo/gp_tuner/gp_tuner.py
View file @
a911b856
...
...
@@ -41,29 +41,77 @@ class GPClassArgsValidator(ClassArgsValidator):
class
GPTuner
(
Tuner
):
"""
GPTuner is a Bayesian Optimization method where Gaussian Process is used for modeling loss functions.
GPTuner is a Bayesian Optimization method where Gaussian Process
is used for modeling loss functions.
Bayesian optimization works by constructing a posterior distribution of functions
(a Gaussian Process) that best describes the function you want to optimize.
As the number of observations grows, the posterior distribution improves,
and the algorithm becomes more certain of which regions in parameter space
are worth exploring and which are not.
GPTuner is designed to minimize/maximize the number of steps required to find
a combination of parameters that are close to the optimal combination.
To do so, this method uses a proxy optimization problem (finding the maximum of
the acquisition function) that, albeit still a hard problem, is cheaper
(in the computational sense) to solve, and it's amenable to common tools.
Therefore, Bayesian Optimization is suggested for situations where sampling the function
to be optimized is very expensive.
Note that the only acceptable types in the :doc:`search space </hpo/search_space>` are
``randint``, ``uniform``, ``quniform``, ``loguniform``, ``qloguniform``, and numerical ``choice``.
This optimization approach is described in Section 3 of the paper
`Algorithms for Hyper-Parameter Optimization <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__
( :footcite:t:`bergstra2011algorithms` ).
Examples
--------
.. code-block::
config.tuner.name = 'GPTuner'
config.tuner.class_args = {
'optimize_mode': 'maximize',
'utility': 'ei',
'kappa': 5.0,
'xi': 0.0,
'nu': 2.5,
'alpha': 1e-6,
'cold_start_num': 10,
'selection_num_warm_up': 100000,
'selection_num_starting_points': 250
}
Parameters
----------
optimize_mode : str
o
ptimize mode, 'maximize' or 'minimize', by default 'maximize'
O
ptimize mode, 'maximize' or 'minimize', by default 'maximize'
utility : str
utility function (also called 'acquisition funcition') to use, which can be 'ei', 'ucb' or 'poi'. By default 'ei'.
Utility function (also called 'acquisition funcition') to use,
which can be 'ei', 'ucb' or 'poi'. By default 'ei'.
kappa : float
value used by utility function 'ucb'. The bigger kappa is, the more the tuner will be exploratory. By default 5.
Value used by utility function 'ucb'. The bigger kappa is,
the more the tuner will be exploratory. By default 5.
xi : float
used by utility function 'ei' and 'poi'. The bigger xi is, the more the tuner will be exploratory. By default 0.
Used by utility function 'ei' and 'poi'. The bigger xi is,
the more the tuner will be exploratory. By default 0.
nu : float
used to specify Matern kernel. The smaller nu, the less smooth the approximated function is. By default 2.5.
Used to specify Matern kernel. The smaller nu,
the less smooth the approximated function is. By default 2.5.
alpha : float
Used to specify Gaussian Process Regressor. Larger values correspond to increased noise level in the observations.
Used to specify Gaussian Process Regressor.
Larger values correspond to increased noise level in the observations.
By default 1e-6.
cold_start_num : int
Number of random exploration to perform before Gaussian Process. By default 10.
Number of random exploration to perform before Gaussian Process.
By default 10.
selection_num_warm_up : int
Number of random points to evaluate for getting the point which maximizes the acquisition function. By default 100000
Number of random points to evaluate for getting the point which
maximizes the acquisition function. By default 100000
selection_num_starting_points : int
Number of times to run L-BFGS-B from a random starting point after the warmup. By default 250.
Number of times to run L-BFGS-B from a random starting point after the warmup.
By default 250.
"""
def
__init__
(
self
,
optimize_mode
=
"maximize"
,
utility
=
'ei'
,
kappa
=
5
,
xi
=
0
,
nu
=
2.5
,
alpha
=
1e-6
,
cold_start_num
=
10
,
...
...
nni/algorithms/hpo/gridsearch_tuner.py
View file @
a911b856
...
...
@@ -2,14 +2,10 @@
# Licensed under the MIT license.
"""
Grid search tuner
for hyper-parameter optimization
.
Grid search tuner.
For categorical parameters this tuner fully explore all combinations.
For numerical parameters it samples them at progressively decreased intervals.
Use this tuner if you have abundant resource and want to find strictly optimal parameters.
Grid search tuner has no argument.
"""
__all__
=
[
'GridSearchTuner'
]
...
...
@@ -63,6 +59,35 @@ _logger = logging.getLogger('nni.tuner.gridsearch')
##
class
GridSearchTuner
(
Tuner
):
"""
Grid search tuner divides search space into evenly spaced grid, and performs brute-force traverse.
Recommended when the search space is small, or if you want to find strictly optimal hyperparameters.
**Implementation**
The original grid search approach performs an exhaustive search through a space consists of ``choice`` and ``randint``.
NNI's implementation extends grid search to support all search spaces types.
When the search space contains continuous parameters like ``normal`` and ``loguniform``,
grid search tuner works in following steps:
1. Divide the search space into a grid.
2. Perform an exhaustive searth through the grid.
3. Subdivide the grid into a finer-grained new grid.
4. Goto step 2, until experiment end.
As a deterministic algorithm, grid search has no argument.
Examples
--------
.. code-block::
config.tuner.name = 'GridSearch'
"""
def
__init__
(
self
):
self
.
space
=
None
...
...
@@ -175,13 +200,18 @@ class GridSearchTuner(Tuner):
mid
=
(
l
+
r
)
/
2
diff_l
=
_less
(
l
,
mid
,
spec
)
diff_r
=
_less
(
mid
,
r
,
spec
)
if
diff_l
and
diff_r
:
# we can skip these for non-q, but it will complicate the code
# if l != 0 and r != 1, then they are already in the grid, else they are not
# the special case is needed because for normal distribution 0 and 1 will generate infinity
if
(
diff_l
or
l
==
0.0
)
and
(
diff_r
or
r
==
1.0
):
# we can skip these for non-q, but it will complicate the code
new_vals
.
append
(
mid
)
updated
=
True
if
diff_l
:
new_divs
.
append
((
l
,
mid
))
updated
=
(
updated
or
l
==
0.0
)
if
diff_r
:
new_divs
.
append
((
mid
,
r
))
updated
=
(
updated
or
r
==
1.0
)
self
.
grid
[
i
]
+=
new_vals
self
.
divisions
[
i
]
=
new_divs
...
...
nni/algorithms/hpo/hyperband_advisor.py
View file @
a911b856
...
...
@@ -105,7 +105,8 @@ def json2parameter(ss_spec, random_state):
class
Bracket
():
"""A bracket in Hyperband, all the information of a bracket is managed by an instance of this class
"""
A bracket in Hyperband, all the information of a bracket is managed by an instance of this class
Parameters
----------
...
...
@@ -267,24 +268,136 @@ class HyperbandClassArgsValidator(ClassArgsValidator):
class
Hyperband
(
MsgDispatcherBase
):
"""
Hyperband inherit from MsgDispatcherBase rather than Tuner, because it integrates both tuner's functions and assessor's functions.
This is an implementation that could fully leverage available resources or follow the algorithm process,
i.e., high parallelism or serial.
A single execution of Hyperband takes a finite budget of (s_max + 1)B.
`Hyperband <https://arxiv.org/pdf/1603.06560.pdf>`__ is a multi-fidelity hyperparameter tuning algorithm
based on successive halving.
The basic idea of Hyperband is to create several buckets,
each having ``n`` randomly generated hyperparameter configurations,
each configuration using ``r`` resources (e.g., epoch number, batch number).
After the ``n`` configurations are finished, it chooses the top ``n/eta`` configurations
and runs them using increased ``r*eta`` resources.
At last, it chooses the best configuration it has found so far.
Please refer to the paper :footcite:t:`li2017hyperband` for detailed algorithm.
Examples
--------
.. code-block::
config.advisor.name = 'Hyperband'
config.advisor.class_args = {
'optimize_mode': 'maximize',
'R': 60,
'eta': 3
}
Note that once you use Advisor, you are not allowed to add a Tuner and Assessor spec in the config file.
When Hyperband is used, the dict returned by :func:`nni.get_next_parameter` one more key
called ``TRIAL_BUDGET`` besides the hyperparameters and their values.
**With this TRIAL_BUDGET, users can control in trial code how long a trial runs by following
the suggested trial budget from Hyperband.** ``TRIAL_BUDGET`` is a relative number,
users can interpret them as number of epochs, number of mini-batches, running time, etc.
Here is a concrete example of ``R=81`` and ``eta=3``:
.. list-table::
:header-rows: 1
:widths: auto
* -
- s=4
- s=3
- s=2
- s=1
- s=0
* - i
- n r
- n r
- n r
- n r
- n r
* - 0
- 81 1
- 27 3
- 9 9
- 6 27
- 5 81
* - 1
- 27 3
- 9 9
- 3 27
- 2 81
-
* - 2
- 9 9
- 3 27
- 1 81
-
-
* - 3
- 3 27
- 1 81
-
-
-
* - 4
- 1 81
-
-
-
-
``s`` means bucket, ``n`` means the number of configurations that are generated,
the corresponding ``r`` means how many budgets these configurations run.
``i`` means round, for example, bucket 4 has 5 rounds, bucket 3 has 4 rounds.
A complete example can be found :githublink:`examples/trials/mnist-advisor`.
Parameters
----------
optimize_mode: str
Optimize mode, 'maximize' or 'minimize'.
R: int
the maximum amount of resource that can be allocated to a single configuration
The maximum amount of budget that can be allocated to a single configuration.
Here, trial budget could mean the number of epochs, number of mini-batches, etc.,
depending on how users interpret it.
Each trial should use ``TRIAL_BUDGET`` to control how long it runs.
eta: int
t
he variable that controls the proportion of configurations discarded in each round of SuccessiveHalving
optimize_mode: str
optimize mode, 'maximize' or 'minimize'
T
he variable that controls the proportion of configurations discarded in each round of SuccessiveHalving
.
``1/eta`` configurations will survive and rerun using more budgets in each round.
exec_mode: str
execution mode, 'serial' or 'parallelism'
Execution mode, 'serial' or 'parallelism'.
If 'parallelism', the tuner will try to use available resources to start new bucket immediately.
If 'serial', the tuner will only start new bucket after the current bucket is done.
Notes
-----
First, Hyperband an example of how to write an autoML algorithm based on MsgDispatcherBase,
rather than based on Tuner and Assessor. Hyperband is implemented in this way
because it integrates the functions of both Tuner and Assessor,thus, we call it Advisor.
Second, this implementation fully leverages Hyperband's internal parallelism.
Specifically, the next bucket is not started strictly after the current bucket.
Instead, it starts when there are available resources. If you want to use full parallelism mode,
set ``exec_mode`` to ``parallelism``.
Or if you want to set ``exec_mode`` with ``serial`` according to the original algorithm.
In this mode, the next bucket will start strictly after the current bucket.
``parallelism`` mode may lead to multiple unfinished buckets,
in contrast, there is at most one unfinished bucket under ``serial`` mode.
The advantage of ``parallelism`` mode is to make full use of resources,
which may reduce the experiment duration multiple times.
"""
def
__init__
(
self
,
R
=
60
,
eta
=
3
,
optimize_mode
=
'maximize'
,
exec_mode
=
'parallelism'
):
def
__init__
(
self
,
optimize_mode
=
'maximize'
,
R
=
60
,
eta
=
3
,
exec_mode
=
'parallelism'
):
"""B = (s_max + 1)R"""
super
(
Hyperband
,
self
).
__init__
()
self
.
R
=
R
...
...
nni/algorithms/hpo/hyperopt_tuner.py
View file @
a911b856
...
...
@@ -191,23 +191,31 @@ class HyperoptClassArgsValidator(ClassArgsValidator):
class
HyperoptTuner
(
Tuner
):
"""
HyperoptTuner is a tuner which using hyperopt algorithm.
NNI wraps `hyperopt <https://github.com/hyperopt/hyperopt>`__ to provide anneal tuner.
This simple annealing algorithm begins by sampling from the prior
but tends over time to sample from points closer and closer to the best ones observed.
This algorithm is a simple variation of random search that leverages smoothness in the response surface.
The annealing rate is not adaptive.
Examples
--------
.. code-block::
config.tuner.name = 'Anneal'
config.tuner.class_args = {
'optimize_mode': 'minimize'
}
Parameters
----------
optimze_mode: 'minimize' or 'maximize'
Whether optimize to minimize or maximize trial result.
"""
def
__init__
(
self
,
algorithm_name
,
optimize_mode
=
'minimize'
,
parallel_optimize
=
False
,
constant_liar_type
=
'min'
):
"""
Parameters
----------
algorithm_name : str
algorithm_name includes "tpe", "random_search" and anneal".
optimize_mode : str
parallel_optimize : bool
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
constant_liar_type : str
constant_liar_type including "min", "max" and "mean"
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
"""
self
.
algorithm_name
=
algorithm_name
self
.
optimize_mode
=
OptimizeMode
(
optimize_mode
)
self
.
json
=
None
...
...
@@ -238,15 +246,6 @@ class HyperoptTuner(Tuner):
raise
RuntimeError
(
'Not support tuner algorithm in hyperopt.'
)
def
update_search_space
(
self
,
search_space
):
"""
Update search space definition in tuner by search_space in parameters.
Will called when first setup experiemnt or update search space in WebUI.
Parameters
----------
search_space : dict
"""
validate_search_space
(
search_space
)
self
.
json
=
search_space
...
...
@@ -266,22 +265,11 @@ class HyperoptTuner(Tuner):
self
.
rval
.
catch_eval_exceptions
=
False
def
generate_parameters
(
self
,
parameter_id
,
**
kwargs
):
"""
Returns a set of trial (hyper-)parameters, as a serializable object.
Parameters
----------
parameter_id : int
Returns
-------
params : dict
"""
total_params
=
self
.
get_suggestion
(
random_search
=
False
)
total_params
=
self
.
_get_suggestion
(
random_search
=
False
)
# avoid generating same parameter with concurrent trials because hyperopt doesn't support parallel mode
if
total_params
in
self
.
total_data
.
values
():
# but it can cause duplicate parameter rarely
total_params
=
self
.
get_suggestion
(
random_search
=
True
)
total_params
=
self
.
_
get_suggestion
(
random_search
=
True
)
self
.
total_data
[
parameter_id
]
=
total_params
if
self
.
parallel
:
...
...
@@ -291,17 +279,6 @@ class HyperoptTuner(Tuner):
return
params
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
,
**
kwargs
):
"""
Record an observation of the objective function
Parameters
----------
parameter_id : int
parameters : dict
value : dict/float
if value is dict, it should have "default" key.
value is final metrics of the trial.
"""
reward
=
extract_scalar_reward
(
value
)
# restore the paramsters contains '_index'
if
parameter_id
not
in
self
.
total_data
:
...
...
@@ -369,7 +346,7 @@ class HyperoptTuner(Tuner):
idxs
[
key
]
=
[
new_id
]
vals
[
key
]
=
[
vals
[
key
]]
self
.
miscs_update_idxs_vals
(
rval_miscs
,
self
.
_
miscs_update_idxs_vals
(
rval_miscs
,
idxs
,
vals
,
idxs_map
=
{
new_id
:
new_id
},
...
...
@@ -382,7 +359,7 @@ class HyperoptTuner(Tuner):
trials
.
insert_trial_docs
([
trial
])
trials
.
refresh
()
def
miscs_update_idxs_vals
(
self
,
def
_
miscs_update_idxs_vals
(
self
,
miscs
,
idxs
,
vals
,
...
...
@@ -416,7 +393,7 @@ class HyperoptTuner(Tuner):
misc_by_id
[
tid
][
'idxs'
][
key
]
=
[
tid
]
misc_by_id
[
tid
][
'vals'
][
key
]
=
[
val
]
def
get_suggestion
(
self
,
random_search
=
False
):
def
_
get_suggestion
(
self
,
random_search
=
False
):
"""
get suggestion from hyperopt
...
...
@@ -469,14 +446,6 @@ class HyperoptTuner(Tuner):
return
total_params
def
import_data
(
self
,
data
):
"""
Import additional data for tuning
Parameters
----------
data:
a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
"""
_completed_num
=
0
for
trial_info
in
data
:
logger
.
info
(
"Importing data, current processing progress %s / %s"
,
_completed_num
,
len
(
data
))
...
...
nni/algorithms/hpo/medianstop_assessor.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
__future__
import
annotations
import
logging
from
schema
import
Schema
,
Optional
from
nni
import
ClassArgsValidator
from
nni.assessor
import
Assessor
,
AssessResult
from
nni.typehint
import
Literal
from
nni.utils
import
extract_scalar_history
logger
=
logging
.
getLogger
(
'medianstop_Assessor'
)
...
...
@@ -18,18 +21,35 @@ class MedianstopClassArgsValidator(ClassArgsValidator):
}).
validate
(
kwargs
)
class
MedianstopAssessor
(
Assessor
):
"""MedianstopAssessor is The median stopping rule stops a pending trial X at step S
"""
The median stopping rule stops a pending trial X at step S
if the trial’s best objective value by step S is strictly worse than the median value
of the running averages of all completed trials’ objectives reported up to step S
Paper: `Google Vizer: A Service for Black-Box Optimization
<https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46180.pdf>`__
Examples
--------
.. code-block::
config.assessor.name = 'Medianstop'
config.tuner.class_args = {
'optimize_mode': 'maximize',
'start_step': 5
}
Parameters
----------
optimize_mode : str
optimize mode, 'maximize' or 'minimize'
start_step : int
only after receiving start_step number of reported intermediate results
optimize_mode
Whether optimize to minimize or maximize trial result.
start_step
A trial is determined to be stopped or not
only after receiving start_step number of reported intermediate results.
"""
def
__init__
(
self
,
optimize_mode
=
'maximize'
,
start_step
=
0
):
def
__init__
(
self
,
optimize_mode
:
Literal
[
'minimize'
,
'maximize'
]
=
'maximize'
,
start_step
:
int
=
0
):
self
.
_start_step
=
start_step
self
.
_running_history
=
dict
()
self
.
_completed_avg_history
=
dict
()
...
...
@@ -56,15 +76,6 @@ class MedianstopAssessor(Assessor):
self
.
_running_history
[
trial_job_id
].
extend
(
trial_history
[
len
(
self
.
_running_history
[
trial_job_id
]):])
def
trial_end
(
self
,
trial_job_id
,
success
):
"""trial_end
Parameters
----------
trial_job_id : int
trial job id
success : bool
True if succssfully finish the experiment, False otherwise
"""
if
trial_job_id
in
self
.
_running_history
:
if
success
:
cnt
=
0
...
...
@@ -79,25 +90,6 @@ class MedianstopAssessor(Assessor):
logger
.
warning
(
'trial_end: trial_job_id does not exist in running_history'
)
def
assess_trial
(
self
,
trial_job_id
,
trial_history
):
"""assess_trial
Parameters
----------
trial_job_id : int
trial job id
trial_history : list
The history performance matrix of each trial
Returns
-------
bool
AssessResult.Good or AssessResult.Bad
Raises
------
Exception
unrecognize exception in medianstop_assessor
"""
curr_step
=
len
(
trial_history
)
if
curr_step
<
self
.
_start_step
:
return
AssessResult
.
Good
...
...
nni/algorithms/hpo/metis_tuner/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.metis_tuner
import
MetisTuner
,
MetisClassArgsValidator
nni/algorithms/hpo/metis_tuner/metis_tuner.py
View file @
a911b856
...
...
@@ -46,39 +46,74 @@ class MetisClassArgsValidator(ClassArgsValidator):
class
MetisTuner
(
Tuner
):
"""
Metis Tuner
`Metis tuner <https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/>`__ offers
several benefits over other tuning algorithms.
While most tools only predict the optimal configuration, Metis gives you two outputs,
a prediction for the optimal configuration and a suggestion for the next trial.
No more guess work!
More algorithm information you could reference here:
https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/
While most tools assume training datasets do not have noisy data,
Metis actually tells you if you need to resample a particular hyper-parameter.
Attributes
While most tools have problems of being exploitation-heavy,
Metis' search strategy balances exploration, exploitation, and (optional) resampling.
Metis belongs to the class of sequential model-based optimization (SMBO) algorithms
and it is based on the Bayesian Optimization framework. To model the parameter-vs-performance space,
Metis uses both a Gaussian Process and GMM. Since each trial can impose a high time cost,
Metis heavily trades inference computations with naive trials.
At each iteration, Metis does two tasks (refer to :footcite:t:`li2018metis` for details):
1. It finds the global optimal point in the Gaussian Process space.
This point represents the optimal configuration.
2. It identifies the next hyper-parameter candidate.
This is achieved by inferring the potential information gain of
exploration, exploitation, and resampling.
Note that the only acceptable types in the :doc:`search space </hpo/search_space>` are
``quniform``, ``uniform``, ``randint``, and numerical ``choice``.
Examples
--------
.. code-block::
config.tuner.name = 'MetisTuner'
config.tuner.class_args = {
'optimize_mode': 'maximize'
}
Parameters
----------
optimize_mode : str
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
optimize_mode : str
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability: float
The probability of Metis to select parameter from exploration instead of exploitation.
"""
def
__init__
(
...
...
@@ -89,43 +124,6 @@ class MetisTuner(Tuner):
selection_num_starting_points
=
600
,
cold_start_num
=
10
,
exploration_probability
=
0.9
):
"""
Parameters
----------
optimize_mode : str
optimize_mode is a string that including two mode "maximize" and "minimize"
no_resampling : bool
True or False.
Should Metis consider re-sampling as part of the search strategy?
If you are confident that the training dataset is noise-free,
then you do not need re-sampling.
no_candidates : bool
True or False.
Should Metis suggest parameters for the next benchmark?
If you do not plan to do more benchmarks,
Metis can skip this step.
selection_num_starting_points : int
How many times Metis should try to find the global optimal in the search space?
The higher the number, the longer it takes to output the solution.
cold_start_num : int
Metis need some trial result to get cold start.
when the number of trial result is less than
cold_start_num, Metis will randomly sample hyper-parameter for trial.
exploration_probability : float
The probability of Metis to select parameter from exploration instead of exploitation.
x_bounds : list
The constration of parameters.
x_types : list
The type of parameters.
"""
self
.
samples_x
=
[]
self
.
samples_y
=
[]
self
.
samples_y_aggregation
=
[]
...
...
@@ -141,7 +139,9 @@ class MetisTuner(Tuner):
self
.
minimize_constraints_fun
=
None
self
.
minimize_starting_points
=
None
self
.
supplement_data_num
=
0
# The constration of parameters
self
.
x_bounds
=
[]
# The type of parameters
self
.
x_types
=
[]
...
...
nni/algorithms/hpo/networkmorphism_tuner/__init__.py
View file @
a911b856
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.networkmorphism_tuner
import
NetworkMorphismTuner
,
NetworkMorphismClassArgsValidator
Prev
1
…
27
28
29
30
31
32
33
34
35
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment