Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
cb090e8c
Unverified
Commit
cb090e8c
authored
Dec 18, 2021
by
liuzhe-lz
Committed by
GitHub
Dec 18, 2021
Browse files
Grid search tuner (#4324)
parent
e0422c5e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
241 additions
and
198 deletions
+241
-198
nni/algorithms/hpo/gridsearch_tuner.py
nni/algorithms/hpo/gridsearch_tuner.py
+232
-190
test/ut/sdk/test_builtin_tuners.py
test/ut/sdk/test_builtin_tuners.py
+9
-8
No files found.
nni/algorithms/hpo/gridsearch_tuner.py
View file @
cb090e8c
...
@@ -2,209 +2,251 @@
...
@@ -2,209 +2,251 @@
# Licensed under the MIT license.
# Licensed under the MIT license.
"""
"""
gridsearch_tuner.py including:
Grid search tuner for hyper-parameter optimization.
class GridSearchTuner
For categorical parameters this tuner fully explore all combinations.
For numerical parameters it samples them at progressively decreased intervals.
Use this tuner if you have abundant resource and want to find strictly optimal parameters.
Grid search tuner has no argument.
"""
"""
import
copy
__all__
=
[
'GridSearchTuner'
]
import
logging
import
logging
import
math
import
numpy
as
np
import
numpy
as
np
from
scipy.special
import
erfinv
# pylint: disable=no-name-in-module
import
nni
import
nni
from
nni.common.hpo_utils
import
validate
_search_space
from
nni.common.hpo_utils
import
ParameterSpec
,
deformat_parameters
,
format
_search_space
from
nni.tuner
import
Tuner
from
nni.tuner
import
Tuner
from
nni.utils
import
convert_dict2tuple
TYPE
=
'_type'
_logger
=
logging
.
getLogger
(
'nni.tuner.gridsearch'
)
CHOICE
=
'choice'
VALUE
=
'_value'
##
# Grid search is a simple algorithm if only categorical parameters are considered.
logger
=
logging
.
getLogger
(
'grid_search_AutoML'
)
# But to support continuous space, things get tricky.
#
# To support continuous space, we divide search process into "epochs".
# The first epoch only explores middle point of uniform and normal parameters.
# When first epoch is fully explored, the algorithm starts second epoch,
# where it divides non-categorical spaces by adding quartile points into the grid.
# Then in third epoch it adds [1/8, 3/8, 5/8, 7/8], and so on.
#
# We divide normal distributed spaces using inverse function of CDF.
# For example the 1/4 point of a normal distribution is defined as X where `normal_cdf(X) = 1/4`.
#
# Here is an example:
#
# search space:
# x: choices(5, 7)
# y: normal(0, 1)
# z: quniform(2, 3, 1)
#
# grid of first epoch:
# x: [5, 7]
# y: [1/2]
# z: [1/2] (results in [2], because round(2.5) == 2)
# generated parameters:
# (5,0,2) (7,0,2)
#
# grid of second epoch:
# x: [5, 7]
# y: [1/2, 1/4, 3/4] (results in [0, -0.67, 0.67])
# z: [1/2, 3/4] (results in [2, 3], 1/4 is eliminated due to duplication)
# generated parameters:
# (5,0,3) (5,-0.67,2) (5,-0.67,3) (5,0.67,2) (5,0.67,3)
# (7,0,3) (7,-0.67,2) (7,-0.67,3) (7,0.67,2) (7,0.67,3)
##
class
GridSearchTuner
(
Tuner
):
class
GridSearchTuner
(
Tuner
):
"""
def
__init__
(
self
):
GridSearchTuner will search all the possible configures that the user define in the searchSpace.
self
.
space
=
None
The only acceptable types of search space are ``choice``, ``quniform``, ``randint``
# the grid to search in this epoch
Type ``choice`` will select one of the options. Note that it can also be nested.
# when the space is fully explored, grid is set to None
self
.
grid
=
None
# list[int | float]
# a paremter set is internally expressed as a vector
# for each dimension i, self.vector[i] is the parameter's index in self.grid[i]
# in second epoch of above example, vector [1, 2, 0] means parameters {x: 7, y: 0.67, z: 2}
self
.
vector
=
None
# list[int]
# this tells which parameters are derived from previous epoch
# in second epoch of above example, epoch_bar is [2, 1, 1]
self
.
epoch_bar
=
None
# list[int]
# this stores which intervals are possibly divisible (low < high after log and q)
# in first epoch of above example, divisions are:
# {1: [(0,1/2), (1/2,1)], 2: [(1/2,1)]}
# in second epoch:
# {1: [(0,1/4), (1/4,1/2), (1/2,3/4), (3/4,1)], 2: [(1/2,3/4)]}
# and in third epoch:
# {1: [(0,1/8), ..., (7/8,1)], 2: []}
self
.
divisions
=
{}
# dict[int, list[tuple[float, float]]]
# dumped JSON string of all tried parameters
self
.
history
=
set
()
def
update_search_space
(
self
,
space
):
self
.
space
=
format_search_space
(
space
)
if
not
self
.
space
:
# the tuner will crash in this case, report it explicitly
raise
ValueError
(
'Search space is empty'
)
self
.
_init_grid
()
def
generate_parameters
(
self
,
*
args
,
**
kwargs
):
while
True
:
params
=
self
.
_suggest
()
if
params
is
None
:
raise
nni
.
NoMoreTrialError
(
'Search space fully explored'
)
params
=
deformat_parameters
(
params
,
self
.
space
)
params_str
=
nni
.
dump
(
params
,
sort_keys
=
True
)
if
params_str
not
in
self
.
history
:
self
.
history
.
add
(
params_str
)
return
params
def
receive_trial_result
(
self
,
*
args
,
**
kwargs
):
pass
Type ``quniform`` will receive three values [``low``, ``high``, ``q``],
def
import_data
(
self
,
data
):
where [``low``, ``high``] specifies a range and ``q`` specifies the interval.
# TODO
It will be sampled in a way that the first sampled value is ``low``,
# use tuple to dedup in case of order/precision issue causes matching failed
and each of the following values is 'interval' larger than the value in front of it.
# and remove `epoch_bar` to use uniform dedup mechanism
for
trial
in
data
:
params_str
=
nni
.
dump
(
trial
[
'parameter'
],
sort_keys
=
True
)
self
.
history
.
add
(
params_str
)
def
_suggest
(
self
):
# returns next parameter set, or None if the space is already fully explored
while
True
:
if
self
.
grid
is
None
:
# search space fully explored
return
None
self
.
_next_vector
()
if
self
.
vector
is
None
:
# epoch end, update grid and retry
self
.
_next_grid
()
continue
Type ``randint`` gives all possible intergers in range[``low``, ``high``). Note that ``high`` is not included.
old
=
all
((
self
.
vector
[
i
]
<
self
.
epoch_bar
[
i
])
for
i
in
range
(
len
(
self
.
space
)))
"""
if
old
:
# already explored in past epochs
continue
def
__init__
(
self
):
# this vector is valid, stop
self
.
count
=
-
1
_logger
.
debug
(
f
'vector:
{
self
.
vector
}
'
)
self
.
expanded_search_space
=
[]
return
self
.
_current_parameters
()
self
.
supplement_data
=
dict
()
def
_next_vector
(
self
):
def
_json2parameter
(
self
,
ss_spec
):
# iterate to next vector of this epoch, set vector to None if epoch end
"""
if
self
.
vector
is
None
:
# first vector in this epoch
Generate all possible configs for hyperparameters from hyperparameter space.
self
.
vector
=
[
0
]
*
len
(
self
.
space
)
return
Parameters
----------
# deal with nested choice, don't touch nested spaces that are not chosen by current vector
ss_spec : dict or list
activated_dims
=
[]
Hyperparameter space or the ``_value`` of a hyperparameter
params
=
self
.
_current_parameters
()
for
i
,
spec
in
enumerate
(
self
.
space
.
values
()):
Returns
if
spec
.
is_activated_in
(
params
):
-------
activated_dims
.
append
(
i
)
list or dict
All the candidate choices of hyperparameters. for a hyperparameter, chosen_params
for
i
in
reversed
(
activated_dims
):
is a list. for multiple hyperparameters (e.g., search space), chosen_params is a dict.
if
self
.
vector
[
i
]
+
1
<
len
(
self
.
grid
[
i
]):
"""
self
.
vector
[
i
]
+=
1
if
isinstance
(
ss_spec
,
dict
):
return
if
'_type'
in
ss_spec
.
keys
():
_type
=
ss_spec
[
'_type'
]
_value
=
ss_spec
[
'_value'
]
chosen_params
=
list
()
if
_type
==
'choice'
:
for
value
in
_value
:
choice
=
self
.
_json2parameter
(
value
)
if
isinstance
(
choice
,
list
):
chosen_params
.
extend
(
choice
)
else
:
chosen_params
.
append
(
choice
)
elif
_type
==
'quniform'
:
chosen_params
=
self
.
_parse_quniform
(
_value
)
elif
_type
==
'randint'
:
chosen_params
=
self
.
_parse_randint
(
_value
)
else
:
else
:
raise
RuntimeError
(
"Not supported type: %s"
%
_type
)
self
.
vector
[
i
]
=
0
self
.
vector
=
None
# the loop ends without returning, no more vector in this epoch
def
_next_grid
(
self
):
# update grid information (grid, epoch_bar, divisions) for next epoch
updated
=
False
for
i
,
spec
in
enumerate
(
self
.
space
.
values
()):
self
.
epoch_bar
[
i
]
=
len
(
self
.
grid
[
i
])
if
not
spec
.
categorical
:
# further divide intervals
new_vals
=
[]
# values to append to grid
new_divs
=
[]
# sub-intervals
for
l
,
r
in
self
.
divisions
[
i
]:
mid
=
(
l
+
r
)
/
2
diff_l
=
_less
(
l
,
mid
,
spec
)
diff_r
=
_less
(
mid
,
r
,
spec
)
if
diff_l
and
diff_r
:
# we can skip these for non-q, but it will complicate the code
new_vals
.
append
(
mid
)
updated
=
True
if
diff_l
:
new_divs
.
append
((
l
,
mid
))
if
diff_r
:
new_divs
.
append
((
mid
,
r
))
self
.
grid
[
i
]
+=
new_vals
self
.
divisions
[
i
]
=
new_divs
if
not
updated
:
# fully explored
_logger
.
info
(
'Search space has been fully explored'
)
self
.
grid
=
None
else
:
else
:
chosen_params
=
dict
()
size
=
_grid_size_info
(
self
.
grid
)
for
key
in
ss_spec
.
keys
():
_logger
.
info
(
f
'Grid subdivided, new size:
{
size
}
'
)
chosen_params
[
key
]
=
self
.
_json2parameter
(
ss_spec
[
key
])
return
self
.
_expand_parameters
(
chosen_params
)
def
_init_grid
(
self
):
elif
isinstance
(
ss_spec
,
list
):
self
.
epoch_bar
=
[
0
for
_
in
self
.
space
]
chosen_params
=
list
()
self
.
grid
=
[
None
for
_
in
self
.
space
]
for
subspec
in
ss_spec
[
1
:]:
for
i
,
spec
in
enumerate
(
self
.
space
.
values
()):
choice
=
self
.
_json2parameter
(
subspec
)
if
spec
.
categorical
:
if
isinstance
(
choice
,
list
):
self
.
grid
[
i
]
=
list
(
range
(
spec
.
size
))
chosen_params
.
extend
(
choice
)
else
:
else
:
chosen_params
.
append
(
choice
)
self
.
grid
[
i
]
=
[
0.5
]
chosen_params
=
list
(
map
(
lambda
v
:
{
ss_spec
[
0
]:
v
},
chosen_params
))
self
.
divisions
[
i
]
=
[]
if
_less
(
0
,
0.5
,
spec
):
self
.
divisions
[
i
].
append
((
0
,
0.5
))
if
_less
(
0.5
,
1
,
spec
):
self
.
divisions
[
i
].
append
((
0.5
,
1
))
size
=
_grid_size_info
(
self
.
grid
)
_logger
.
info
(
f
'Grid initialized, size:
{
size
}
'
)
def
_current_parameters
(
self
):
# convert self.vector to "formatted" parameters
params
=
{}
for
i
,
spec
in
enumerate
(
self
.
space
.
values
()):
x
=
self
.
grid
[
i
][
self
.
vector
[
i
]]
if
spec
.
categorical
:
params
[
spec
.
key
]
=
x
else
:
else
:
chosen_params
=
copy
.
deepcopy
(
ss_spec
)
params
[
spec
.
key
]
=
_cdf_inverse
(
x
,
spec
)
return
chosen_params
return
params
def
_parse_quniform
(
self
,
param_value
):
def
_less
(
x
,
y
,
spec
):
"""
#if spec.q is None: # TODO: comment out because of edge case UT uniform(99.9, 99.9)
Parse type of quniform parameter and return a list
# return x < y
"""
real_x
=
_deformat_single_parameter
(
_cdf_inverse
(
x
,
spec
),
spec
)
low
,
high
,
q
=
param_value
[
0
],
param_value
[
1
],
param_value
[
2
]
real_y
=
_deformat_single_parameter
(
_cdf_inverse
(
y
,
spec
),
spec
)
return
np
.
clip
(
np
.
arange
(
np
.
round
(
low
/
q
),
np
.
round
(
high
/
q
)
+
1
)
*
q
,
low
,
high
)
return
real_x
<
real_y
def
_parse_randint
(
self
,
param_value
):
def
_cdf_inverse
(
x
,
spec
):
"""
# inverse function of spec's cumulative distribution function
Parse type of randint parameter and return a list
if
spec
.
normal_distributed
:
"""
return
spec
.
mu
+
spec
.
sigma
*
math
.
sqrt
(
2
)
*
erfinv
(
2
*
x
-
1
)
if
param_value
[
0
]
>=
param_value
[
1
]:
raise
ValueError
(
"Randint should contain at least 1 candidate, but [%s, %s) contains none."
,
param_value
[
0
],
param_value
[
1
])
return
np
.
arange
(
param_value
[
0
],
param_value
[
1
]).
tolist
()
def
_expand_parameters
(
self
,
para
):
"""
Enumerate all possible combinations of all parameters
Parameters
----------
para : dict
{key1: [v11, v12, ...], key2: [v21, v22, ...], ...}
Returns
-------
dict
{{key1: v11, key2: v21, ...}, {key1: v11, key2: v22, ...}, ...}
"""
if
len
(
para
)
==
1
:
for
key
,
values
in
para
.
items
():
return
list
(
map
(
lambda
v
:
{
key
:
v
},
values
))
key
=
list
(
para
)[
0
]
values
=
para
.
pop
(
key
)
rest_para
=
self
.
_expand_parameters
(
para
)
ret_para
=
list
()
for
val
in
values
:
for
config
in
rest_para
:
config
[
key
]
=
val
ret_para
.
append
(
copy
.
deepcopy
(
config
))
return
ret_para
def
update_search_space
(
self
,
search_space
):
"""
Check if the search space is valid and expand it: support only ``choice``, ``quniform``, ``randint``.
Parameters
----------
search_space : dict
The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
"""
validate_search_space
(
search_space
,
[
'choice'
,
'randint'
,
'quniform'
])
self
.
expanded_search_space
=
self
.
_json2parameter
(
search_space
)
def
generate_parameters
(
self
,
parameter_id
,
**
kwargs
):
"""
Generate parameters for one trial.
Parameters
----------
parameter_id : int
The id for the generated hyperparameter
**kwargs
Not used
Returns
-------
dict
One configuration from the expanded search space.
Raises
------
NoMoreTrialError
If all the configurations has been sent, raise :class:`~nni.NoMoreTrialError`.
"""
self
.
count
+=
1
while
self
.
count
<=
len
(
self
.
expanded_search_space
)
-
1
:
_params_tuple
=
convert_dict2tuple
(
copy
.
deepcopy
(
self
.
expanded_search_space
[
self
.
count
]))
if
_params_tuple
in
self
.
supplement_data
:
self
.
count
+=
1
else
:
else
:
return
self
.
expanded_search_space
[
self
.
count
]
return
spec
.
low
+
(
spec
.
high
-
spec
.
low
)
*
x
raise
nni
.
NoMoreTrialError
(
'no more parameters now.'
)
def
_deformat_single_parameter
(
x
,
spec
):
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
,
**
kwargs
):
if
math
.
isinf
(
x
):
"""
return
x
Receive a trial's final performance result reported through :func:`~nni.report_final_result` by the trial.
spec_dict
=
spec
.
_asdict
()
GridSearchTuner does not need trial's results.
spec_dict
[
'key'
]
=
(
spec
.
name
,)
"""
spec
=
ParameterSpec
(
**
spec_dict
)
pass
params
=
deformat_parameters
({
spec
.
key
:
x
},
{
spec
.
key
:
spec
})
return
params
[
spec
.
name
]
def
import_data
(
self
,
data
):
"""
def
_grid_size_info
(
grid
):
Import additional data for tuning
if
len
(
grid
)
==
1
:
return
str
(
len
(
grid
[
0
]))
Parameters
sizes
=
[
len
(
candidates
)
for
candidates
in
grid
]
----------
mul
=
'x'
.
join
(
str
(
s
)
for
s
in
sizes
)
list
total
=
np
.
prod
(
sizes
)
A list of dictionarys, each of which has at least two keys, ``parameter`` and ``value``
return
f
'(
{
mul
}
) =
{
total
}
'
"""
_completed_num
=
0
for
trial_info
in
data
:
logger
.
info
(
"Importing data, current processing progress %s / %s"
,
_completed_num
,
len
(
data
))
_completed_num
+=
1
assert
"parameter"
in
trial_info
_params
=
trial_info
[
"parameter"
]
assert
"value"
in
trial_info
_value
=
trial_info
[
'value'
]
if
not
_value
:
logger
.
info
(
"Useless trial data, value is %s, skip this trial data."
,
_value
)
continue
_params_tuple
=
convert_dict2tuple
(
copy
.
deepcopy
(
_params
))
self
.
supplement_data
[
_params_tuple
]
=
True
logger
.
info
(
"Successfully import data to grid search tuner."
)
test/ut/sdk/test_builtin_tuners.py
View file @
cb090e8c
...
@@ -253,7 +253,7 @@ class BuiltinTunersTestCase(TestCase):
...
@@ -253,7 +253,7 @@ class BuiltinTunersTestCase(TestCase):
logger
.
info
(
"Imported data successfully at the beginning with incomplete epoch"
)
logger
.
info
(
"Imported data successfully at the beginning with incomplete epoch"
)
shutil
.
rmtree
(
all_checkpoint_dir
)
shutil
.
rmtree
(
all_checkpoint_dir
)
def
import_data_test
(
self
,
tuner_factory
,
stype
=
"choice_str"
):
def
import_data_test
(
self
,
tuner_factory
,
stype
=
"choice_str"
,
support_middle
=
True
):
"""
"""
import data at the beginning with number value and dict value
import data at the beginning with number value and dict value
import data in the middle also with number value and dict value, and duplicate data record
import data in the middle also with number value and dict value, and duplicate data record
...
@@ -298,6 +298,8 @@ class BuiltinTunersTestCase(TestCase):
...
@@ -298,6 +298,8 @@ class BuiltinTunersTestCase(TestCase):
parameters
=
tuner
.
generate_multiple_parameters
(
list
(
range
(
3
)))
parameters
=
tuner
.
generate_multiple_parameters
(
list
(
range
(
3
)))
for
i
in
range
(
3
):
for
i
in
range
(
3
):
tuner
.
receive_trial_result
(
i
,
parameters
[
i
],
random
.
uniform
(
-
100
,
100
))
tuner
.
receive_trial_result
(
i
,
parameters
[
i
],
random
.
uniform
(
-
100
,
100
))
if
not
support_middle
:
return
# import data in the middle
# import data in the middle
if
stype
==
"choice_str"
:
if
stype
==
"choice_str"
:
data
=
[{
"parameter"
:
{
"choice_str"
:
"cat"
},
"value"
:
1.1
},
data
=
[{
"parameter"
:
{
"choice_str"
:
"cat"
},
"value"
:
1.1
},
...
@@ -316,9 +318,8 @@ class BuiltinTunersTestCase(TestCase):
...
@@ -316,9 +318,8 @@ class BuiltinTunersTestCase(TestCase):
def
test_grid_search
(
self
):
def
test_grid_search
(
self
):
self
.
exhaustive
=
True
self
.
exhaustive
=
True
tuner_fn
=
lambda
:
GridSearchTuner
()
tuner_fn
=
lambda
:
GridSearchTuner
()
self
.
search_space_test_all
(
tuner_fn
,
self
.
search_space_test_all
(
tuner_fn
)
supported_types
=
[
"choice"
,
"randint"
,
"quniform"
])
self
.
import_data_test
(
tuner_fn
,
support_middle
=
False
)
self
.
import_data_test
(
tuner_fn
)
def
test_tpe
(
self
):
def
test_tpe
(
self
):
tuner_fn
=
TpeTuner
tuner_fn
=
TpeTuner
...
@@ -397,6 +398,10 @@ class BuiltinTunersTestCase(TestCase):
...
@@ -397,6 +398,10 @@ class BuiltinTunersTestCase(TestCase):
"normal"
,
"lognormal"
,
"qnormal"
,
"qlognormal"
])
"normal"
,
"lognormal"
,
"qnormal"
,
"qlognormal"
])
self
.
import_data_test
(
tuner_fn
,
stype
=
'choice_num'
)
self
.
import_data_test
(
tuner_fn
,
stype
=
'choice_num'
)
def
test_regularized_evolution_tuner
(
self
):
tuner_fn
=
lambda
:
RegularizedEvolutionTuner
()
self
.
nas_search_space_test_all
(
tuner_fn
)
def
tearDown
(
self
):
def
tearDown
(
self
):
file_list
=
glob
.
glob
(
"smac3*"
)
+
[
"param_config_space.pcs"
,
"scenario.txt"
,
"model_path"
]
file_list
=
glob
.
glob
(
"smac3*"
)
+
[
"param_config_space.pcs"
,
"scenario.txt"
,
"model_path"
]
for
file
in
file_list
:
for
file
in
file_list
:
...
@@ -406,10 +411,6 @@ class BuiltinTunersTestCase(TestCase):
...
@@ -406,10 +411,6 @@ class BuiltinTunersTestCase(TestCase):
else
:
else
:
os
.
remove
(
file
)
os
.
remove
(
file
)
def
test_regularized_evolution_tuner
(
self
):
tuner_fn
=
lambda
:
RegularizedEvolutionTuner
()
self
.
nas_search_space_test_all
(
tuner_fn
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
()
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment