Unverified Commit b0f34da1 authored by cruiseliu's avatar cruiseliu Committed by GitHub
Browse files

Refactor Hyperopt Tuners (Stage 2) - util update and test (#4238)

parent 30643638
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT license. # Licensed under the MIT license.
"""
Naive random tuner for hyper-parameter optimization.
You can specify an integer seed to determine random result.
"""
__all__ = ['RandomTuner', 'suggest', 'suggest_parameter']
import numpy as np import numpy as np
import schema import schema
...@@ -29,25 +37,15 @@ class RandomClassArgsValidator(ClassArgsValidator): ...@@ -29,25 +37,15 @@ class RandomClassArgsValidator(ClassArgsValidator):
def suggest(rng, space): def suggest(rng, space):
params = {} params = {}
for spec in space.values(): for key, spec in space.items():
if not spec.is_activated(params): if spec.is_activated_in(params):
continue params[key] = suggest_parameter(rng, spec)
if spec.categorical:
params[spec.key] = rng.integers(spec.size)
continue
if spec.normal_distributed:
if spec.log_distributed:
x = rng.lognormal(spec.mu, spec.sigma)
else:
x = rng.normal(spec.mu, spec.sigma)
else:
if spec.log_distributed:
x = np.exp(rng.uniform(np.log(spec.low), np.log(spec.high)))
else:
x = rng.uniform(spec.low, spec.high)
if spec.q is not None:
x = np.round(x / spec.q) * spec.q
params[spec.key] = x
return params return params
def suggest_parameter(rng, spec):
if spec.categorical:
return rng.integers(spec.size)
if spec.normal_distributed:
return rng.normal(spec.mu, spec.sigma)
else:
return rng.uniform(spec.low, spec.high)
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
from .validation import validate_search_space from .validation import validate_search_space
from .formatting import * from .formatting import *
from .optimize_mode import OptimizeMode
...@@ -6,6 +6,9 @@ This script provides a more program-friendly representation of HPO search space. ...@@ -6,6 +6,9 @@ This script provides a more program-friendly representation of HPO search space.
The format is considered internal helper and is not visible to end users. The format is considered internal helper and is not visible to end users.
You will find this useful when you want to support nested search space. You will find this useful when you want to support nested search space.
The random tuner is an intuitive example for this utility.
You should check its code before reading docstrings in this file.
""" """
__all__ = [ __all__ = [
...@@ -15,11 +18,14 @@ __all__ = [ ...@@ -15,11 +18,14 @@ __all__ = [
] ]
import math import math
from types import SimpleNamespace
from typing import Any, List, NamedTuple, Optional, Tuple from typing import Any, List, NamedTuple, Optional, Tuple
class ParameterSpec(NamedTuple): class ParameterSpec(NamedTuple):
""" """
Specification (aka space / range) of one single parameter. Specification (aka space / range / domain) of one single parameter.
NOTE: For `loguniform` (and `qloguniform`), the fields `low` and `high` are logarithm of original values.
""" """
name: str # The object key in JSON name: str # The object key in JSON
...@@ -27,126 +33,137 @@ class ParameterSpec(NamedTuple): ...@@ -27,126 +33,137 @@ class ParameterSpec(NamedTuple):
values: List[Any] # "_value" in JSON values: List[Any] # "_value" in JSON
key: Tuple[str] # The "path" of this parameter key: Tuple[str] # The "path" of this parameter
parent_index: Optional[int] # If the parameter is in a nested choice, this is its parent's index;
# if the parameter is at top level, this is `None`.
categorical: bool # Whether this paramter is categorical (unordered) or numerical (ordered) categorical: bool # Whether this paramter is categorical (unordered) or numerical (ordered)
size: int = None # If it's categorical, how many canidiates it has size: int = None # If it's categorical, how many candidates it has
# uniform distributed # uniform distributed
low: float = None # Lower bound of uniform parameter low: float = None # Lower bound of uniform parameter
high: float = None # Upper bound of uniform parameter high: float = None # Upper bound of uniform parameter
normal_distributed: bool = None # Whether this parameter is uniform or normal distrubuted normal_distributed: bool = None # Whether this parameter is uniform or normal distrubuted
mu: float = None # Mean of normal parameter mu: float = None # µ of normal parameter
sigma: float = None # Scale of normal parameter sigma: float = None # σ of normal parameter
q: Optional[float] = None # If not `None`, the parameter value should be an integer multiple of this
clip: Optional[Tuple[float, float]] = None
# For q(log)uniform, this equals to "values[:2]"; for others this is None
q: Optional[float] = None # If not `None`, the value should be an integer multiple of this
log_distributed: bool = None # Whether this parameter is log distributed log_distributed: bool = None # Whether this parameter is log distributed
# When true, low/high/mu/sigma describes log of parameter value (like np.lognormal)
def is_activated(self, partial_parameters): def is_activated_in(self, partial_parameters):
""" """
For nested search space, check whether this parameter should be skipped for current set of paremters. For nested search space, check whether this parameter should be skipped for current set of paremters.
This function works because the return value of `format_search_space()` is sorted in a way that This function must be used in a pattern similar to random tuner. Otherwise it will misbehave.
parents always appear before children.
""" """
return self.parent_index is None or partial_parameters.get(self.key[:-1]) == self.parent_index if len(self.key) < 2 or isinstance(self.key[-2], str):
return True
return partial_parameters[self.key[:-2]] == self.key[-2]
def format_search_space(search_space, ordered_randint=False): def format_search_space(search_space):
formatted = _format_search_space(tuple(), None, search_space) """
if ordered_randint: Convert user provided search space into a dict of ParameterSpec.
for i, spec in enumerate(formatted): The dict key is dict value's `ParameterSpec.key`.
if spec.type == 'randint': """
formatted[i] = _format_ordered_randint(spec.key, spec.parent_index, spec.values) formatted = _format_search_space(tuple(), search_space)
# In CPython 3.6, dicts preserve order by internal implementation.
# In Python 3.7+, dicts preserve order by language spec.
# Python 3.6 is crappy enough. Don't bother to do extra work for it.
# Remove these comments when we drop 3.6 support.
return {spec.key: spec for spec in formatted} return {spec.key: spec for spec in formatted}
def deformat_parameters(parameters, formatted_search_space): def deformat_parameters(parameters, formatted_search_space):
""" """
`paramters` is a dict whose key is `ParamterSpec.key`, and value is integer index if the parameter is categorical. Convert internal format parameters to users' expected format.
Convert it to the format expected by end users.
"test/ut/sdk/test_hpo_formatting.py" provides examples of how this works.
The function do following jobs:
1. For "choice" and "randint", convert index (integer) to corresponding value.
2. For "*log*", convert x to `exp(x)`.
3. For "q*", convert x to `round(x / q) * q`, then clip into range.
4. For nested choices, convert flatten key-value pairs into nested structure.
""" """
ret = {} ret = {}
for key, x in parameters.items(): for key, x in parameters.items():
spec = formatted_search_space[key] spec = formatted_search_space[key]
if not spec.categorical: if spec.categorical:
_assign(ret, key, x) if spec.type == 'randint':
elif spec.type == 'randint': lower = min(math.ceil(float(x)) for x in spec.values)
lower = min(math.ceil(float(x)) for x in spec.values) _assign(ret, key, lower + x)
_assign(ret, key, lower + x) elif _is_nested_choices(spec.values):
elif _is_nested_choices(spec.values): _assign(ret, tuple([*key, '_name']), spec.values[x]['_name'])
_assign(ret, tuple([*key, '_name']), spec.values[x]['_name']) else:
_assign(ret, key, spec.values[x])
else: else:
_assign(ret, key, spec.values[x]) if spec.log_distributed:
x = math.exp(x)
if spec.q is not None:
x = round(x / spec.q) * spec.q
if spec.clip:
x = max(x, spec.clip[0])
x = min(x, spec.clip[1])
_assign(ret, key, x)
return ret return ret
def _format_search_space(parent_key, parent_index, space): def _format_search_space(parent_key, space):
formatted = [] formatted = []
for name, spec in space.items(): for name, spec in space.items():
if name == '_name': if name == '_name':
continue continue
key = tuple([*parent_key, name]) key = tuple([*parent_key, name])
formatted.append(_format_parameter(key, parent_index, spec['_type'], spec['_value'])) formatted.append(_format_parameter(key, spec['_type'], spec['_value']))
if spec['_type'] == 'choice' and _is_nested_choices(spec['_value']): if spec['_type'] == 'choice' and _is_nested_choices(spec['_value']):
for index, sub_space in enumerate(spec['_value']): for index, sub_space in enumerate(spec['_value']):
formatted += _format_search_space(key, index, sub_space) key = tuple([*parent_key, name, index])
formatted += _format_search_space(key, sub_space)
return formatted return formatted
def _format_parameter(key, parent_index, type_, values): def _format_parameter(key, type_, values):
spec = {} spec = SimpleNamespace(
spec['name'] = key[-1] name = key[-1],
spec['type'] = type_ type = type_,
spec['values'] = values values = values,
key = key,
spec['key'] = key categorical = type_ in ['choice', 'randint'],
spec['parent_index'] = parent_index )
if type_ in ['choice', 'randint']: if spec.categorical:
spec['categorical'] = True
if type_ == 'choice': if type_ == 'choice':
spec['size'] = len(values) spec.size = len(values)
else: else:
lower, upper = sorted(math.ceil(float(x)) for x in values) lower = math.ceil(float(values[0]))
spec['size'] = upper - lower upper = math.ceil(float(values[1]))
spec.size = upper - lower
else: else:
spec['categorical'] = False
if type_.startswith('q'): if type_.startswith('q'):
spec['q'] = float(values[2]) spec.q = float(values[2])
spec['log_distributed'] = ('log' in type_) else:
spec.q = None
spec.log_distributed = ('log' in type_)
if 'normal' in type_: if 'normal' in type_:
spec['normal_distributed'] = True spec.normal_distributed = True
spec['mu'] = float(values[0]) spec.mu = float(values[0])
spec['sigma'] = float(values[1]) spec.sigma = float(values[1])
else: else:
spec['normal_distributed'] = False spec.normal_distributed = False
spec['low'], spec['high'] = sorted(float(x) for x in values[:2]) spec.low = float(values[0])
if 'q' in spec: spec.high = float(values[1])
spec['low'] = math.ceil(spec['low'] / spec['q']) * spec['q'] if spec.q is not None:
spec['high'] = math.floor(spec['high'] / spec['q']) * spec['q'] spec.clip = (spec.low, spec.high)
if spec.log_distributed:
return ParameterSpec(**spec) # make it align with mu
spec.low = math.log(spec.low)
def _format_ordered_randint(key, parent_index, values): spec.high = math.log(spec.high)
lower, upper = sorted(math.ceil(float(x)) for x in values)
return ParameterSpec( return ParameterSpec(**spec.__dict__)
name = key[-1],
type = 'randint',
values = values,
key = key,
parent_index = parent_index,
categorical = False,
low = float(lower),
high = float(upper - 1),
normal_distributed = False,
q = 1.0,
log_distributed = False,
)
def _is_nested_choices(values): def _is_nested_choices(values):
if not values: assert values # choices should not be empty
return False
for value in values: for value in values:
if not isinstance(value, dict): if not isinstance(value, dict):
return False return False
...@@ -157,6 +174,8 @@ def _is_nested_choices(values): ...@@ -157,6 +174,8 @@ def _is_nested_choices(values):
def _assign(params, key, x): def _assign(params, key, x):
if len(key) == 1: if len(key) == 1:
params[key[0]] = x params[key[0]] = x
elif isinstance(key[0], int):
_assign(params, key[1:], x)
else: else:
if key[0] not in params: if key[0] not in params:
params[key[0]] = {} params[key[0]] = {}
......
from enum import Enum
class OptimizeMode(Enum):
Minimize = 'minimize'
Maximize = 'maximize'
from nni.common.hpo_utils import format_search_space, deformat_parameters
user_space = {
'dropout_rate': { '_type': 'uniform', '_value': [0.5, 0.9] },
'conv_size': { '_type': 'choice', '_value': [2, 3, 5, 7] },
'hidden_size': { '_type': 'qloguniform', '_value': [128, 1024, 1] },
'batch_size': { '_type': 'randint', '_value': [16, 32] },
'learning_rate': { '_type': 'loguniform', '_value': [0.0001, 0.1] },
'nested': {
'_type': 'choice',
'_value': [
{
'_name': 'empty',
},
{
'_name': 'double_nested',
'xy': {
'_type': 'choice',
'_value': [
{
'_name': 'x',
'x': { '_type': 'normal', '_value': [0, 1.0] },
},
{
'_name': 'y',
'y': { '_type': 'qnormal', '_value': [0, 1, 0.5] },
},
],
},
'z': { '_type': 'quniform', '_value': [-0.5, 0.5, 0.1] },
},
{
'_name': 'common',
'x': { '_type': 'lognormal', '_value': [1, 0.1] },
'y': { '_type': 'qlognormal', '_value': [-1, 1, 0.1] },
},
],
},
}
internal_space_simple = [ # the full internal space is too long, omit None and False values here
{'name':'dropout_rate', 'type':'uniform', 'values':[0.5,0.9], 'key':('dropout_rate',), 'low':0.5, 'high':0.9},
{'name':'conv_size', 'type':'choice', 'values':[2,3,5,7], 'key':('conv_size',), 'categorical':True, 'size':4},
{'name':'hidden_size', 'type':'qloguniform', 'values':[128,1024,1], 'key':('hidden_size',), 'low':128.0, 'high':1024.0, 'q':1.0, 'log_distributed':True},
{'name':'batch_size', 'type':'randint', 'values':[16,32], 'key':('batch_size',), 'categorical':True, 'size':16},
{'name':'learning_rate', 'type':'loguniform', 'values':[0.0001,0.1], 'key':('learning_rate',), 'low':0.0001, 'high':0.1, 'log_distributed':True},
{'name':'nested', 'type':'choice', '_value_names':['empty','double_nested','common'], 'key':('nested',), 'categorical':True, 'size':3, 'nested_choice':True},
{'name':'xy', 'type':'choice', '_value_names':['x','y'], 'key':('nested','xy'), 'parent_index':1, 'categorical':True, 'size':2, 'nested_choice':True},
{'name':'x', 'type':'normal', 'values':[0,1.0], 'key':('nested','xy','x'), 'parent_index':0, 'normal_distributed':True, 'mu':0.0, 'sigma':1.0},
{'name':'y', 'type':'qnormal', 'values':[0,1,0.5], 'key':('nested','xy','y'), 'parent_index':1, 'normal_distributed':True, 'mu':0.0, 'sigma':1.0, 'q':0.5},
{'name':'z', 'type':'quniform', 'values':[-0.5,0.5,0.1], 'key':('nested','z'), 'parent_index':1, 'low':-0.5, 'high':0.5, 'q':0.1},
{'name':'x', 'type':'lognormal', 'values':[1,0.1], 'key':('nested','x'), 'parent_index':2, 'normal_distributed':True, 'mu':1.0, 'sigma':0.1, 'log_distributed':True},
{'name':'y', 'type':'qlognormal', 'values':[-1,1,0.1], 'key':('nested','y'), 'parent_index':2, 'normal_distributed':True, 'mu':-1.0, 'sigma':1.0, 'q':0.1, 'log_distributed':True},
]
def test_format_search_space():
formatted = format_search_space(user_space)
for spec, expected in zip(formatted.values(), internal_space_simple):
for key, value in spec._asdict().items():
if key == 'values' and '_value_names' in expected:
assert [v['_name'] for v in value] == expected['_value_names']
elif key in expected:
assert value == expected[key]
else:
assert value is None or value == False
internal_parameters = {
('dropout_rate',): 0.7,
('conv_size',): 2,
('hidden_size',): 200.0,
('batch_size',): 3,
('learning_rate',): 0.0345,
('nested',): 1,
('nested', 'xy'): 0,
('nested', 'xy', 'x'): 0.123,
}
user_parameters = {
'dropout_rate': 0.7,
'conv_size': 5,
'hidden_size': 200.0,
'batch_size': 19,
'learning_rate': 0.0345,
'nested': {
'_name': 'double_nested',
'xy': {
'_name': 'x',
'x': 0.123,
},
},
}
def test_deformat_parameters():
space = format_search_space(user_space)
generated = deformat_parameters(internal_parameters, space)
assert generated == user_parameters
if __name__ == '__main__':
test_format_search_space()
test_deformat_parameters()
from math import exp, log
from nni.common.hpo_utils import deformat_parameters, format_search_space
user_space = {
'pool': { '_type': 'choice', '_value': ['max', 'min', 'avg'] },
'kernel': { '_type': 'randint', '_value': [2, 8] },
'D': { # distribution
'_type': 'choice',
'_value': [
{
'_name': 'UNIFORM',
'dropout': { '_type': 'uniform', '_value': [0.5, 0.9] },
'hidden': { '_type': 'quniform', '_value': [100, 1000, 3] },
'U_lr': { '_type': 'loguniform', '_value': [0.0001, 0.1] },
'U_batch': { '_type': 'qloguniform', '_value': [16.0, 128.0, 0.725] },
},
{
'_name': 'NORMAL',
'dropout': { '_type': 'normal', '_value': [0.7, 0.2] },
'hidden': { '_type': 'qnormal', '_value': [500, 200, 3] },
'N_lr': { '_type': 'lognormal', '_value': [-6, 3] },
'N_batch': { '_type': 'qlognormal', '_value': [3.5, 1.2, 0.725] },
},
{
'_name': 'EMPTY',
},
]
},
'not_nested': {
'_type': 'choice',
'_value': [
{'x': 0, 'y': 0},
{'x': 1, 'y': 2},
],
},
}
spec_names = ['pool', 'kernel', 'D', 'dropout', 'hidden', 'U_lr', 'U_batch', 'dropout', 'hidden', 'N_lr', 'N_batch', 'not_nested']
spec_types = ['choice', 'randint', 'choice', 'uniform', 'quniform', 'loguniform', 'qloguniform', 'normal', 'qnormal', 'lognormal', 'qlognormal', 'choice']
spec_values = [['max','min','avg'], [2,8], user_space['D']['_value'], [0.5,0.9], [100.0,1000.0,3.0], [0.0001,0.1], [16.0,128.0,0.725], [0.7,0.2], [500.0,200.0,3.0], [-6.0,3.0], [3.5,1.2,0.725], [{'x':0,'y':0},{'x':1,'y':2}]]
spec_keys = [('pool',), ('kernel',), ('D',), ('D',0,'dropout'), ('D',0,'hidden'), ('D',0,'U_lr'), ('D',0,'U_batch'), ('D',1,'dropout'), ('D',1,'hidden'), ('D',1,'N_lr'), ('D',1,'N_batch'), ('not_nested',)]
spec_categoricals = [True, True, True, False, False, False, False, False, False, False, False, True]
spec_sizes = [3, 6, 3, None, None, None, None, None, None, None, None, 2]
spec_lows = [None, None, None, 0.5, 100.0, log(0.0001), log(16.0), None, None, None, None, None]
spec_highs = [None, None, None, 0.9, 1000.0, log(0.1), log(128.0), None, None, None, None, None]
spec_normals = [None, None, None, False, False, False, False, True, True, True, True, None]
spec_mus = [None, None, None, None, None, None, None, 0.7, 500.0, -6.0, 3.5, None]
spec_sigmas = [None, None, None, None, None, None, None, 0.2, 200.0, 3.0, 1.2, None]
spec_qs = [None, None, None, None, 3.0, None, 0.725, None, 3.0, None, 0.725, None]
spec_clips = [None, None, None, None, (100.0,1000.0), None, (16.0,128.0), None, None, None, None, None]
spec_logs = [None, None, None, False, False, True, True, False, False, True, True, None]
def test_formatting():
internal_space = format_search_space(user_space)
assert all(key == value.key for key, value in internal_space.items())
specs = list(internal_space.values())
assert spec_names == [spec.name for spec in specs]
assert spec_types == [spec.type for spec in specs]
assert spec_values == [spec.values for spec in specs]
assert spec_keys == [spec.key for spec in specs]
assert spec_categoricals == [spec.categorical for spec in specs]
assert spec_sizes == [spec.size for spec in specs]
assert spec_lows == [spec.low for spec in specs]
assert spec_highs == [spec.high for spec in specs]
assert spec_normals == [spec.normal_distributed for spec in specs]
assert spec_mus == [spec.mu for spec in specs]
assert spec_sigmas == [spec.sigma for spec in specs]
assert spec_qs == [spec.q for spec in specs]
assert spec_clips == [spec.clip for spec in specs]
assert spec_logs == [spec.log_distributed for spec in specs]
internal_params_1 = {
('pool',): 0,
('kernel',): 5,
('D',): 0,
('D',0,'dropout'): 0.7,
('D',0,'hidden'): 100.1, # round to 99.0, then clip to 100.0
('D',0,'U_lr'): -4.6,
('D',0,'U_batch'): 4.0,
('not_nested',): 0,
}
user_params_1 = {
'pool': 'max',
'kernel': 7,
'D': {
'_name': 'UNIFORM',
'dropout': 0.7,
'hidden': 100.0,
'U_lr': exp(-4.6),
'U_batch': 54.375,
},
'not_nested': {'x': 0, 'y': 0},
}
internal_params_2 = {
('pool',): 2,
('kernel',): 0,
('D',): 1,
('D',1,'dropout'): 0.7,
('D',1,'hidden'): 100.1,
('D',1,'N_lr'): -4.6,
('D',1,'N_batch'): 4.0,
('not_nested',): 1,
}
user_params_2 = {
'pool': 'avg',
'kernel': 2,
'D': {
'_name': 'NORMAL',
'dropout': 0.7,
'hidden': 99.0,
'N_lr': exp(-4.6),
'N_batch': 54.375,
},
'not_nested': {'x': 1, 'y': 2},
}
internal_params_3 = {
('pool',): 1,
('kernel',): 1,
('D',): 2,
('not_nested',): 1,
}
user_params_3 = {
'pool': 'min',
'kernel': 3,
'D': {
'_name': 'EMPTY',
},
'not_nested': {'x': 1, 'y': 2},
}
def test_deformatting():
internal_space = format_search_space(user_space)
assert deformat_parameters(internal_params_1, internal_space) == user_params_1
assert deformat_parameters(internal_params_2, internal_space) == user_params_2
assert deformat_parameters(internal_params_3, internal_space) == user_params_3
def test_activate():
internal_space = format_search_space(user_space)
assert internal_space[('pool',)].is_activated_in({})
partial = { ('pool',): 1, ('kernel',): 1, ('D',): 0 }
assert internal_space[('D', 0, 'dropout')].is_activated_in(partial)
assert internal_space[('D', 0, 'U_lr')].is_activated_in(partial)
assert not internal_space[('D', 1, 'dropout')].is_activated_in(partial)
assert not internal_space[('D', 1, 'N_lr')].is_activated_in(partial)
partial = { ('pool',): 1, ('kernel',): 1, ('D',): 2 }
assert not internal_space[('D', 0, 'dropout')].is_activated_in(partial)
assert not internal_space[('D', 0, 'U_lr')].is_activated_in(partial)
assert not internal_space[('D', 1, 'dropout')].is_activated_in(partial)
assert not internal_space[('D', 1, 'N_lr')].is_activated_in(partial)
assert internal_space[('not_nested',)].is_activated_in(partial)
if __name__ == '__main__':
test_formatting()
test_deformatting()
test_activate()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment