formatting.py 6.62 KB
Newer Older
1
2
3
4
5
6
7
8
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
This script provides a more program-friendly representation of HPO search space.
The format is considered internal helper and is not visible to end users.

You will find this useful when you want to support nested search space.
9
10
11

The random tuner is an intuitive example for this utility.
You should check its code before reading docstrings in this file.
12
13
14
15
16
17
18
19
20
"""

__all__ = [
    'ParameterSpec',
    'deformat_parameters',
    'format_search_space',
]

import math
21
from types import SimpleNamespace
22
23
24
25
from typing import Any, List, NamedTuple, Optional, Tuple

class ParameterSpec(NamedTuple):
    """
26
27
28
    Specification (aka space / range / domain) of one single parameter.

    NOTE: For `loguniform` (and `qloguniform`), the fields `low` and `high` are logarithm of original values.
29
30
31
32
33
34
35
36
37
    """

    name: str                       # The object key in JSON
    type: str                       # "_type" in JSON
    values: List[Any]               # "_value" in JSON

    key: Tuple[str]                 # The "path" of this parameter

    categorical: bool               # Whether this paramter is categorical (unordered) or numerical (ordered)
38
    size: int = None                # If it's categorical, how many candidates it has
39
40
41
42
43
44

    # uniform distributed
    low: float = None               # Lower bound of uniform parameter
    high: float = None              # Upper bound of uniform parameter

    normal_distributed: bool = None # Whether this parameter is uniform or normal distrubuted
45
46
47
48
49
50
    mu: float = None                # µ of normal parameter
    sigma: float = None             # σ of normal parameter

    q: Optional[float] = None       # If not `None`, the parameter value should be an integer multiple of this
    clip: Optional[Tuple[float, float]] = None
                                    # For q(log)uniform, this equals to "values[:2]"; for others this is None
51
52

    log_distributed: bool = None    # Whether this parameter is log distributed
53
                                    # When true, low/high/mu/sigma describes log of parameter value (like np.lognormal)
54

55
    def is_activated_in(self, partial_parameters):
56
57
        """
        For nested search space, check whether this parameter should be skipped for current set of paremters.
58
        This function must be used in a pattern similar to random tuner. Otherwise it will misbehave.
59
        """
60
61
62
        if len(self.key) < 2 or isinstance(self.key[-2], str):
            return True
        return partial_parameters[self.key[:-2]] == self.key[-2]
63

64
65
66
67
68
69
70
71
72
73
def format_search_space(search_space):
    """
    Convert user provided search space into a dict of ParameterSpec.
    The dict key is dict value's `ParameterSpec.key`.
    """
    formatted = _format_search_space(tuple(), search_space)
    # In CPython 3.6, dicts preserve order by internal implementation.
    # In Python 3.7+, dicts preserve order by language spec.
    # Python 3.6 is crappy enough. Don't bother to do extra work for it.
    # Remove these comments when we drop 3.6 support.
74
75
76
77
    return {spec.key: spec for spec in formatted}

def deformat_parameters(parameters, formatted_search_space):
    """
78
79
80
81
82
83
84
85
86
    Convert internal format parameters to users' expected format.

    "test/ut/sdk/test_hpo_formatting.py" provides examples of how this works.

    The function do following jobs:
     1. For "choice" and "randint", convert index (integer) to corresponding value.
     2. For "*log*", convert x to `exp(x)`.
     3. For "q*", convert x to `round(x / q) * q`, then clip into range.
     4. For nested choices, convert flatten key-value pairs into nested structure.
87
88
89
90
    """
    ret = {}
    for key, x in parameters.items():
        spec = formatted_search_space[key]
91
92
93
94
95
96
97
98
        if spec.categorical:
            if spec.type == 'randint':
                lower = min(math.ceil(float(x)) for x in spec.values)
                _assign(ret, key, lower + x)
            elif _is_nested_choices(spec.values):
                _assign(ret, tuple([*key, '_name']), spec.values[x]['_name'])
            else:
                _assign(ret, key, spec.values[x])
99
        else:
100
101
102
103
104
105
106
107
            if spec.log_distributed:
                x = math.exp(x)
            if spec.q is not None:
                x = round(x / spec.q) * spec.q
            if spec.clip:
                x = max(x, spec.clip[0])
                x = min(x, spec.clip[1])
            _assign(ret, key, x)
108
109
    return ret

110
def _format_search_space(parent_key, space):
111
112
113
114
115
    formatted = []
    for name, spec in space.items():
        if name == '_name':
            continue
        key = tuple([*parent_key, name])
116
        formatted.append(_format_parameter(key, spec['_type'], spec['_value']))
117
118
        if spec['_type'] == 'choice' and _is_nested_choices(spec['_value']):
            for index, sub_space in enumerate(spec['_value']):
119
120
                key = tuple([*parent_key, name, index])
                formatted += _format_search_space(key, sub_space)
121
122
    return formatted

123
124
125
126
127
128
129
130
def _format_parameter(key, type_, values):
    spec = SimpleNamespace(
        name = key[-1],
        type = type_,
        values = values,
        key = key,
        categorical = type_ in ['choice', 'randint'],
    )
131

132
    if spec.categorical:
133
        if type_ == 'choice':
134
            spec.size = len(values)
135
        else:
136
137
138
            lower = math.ceil(float(values[0]))
            upper = math.ceil(float(values[1]))
            spec.size = upper - lower
139
140
141

    else:
        if type_.startswith('q'):
142
143
144
145
            spec.q = float(values[2])
        else:
            spec.q = None
        spec.log_distributed = ('log' in type_)
146
147

        if 'normal' in type_:
148
149
150
            spec.normal_distributed = True
            spec.mu = float(values[0])
            spec.sigma = float(values[1])
151
152

        else:
153
154
155
156
157
158
159
160
161
162
163
            spec.normal_distributed = False
            spec.low = float(values[0])
            spec.high = float(values[1])
            if spec.q is not None:
                spec.clip = (spec.low, spec.high)
            if spec.log_distributed:
                # make it align with mu
                spec.low = math.log(spec.low)
                spec.high = math.log(spec.high)

    return ParameterSpec(**spec.__dict__)
164
165

def _is_nested_choices(values):
166
    assert values  # choices should not be empty
167
168
169
170
171
172
173
174
175
176
    for value in values:
        if not isinstance(value, dict):
            return False
        if '_name' not in value:
            return False
    return True

def _assign(params, key, x):
    if len(key) == 1:
        params[key[0]] = x
177
178
    elif isinstance(key[0], int):
        _assign(params, key[1:], x)
179
180
181
182
    else:
        if key[0] not in params:
            params[key[0]] = {}
        _assign(params[key[0]], key[1:], x)