smac_tuner.py 13.4 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
3
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

ShufanHuang's avatar
ShufanHuang committed
4
"""
QuanluZhang's avatar
QuanluZhang committed
5
smac_tuner.py
ShufanHuang's avatar
ShufanHuang committed
6
"""
QuanluZhang's avatar
QuanluZhang committed
7
8

import logging
Yuge Zhang's avatar
Yuge Zhang committed
9
import sys
QuanluZhang's avatar
QuanluZhang committed
10

Yuge Zhang's avatar
Yuge Zhang committed
11
import numpy as np
chicm-ms's avatar
chicm-ms committed
12
from schema import Schema, Optional
liuzhe-lz's avatar
liuzhe-lz committed
13

QuanluZhang's avatar
QuanluZhang committed
14
from smac.facade.epils_facade import EPILS
Yuge Zhang's avatar
Yuge Zhang committed
15
16
17
18
from smac.facade.roar_facade import ROAR
from smac.facade.smac_facade import SMAC
from smac.scenario.scenario import Scenario
from smac.utils.io.cmd_reader import CMDReader
QuanluZhang's avatar
QuanluZhang committed
19

liuzhe-lz's avatar
liuzhe-lz committed
20
21
from ConfigSpaceNNI import Configuration

22
import nni
chicm-ms's avatar
chicm-ms committed
23
from nni import ClassArgsValidator
liuzhe-lz's avatar
liuzhe-lz committed
24
25
26
from nni.tuner import Tuner
from nni.utils import OptimizeMode, extract_scalar_reward

QuanluZhang's avatar
QuanluZhang committed
27
from .convert_ss_to_scenario import generate_scenario
QuanluZhang's avatar
QuanluZhang committed
28

29
30
logger = logging.getLogger('smac_AutoML')

chicm-ms's avatar
chicm-ms committed
31
32
33
34
35
36
37
class SMACClassArgsValidator(ClassArgsValidator):
    def validate_class_args(self, **kwargs):
        Schema({
            'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'),
            Optional('config_dedup'): bool
        }).validate(kwargs)

QuanluZhang's avatar
QuanluZhang committed
38
class SMACTuner(Tuner):
ShufanHuang's avatar
ShufanHuang committed
39
    """
40
41
42
    This is a wrapper of [SMAC](https://github.com/automl/SMAC3) following NNI tuner interface.
    It only supports ``SMAC`` mode, and does not support the multiple instances of SMAC3 (i.e.,
    the same configuration is run multiple times).
ShufanHuang's avatar
ShufanHuang committed
43
    """
44
    def __init__(self, optimize_mode="maximize", config_dedup=False):
45
46
47
48
49
        """
        Parameters
        ----------
        optimize_mode : str
            Optimize mode, 'maximize' or 'minimize', by default 'maximize'
50
51
52
        config_dedup : bool
            If True, the tuner will not generate a configuration that has been already generated.
            If False, a configuration may be generated twice, but it is rare for relatively large search space.
53
        """
54
        self.logger = logger
QuanluZhang's avatar
QuanluZhang committed
55
56
57
58
59
60
        self.optimize_mode = OptimizeMode(optimize_mode)
        self.total_data = {}
        self.optimizer = None
        self.smbo_solver = None
        self.first_one = True
        self.update_ss_done = False
Zejun Lin's avatar
Zejun Lin committed
61
        self.loguniform_key = set()
62
        self.categorical_dict = {}
QuanluZhang's avatar
QuanluZhang committed
63
        self.cs = None
64
        self.dedup = config_dedup
QuanluZhang's avatar
QuanluZhang committed
65
66

    def _main_cli(self):
67
68
69
70
        """
        Main function of SMAC for CLI interface. Some initializations of the wrapped SMAC are done
        in this function.

ShufanHuang's avatar
ShufanHuang committed
71
72
        Returns
        -------
73
74
        obj
            The object of the SMAC optimizer
ShufanHuang's avatar
ShufanHuang committed
75
        """
QuanluZhang's avatar
QuanluZhang committed
76
        self.logger.info("SMAC call: %s", " ".join(sys.argv))
QuanluZhang's avatar
QuanluZhang committed
77
78
79
80
81
82

        cmd_reader = CMDReader()
        args, _ = cmd_reader.read_cmd()

        root_logger = logging.getLogger()
        root_logger.setLevel(args.verbose_level)
liuzhe-lz's avatar
liuzhe-lz committed
83
        logger_handler = logging.StreamHandler(stream=sys.stdout)
QuanluZhang's avatar
QuanluZhang committed
84
        if root_logger.level >= logging.INFO:
liuzhe-lz's avatar
liuzhe-lz committed
85
            formatter = logging.Formatter("%(levelname)s:\t%(message)s")
QuanluZhang's avatar
QuanluZhang committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        root_logger.removeHandler(root_logger.handlers[0])

        # Create defaults
        rh = None
        initial_configs = None
        stats = None
        incumbent = None

        # Create scenario-object
        scen = Scenario(args.scenario_file, [])
QuanluZhang's avatar
QuanluZhang committed
103
        self.cs = scen.cs
QuanluZhang's avatar
QuanluZhang committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

        if args.mode == "SMAC":
            optimizer = SMAC(
                scenario=scen,
                rng=np.random.RandomState(args.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=args.seed)
        elif args.mode == "ROAR":
            optimizer = ROAR(
                scenario=scen,
                rng=np.random.RandomState(args.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                run_id=args.seed)
        elif args.mode == "EPILS":
            optimizer = EPILS(
                scenario=scen,
                rng=np.random.RandomState(args.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                run_id=args.seed)
        else:
            optimizer = None

        return optimizer

    def update_search_space(self, search_space):
Yuge Zhang's avatar
Yuge Zhang committed
134
        """
135
136
137
138
139
        Convert search_space to the format that ``SMAC3`` could recognize, thus, not all the search space types
        are supported. In this function, we also do the initialization of `SMAC3`, i.e., calling ``self._main_cli``.

        NOTE: updating search space during experiment running is not supported.

ShufanHuang's avatar
ShufanHuang committed
140
141
        Parameters
        ----------
142
143
        search_space : dict
            The format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
ShufanHuang's avatar
ShufanHuang committed
144
        """
145
        self.logger.info('update search space in SMAC.')
QuanluZhang's avatar
QuanluZhang committed
146
        if not self.update_ss_done:
147
148
149
            self.categorical_dict = generate_scenario(search_space)
            if self.categorical_dict is None:
                raise RuntimeError('categorical dict is not correctly returned after parsing search space.')
150
151
            # TODO: this is ugly, we put all the initialization work in this method, because initialization relies
            #         on search space, also because update_search_space is called at the beginning.
QuanluZhang's avatar
QuanluZhang committed
152
153
            self.optimizer = self._main_cli()
            self.smbo_solver = self.optimizer.solver
Zejun Lin's avatar
Zejun Lin committed
154
            self.loguniform_key = {key for key in search_space.keys() if search_space[key]['_type'] == 'loguniform'}
QuanluZhang's avatar
QuanluZhang committed
155
156
157
158
            self.update_ss_done = True
        else:
            self.logger.warning('update search space is not supported.')

159
    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
160
161
162
163
        """
        Receive a trial's final performance result reported through :func:``nni.report_final_result`` by the trial.
        GridSearchTuner does not need trial's results.

ShufanHuang's avatar
ShufanHuang committed
164
165
        Parameters
        ----------
166
167
168
169
170
171
172
        parameter_id : int
            Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
        parameters : dict
            Hyper-parameters generated by :meth:`generate_parameters`.
        value : dict
            Result from trial (the return value of :func:`nni.report_final_result`).

ShufanHuang's avatar
ShufanHuang committed
173
174
175
        Raises
        ------
        RuntimeError
176
            Received parameter id not in ``self.total_data``
ShufanHuang's avatar
ShufanHuang committed
177
        """
178
        reward = extract_scalar_reward(value)
QuanluZhang's avatar
QuanluZhang committed
179
180
181
182
183
184
185
186
187
188
189
        if self.optimize_mode is OptimizeMode.Maximize:
            reward = -reward

        if parameter_id not in self.total_data:
            raise RuntimeError('Received parameter_id not in total_data.')
        if self.first_one:
            self.smbo_solver.nni_smac_receive_first_run(self.total_data[parameter_id], reward)
            self.first_one = False
        else:
            self.smbo_solver.nni_smac_receive_runs(self.total_data[parameter_id], reward)

Yuge Zhang's avatar
Yuge Zhang committed
190
191
    def param_postprocess(self, challenger_dict):
        """
192
193
194
195
196
        Postprocessing for a set of hyperparameters includes:
            1. Convert the values of type ``loguniform`` back to their initial range.
            2. Convert ``categorical``: categorical values in search space are changed to list of numbers before,
               those original values will be changed back in this function.

ShufanHuang's avatar
ShufanHuang committed
197
198
        Parameters
        ----------
199
        challenger_dict : dict
ShufanHuang's avatar
ShufanHuang committed
200
            challenger dict
201

ShufanHuang's avatar
ShufanHuang committed
202
203
204
        Returns
        -------
        dict
205
            dict which stores copy of challengers
ShufanHuang's avatar
ShufanHuang committed
206
        """
207
        converted_dict = {}
Zejun Lin's avatar
Zejun Lin committed
208
        for key, value in challenger_dict.items():
209
            # convert to loguniform
Zejun Lin's avatar
Zejun Lin committed
210
            if key in self.loguniform_key:
211
                converted_dict[key] = np.exp(challenger_dict[key])
212
            # convert categorical back to original value
213
            elif key in self.categorical_dict:
214
                idx = challenger_dict[key]
215
216
217
218
                converted_dict[key] = self.categorical_dict[key][idx]
            else:
                converted_dict[key] = value
        return converted_dict
Zejun Lin's avatar
Zejun Lin committed
219

220
    def generate_parameters(self, parameter_id, **kwargs):
221
222
223
224
        """
        Generate one instance of hyperparameters (i.e., one configuration).
        Get one from SMAC3's ``challengers``.

ShufanHuang's avatar
ShufanHuang committed
225
226
        Parameters
        ----------
227
228
229
230
231
        parameter_id : int
            Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
        **kwargs
            Not used

ShufanHuang's avatar
ShufanHuang committed
232
233
        Returns
        -------
234
235
        dict
            One newly generated configuration
ShufanHuang's avatar
ShufanHuang committed
236
        """
QuanluZhang's avatar
QuanluZhang committed
237
238
239
        if self.first_one:
            init_challenger = self.smbo_solver.nni_smac_start()
            self.total_data[parameter_id] = init_challenger
Yuge Zhang's avatar
Yuge Zhang committed
240
            return self.param_postprocess(init_challenger.get_dictionary())
QuanluZhang's avatar
QuanluZhang committed
241
242
        else:
            challengers = self.smbo_solver.nni_smac_request_challengers()
243
            challengers_empty = True
QuanluZhang's avatar
QuanluZhang committed
244
            for challenger in challengers:
245
246
247
248
249
250
                challengers_empty = False
                if self.dedup:
                    match = [v for k, v in self.total_data.items() \
                             if v.get_dictionary() == challenger.get_dictionary()]
                    if match:
                        continue
QuanluZhang's avatar
QuanluZhang committed
251
                self.total_data[parameter_id] = challenger
Yuge Zhang's avatar
Yuge Zhang committed
252
                return self.param_postprocess(challenger.get_dictionary())
253
254
255
            assert challengers_empty is False, 'The case that challengers is empty is not handled.'
            self.logger.info('In generate_parameters: No more new parameters.')
            raise nni.NoMoreTrialError('No more new parameters.')
QuanluZhang's avatar
QuanluZhang committed
256

257
    def generate_multiple_parameters(self, parameter_id_list, **kwargs):
258
259
260
261
262
        """
        Generate mutiple instances of hyperparameters. If it is a first request,
        retrieve the instances from initial challengers. While if it is not, request
        new challengers and retrieve instances from the requested challengers.

ShufanHuang's avatar
ShufanHuang committed
263
264
        Parameters
        ----------
265
266
267
268
269
270
        parameter_id_list: list of int
            Unique identifiers for each set of requested hyper-parameters.
            These will later be used in :meth:`receive_trial_result`.
        **kwargs
            Not used

ShufanHuang's avatar
ShufanHuang committed
271
272
273
        Returns
        -------
        list
274
            a list of newly generated configurations
ShufanHuang's avatar
ShufanHuang committed
275
        """
QuanluZhang's avatar
QuanluZhang committed
276
277
278
279
280
        if self.first_one:
            params = []
            for one_id in parameter_id_list:
                init_challenger = self.smbo_solver.nni_smac_start()
                self.total_data[one_id] = init_challenger
Yuge Zhang's avatar
Yuge Zhang committed
281
                params.append(self.param_postprocess(init_challenger.get_dictionary()))
QuanluZhang's avatar
QuanluZhang committed
282
283
284
285
286
287
288
        else:
            challengers = self.smbo_solver.nni_smac_request_challengers()
            cnt = 0
            params = []
            for challenger in challengers:
                if cnt >= len(parameter_id_list):
                    break
289
290
291
292
293
                if self.dedup:
                    match = [v for k, v in self.total_data.items() \
                             if v.get_dictionary() == challenger.get_dictionary()]
                    if match:
                        continue
QuanluZhang's avatar
QuanluZhang committed
294
                self.total_data[parameter_id_list[cnt]] = challenger
Yuge Zhang's avatar
Yuge Zhang committed
295
                params.append(self.param_postprocess(challenger.get_dictionary()))
QuanluZhang's avatar
QuanluZhang committed
296
                cnt += 1
297
298
            if self.dedup and not params:
                self.logger.info('In generate_multiple_parameters: No more new parameters.')
QuanluZhang's avatar
QuanluZhang committed
299
        return params
300
301

    def import_data(self, data):
Yuge Zhang's avatar
Yuge Zhang committed
302
        """
303
304
        Import additional data for tuning.

QuanluZhang's avatar
QuanluZhang committed
305
306
        Parameters
        ----------
307
308
        data : list of dict
            Each of which has at least two keys, ``parameter`` and ``value``.
QuanluZhang's avatar
QuanluZhang committed
309
310
311
312
313
314
315
316
317
318
319
320
        """
        _completed_num = 0
        for trial_info in data:
            self.logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data))
            # simply validate data format
            assert "parameter" in trial_info
            _params = trial_info["parameter"]
            assert "value" in trial_info
            _value = trial_info['value']
            if not _value:
                self.logger.info("Useless trial data, value is %s, skip this trial data.", _value)
                continue
321
            _value = extract_scalar_reward(_value)
QuanluZhang's avatar
QuanluZhang committed
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
            # convert the keys in loguniform and categorical types
            valid_entry = True
            for key, value in _params.items():
                if key in self.loguniform_key:
                    _params[key] = np.log(value)
                elif key in self.categorical_dict:
                    if value in self.categorical_dict[key]:
                        _params[key] = self.categorical_dict[key].index(value)
                    else:
                        self.logger.info("The value %s of key %s is not in search space.", str(value), key)
                        valid_entry = False
                        break
            if not valid_entry:
                continue
            # start import this data entry
            _completed_num += 1
            config = Configuration(self.cs, values=_params)
            if self.optimize_mode is OptimizeMode.Maximize:
                _value = -_value
            if self.first_one:
                self.smbo_solver.nni_smac_receive_first_run(config, _value)
                self.first_one = False
            else:
                self.smbo_solver.nni_smac_receive_runs(config, _value)
        self.logger.info("Successfully import data to smac tuner, total data: %d, imported data: %d.", len(data), _completed_num)