Unverified Commit 26b71c40 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

Hyperband Refector (#3040)

parent a3108caf
authorName: default authorName: default
experimentName: auto_rocksdb_SMAC experimentName: auto_rocksdb_SMAC
trialConcurrency: 1 trialConcurrency: 1
maxExecDuration: 12h maxExecDuration: 12h
maxTrialNum: 256 maxTrialNum: 256
#choice: local, remote, pai #choice: local, remote, pai
trainingServicePlatform: local trainingServicePlatform: local
searchSpacePath: search_space.json searchSpacePath: search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl) #SMAC (SMAC should be installed through nnictl)
builtinTunerName: SMAC builtinTunerName: SMAC
classArgs: classArgs:
#choice: maximize, minimize #choice: maximize, minimize
optimize_mode: maximize optimize_mode: maximize
trial: trial:
command: python3 main.py command: python3 main.py
codeDir: . codeDir: .
gpuNum: 0 gpuNum: 0
authorName: default authorName: default
experimentName: auto_rocksdb_TPE experimentName: auto_rocksdb_TPE
trialConcurrency: 1 trialConcurrency: 1
maxExecDuration: 12h maxExecDuration: 12h
maxTrialNum: 256 maxTrialNum: 256
#choice: local, remote, pai #choice: local, remote, pai
trainingServicePlatform: local trainingServicePlatform: local
searchSpacePath: search_space.json searchSpacePath: search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl) #SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE builtinTunerName: TPE
classArgs: classArgs:
#choice: maximize, minimize #choice: maximize, minimize
optimize_mode: maximize optimize_mode: maximize
trial: trial:
command: python3 main.py command: python3 main.py
codeDir: . codeDir: .
gpuNum: 0 gpuNum: 0
{ {
"write_buffer_size": { "write_buffer_size": {
"_type": "quniform", "_type": "quniform",
"_value": [2097152, 16777216, 1048576] "_value": [2097152, 16777216, 1048576]
}, },
"min_write_buffer_number_to_merge": { "min_write_buffer_number_to_merge": {
"_type": "quniform", "_type": "quniform",
"_value": [2, 16, 1] "_value": [2, 16, 1]
}, },
"level0_file_num_compaction_trigger": { "level0_file_num_compaction_trigger": {
"_type": "quniform", "_type": "quniform",
"_value": [2, 16, 1] "_value": [2, 16, 1]
} }
} }
...@@ -46,7 +46,7 @@ def create_bracket_parameter_id(brackets_id, brackets_curr_decay, increased_id=- ...@@ -46,7 +46,7 @@ def create_bracket_parameter_id(brackets_id, brackets_curr_decay, increased_id=-
Parameters Parameters
---------- ----------
brackets_id: int brackets_id: string
brackets id brackets id
brackets_curr_decay: brackets_curr_decay:
brackets curr decay brackets curr decay
...@@ -60,7 +60,7 @@ def create_bracket_parameter_id(brackets_id, brackets_curr_decay, increased_id=- ...@@ -60,7 +60,7 @@ def create_bracket_parameter_id(brackets_id, brackets_curr_decay, increased_id=-
""" """
if increased_id == -1: if increased_id == -1:
increased_id = str(create_parameter_id()) increased_id = str(create_parameter_id())
params_id = '_'.join([str(brackets_id), params_id = '_'.join([brackets_id,
str(brackets_curr_decay), str(brackets_curr_decay),
increased_id]) increased_id])
return params_id return params_id
...@@ -108,6 +108,8 @@ class Bracket(): ...@@ -108,6 +108,8 @@ class Bracket():
Parameters Parameters
---------- ----------
bracket_id: string
The id of this bracket, usually be set as '{Hyperband index}-{SH iteration index}'
s: int s: int
The current SH iteration index. The current SH iteration index.
s_max: int s_max: int
...@@ -122,8 +124,9 @@ class Bracket(): ...@@ -122,8 +124,9 @@ class Bracket():
optimize mode, 'maximize' or 'minimize' optimize mode, 'maximize' or 'minimize'
""" """
def __init__(self, s, s_max, eta, R, optimize_mode): def __init__(self, bracket_id, s, s_max, eta, R, optimize_mode):
self.bracket_id = s self.bracket_id = bracket_id
self.s = s
self.s_max = s_max self.s_max = s_max
self.eta = eta self.eta = eta
self.n = math.ceil((s_max + 1) * (eta ** s) / (s + 1) - _epsilon) self.n = math.ceil((s_max + 1) * (eta ** s) / (s + 1) - _epsilon)
...@@ -147,7 +150,7 @@ class Bracket(): ...@@ -147,7 +150,7 @@ class Bracket():
def increase_i(self): def increase_i(self):
"""i means the ith round. Increase i by 1""" """i means the ith round. Increase i by 1"""
self.i += 1 self.i += 1
if self.i > self.bracket_id: if self.i > self.s:
self.no_more_trial = True self.no_more_trial = True
def set_config_perf(self, i, parameter_id, seq, value): def set_config_perf(self, i, parameter_id, seq, value):
...@@ -256,13 +259,14 @@ class HyperbandClassArgsValidator(ClassArgsValidator): ...@@ -256,13 +259,14 @@ class HyperbandClassArgsValidator(ClassArgsValidator):
def validate_class_args(self, **kwargs): def validate_class_args(self, **kwargs):
Schema({ Schema({
'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'), 'optimize_mode': self.choices('optimize_mode', 'maximize', 'minimize'),
Optional('exec_mode'): self.choices('exec_mode', 'serial', 'parallelism'),
Optional('R'): int, Optional('R'): int,
Optional('eta'): int Optional('eta'): int
}).validate(kwargs) }).validate(kwargs)
class Hyperband(MsgDispatcherBase): class Hyperband(MsgDispatcherBase):
"""Hyperband inherit from MsgDispatcherBase rather than Tuner, because it integrates both tuner's functions and assessor's functions. """Hyperband inherit from MsgDispatcherBase rather than Tuner, because it integrates both tuner's functions and assessor's functions.
This is an implementation that could fully leverage available resources, i.e., high parallelism. This is an implementation that could fully leverage available resources or follow the algorithm process, i.e., high parallelism or serial.
A single execution of Hyperband takes a finite budget of (s_max + 1)B. A single execution of Hyperband takes a finite budget of (s_max + 1)B.
Parameters Parameters
...@@ -273,9 +277,11 @@ class Hyperband(MsgDispatcherBase): ...@@ -273,9 +277,11 @@ class Hyperband(MsgDispatcherBase):
the variable that controls the proportion of configurations discarded in each round of SuccessiveHalving the variable that controls the proportion of configurations discarded in each round of SuccessiveHalving
optimize_mode: str optimize_mode: str
optimize mode, 'maximize' or 'minimize' optimize mode, 'maximize' or 'minimize'
exec_mode: str
execution mode, 'serial' or 'parallelism'
""" """
def __init__(self, R=60, eta=3, optimize_mode='maximize'): def __init__(self, R=60, eta=3, optimize_mode='maximize', exec_mode='parallelism'):
"""B = (s_max + 1)R""" """B = (s_max + 1)R"""
super(Hyperband, self).__init__() super(Hyperband, self).__init__()
self.R = R self.R = R
...@@ -285,6 +291,9 @@ class Hyperband(MsgDispatcherBase): ...@@ -285,6 +291,9 @@ class Hyperband(MsgDispatcherBase):
self.completed_hyper_configs = [] # all the completed configs self.completed_hyper_configs = [] # all the completed configs
self.s_max = math.floor(math.log(self.R, self.eta) + _epsilon) self.s_max = math.floor(math.log(self.R, self.eta) + _epsilon)
self.curr_s = self.s_max self.curr_s = self.s_max
self.curr_hb = 0
self.exec_mode = exec_mode
self.curr_bracket_id = None
self.searchspace_json = None self.searchspace_json = None
self.random_state = None self.random_state = None
...@@ -316,25 +325,44 @@ class Hyperband(MsgDispatcherBase): ...@@ -316,25 +325,44 @@ class Hyperband(MsgDispatcherBase):
data: int data: int
number of trial jobs number of trial jobs
""" """
for _ in range(data): self.credit += data
ret = self._get_one_trial_job()
for _ in range(self.credit):
self._request_one_trial_job()
def _request_one_trial_job(self):
ret = self._get_one_trial_job()
if ret is not None:
send(CommandType.NewTrialJob, json_tricks.dumps(ret)) send(CommandType.NewTrialJob, json_tricks.dumps(ret))
self.credit -= 1
def _get_one_trial_job(self): def _get_one_trial_job(self):
"""get one trial job, i.e., one hyperparameter configuration.""" """get one trial job, i.e., one hyperparameter configuration."""
if not self.generated_hyper_configs: if not self.generated_hyper_configs:
if self.curr_s < 0: if self.exec_mode == 'parallelism' or \
self.curr_s = self.s_max (self.exec_mode == 'serial' and (self.curr_bracket_id is None or self.brackets[self.curr_bracket_id].is_completed())):
_logger.debug('create a new bracket, self.curr_s=%d', self.curr_s) if self.curr_s < 0:
self.brackets[self.curr_s] = Bracket(self.curr_s, self.s_max, self.eta, self.R, self.optimize_mode) self.curr_s = self.s_max
next_n, next_r = self.brackets[self.curr_s].get_n_r() self.curr_hb += 1
_logger.debug('new bracket, next_n=%d, next_r=%d', next_n, next_r) _logger.debug('create a new bracket, self.curr_hb=%d, self.curr_s=%d', self.curr_hb, self.curr_s)
assert self.searchspace_json is not None and self.random_state is not None self.curr_bracket_id = '{}-{}'.format(self.curr_hb, self.curr_s)
generated_hyper_configs = self.brackets[self.curr_s].get_hyperparameter_configurations(next_n, next_r, self.brackets[self.curr_bracket_id] = Bracket(self.curr_bracket_id, self.curr_s, self.s_max, self.eta, self.R, self.optimize_mode)
self.searchspace_json, next_n, next_r = self.brackets[self.curr_bracket_id].get_n_r()
self.random_state) _logger.debug('new bracket, next_n=%d, next_r=%d', next_n, next_r)
self.generated_hyper_configs = generated_hyper_configs.copy() assert self.searchspace_json is not None and self.random_state is not None
self.curr_s -= 1 generated_hyper_configs = self.brackets[self.curr_bracket_id].get_hyperparameter_configurations(next_n, next_r,
self.searchspace_json,
self.random_state)
self.generated_hyper_configs = generated_hyper_configs.copy()
self.curr_s -= 1
else:
ret = {
'parameter_id': '-1_0_0',
'parameter_source': 'algorithm',
'parameters': ''
}
send(CommandType.NoMoreTrialJobs, json_tricks.dumps(ret))
return None
assert self.generated_hyper_configs assert self.generated_hyper_configs
params = self.generated_hyper_configs.pop(0) params = self.generated_hyper_configs.pop(0)
...@@ -358,10 +386,12 @@ class Hyperband(MsgDispatcherBase): ...@@ -358,10 +386,12 @@ class Hyperband(MsgDispatcherBase):
parameter_id: parameter id of the finished config parameter_id: parameter id of the finished config
""" """
bracket_id, i, _ = parameter_id.split('_') bracket_id, i, _ = parameter_id.split('_')
hyper_configs = self.brackets[int(bracket_id)].inform_trial_end(int(i)) hyper_configs = self.brackets[bracket_id].inform_trial_end(int(i))
if hyper_configs is not None: if hyper_configs is not None:
_logger.debug('bracket %s next round %s, hyper_configs: %s', bracket_id, i, hyper_configs) _logger.debug('bracket %s next round %s, hyper_configs: %s', bracket_id, i, hyper_configs)
self.generated_hyper_configs = self.generated_hyper_configs + hyper_configs self.generated_hyper_configs = self.generated_hyper_configs + hyper_configs
for _ in range(self.credit):
self._request_one_trial_job()
def handle_trial_end(self, data): def handle_trial_end(self, data):
""" """
...@@ -392,6 +422,7 @@ class Hyperband(MsgDispatcherBase): ...@@ -392,6 +422,7 @@ class Hyperband(MsgDispatcherBase):
""" """
if 'value' in data: if 'value' in data:
data['value'] = json_tricks.loads(data['value']) data['value'] = json_tricks.loads(data['value'])
# multiphase? need to check
if data['type'] == MetricType.REQUEST_PARAMETER: if data['type'] == MetricType.REQUEST_PARAMETER:
assert multi_phase_enabled() assert multi_phase_enabled()
assert data['trial_job_id'] is not None assert data['trial_job_id'] is not None
...@@ -408,7 +439,6 @@ class Hyperband(MsgDispatcherBase): ...@@ -408,7 +439,6 @@ class Hyperband(MsgDispatcherBase):
else: else:
value = extract_scalar_reward(data['value']) value = extract_scalar_reward(data['value'])
bracket_id, i, _ = data['parameter_id'].split('_') bracket_id, i, _ = data['parameter_id'].split('_')
bracket_id = int(bracket_id)
# add <trial_job_id, parameter_id> to self.job_id_para_id_map here, # add <trial_job_id, parameter_id> to self.job_id_para_id_map here,
# because when the first parameter_id is created, trial_job_id is not known yet. # because when the first parameter_id is created, trial_job_id is not known yet.
......
...@@ -11,6 +11,7 @@ advisor: ...@@ -11,6 +11,7 @@ advisor:
optimize_mode: maximize optimize_mode: maximize
R: 60 R: 60
eta: 3 eta: 3
exec_mode: parallelism
trial: trial:
codeDir: ../../../examples/trials/mnist-advisor codeDir: ../../../examples/trials/mnist-advisor
command: python3 mnist.py command: python3 mnist.py
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment