Commit e29ab9f6 authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

fix reset parameter; re-define CVBooster (#166)

* fix reset parameter

* redefine CVBooster

* env.model won't be None

* update env.params
parent 96d08f42
......@@ -20,7 +20,7 @@ class EarlyStopException(Exception):
CallbackEnv = collections.namedtuple(
"LightGBMCallbackEnv",
["model",
"cvfolds",
"params",
"iteration",
"begin_iteration",
"end_iteration",
......@@ -121,13 +121,21 @@ def reset_parameter(**kwargs):
"""
def callback(env):
"""internal function"""
new_parameters = {}
for key, value in kwargs.items():
if key in ['num_class', 'boosting_type', 'metric']:
raise RuntimeError("cannot reset {} during training".format(repr(key)))
if isinstance(value, list):
if len(value) != env.end_iteration - env.begin_iteration:
raise ValueError("Length of list {} has to equal to 'num_boost_round'.".format(repr(key)))
env.model.reset_parameter({key: value[env.iteration - env.begin_iteration]})
new_param = value[env.iteration - env.begin_iteration]
else:
env.model.reset_parameter({key: value(env.iteration - env.begin_iteration)})
new_param = value(env.iteration - env.begin_iteration)
if new_param != env.params.get(key, None):
new_parameters[key] = new_param
if new_parameters:
env.model.reset_parameter(new_parameters)
env.params.update(new_parameters)
callback.before_iteration = True
callback.order = 10
return callback
......@@ -190,7 +198,6 @@ def early_stopping(stopping_rounds, verbose=True):
)
else:
if env.iteration - best_iter[i] >= stopping_rounds:
if env.model is not None:
env.model.set_attr(best_iteration=str(best_iter[i]))
if verbose:
print('Early stopping, best iteration is:')
......
......@@ -164,7 +164,7 @@ def train(params, train_set, num_boost_round=100,
for i in range(init_iteration, init_iteration + num_boost_round):
for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=booster,
cvfolds=None,
params=params,
iteration=i,
begin_iteration=init_iteration,
end_iteration=init_iteration + num_boost_round,
......@@ -181,7 +181,7 @@ def train(params, train_set, num_boost_round=100,
try:
for cb in callbacks_after_iter:
cb(callback.CallbackEnv(model=booster,
cvfolds=None,
params=params,
iteration=i,
begin_iteration=init_iteration,
end_iteration=init_iteration + num_boost_round,
......@@ -196,22 +196,23 @@ def train(params, train_set, num_boost_round=100,
class CVBooster(object):
""""Auxiliary datastruct to hold one fold of CV."""
def __init__(self, train_set, valid_test, params):
""""Initialize the CVBooster"""
self.train_set = train_set
self.valid_test = valid_test
self.booster = Booster(params=params, train_set=train_set)
self.booster.add_valid(valid_test, 'valid')
def update(self, fobj):
""""Update the boosters for one iteration"""
self.booster.update(fobj=fobj)
def eval(self, feval):
""""Evaluate the CVBooster for one iteration."""
return self.booster.eval_valid(feval)
""""Auxiliary data struct to hold all boosters of CV."""
def __init__(self):
self.boosters = []
def append(self, booster):
"""add a booster to CVBooster"""
self.boosters.append(booster)
def __getattr__(self, name):
"""redirect methods call of CVBooster"""
def handlerFunction(*args, **kwargs):
"""call methods with each booster, and concatenate their results"""
ret = []
for booster in self.boosters:
ret.append(getattr(booster, name)(*args, **kwargs))
return ret
return handlerFunction
try:
from sklearn.model_selection import StratifiedKFold
......@@ -226,7 +227,7 @@ except ImportError:
def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
"""
Make an n-fold list of CVBooster from random indices.
Make an n-fold list of Booster from random indices.
"""
np.random.seed(seed)
if stratified:
......@@ -242,7 +243,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
kstep = int(len(randidx) / nfold)
idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)]
ret = []
ret = CVBooster()
for k in range(nfold):
train_set = full_data.subset(np.concatenate([idset[i] for i in range(nfold) if k != i]))
valid_set = full_data.subset(idset[k])
......@@ -251,7 +252,9 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy())
else:
tparam = params
ret.append(CVBooster(train_set, valid_set, tparam))
cvbooster = Booster(tparam, train_set)
cvbooster.add_valid(valid_set, 'valid')
ret.append(cvbooster)
return ret
......@@ -377,22 +380,21 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
for i in range(num_boost_round):
for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=None,
cvfolds=cvfolds,
cb(callback.CallbackEnv(model=cvfolds,
params=params,
iteration=i,
begin_iteration=0,
end_iteration=num_boost_round,
evaluation_result_list=None))
for fold in cvfolds:
fold.update(fobj)
res = _agg_cv_result([f.eval(feval) for f in cvfolds])
cvfolds.update(fobj)
res = _agg_cv_result(cvfolds.eval_valid(feval))
for _, key, mean, _, std in res:
results[key + '-mean'].append(mean)
results[key + '-stdv'].append(std)
try:
for cb in callbacks_after_iter:
cb(callback.CallbackEnv(model=None,
cvfolds=cvfolds,
cb(callback.CallbackEnv(model=cvfolds,
params=params,
iteration=i,
begin_iteration=0,
end_iteration=num_boost_round,
......
......@@ -113,7 +113,8 @@ class TestEngine(unittest.TestCase):
def test_cv(self):
lgb_train, _ = test_template(return_data=True)
lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5,
metrics='l1', verbose_eval=False)
metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
def test_save_load_copy_pickle(self):
gbm = test_template(num_round=20, return_model=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment