Commit e29ab9f6 authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

fix reset parameter; re-define CVBooster (#166)

* fix reset parameter

* redefine CVBooster

* env.model won't be None

* update env.params
parent 96d08f42
...@@ -20,7 +20,7 @@ class EarlyStopException(Exception): ...@@ -20,7 +20,7 @@ class EarlyStopException(Exception):
CallbackEnv = collections.namedtuple( CallbackEnv = collections.namedtuple(
"LightGBMCallbackEnv", "LightGBMCallbackEnv",
["model", ["model",
"cvfolds", "params",
"iteration", "iteration",
"begin_iteration", "begin_iteration",
"end_iteration", "end_iteration",
...@@ -121,13 +121,21 @@ def reset_parameter(**kwargs): ...@@ -121,13 +121,21 @@ def reset_parameter(**kwargs):
""" """
def callback(env): def callback(env):
"""internal function""" """internal function"""
new_parameters = {}
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ['num_class', 'boosting_type', 'metric']:
raise RuntimeError("cannot reset {} during training".format(repr(key)))
if isinstance(value, list): if isinstance(value, list):
if len(value) != env.end_iteration - env.begin_iteration: if len(value) != env.end_iteration - env.begin_iteration:
raise ValueError("Length of list {} has to equal to 'num_boost_round'.".format(repr(key))) raise ValueError("Length of list {} has to equal to 'num_boost_round'.".format(repr(key)))
env.model.reset_parameter({key: value[env.iteration - env.begin_iteration]}) new_param = value[env.iteration - env.begin_iteration]
else: else:
env.model.reset_parameter({key: value(env.iteration - env.begin_iteration)}) new_param = value(env.iteration - env.begin_iteration)
if new_param != env.params.get(key, None):
new_parameters[key] = new_param
if new_parameters:
env.model.reset_parameter(new_parameters)
env.params.update(new_parameters)
callback.before_iteration = True callback.before_iteration = True
callback.order = 10 callback.order = 10
return callback return callback
...@@ -190,7 +198,6 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -190,7 +198,6 @@ def early_stopping(stopping_rounds, verbose=True):
) )
else: else:
if env.iteration - best_iter[i] >= stopping_rounds: if env.iteration - best_iter[i] >= stopping_rounds:
if env.model is not None:
env.model.set_attr(best_iteration=str(best_iter[i])) env.model.set_attr(best_iteration=str(best_iter[i]))
if verbose: if verbose:
print('Early stopping, best iteration is:') print('Early stopping, best iteration is:')
......
...@@ -164,7 +164,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -164,7 +164,7 @@ def train(params, train_set, num_boost_round=100,
for i in range(init_iteration, init_iteration + num_boost_round): for i in range(init_iteration, init_iteration + num_boost_round):
for cb in callbacks_before_iter: for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=booster, cb(callback.CallbackEnv(model=booster,
cvfolds=None, params=params,
iteration=i, iteration=i,
begin_iteration=init_iteration, begin_iteration=init_iteration,
end_iteration=init_iteration + num_boost_round, end_iteration=init_iteration + num_boost_round,
...@@ -181,7 +181,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -181,7 +181,7 @@ def train(params, train_set, num_boost_round=100,
try: try:
for cb in callbacks_after_iter: for cb in callbacks_after_iter:
cb(callback.CallbackEnv(model=booster, cb(callback.CallbackEnv(model=booster,
cvfolds=None, params=params,
iteration=i, iteration=i,
begin_iteration=init_iteration, begin_iteration=init_iteration,
end_iteration=init_iteration + num_boost_round, end_iteration=init_iteration + num_boost_round,
...@@ -196,22 +196,23 @@ def train(params, train_set, num_boost_round=100, ...@@ -196,22 +196,23 @@ def train(params, train_set, num_boost_round=100,
class CVBooster(object): class CVBooster(object):
""""Auxiliary datastruct to hold one fold of CV.""" """"Auxiliary data struct to hold all boosters of CV."""
def __init__(self, train_set, valid_test, params): def __init__(self):
""""Initialize the CVBooster""" self.boosters = []
self.train_set = train_set
self.valid_test = valid_test def append(self, booster):
self.booster = Booster(params=params, train_set=train_set) """add a booster to CVBooster"""
self.booster.add_valid(valid_test, 'valid') self.boosters.append(booster)
def update(self, fobj): def __getattr__(self, name):
""""Update the boosters for one iteration""" """redirect methods call of CVBooster"""
self.booster.update(fobj=fobj) def handlerFunction(*args, **kwargs):
"""call methods with each booster, and concatenate their results"""
def eval(self, feval): ret = []
""""Evaluate the CVBooster for one iteration.""" for booster in self.boosters:
return self.booster.eval_valid(feval) ret.append(getattr(booster, name)(*args, **kwargs))
return ret
return handlerFunction
try: try:
from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import StratifiedKFold
...@@ -226,7 +227,7 @@ except ImportError: ...@@ -226,7 +227,7 @@ except ImportError:
def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True): def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
""" """
Make an n-fold list of CVBooster from random indices. Make an n-fold list of Booster from random indices.
""" """
np.random.seed(seed) np.random.seed(seed)
if stratified: if stratified:
...@@ -242,7 +243,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals ...@@ -242,7 +243,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
kstep = int(len(randidx) / nfold) kstep = int(len(randidx) / nfold)
idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)] idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)]
ret = [] ret = CVBooster()
for k in range(nfold): for k in range(nfold):
train_set = full_data.subset(np.concatenate([idset[i] for i in range(nfold) if k != i])) train_set = full_data.subset(np.concatenate([idset[i] for i in range(nfold) if k != i]))
valid_set = full_data.subset(idset[k]) valid_set = full_data.subset(idset[k])
...@@ -251,7 +252,9 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals ...@@ -251,7 +252,9 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy()) train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy())
else: else:
tparam = params tparam = params
ret.append(CVBooster(train_set, valid_set, tparam)) cvbooster = Booster(tparam, train_set)
cvbooster.add_valid(valid_set, 'valid')
ret.append(cvbooster)
return ret return ret
...@@ -377,22 +380,21 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -377,22 +380,21 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
for i in range(num_boost_round): for i in range(num_boost_round):
for cb in callbacks_before_iter: for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=None, cb(callback.CallbackEnv(model=cvfolds,
cvfolds=cvfolds, params=params,
iteration=i, iteration=i,
begin_iteration=0, begin_iteration=0,
end_iteration=num_boost_round, end_iteration=num_boost_round,
evaluation_result_list=None)) evaluation_result_list=None))
for fold in cvfolds: cvfolds.update(fobj)
fold.update(fobj) res = _agg_cv_result(cvfolds.eval_valid(feval))
res = _agg_cv_result([f.eval(feval) for f in cvfolds])
for _, key, mean, _, std in res: for _, key, mean, _, std in res:
results[key + '-mean'].append(mean) results[key + '-mean'].append(mean)
results[key + '-stdv'].append(std) results[key + '-stdv'].append(std)
try: try:
for cb in callbacks_after_iter: for cb in callbacks_after_iter:
cb(callback.CallbackEnv(model=None, cb(callback.CallbackEnv(model=cvfolds,
cvfolds=cvfolds, params=params,
iteration=i, iteration=i,
begin_iteration=0, begin_iteration=0,
end_iteration=num_boost_round, end_iteration=num_boost_round,
......
...@@ -113,7 +113,8 @@ class TestEngine(unittest.TestCase): ...@@ -113,7 +113,8 @@ class TestEngine(unittest.TestCase):
def test_cv(self): def test_cv(self):
lgb_train, _ = test_template(return_data=True) lgb_train, _ = test_template(return_data=True)
lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5, lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5,
metrics='l1', verbose_eval=False) metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
def test_save_load_copy_pickle(self): def test_save_load_copy_pickle(self):
gbm = test_template(num_round=20, return_model=True) gbm = test_template(num_round=20, return_model=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment