Commit ca85b679 authored by kenmatsu4's avatar kenmatsu4 Committed by Tsukasa OMOTO
Browse files

[python] add flag of displaying train loss for lgb.cv() (#2089)

* [python] displaying train loss during training with lgb.cv

* modifying only display running type when disp_train_loss==True

* Add test for display train loss

* del .idea files

* Rename disp_train_loss to show_train_loss and revise comment.

* Change aug name show_train_loss -> eval_train_metric , and add a test item.

* Modifying comment of eval_train_metric.
parent 8ffd8d80
...@@ -267,7 +267,8 @@ class _CVBooster(object): ...@@ -267,7 +267,8 @@ class _CVBooster(object):
return handler_function return handler_function
def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True): def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True,
shuffle=True, eval_train_metric=False):
"""Make a n-fold list of Booster from random indices.""" """Make a n-fold list of Booster from random indices."""
full_data = full_data.construct() full_data = full_data.construct()
num_data = full_data.num_data() num_data = full_data.num_data()
...@@ -317,19 +318,25 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -317,19 +318,25 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
else: else:
tparam = params tparam = params
cvbooster = Booster(tparam, train_set) cvbooster = Booster(tparam, train_set)
if eval_train_metric:
cvbooster.add_valid(train_set, 'train')
cvbooster.add_valid(valid_set, 'valid') cvbooster.add_valid(valid_set, 'valid')
ret.append(cvbooster) ret.append(cvbooster)
return ret return ret
def _agg_cv_result(raw_results): def _agg_cv_result(raw_results, eval_train_metric=False):
"""Aggregate cross-validation results.""" """Aggregate cross-validation results."""
cvmap = collections.defaultdict(list) cvmap = collections.defaultdict(list)
metric_type = {} metric_type = {}
for one_result in raw_results: for one_result in raw_results:
for one_line in one_result: for one_line in one_result:
metric_type[one_line[1]] = one_line[3] if eval_train_metric:
cvmap[one_line[1]].append(one_line[2]) key = "{} {}".format(one_line[0], one_line[1])
else:
key = one_line[1]
metric_type[key] = one_line[3]
cvmap[key].append(one_line[2])
return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()] return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
...@@ -339,7 +346,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -339,7 +346,7 @@ def cv(params, train_set, num_boost_round=100,
feature_name='auto', categorical_feature='auto', feature_name='auto', categorical_feature='auto',
early_stopping_rounds=None, fpreproc=None, early_stopping_rounds=None, fpreproc=None,
verbose_eval=None, show_stdv=True, seed=0, verbose_eval=None, show_stdv=True, seed=0,
callbacks=None): callbacks=None, eval_train_metric=False):
"""Perform the cross-validation with given paramaters. """Perform the cross-validation with given paramaters.
Parameters Parameters
...@@ -412,6 +419,9 @@ def cv(params, train_set, num_boost_round=100, ...@@ -412,6 +419,9 @@ def cv(params, train_set, num_boost_round=100,
callbacks : list of callables or None, optional (default=None) callbacks : list of callables or None, optional (default=None)
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python API for more information. See Callbacks in Python API for more information.
eval_train_metric : bool, optional (default=False)
Whether to display the train metric in progress.
The score of the metric is calculated again after each training step, so there is some impact on performance.
Returns Returns
------- -------
...@@ -459,7 +469,8 @@ def cv(params, train_set, num_boost_round=100, ...@@ -459,7 +469,8 @@ def cv(params, train_set, num_boost_round=100,
results = collections.defaultdict(list) results = collections.defaultdict(list)
cvfolds = _make_n_folds(train_set, folds=folds, nfold=nfold, cvfolds = _make_n_folds(train_set, folds=folds, nfold=nfold,
params=params, seed=seed, fpreproc=fpreproc, params=params, seed=seed, fpreproc=fpreproc,
stratified=stratified, shuffle=shuffle) stratified=stratified, shuffle=shuffle,
eval_train_metric=eval_train_metric)
# setup callbacks # setup callbacks
if callbacks is None: if callbacks is None:
...@@ -489,7 +500,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -489,7 +500,7 @@ def cv(params, train_set, num_boost_round=100,
end_iteration=num_boost_round, end_iteration=num_boost_round,
evaluation_result_list=None)) evaluation_result_list=None))
cvfolds.update(fobj=fobj) cvfolds.update(fobj=fobj)
res = _agg_cv_result(cvfolds.eval_valid(feval)) res = _agg_cv_result(cvfolds.eval_valid(feval), eval_train_metric)
for _, key, mean, _, std in res: for _, key, mean, _, std in res:
results[key + '-mean'].append(mean) results[key + '-mean'].append(mean)
results[key + '-stdv'].append(std) results[key + '-stdv'].append(std)
......
...@@ -465,6 +465,16 @@ class TestEngine(unittest.TestCase): ...@@ -465,6 +465,16 @@ class TestEngine(unittest.TestCase):
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)]) callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
self.assertIn('l1-mean', cv_res) self.assertIn('l1-mean', cv_res)
self.assertEqual(len(cv_res['l1-mean']), 10) self.assertEqual(len(cv_res['l1-mean']), 10)
# enable display training loss
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False, eval_train_metric=True)
self.assertIn('train l1-mean', cv_res)
self.assertIn('valid l1-mean', cv_res)
self.assertNotIn('train l2-mean', cv_res)
self.assertNotIn('valid l2-mean', cv_res)
self.assertEqual(len(cv_res['train l1-mean']), 10)
self.assertEqual(len(cv_res['valid l1-mean']), 10)
# self defined folds # self defined folds
tss = TimeSeriesSplit(3) tss = TimeSeriesSplit(3)
folds = tss.split(X_train) folds = tss.split(X_train)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment