[python] added ability to pass first_metric_only in params (#2175)

* added ability to pass first_metric_only in params * simplified tests * fixed test * fixed punctuation

[python] added ability to pass first_metric_only in params (#2175)
* added ability to pass first_metric_only in params * simplified tests * fixed test * fixed punctuation
f91e5644 · Nikita Titov · GitHub · 2a369170 · f91e5644 · f91e5644
Unverified Commit f91e5644 authored May 15, 2019 by Nikita Titov Committed by GitHub May 15, 2019
5 changed files
--- a/docs/Python-Intro.rst
+++ b/docs/Python-Intro.rst
@@ -207,7 +207,7 @@ Note that ``train()`` will return a model from the best iteration.

 This works with both metrics to minimize (L2, log loss, etc.) and to maximize (NDCG, AUC, etc.).
 Note that if you specify more than one evaluation metric, all of them will be used for early stopping.
-However, you can change this behavior and make LightGBM check only the first metric for early stopping by creating ``early_stopping`` callback with ``first_metric_only=True``.
+However, you can change this behavior and make LightGBM check only the first metric for early stopping by passing ``first_metric_only=True`` in ``param`` or ``early_stopping`` callback constructor.

 Prediction
 ----------

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -66,8 +66,7 @@ def train(params, train_set, num_boost_round=100,
        to continue training.
        Requires at least one validation data and one metric.
        If there's more than one, will check all of them. But the training data is ignored anyway.
-        To check only the first metric you can pass in ``callbacks``
-        ``early_stopping`` callback with ``first_metric_only=True``.
+        To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in ``params``.
        The index of iteration that has the best performance will be saved in the ``best_iteration`` field
        if early stopping logic is enabled by setting ``early_stopping_rounds``.
    evals_result: dict or None, optional (default=None)
@@ -116,14 +115,15 @@ def train(params, train_set, num_boost_round=100,
    for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
                  "num_round", "num_rounds", "num_boost_round", "n_estimators"]:
        if alias in params:
-            num_boost_round = int(params.pop(alias))
+            num_boost_round = params.pop(alias)
            warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
            break
    for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping"]:
-        if alias in params and params[alias] is not None:
-            early_stopping_rounds = int(params.pop(alias))
+        if alias in params:
+            early_stopping_rounds = params.pop(alias)
            warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
            break
+    first_metric_only = params.pop('first_metric_only', False)

    if num_boost_round <= 0:
        raise ValueError("num_boost_round should be greater than zero.")
@@ -181,7 +181,7 @@ def train(params, train_set, num_boost_round=100,
        callbacks.add(callback.print_evaluation(verbose_eval))

    if early_stopping_rounds is not None:
-        callbacks.add(callback.early_stopping(early_stopping_rounds, verbose=bool(verbose_eval)))
+        callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=bool(verbose_eval)))

    if learning_rates is not None:
        callbacks.add(callback.reset_parameter(learning_rate=learning_rates))
@@ -400,8 +400,7 @@ def cv(params, train_set, num_boost_round=100,
        CV score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue.
        Requires at least one metric. If there's more than one, will check all of them.
-        To check only the first metric you can pass in ``callbacks``
-        ``early_stopping`` callback with ``first_metric_only=True``.
+        To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in ``params``.
        Last entry in evaluation history is the one from the best iteration.
    fpreproc : callable or None, optional (default=None)
        Preprocessing function that takes (dtrain, dtest, params)
@@ -449,6 +448,7 @@ def cv(params, train_set, num_boost_round=100,
            warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
            early_stopping_rounds = params.pop(alias)
            break
+    first_metric_only = params.pop('first_metric_only', False)

    if num_boost_round <= 0:
        raise ValueError("num_boost_round should be greater than zero.")
@@ -480,7 +480,7 @@ def cv(params, train_set, num_boost_round=100,
            cb.__dict__.setdefault('order', i - len(callbacks))
        callbacks = set(callbacks)
    if early_stopping_rounds is not None:
-        callbacks.add(callback.early_stopping(early_stopping_rounds, verbose=False))
+        callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=False))
    if verbose_eval is True:
        callbacks.add(callback.print_evaluation(show_stdv=show_stdv))
    elif isinstance(verbose_eval, integer_types):

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -376,8 +376,8 @@ class LGBMModel(_LGBMModelBase):
            to continue training.
            Requires at least one validation data and one metric.
            If there's more than one, will check all of them. But the training data is ignored anyway.
-            To check only the first metric you can pass in ``callbacks``
-            ``early_stopping`` callback with ``first_metric_only=True``.
+            To check only the first metric, set the ``first_metric_only`` parameter to ``True``
+            in additional parameters ``**kwargs`` of the model constructor.
        verbose : bool or int, optional (default=True)
            Requires at least one evaluation data.
            If True, the eval metric on the eval set is printed at each boosting stage.

--- a/tests/python_package_test/test_consistency.py
+++ b/tests/python_package_test/test_consistency.py
@@ -21,7 +21,7 @@ class FileLoader(object):
                if line and not line.startswith('#'):
                    key, value = [token.strip() for token in line.split('=')]
                    if 'early_stopping' not in key:  # disable early_stopping
-                        self.params[key] = value
+                        self.params[key] = value if key != 'num_trees' else int(value)

    def load_dataset(self, suffix, is_sparse=False):
        filename = self.path(suffix)

--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1379,24 +1379,23 @@ class TestEngine(unittest.TestCase):
            return ('constant_metric', 0.0, False)

        # test that all metrics are checked (default behaviour)
-        early_stop_callback = lgb.early_stopping(5, verbose=False)
        gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=[lgb_eval],
                        feval=lambda preds, train_data: [decreasing_metric(preds, train_data),
                                                         constant_metric(preds, train_data)],
-                        callbacks=[early_stop_callback])
+                        early_stopping_rounds=5, verbose_eval=False)
        self.assertEqual(gbm.best_iteration, 1)

        # test that only the first metric is checked
-        early_stop_callback = lgb.early_stopping(5, first_metric_only=True, verbose=False)
-        gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=[lgb_eval],
+        gbm = lgb.train(dict(params, first_metric_only=True), lgb_train,
+                        num_boost_round=20, valid_sets=[lgb_eval],
                        feval=lambda preds, train_data: [decreasing_metric(preds, train_data),
                                                         constant_metric(preds, train_data)],
-                        callbacks=[early_stop_callback])
+                        early_stopping_rounds=5, verbose_eval=False)
        self.assertEqual(gbm.best_iteration, 20)
        # ... change the order of metrics
-        early_stop_callback = lgb.early_stopping(5, first_metric_only=True, verbose=False)
-        gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=[lgb_eval],
+        gbm = lgb.train(dict(params, first_metric_only=True), lgb_train,
+                        num_boost_round=20, valid_sets=[lgb_eval],
                        feval=lambda preds, train_data: [constant_metric(preds, train_data),
                                                         decreasing_metric(preds, train_data)],
-                        callbacks=[early_stop_callback])
+                        early_stopping_rounds=5, verbose_eval=False)
        self.assertEqual(gbm.best_iteration, 1)