engine.py 32.2 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
2
"""Library with training routines of LightGBM."""
wxchan's avatar
wxchan committed
3
import collections
4
import copy
wxchan's avatar
wxchan committed
5
from operator import attrgetter
6
from pathlib import Path
7
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
8

wxchan's avatar
wxchan committed
9
import numpy as np
10

wxchan's avatar
wxchan committed
11
from . import callback
12
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
13
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold
wxchan's avatar
wxchan committed
14

15
16
17
18
19
20
21
22
_LGBM_CustomObjectiveFunction = Callable[
    [Union[List, np.ndarray], Dataset],
    Tuple[Union[List, np.ndarray], Union[List, np.ndarray]]
]
_LGBM_CustomMetricFunction = Callable[
    [Union[List, np.ndarray], Dataset],
    Tuple[str, float, bool]
]
wxchan's avatar
wxchan committed
23

24
25
26
27
28
29
30
31
32
33
34
35
36
37

def train(
    params: Dict[str, Any],
    train_set: Dataset,
    num_boost_round: int = 100,
    valid_sets: Optional[List[Dataset]] = None,
    valid_names: Optional[List[str]] = None,
    fobj: Optional[_LGBM_CustomObjectiveFunction] = None,
    feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None,
    init_model: Optional[Union[str, Path, Booster]] = None,
    feature_name: Union[List[str], str] = 'auto',
    categorical_feature: Union[List[str], List[int], str] = 'auto',
    early_stopping_rounds: Optional[int] = None,
    evals_result: Optional[Dict[str, Any]] = None,
38
    verbose_eval: Union[bool, int, str] = 'warn',
39
40
41
42
    learning_rates: Optional[Union[List[float], Callable[[int], float]]] = None,
    keep_training_booster: bool = False,
    callbacks: Optional[List[Callable]] = None
) -> Booster:
43
    """Perform the training with given parameters.
wxchan's avatar
wxchan committed
44
45
46
47

    Parameters
    ----------
    params : dict
48
        Parameters for training.
Guolin Ke's avatar
Guolin Ke committed
49
    train_set : Dataset
50
51
        Data to be trained on.
    num_boost_round : int, optional (default=100)
wxchan's avatar
wxchan committed
52
        Number of boosting iterations.
53
    valid_sets : list of Dataset, or None, optional (default=None)
54
        List of data to be evaluated on during training.
55
    valid_names : list of str, or None, optional (default=None)
56
57
        Names of ``valid_sets``.
    fobj : callable or None, optional (default=None)
wxchan's avatar
wxchan committed
58
        Customized objective function.
59
60
61
62
63
        Should accept two parameters: preds, train_data,
        and return (grad, hess).

            preds : list or numpy 1-D array
                The predicted values.
64
65
                Predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task.
66
67
68
            train_data : Dataset
                The training dataset.
            grad : list or numpy 1-D array
69
70
                The value of the first order derivative (gradient) of the loss
                with respect to the elements of preds for each sample point.
71
            hess : list or numpy 1-D array
72
73
                The value of the second order derivative (Hessian) of the loss
                with respect to the elements of preds for each sample point.
74
75
76
77
78

        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
        and you should group grad and hess in this way as well.

79
    feval : callable, list of callable, or None, optional (default=None)
wxchan's avatar
wxchan committed
80
        Customized evaluation function.
81
        Each evaluation function should accept two parameters: preds, train_data,
82
        and return (eval_name, eval_result, is_higher_better) or list of such tuples.
83
84
85

            preds : list or numpy 1-D array
                The predicted values.
86
87
                If ``fobj`` is specified, predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task in this case.
88
89
            train_data : Dataset
                The training dataset.
90
            eval_name : str
91
                The name of evaluation function (without whitespaces).
92
93
94
95
96
            eval_result : float
                The eval result.
            is_higher_better : bool
                Is eval result higher better, e.g. AUC is ``is_higher_better``.

97
98
        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
99
100
        To ignore the default metric corresponding to the used objective,
        set the ``metric`` parameter to the string ``"None"`` in ``params``.
101
    init_model : str, pathlib.Path, Booster or None, optional (default=None)
102
        Filename of LightGBM model or Booster instance used for continue training.
103
    feature_name : list of str, or 'auto', optional (default="auto")
104
105
        Feature names.
        If 'auto' and data is pandas DataFrame, data columns names are used.
106
    categorical_feature : list of str or int, or 'auto', optional (default="auto")
107
108
        Categorical features.
        If list of int, interpreted as indices.
109
        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
110
        If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
111
        All values in categorical features should be less than int32 max value (2147483647).
112
        Large values could be memory consuming. Consider using consecutive integers starting from zero.
113
        All negative values in categorical features will be treated as missing values.
114
        The output cannot be monotonically constrained with respect to a categorical feature.
115
    early_stopping_rounds : int or None, optional (default=None)
116
        Activates early stopping. The model will train until the validation score stops improving.
117
118
119
120
        Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue training.
        Requires at least one validation data and one metric.
        If there's more than one, will check all of them. But the training data is ignored anyway.
121
        To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in ``params``.
122
123
        The index of iteration that has the best performance will be saved in the ``best_iteration`` field
        if early stopping logic is enabled by setting ``early_stopping_rounds``.
124
    evals_result : dict or None, optional (default=None)
125
126
        Dictionary used to store all evaluation results of all the items in ``valid_sets``.
        This should be initialized outside of your call to ``train()`` and should be empty.
127
        Any initial contents of the dictionary will be deleted.
128

Nikita Titov's avatar
Nikita Titov committed
129
130
        .. rubric:: Example

131
132
        With a ``valid_sets`` = [valid_set, train_set],
        ``valid_names`` = ['eval', 'train']
133
134
        and a ``params`` = {'metric': 'logloss'}
        returns {'train': {'logloss': ['0.48253', '0.35953', ...]},
135
        'eval': {'logloss': ['0.480385', '0.357756', ...]}}.
136

137
138
139
140
141
142
    verbose_eval : bool or int, optional (default=True)
        Requires at least one validation data.
        If True, the eval metric on the valid set is printed at each boosting stage.
        If int, the eval metric on the valid set is printed at every ``verbose_eval`` boosting stage.
        The last boosting stage or the boosting stage found by using ``early_stopping_rounds`` is also printed.

Nikita Titov's avatar
Nikita Titov committed
143
144
        .. rubric:: Example

145
        With ``verbose_eval`` = 4 and at least one item in ``valid_sets``,
146
        an evaluation metric is printed every 4 (instead of 1) boosting stages.
147
148

    learning_rates : list, callable or None, optional (default=None)
149
        List of learning rates for each boosting round
150
        or a callable that calculates ``learning_rate``
151
152
153
154
        in terms of current number of round (e.g. yields learning rate decay).
    keep_training_booster : bool, optional (default=False)
        Whether the returned Booster will be used to keep training.
        If False, the returned value will be converted into _InnerPredictor before returning.
155
        This means you won't be able to use ``eval``, ``eval_train`` or ``eval_valid`` methods of the returned Booster.
156
157
        When your model is very large and cause the memory error,
        you can try to set this param to ``True`` to avoid the model conversion performed during the internal call of ``model_to_string``.
158
        You can still use _InnerPredictor as ``init_model`` for future continue training.
159
    callbacks : list of callable, or None, optional (default=None)
160
        List of callback functions that are applied at each iteration.
161
        See Callbacks in Python API for more information.
wxchan's avatar
wxchan committed
162
163
164

    Returns
    -------
165
166
    booster : Booster
        The trained Booster model.
wxchan's avatar
wxchan committed
167
    """
168
    # create predictor first
169
    params = copy.deepcopy(params)
170
    if fobj is not None:
171
172
        for obj_alias in _ConfigAliases.get("objective"):
            params.pop(obj_alias, None)
173
        params['objective'] = 'none'
174
    for alias in _ConfigAliases.get("num_iterations"):
175
        if alias in params:
176
            num_boost_round = params.pop(alias)
177
            _log_warning(f"Found `{alias}` in params. Will use it instead of argument")
178
    params["num_iterations"] = num_boost_round
179
180
181
182
    # show deprecation warning only for early stop argument, setting early stop via global params should still be possible
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
        _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'early_stopping()' callback via 'callbacks' argument instead.")
183
    for alias in _ConfigAliases.get("early_stopping_round"):
184
185
        if alias in params:
            early_stopping_rounds = params.pop(alias)
186
187
    params["early_stopping_round"] = early_stopping_rounds
    first_metric_only = params.get('first_metric_only', False)
188

189
190
    if num_boost_round <= 0:
        raise ValueError("num_boost_round should be greater than zero.")
191
    if isinstance(init_model, (str, Path)):
192
        predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
wxchan's avatar
wxchan committed
193
    elif isinstance(init_model, Booster):
194
        predictor = init_model._to_predictor(dict(init_model.params, **params))
wxchan's avatar
wxchan committed
195
196
    else:
        predictor = None
197
    init_iteration = predictor.num_total_iteration if predictor is not None else 0
198
    # check dataset
Guolin Ke's avatar
Guolin Ke committed
199
    if not isinstance(train_set, Dataset):
200
        raise TypeError("Training only accepts Dataset object")
Guolin Ke's avatar
Guolin Ke committed
201

202
203
204
205
    train_set._update_params(params) \
             ._set_predictor(predictor) \
             .set_feature_name(feature_name) \
             .set_categorical_feature(categorical_feature)
Guolin Ke's avatar
Guolin Ke committed
206

wxchan's avatar
wxchan committed
207
208
    is_valid_contain_train = False
    train_data_name = "training"
Guolin Ke's avatar
Guolin Ke committed
209
    reduced_valid_sets = []
wxchan's avatar
wxchan committed
210
    name_valid_sets = []
211
    if valid_sets is not None:
Guolin Ke's avatar
Guolin Ke committed
212
213
        if isinstance(valid_sets, Dataset):
            valid_sets = [valid_sets]
214
        if isinstance(valid_names, str):
wxchan's avatar
wxchan committed
215
            valid_names = [valid_names]
Guolin Ke's avatar
Guolin Ke committed
216
        for i, valid_data in enumerate(valid_sets):
217
            # reduce cost for prediction training data
Guolin Ke's avatar
Guolin Ke committed
218
            if valid_data is train_set:
wxchan's avatar
wxchan committed
219
220
221
222
                is_valid_contain_train = True
                if valid_names is not None:
                    train_data_name = valid_names[i]
                continue
Guolin Ke's avatar
Guolin Ke committed
223
            if not isinstance(valid_data, Dataset):
224
                raise TypeError("Training only accepts Dataset object")
Nikita Titov's avatar
Nikita Titov committed
225
            reduced_valid_sets.append(valid_data._update_params(params).set_reference(train_set))
226
            if valid_names is not None and len(valid_names) > i:
wxchan's avatar
wxchan committed
227
228
                name_valid_sets.append(valid_names[i])
            else:
229
                name_valid_sets.append(f'valid_{i}')
230
    # process callbacks
231
    if callbacks is None:
wxchan's avatar
wxchan committed
232
233
234
235
236
        callbacks = set()
    else:
        for i, cb in enumerate(callbacks):
            cb.__dict__.setdefault('order', i - len(callbacks))
        callbacks = set(callbacks)
wxchan's avatar
wxchan committed
237
238

    # Most of legacy advanced options becomes callbacks
239
240
241
242
243
244
245
246
    if verbose_eval != "warn":
        _log_warning("'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'print_evaluation()' callback via 'callbacks' argument instead.")
    else:
        if callbacks:  # assume user has already specified print_evaluation callback
            verbose_eval = False
        else:
            verbose_eval = True
wxchan's avatar
wxchan committed
247
248
    if verbose_eval is True:
        callbacks.add(callback.print_evaluation())
249
    elif isinstance(verbose_eval, int):
wxchan's avatar
wxchan committed
250
        callbacks.add(callback.print_evaluation(verbose_eval))
wxchan's avatar
wxchan committed
251

252
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
253
        callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=bool(verbose_eval)))
254

wxchan's avatar
wxchan committed
255
    if learning_rates is not None:
256
257
        _log_warning("'learning_rates' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'reset_parameter()' callback via 'callbacks' argument instead.")
258
        callbacks.add(callback.reset_parameter(learning_rate=learning_rates))
wxchan's avatar
wxchan committed
259
260

    if evals_result is not None:
261
262
        _log_warning("'evals_result' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'record_evaluation()' callback via 'callbacks' argument instead.")
wxchan's avatar
wxchan committed
263
264
265
266
267
268
        callbacks.add(callback.record_evaluation(evals_result))

    callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)}
    callbacks_after_iter = callbacks - callbacks_before_iter
    callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
    callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
wxchan's avatar
wxchan committed
269

270
    # construct booster
271
272
273
274
    try:
        booster = Booster(params=params, train_set=train_set)
        if is_valid_contain_train:
            booster.set_train_data_name(train_data_name)
275
        for valid_set, name_valid_set in zip(reduced_valid_sets, name_valid_sets):
276
277
278
279
280
            booster.add_valid(valid_set, name_valid_set)
    finally:
        train_set._reverse_update_params()
        for valid_set in reduced_valid_sets:
            valid_set._reverse_update_params()
281
    booster.best_iteration = 0
wxchan's avatar
wxchan committed
282

283
    # start training
284
    for i in range(init_iteration, init_iteration + num_boost_round):
wxchan's avatar
wxchan committed
285
286
        for cb in callbacks_before_iter:
            cb(callback.CallbackEnv(model=booster,
287
                                    params=params,
wxchan's avatar
wxchan committed
288
                                    iteration=i,
289
290
                                    begin_iteration=init_iteration,
                                    end_iteration=init_iteration + num_boost_round,
wxchan's avatar
wxchan committed
291
292
293
294
295
296
                                    evaluation_result_list=None))

        booster.update(fobj=fobj)

        evaluation_result_list = []
        # check evaluation result.
297
        if valid_sets is not None:
wxchan's avatar
wxchan committed
298
299
300
301
302
303
            if is_valid_contain_train:
                evaluation_result_list.extend(booster.eval_train(feval))
            evaluation_result_list.extend(booster.eval_valid(feval))
        try:
            for cb in callbacks_after_iter:
                cb(callback.CallbackEnv(model=booster,
304
                                        params=params,
wxchan's avatar
wxchan committed
305
                                        iteration=i,
306
307
                                        begin_iteration=init_iteration,
                                        end_iteration=init_iteration + num_boost_round,
wxchan's avatar
wxchan committed
308
                                        evaluation_result_list=evaluation_result_list))
309
310
        except callback.EarlyStopException as earlyStopException:
            booster.best_iteration = earlyStopException.best_iteration + 1
wxchan's avatar
wxchan committed
311
            evaluation_result_list = earlyStopException.best_score
wxchan's avatar
wxchan committed
312
            break
313
    booster.best_score = collections.defaultdict(collections.OrderedDict)
wxchan's avatar
wxchan committed
314
315
    for dataset_name, eval_name, score, _ in evaluation_result_list:
        booster.best_score[dataset_name][eval_name] = score
316
    if not keep_training_booster:
317
        booster.model_from_string(booster.model_to_string(), verbose='_silent_false').free_dataset()
wxchan's avatar
wxchan committed
318
319
320
    return booster


321
class CVBooster:
322
323
324
325
326
327
328
329
330
331
332
333
334
    """CVBooster in LightGBM.

    Auxiliary data structure to hold and redirect all boosters of ``cv`` function.
    This class has the same methods as Booster class.
    All method calls are actually performed for underlying Boosters and then all returned results are returned in a list.

    Attributes
    ----------
    boosters : list of Booster
        The list of underlying fitted models.
    best_iteration : int
        The best iteration of fitted model.
    """
335

336
    def __init__(self):
337
338
339
340
        """Initialize the CVBooster.

        Generally, no need to instantiate manually.
        """
341
        self.boosters = []
342
        self.best_iteration = -1
343

344
345
    def _append(self, booster):
        """Add a booster to CVBooster."""
346
347
348
        self.boosters.append(booster)

    def __getattr__(self, name):
349
        """Redirect methods call of CVBooster."""
350
351
        def handler_function(*args, **kwargs):
            """Call methods with each booster, and concatenate their results."""
352
353
354
355
            ret = []
            for booster in self.boosters:
                ret.append(getattr(booster, name)(*args, **kwargs))
            return ret
356
        return handler_function
wxchan's avatar
wxchan committed
357

358

359
360
def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True,
                  shuffle=True, eval_train_metric=False):
361
    """Make a n-fold list of Booster from random indices."""
wxchan's avatar
wxchan committed
362
363
    full_data = full_data.construct()
    num_data = full_data.num_data()
364
    if folds is not None:
365
366
367
368
369
370
        if not hasattr(folds, '__iter__') and not hasattr(folds, 'split'):
            raise AttributeError("folds should be a generator or iterator of (train_idx, test_idx) tuples "
                                 "or scikit-learn splitter object with split method")
        if hasattr(folds, 'split'):
            group_info = full_data.get_group()
            if group_info is not None:
371
                group_info = np.array(group_info, dtype=np.int32, copy=False)
372
                flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
373
            else:
374
                flatted_group = np.zeros(num_data, dtype=np.int32)
375
            folds = folds.split(X=np.empty(num_data), y=full_data.get_label(), groups=flatted_group)
wxchan's avatar
wxchan committed
376
    else:
377
378
379
        if any(params.get(obj_alias, "") in {"lambdarank", "rank_xendcg", "xendcg",
                                             "xe_ndcg", "xe_ndcg_mart", "xendcg_mart"}
               for obj_alias in _ConfigAliases.get("objective")):
wxchan's avatar
wxchan committed
380
            if not SKLEARN_INSTALLED:
381
                raise LightGBMError('scikit-learn is required for ranking cv')
382
            # ranking task, split according to groups
383
            group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
384
            flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
385
            group_kfold = _LGBMGroupKFold(n_splits=nfold)
386
            folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
wxchan's avatar
wxchan committed
387
388
        elif stratified:
            if not SKLEARN_INSTALLED:
389
                raise LightGBMError('scikit-learn is required for stratified cv')
390
            skf = _LGBMStratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
391
            folds = skf.split(X=np.empty(num_data), y=full_data.get_label())
extremin's avatar
extremin committed
392
        else:
wxchan's avatar
wxchan committed
393
394
395
396
397
            if shuffle:
                randidx = np.random.RandomState(seed).permutation(num_data)
            else:
                randidx = np.arange(num_data)
            kstep = int(num_data / nfold)
398
399
400
            test_id = [randidx[i: i + kstep] for i in range(0, num_data, kstep)]
            train_id = [np.concatenate([test_id[i] for i in range(nfold) if k != i]) for k in range(nfold)]
            folds = zip(train_id, test_id)
wxchan's avatar
wxchan committed
401

402
    ret = CVBooster()
wxchan's avatar
wxchan committed
403
    for train_idx, test_idx in folds:
404
405
        train_set = full_data.subset(sorted(train_idx))
        valid_set = full_data.subset(sorted(test_idx))
wxchan's avatar
wxchan committed
406
407
        # run preprocessing on the data set if needed
        if fpreproc is not None:
wxchan's avatar
wxchan committed
408
            train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy())
wxchan's avatar
wxchan committed
409
        else:
wxchan's avatar
wxchan committed
410
            tparam = params
411
        cvbooster = Booster(tparam, train_set)
412
413
        if eval_train_metric:
            cvbooster.add_valid(train_set, 'train')
414
        cvbooster.add_valid(valid_set, 'valid')
415
        ret._append(cvbooster)
wxchan's avatar
wxchan committed
416
417
    return ret

wxchan's avatar
wxchan committed
418

419
def _agg_cv_result(raw_results, eval_train_metric=False):
420
    """Aggregate cross-validation results."""
421
    cvmap = collections.OrderedDict()
wxchan's avatar
wxchan committed
422
423
424
    metric_type = {}
    for one_result in raw_results:
        for one_line in one_result:
425
            if eval_train_metric:
426
                key = f"{one_line[0]} {one_line[1]}"
427
428
429
            else:
                key = one_line[1]
            metric_type[key] = one_line[3]
430
            cvmap.setdefault(key, [])
431
            cvmap[key].append(one_line[2])
wxchan's avatar
wxchan committed
432
    return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
wxchan's avatar
wxchan committed
433

wxchan's avatar
wxchan committed
434

435
def cv(params, train_set, num_boost_round=100,
436
       folds=None, nfold=5, stratified=True, shuffle=True,
wxchan's avatar
wxchan committed
437
       metrics=None, fobj=None, feval=None, init_model=None,
438
       feature_name='auto', categorical_feature='auto',
Guolin Ke's avatar
Guolin Ke committed
439
440
       early_stopping_rounds=None, fpreproc=None,
       verbose_eval=None, show_stdv=True, seed=0,
441
442
       callbacks=None, eval_train_metric=False,
       return_cvbooster=False):
Andrew Ziem's avatar
Andrew Ziem committed
443
    """Perform the cross-validation with given parameters.
wxchan's avatar
wxchan committed
444
445
446
447

    Parameters
    ----------
    params : dict
448
        Parameters for Booster.
Guolin Ke's avatar
Guolin Ke committed
449
    train_set : Dataset
450
        Data to be trained on.
451
    num_boost_round : int, optional (default=100)
wxchan's avatar
wxchan committed
452
        Number of boosting iterations.
453
    folds : generator or iterator of (train_idx, test_idx) tuples, scikit-learn splitter object or None, optional (default=None)
454
        If generator or iterator, it should yield the train and test indices for each fold.
455
        If object, it should be one of the scikit-learn splitter classes
456
        (https://scikit-learn.org/stable/modules/classes.html#splitter-classes)
457
        and have ``split`` method.
458
        This argument has highest priority over other data split arguments.
459
    nfold : int, optional (default=5)
wxchan's avatar
wxchan committed
460
        Number of folds in CV.
461
462
    stratified : bool, optional (default=True)
        Whether to perform stratified sampling.
463
    shuffle : bool, optional (default=True)
464
        Whether to shuffle before splitting data.
465
    metrics : str, list of str, or None, optional (default=None)
466
467
468
        Evaluation metrics to be monitored while CV.
        If not None, the metric in ``params`` will be overridden.
    fobj : callable or None, optional (default=None)
469
470
471
472
473
474
        Customized objective function.
        Should accept two parameters: preds, train_data,
        and return (grad, hess).

            preds : list or numpy 1-D array
                The predicted values.
475
476
                Predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task.
477
478
479
            train_data : Dataset
                The training dataset.
            grad : list or numpy 1-D array
480
481
                The value of the first order derivative (gradient) of the loss
                with respect to the elements of preds for each sample point.
482
            hess : list or numpy 1-D array
483
484
                The value of the second order derivative (Hessian) of the loss
                with respect to the elements of preds for each sample point.
485
486
487
488
489

        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
        and you should group grad and hess in this way as well.

490
    feval : callable, list of callable, or None, optional (default=None)
491
        Customized evaluation function.
492
        Each evaluation function should accept two parameters: preds, train_data,
493
        and return (eval_name, eval_result, is_higher_better) or list of such tuples.
494
495
496

            preds : list or numpy 1-D array
                The predicted values.
497
498
                If ``fobj`` is specified, predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task in this case.
499
500
            train_data : Dataset
                The training dataset.
501
            eval_name : str
Andrew Ziem's avatar
Andrew Ziem committed
502
                The name of evaluation function (without whitespace).
503
504
505
506
507
            eval_result : float
                The eval result.
            is_higher_better : bool
                Is eval result higher better, e.g. AUC is ``is_higher_better``.

508
509
        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
510
511
        To ignore the default metric corresponding to the used objective,
        set ``metrics`` to the string ``"None"``.
512
    init_model : str, pathlib.Path, Booster or None, optional (default=None)
513
        Filename of LightGBM model or Booster instance used for continue training.
514
    feature_name : list of str, or 'auto', optional (default="auto")
515
516
        Feature names.
        If 'auto' and data is pandas DataFrame, data columns names are used.
517
    categorical_feature : list of str or int, or 'auto', optional (default="auto")
518
519
        Categorical features.
        If list of int, interpreted as indices.
520
        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
521
        If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
522
        All values in categorical features should be less than int32 max value (2147483647).
523
        Large values could be memory consuming. Consider using consecutive integers starting from zero.
524
        All negative values in categorical features will be treated as missing values.
525
        The output cannot be monotonically constrained with respect to a categorical feature.
526
    early_stopping_rounds : int or None, optional (default=None)
527
528
529
530
        Activates early stopping.
        CV score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue.
        Requires at least one metric. If there's more than one, will check all of them.
531
        To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in ``params``.
532
        Last entry in evaluation history is the one from the best iteration.
533
534
    fpreproc : callable or None, optional (default=None)
        Preprocessing function that takes (dtrain, dtest, params)
wxchan's avatar
wxchan committed
535
        and returns transformed versions of those.
536
    verbose_eval : bool, int, or None, optional (default=None)
wxchan's avatar
wxchan committed
537
        Whether to display the progress.
538
539
540
        If True, progress will be displayed at every boosting stage.
        If int, progress will be displayed at every given ``verbose_eval`` boosting stage.
    show_stdv : bool, optional (default=True)
wxchan's avatar
wxchan committed
541
        Whether to display the standard deviation in progress.
542
        Results are not affected by this parameter, and always contain std.
543
    seed : int, optional (default=0)
wxchan's avatar
wxchan committed
544
        Seed used to generate the folds (passed to numpy.random.seed).
545
    callbacks : list of callable, or None, optional (default=None)
546
        List of callback functions that are applied at each iteration.
547
        See Callbacks in Python API for more information.
548
549
550
    eval_train_metric : bool, optional (default=False)
        Whether to display the train metric in progress.
        The score of the metric is calculated again after each training step, so there is some impact on performance.
551
552
    return_cvbooster : bool, optional (default=False)
        Whether to return Booster models trained on each fold through ``CVBooster``.
wxchan's avatar
wxchan committed
553
554
555

    Returns
    -------
556
557
558
559
    eval_hist : dict
        Evaluation history.
        The dictionary has the following format:
        {'metric1-mean': [values], 'metric1-stdv': [values],
Qiwei Ye's avatar
Qiwei Ye committed
560
        'metric2-mean': [values], 'metric2-stdv': [values],
561
        ...}.
562
        If ``return_cvbooster=True``, also returns trained boosters via ``cvbooster`` key.
wxchan's avatar
wxchan committed
563
    """
Guolin Ke's avatar
Guolin Ke committed
564
    if not isinstance(train_set, Dataset):
565
        raise TypeError("Training only accepts Dataset object")
Guolin Ke's avatar
Guolin Ke committed
566

567
    params = copy.deepcopy(params)
568
    if fobj is not None:
569
570
        for obj_alias in _ConfigAliases.get("objective"):
            params.pop(obj_alias, None)
571
        params['objective'] = 'none'
572
    for alias in _ConfigAliases.get("num_iterations"):
573
        if alias in params:
574
            _log_warning(f"Found `{alias}` in params. Will use it instead of argument")
575
            num_boost_round = params.pop(alias)
576
    params["num_iterations"] = num_boost_round
577
578
579
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
        _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'early_stopping()' callback via 'callbacks' argument instead.")
580
    for alias in _ConfigAliases.get("early_stopping_round"):
581
582
        if alias in params:
            early_stopping_rounds = params.pop(alias)
583
584
    params["early_stopping_round"] = early_stopping_rounds
    first_metric_only = params.get('first_metric_only', False)
585

586
587
    if num_boost_round <= 0:
        raise ValueError("num_boost_round should be greater than zero.")
588
    if isinstance(init_model, (str, Path)):
589
        predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
Guolin Ke's avatar
Guolin Ke committed
590
    elif isinstance(init_model, Booster):
591
        predictor = init_model._to_predictor(dict(init_model.params, **params))
Guolin Ke's avatar
Guolin Ke committed
592
593
594
    else:
        predictor = None

Peter's avatar
Peter committed
595
    if metrics is not None:
596
597
        for metric_alias in _ConfigAliases.get("metric"):
            params.pop(metric_alias, None)
Peter's avatar
Peter committed
598
        params['metric'] = metrics
wxchan's avatar
wxchan committed
599

600
601
602
603
604
    train_set._update_params(params) \
             ._set_predictor(predictor) \
             .set_feature_name(feature_name) \
             .set_categorical_feature(categorical_feature)

wxchan's avatar
wxchan committed
605
    results = collections.defaultdict(list)
606
607
    cvfolds = _make_n_folds(train_set, folds=folds, nfold=nfold,
                            params=params, seed=seed, fpreproc=fpreproc,
608
609
                            stratified=stratified, shuffle=shuffle,
                            eval_train_metric=eval_train_metric)
wxchan's avatar
wxchan committed
610
611

    # setup callbacks
612
    if callbacks is None:
wxchan's avatar
wxchan committed
613
614
615
616
617
        callbacks = set()
    else:
        for i, cb in enumerate(callbacks):
            cb.__dict__.setdefault('order', i - len(callbacks))
        callbacks = set(callbacks)
618
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
619
        callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=False))
620
621
622
    if verbose_eval is not None:
        _log_warning("'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'print_evaluation()' callback via 'callbacks' argument instead.")
wxchan's avatar
wxchan committed
623
624
    if verbose_eval is True:
        callbacks.add(callback.print_evaluation(show_stdv=show_stdv))
625
    elif isinstance(verbose_eval, int):
wxchan's avatar
wxchan committed
626
        callbacks.add(callback.print_evaluation(verbose_eval, show_stdv=show_stdv))
wxchan's avatar
wxchan committed
627

wxchan's avatar
wxchan committed
628
629
630
631
    callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)}
    callbacks_after_iter = callbacks - callbacks_before_iter
    callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
    callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
wxchan's avatar
wxchan committed
632

633
    for i in range(num_boost_round):
wxchan's avatar
wxchan committed
634
        for cb in callbacks_before_iter:
635
636
            cb(callback.CallbackEnv(model=cvfolds,
                                    params=params,
wxchan's avatar
wxchan committed
637
638
639
640
                                    iteration=i,
                                    begin_iteration=0,
                                    end_iteration=num_boost_round,
                                    evaluation_result_list=None))
wxchan's avatar
wxchan committed
641
        cvfolds.update(fobj=fobj)
642
        res = _agg_cv_result(cvfolds.eval_valid(feval), eval_train_metric)
wxchan's avatar
wxchan committed
643
        for _, key, mean, _, std in res:
644
645
            results[f'{key}-mean'].append(mean)
            results[f'{key}-stdv'].append(std)
wxchan's avatar
wxchan committed
646
647
        try:
            for cb in callbacks_after_iter:
648
649
                cb(callback.CallbackEnv(model=cvfolds,
                                        params=params,
wxchan's avatar
wxchan committed
650
651
652
653
                                        iteration=i,
                                        begin_iteration=0,
                                        end_iteration=num_boost_round,
                                        evaluation_result_list=res))
654
655
        except callback.EarlyStopException as earlyStopException:
            cvfolds.best_iteration = earlyStopException.best_iteration + 1
wxchan's avatar
wxchan committed
656
            for k in results:
657
                results[k] = results[k][:cvfolds.best_iteration]
wxchan's avatar
wxchan committed
658
            break
659
660
661
662

    if return_cvbooster:
        results['cvbooster'] = cvfolds

wxchan's avatar
wxchan committed
663
    return dict(results)