engine.py 32.2 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
2
"""Library with training routines of LightGBM."""
wxchan's avatar
wxchan committed
3
import collections
4
import copy
wxchan's avatar
wxchan committed
5
from operator import attrgetter
6
from pathlib import Path
7
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
8

wxchan's avatar
wxchan committed
9
import numpy as np
10

wxchan's avatar
wxchan committed
11
from . import callback
12
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
13
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold
wxchan's avatar
wxchan committed
14

15
16
17
18
19
20
21
22
_LGBM_CustomObjectiveFunction = Callable[
    [Union[List, np.ndarray], Dataset],
    Tuple[Union[List, np.ndarray], Union[List, np.ndarray]]
]
_LGBM_CustomMetricFunction = Callable[
    [Union[List, np.ndarray], Dataset],
    Tuple[str, float, bool]
]
wxchan's avatar
wxchan committed
23

24
25
26
27
28
29
30
31
32
33
34
35
36
37

def train(
    params: Dict[str, Any],
    train_set: Dataset,
    num_boost_round: int = 100,
    valid_sets: Optional[List[Dataset]] = None,
    valid_names: Optional[List[str]] = None,
    fobj: Optional[_LGBM_CustomObjectiveFunction] = None,
    feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None,
    init_model: Optional[Union[str, Path, Booster]] = None,
    feature_name: Union[List[str], str] = 'auto',
    categorical_feature: Union[List[str], List[int], str] = 'auto',
    early_stopping_rounds: Optional[int] = None,
    evals_result: Optional[Dict[str, Any]] = None,
38
    verbose_eval: Union[bool, int, str] = 'warn',
39
40
41
42
    learning_rates: Optional[Union[List[float], Callable[[int], float]]] = None,
    keep_training_booster: bool = False,
    callbacks: Optional[List[Callable]] = None
) -> Booster:
43
    """Perform the training with given parameters.
wxchan's avatar
wxchan committed
44
45
46
47

    Parameters
    ----------
    params : dict
48
        Parameters for training.
Guolin Ke's avatar
Guolin Ke committed
49
    train_set : Dataset
50
51
        Data to be trained on.
    num_boost_round : int, optional (default=100)
wxchan's avatar
wxchan committed
52
        Number of boosting iterations.
53
    valid_sets : list of Dataset, or None, optional (default=None)
54
        List of data to be evaluated on during training.
55
    valid_names : list of str, or None, optional (default=None)
56
57
        Names of ``valid_sets``.
    fobj : callable or None, optional (default=None)
wxchan's avatar
wxchan committed
58
        Customized objective function.
59
60
61
62
63
        Should accept two parameters: preds, train_data,
        and return (grad, hess).

            preds : list or numpy 1-D array
                The predicted values.
64
65
                Predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task.
66
67
68
            train_data : Dataset
                The training dataset.
            grad : list or numpy 1-D array
69
70
                The value of the first order derivative (gradient) of the loss
                with respect to the elements of preds for each sample point.
71
            hess : list or numpy 1-D array
72
73
                The value of the second order derivative (Hessian) of the loss
                with respect to the elements of preds for each sample point.
74
75
76
77
78

        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
        and you should group grad and hess in this way as well.

79
    feval : callable, list of callable, or None, optional (default=None)
wxchan's avatar
wxchan committed
80
        Customized evaluation function.
81
        Each evaluation function should accept two parameters: preds, train_data,
82
        and return (eval_name, eval_result, is_higher_better) or list of such tuples.
83
84
85

            preds : list or numpy 1-D array
                The predicted values.
86
87
                If ``fobj`` is specified, predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task in this case.
88
89
            train_data : Dataset
                The training dataset.
90
            eval_name : str
91
                The name of evaluation function (without whitespaces).
92
93
94
95
96
            eval_result : float
                The eval result.
            is_higher_better : bool
                Is eval result higher better, e.g. AUC is ``is_higher_better``.

97
98
        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
99
100
        To ignore the default metric corresponding to the used objective,
        set the ``metric`` parameter to the string ``"None"`` in ``params``.
101
    init_model : str, pathlib.Path, Booster or None, optional (default=None)
102
        Filename of LightGBM model or Booster instance used for continue training.
103
    feature_name : list of str, or 'auto', optional (default="auto")
104
105
        Feature names.
        If 'auto' and data is pandas DataFrame, data columns names are used.
106
    categorical_feature : list of str or int, or 'auto', optional (default="auto")
107
108
        Categorical features.
        If list of int, interpreted as indices.
109
        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
110
        If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
111
        All values in categorical features should be less than int32 max value (2147483647).
112
        Large values could be memory consuming. Consider using consecutive integers starting from zero.
113
        All negative values in categorical features will be treated as missing values.
114
        The output cannot be monotonically constrained with respect to a categorical feature.
115
    early_stopping_rounds : int or None, optional (default=None)
116
        Activates early stopping. The model will train until the validation score stops improving.
117
118
119
120
        Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue training.
        Requires at least one validation data and one metric.
        If there's more than one, will check all of them. But the training data is ignored anyway.
121
        To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in ``params``.
122
123
        The index of iteration that has the best performance will be saved in the ``best_iteration`` field
        if early stopping logic is enabled by setting ``early_stopping_rounds``.
124
    evals_result : dict or None, optional (default=None)
125
126
        Dictionary used to store all evaluation results of all the items in ``valid_sets``.
        This should be initialized outside of your call to ``train()`` and should be empty.
127
        Any initial contents of the dictionary will be deleted.
128

Nikita Titov's avatar
Nikita Titov committed
129
130
        .. rubric:: Example

131
132
        With a ``valid_sets`` = [valid_set, train_set],
        ``valid_names`` = ['eval', 'train']
133
134
        and a ``params`` = {'metric': 'logloss'}
        returns {'train': {'logloss': ['0.48253', '0.35953', ...]},
135
        'eval': {'logloss': ['0.480385', '0.357756', ...]}}.
136

137
138
139
140
141
142
    verbose_eval : bool or int, optional (default=True)
        Requires at least one validation data.
        If True, the eval metric on the valid set is printed at each boosting stage.
        If int, the eval metric on the valid set is printed at every ``verbose_eval`` boosting stage.
        The last boosting stage or the boosting stage found by using ``early_stopping_rounds`` is also printed.

Nikita Titov's avatar
Nikita Titov committed
143
144
        .. rubric:: Example

145
        With ``verbose_eval`` = 4 and at least one item in ``valid_sets``,
146
        an evaluation metric is printed every 4 (instead of 1) boosting stages.
147
148

    learning_rates : list, callable or None, optional (default=None)
149
        List of learning rates for each boosting round
150
        or a callable that calculates ``learning_rate``
151
152
153
154
        in terms of current number of round (e.g. yields learning rate decay).
    keep_training_booster : bool, optional (default=False)
        Whether the returned Booster will be used to keep training.
        If False, the returned value will be converted into _InnerPredictor before returning.
155
        This means you won't be able to use ``eval``, ``eval_train`` or ``eval_valid`` methods of the returned Booster.
156
157
        When your model is very large and cause the memory error,
        you can try to set this param to ``True`` to avoid the model conversion performed during the internal call of ``model_to_string``.
158
        You can still use _InnerPredictor as ``init_model`` for future continue training.
159
    callbacks : list of callable, or None, optional (default=None)
160
        List of callback functions that are applied at each iteration.
161
        See Callbacks in Python API for more information.
wxchan's avatar
wxchan committed
162
163
164

    Returns
    -------
165
166
    booster : Booster
        The trained Booster model.
wxchan's avatar
wxchan committed
167
    """
168
    # create predictor first
169
    params = copy.deepcopy(params)
170
    if fobj is not None:
171
172
        for obj_alias in _ConfigAliases.get("objective"):
            params.pop(obj_alias, None)
173
        params['objective'] = 'none'
174
    for alias in _ConfigAliases.get("num_iterations"):
175
        if alias in params:
176
            num_boost_round = params.pop(alias)
177
            _log_warning(f"Found `{alias}` in params. Will use it instead of argument")
178
    params["num_iterations"] = num_boost_round
179
180
181
182
    # show deprecation warning only for early stop argument, setting early stop via global params should still be possible
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
        _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'early_stopping()' callback via 'callbacks' argument instead.")
183
    for alias in _ConfigAliases.get("early_stopping_round"):
184
185
        if alias in params:
            early_stopping_rounds = params.pop(alias)
186
187
    params["early_stopping_round"] = early_stopping_rounds
    first_metric_only = params.get('first_metric_only', False)
188

189
190
    if num_boost_round <= 0:
        raise ValueError("num_boost_round should be greater than zero.")
191
    predictor: Optional[_InnerPredictor] = None
192
    if isinstance(init_model, (str, Path)):
193
        predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
wxchan's avatar
wxchan committed
194
    elif isinstance(init_model, Booster):
195
        predictor = init_model._to_predictor(dict(init_model.params, **params))
196
    init_iteration = predictor.num_total_iteration if predictor is not None else 0
197
    # check dataset
Guolin Ke's avatar
Guolin Ke committed
198
    if not isinstance(train_set, Dataset):
199
        raise TypeError("Training only accepts Dataset object")
Guolin Ke's avatar
Guolin Ke committed
200

201
202
203
204
    train_set._update_params(params) \
             ._set_predictor(predictor) \
             .set_feature_name(feature_name) \
             .set_categorical_feature(categorical_feature)
Guolin Ke's avatar
Guolin Ke committed
205

wxchan's avatar
wxchan committed
206
207
    is_valid_contain_train = False
    train_data_name = "training"
Guolin Ke's avatar
Guolin Ke committed
208
    reduced_valid_sets = []
wxchan's avatar
wxchan committed
209
    name_valid_sets = []
210
    if valid_sets is not None:
Guolin Ke's avatar
Guolin Ke committed
211
212
        if isinstance(valid_sets, Dataset):
            valid_sets = [valid_sets]
213
        if isinstance(valid_names, str):
wxchan's avatar
wxchan committed
214
            valid_names = [valid_names]
Guolin Ke's avatar
Guolin Ke committed
215
        for i, valid_data in enumerate(valid_sets):
216
            # reduce cost for prediction training data
Guolin Ke's avatar
Guolin Ke committed
217
            if valid_data is train_set:
wxchan's avatar
wxchan committed
218
219
220
221
                is_valid_contain_train = True
                if valid_names is not None:
                    train_data_name = valid_names[i]
                continue
Guolin Ke's avatar
Guolin Ke committed
222
            if not isinstance(valid_data, Dataset):
223
                raise TypeError("Training only accepts Dataset object")
Nikita Titov's avatar
Nikita Titov committed
224
            reduced_valid_sets.append(valid_data._update_params(params).set_reference(train_set))
225
            if valid_names is not None and len(valid_names) > i:
wxchan's avatar
wxchan committed
226
227
                name_valid_sets.append(valid_names[i])
            else:
228
                name_valid_sets.append(f'valid_{i}')
229
    # process callbacks
230
    if callbacks is None:
wxchan's avatar
wxchan committed
231
232
233
234
235
        callbacks = set()
    else:
        for i, cb in enumerate(callbacks):
            cb.__dict__.setdefault('order', i - len(callbacks))
        callbacks = set(callbacks)
wxchan's avatar
wxchan committed
236
237

    # Most of legacy advanced options becomes callbacks
238
239
    if verbose_eval != "warn":
        _log_warning("'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. "
240
                     "Pass 'log_evaluation()' callback via 'callbacks' argument instead.")
241
    else:
242
        if callbacks:  # assume user has already specified log_evaluation callback
243
244
245
            verbose_eval = False
        else:
            verbose_eval = True
wxchan's avatar
wxchan committed
246
    if verbose_eval is True:
247
        callbacks.add(callback.log_evaluation())
248
    elif isinstance(verbose_eval, int):
249
        callbacks.add(callback.log_evaluation(verbose_eval))
wxchan's avatar
wxchan committed
250

251
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
252
        callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=bool(verbose_eval)))
253

wxchan's avatar
wxchan committed
254
    if learning_rates is not None:
255
256
        _log_warning("'learning_rates' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'reset_parameter()' callback via 'callbacks' argument instead.")
257
        callbacks.add(callback.reset_parameter(learning_rate=learning_rates))
wxchan's avatar
wxchan committed
258
259

    if evals_result is not None:
260
261
        _log_warning("'evals_result' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'record_evaluation()' callback via 'callbacks' argument instead.")
wxchan's avatar
wxchan committed
262
263
264
265
266
267
        callbacks.add(callback.record_evaluation(evals_result))

    callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)}
    callbacks_after_iter = callbacks - callbacks_before_iter
    callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
    callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
wxchan's avatar
wxchan committed
268

269
    # construct booster
270
271
272
273
    try:
        booster = Booster(params=params, train_set=train_set)
        if is_valid_contain_train:
            booster.set_train_data_name(train_data_name)
274
        for valid_set, name_valid_set in zip(reduced_valid_sets, name_valid_sets):
275
276
277
278
279
            booster.add_valid(valid_set, name_valid_set)
    finally:
        train_set._reverse_update_params()
        for valid_set in reduced_valid_sets:
            valid_set._reverse_update_params()
280
    booster.best_iteration = 0
wxchan's avatar
wxchan committed
281

282
    # start training
283
    for i in range(init_iteration, init_iteration + num_boost_round):
wxchan's avatar
wxchan committed
284
285
        for cb in callbacks_before_iter:
            cb(callback.CallbackEnv(model=booster,
286
                                    params=params,
wxchan's avatar
wxchan committed
287
                                    iteration=i,
288
289
                                    begin_iteration=init_iteration,
                                    end_iteration=init_iteration + num_boost_round,
wxchan's avatar
wxchan committed
290
291
292
293
294
295
                                    evaluation_result_list=None))

        booster.update(fobj=fobj)

        evaluation_result_list = []
        # check evaluation result.
296
        if valid_sets is not None:
wxchan's avatar
wxchan committed
297
298
299
300
301
302
            if is_valid_contain_train:
                evaluation_result_list.extend(booster.eval_train(feval))
            evaluation_result_list.extend(booster.eval_valid(feval))
        try:
            for cb in callbacks_after_iter:
                cb(callback.CallbackEnv(model=booster,
303
                                        params=params,
wxchan's avatar
wxchan committed
304
                                        iteration=i,
305
306
                                        begin_iteration=init_iteration,
                                        end_iteration=init_iteration + num_boost_round,
wxchan's avatar
wxchan committed
307
                                        evaluation_result_list=evaluation_result_list))
308
309
        except callback.EarlyStopException as earlyStopException:
            booster.best_iteration = earlyStopException.best_iteration + 1
wxchan's avatar
wxchan committed
310
            evaluation_result_list = earlyStopException.best_score
wxchan's avatar
wxchan committed
311
            break
312
    booster.best_score = collections.defaultdict(collections.OrderedDict)
wxchan's avatar
wxchan committed
313
314
    for dataset_name, eval_name, score, _ in evaluation_result_list:
        booster.best_score[dataset_name][eval_name] = score
315
    if not keep_training_booster:
316
        booster.model_from_string(booster.model_to_string(), verbose='_silent_false').free_dataset()
wxchan's avatar
wxchan committed
317
318
319
    return booster


320
class CVBooster:
321
322
323
324
325
326
327
328
329
330
331
332
333
    """CVBooster in LightGBM.

    Auxiliary data structure to hold and redirect all boosters of ``cv`` function.
    This class has the same methods as Booster class.
    All method calls are actually performed for underlying Boosters and then all returned results are returned in a list.

    Attributes
    ----------
    boosters : list of Booster
        The list of underlying fitted models.
    best_iteration : int
        The best iteration of fitted model.
    """
334

335
    def __init__(self):
336
337
338
339
        """Initialize the CVBooster.

        Generally, no need to instantiate manually.
        """
340
        self.boosters = []
341
        self.best_iteration = -1
342

343
344
    def _append(self, booster):
        """Add a booster to CVBooster."""
345
346
347
        self.boosters.append(booster)

    def __getattr__(self, name):
348
        """Redirect methods call of CVBooster."""
349
350
        def handler_function(*args, **kwargs):
            """Call methods with each booster, and concatenate their results."""
351
352
353
354
            ret = []
            for booster in self.boosters:
                ret.append(getattr(booster, name)(*args, **kwargs))
            return ret
355
        return handler_function
wxchan's avatar
wxchan committed
356

357

358
359
def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True,
                  shuffle=True, eval_train_metric=False):
360
    """Make a n-fold list of Booster from random indices."""
wxchan's avatar
wxchan committed
361
362
    full_data = full_data.construct()
    num_data = full_data.num_data()
363
    if folds is not None:
364
365
366
367
368
369
        if not hasattr(folds, '__iter__') and not hasattr(folds, 'split'):
            raise AttributeError("folds should be a generator or iterator of (train_idx, test_idx) tuples "
                                 "or scikit-learn splitter object with split method")
        if hasattr(folds, 'split'):
            group_info = full_data.get_group()
            if group_info is not None:
370
                group_info = np.array(group_info, dtype=np.int32, copy=False)
371
                flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
372
            else:
373
                flatted_group = np.zeros(num_data, dtype=np.int32)
374
            folds = folds.split(X=np.empty(num_data), y=full_data.get_label(), groups=flatted_group)
wxchan's avatar
wxchan committed
375
    else:
376
377
378
        if any(params.get(obj_alias, "") in {"lambdarank", "rank_xendcg", "xendcg",
                                             "xe_ndcg", "xe_ndcg_mart", "xendcg_mart"}
               for obj_alias in _ConfigAliases.get("objective")):
wxchan's avatar
wxchan committed
379
            if not SKLEARN_INSTALLED:
380
                raise LightGBMError('scikit-learn is required for ranking cv')
381
            # ranking task, split according to groups
382
            group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
383
            flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
384
            group_kfold = _LGBMGroupKFold(n_splits=nfold)
385
            folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
wxchan's avatar
wxchan committed
386
387
        elif stratified:
            if not SKLEARN_INSTALLED:
388
                raise LightGBMError('scikit-learn is required for stratified cv')
389
            skf = _LGBMStratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
390
            folds = skf.split(X=np.empty(num_data), y=full_data.get_label())
extremin's avatar
extremin committed
391
        else:
wxchan's avatar
wxchan committed
392
393
394
395
396
            if shuffle:
                randidx = np.random.RandomState(seed).permutation(num_data)
            else:
                randidx = np.arange(num_data)
            kstep = int(num_data / nfold)
397
398
399
            test_id = [randidx[i: i + kstep] for i in range(0, num_data, kstep)]
            train_id = [np.concatenate([test_id[i] for i in range(nfold) if k != i]) for k in range(nfold)]
            folds = zip(train_id, test_id)
wxchan's avatar
wxchan committed
400

401
    ret = CVBooster()
wxchan's avatar
wxchan committed
402
    for train_idx, test_idx in folds:
403
404
        train_set = full_data.subset(sorted(train_idx))
        valid_set = full_data.subset(sorted(test_idx))
wxchan's avatar
wxchan committed
405
406
        # run preprocessing on the data set if needed
        if fpreproc is not None:
wxchan's avatar
wxchan committed
407
            train_set, valid_set, tparam = fpreproc(train_set, valid_set, params.copy())
wxchan's avatar
wxchan committed
408
        else:
wxchan's avatar
wxchan committed
409
            tparam = params
410
        cvbooster = Booster(tparam, train_set)
411
412
        if eval_train_metric:
            cvbooster.add_valid(train_set, 'train')
413
        cvbooster.add_valid(valid_set, 'valid')
414
        ret._append(cvbooster)
wxchan's avatar
wxchan committed
415
416
    return ret

wxchan's avatar
wxchan committed
417

418
def _agg_cv_result(raw_results, eval_train_metric=False):
419
    """Aggregate cross-validation results."""
420
    cvmap = collections.OrderedDict()
wxchan's avatar
wxchan committed
421
422
423
    metric_type = {}
    for one_result in raw_results:
        for one_line in one_result:
424
            if eval_train_metric:
425
                key = f"{one_line[0]} {one_line[1]}"
426
427
428
            else:
                key = one_line[1]
            metric_type[key] = one_line[3]
429
            cvmap.setdefault(key, [])
430
            cvmap[key].append(one_line[2])
wxchan's avatar
wxchan committed
431
    return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
wxchan's avatar
wxchan committed
432

wxchan's avatar
wxchan committed
433

434
def cv(params, train_set, num_boost_round=100,
435
       folds=None, nfold=5, stratified=True, shuffle=True,
wxchan's avatar
wxchan committed
436
       metrics=None, fobj=None, feval=None, init_model=None,
437
       feature_name='auto', categorical_feature='auto',
Guolin Ke's avatar
Guolin Ke committed
438
439
       early_stopping_rounds=None, fpreproc=None,
       verbose_eval=None, show_stdv=True, seed=0,
440
441
       callbacks=None, eval_train_metric=False,
       return_cvbooster=False):
Andrew Ziem's avatar
Andrew Ziem committed
442
    """Perform the cross-validation with given parameters.
wxchan's avatar
wxchan committed
443
444
445
446

    Parameters
    ----------
    params : dict
447
        Parameters for Booster.
Guolin Ke's avatar
Guolin Ke committed
448
    train_set : Dataset
449
        Data to be trained on.
450
    num_boost_round : int, optional (default=100)
wxchan's avatar
wxchan committed
451
        Number of boosting iterations.
452
    folds : generator or iterator of (train_idx, test_idx) tuples, scikit-learn splitter object or None, optional (default=None)
453
        If generator or iterator, it should yield the train and test indices for each fold.
454
        If object, it should be one of the scikit-learn splitter classes
455
        (https://scikit-learn.org/stable/modules/classes.html#splitter-classes)
456
        and have ``split`` method.
457
        This argument has highest priority over other data split arguments.
458
    nfold : int, optional (default=5)
wxchan's avatar
wxchan committed
459
        Number of folds in CV.
460
461
    stratified : bool, optional (default=True)
        Whether to perform stratified sampling.
462
    shuffle : bool, optional (default=True)
463
        Whether to shuffle before splitting data.
464
    metrics : str, list of str, or None, optional (default=None)
465
466
467
        Evaluation metrics to be monitored while CV.
        If not None, the metric in ``params`` will be overridden.
    fobj : callable or None, optional (default=None)
468
469
470
471
472
473
        Customized objective function.
        Should accept two parameters: preds, train_data,
        and return (grad, hess).

            preds : list or numpy 1-D array
                The predicted values.
474
475
                Predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task.
476
477
478
            train_data : Dataset
                The training dataset.
            grad : list or numpy 1-D array
479
480
                The value of the first order derivative (gradient) of the loss
                with respect to the elements of preds for each sample point.
481
            hess : list or numpy 1-D array
482
483
                The value of the second order derivative (Hessian) of the loss
                with respect to the elements of preds for each sample point.
484
485
486
487
488

        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
        and you should group grad and hess in this way as well.

489
    feval : callable, list of callable, or None, optional (default=None)
490
        Customized evaluation function.
491
        Each evaluation function should accept two parameters: preds, train_data,
492
        and return (eval_name, eval_result, is_higher_better) or list of such tuples.
493
494
495

            preds : list or numpy 1-D array
                The predicted values.
496
497
                If ``fobj`` is specified, predicted values are returned before any transformation,
                e.g. they are raw margin instead of probability of positive class for binary task in this case.
498
499
            train_data : Dataset
                The training dataset.
500
            eval_name : str
Andrew Ziem's avatar
Andrew Ziem committed
501
                The name of evaluation function (without whitespace).
502
503
504
505
506
            eval_result : float
                The eval result.
            is_higher_better : bool
                Is eval result higher better, e.g. AUC is ``is_higher_better``.

507
508
        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
509
510
        To ignore the default metric corresponding to the used objective,
        set ``metrics`` to the string ``"None"``.
511
    init_model : str, pathlib.Path, Booster or None, optional (default=None)
512
        Filename of LightGBM model or Booster instance used for continue training.
513
    feature_name : list of str, or 'auto', optional (default="auto")
514
515
        Feature names.
        If 'auto' and data is pandas DataFrame, data columns names are used.
516
    categorical_feature : list of str or int, or 'auto', optional (default="auto")
517
518
        Categorical features.
        If list of int, interpreted as indices.
519
        If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
520
        If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
521
        All values in categorical features should be less than int32 max value (2147483647).
522
        Large values could be memory consuming. Consider using consecutive integers starting from zero.
523
        All negative values in categorical features will be treated as missing values.
524
        The output cannot be monotonically constrained with respect to a categorical feature.
525
    early_stopping_rounds : int or None, optional (default=None)
526
527
528
529
        Activates early stopping.
        CV score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue.
        Requires at least one metric. If there's more than one, will check all of them.
530
        To check only the first metric, set the ``first_metric_only`` parameter to ``True`` in ``params``.
531
        Last entry in evaluation history is the one from the best iteration.
532
533
    fpreproc : callable or None, optional (default=None)
        Preprocessing function that takes (dtrain, dtest, params)
wxchan's avatar
wxchan committed
534
        and returns transformed versions of those.
535
    verbose_eval : bool, int, or None, optional (default=None)
wxchan's avatar
wxchan committed
536
        Whether to display the progress.
537
538
539
        If True, progress will be displayed at every boosting stage.
        If int, progress will be displayed at every given ``verbose_eval`` boosting stage.
    show_stdv : bool, optional (default=True)
wxchan's avatar
wxchan committed
540
        Whether to display the standard deviation in progress.
541
        Results are not affected by this parameter, and always contain std.
542
    seed : int, optional (default=0)
wxchan's avatar
wxchan committed
543
        Seed used to generate the folds (passed to numpy.random.seed).
544
    callbacks : list of callable, or None, optional (default=None)
545
        List of callback functions that are applied at each iteration.
546
        See Callbacks in Python API for more information.
547
548
549
    eval_train_metric : bool, optional (default=False)
        Whether to display the train metric in progress.
        The score of the metric is calculated again after each training step, so there is some impact on performance.
550
551
    return_cvbooster : bool, optional (default=False)
        Whether to return Booster models trained on each fold through ``CVBooster``.
wxchan's avatar
wxchan committed
552
553
554

    Returns
    -------
555
556
557
558
    eval_hist : dict
        Evaluation history.
        The dictionary has the following format:
        {'metric1-mean': [values], 'metric1-stdv': [values],
Qiwei Ye's avatar
Qiwei Ye committed
559
        'metric2-mean': [values], 'metric2-stdv': [values],
560
        ...}.
561
        If ``return_cvbooster=True``, also returns trained boosters via ``cvbooster`` key.
wxchan's avatar
wxchan committed
562
    """
Guolin Ke's avatar
Guolin Ke committed
563
    if not isinstance(train_set, Dataset):
564
        raise TypeError("Training only accepts Dataset object")
Guolin Ke's avatar
Guolin Ke committed
565

566
    params = copy.deepcopy(params)
567
    if fobj is not None:
568
569
        for obj_alias in _ConfigAliases.get("objective"):
            params.pop(obj_alias, None)
570
        params['objective'] = 'none'
571
    for alias in _ConfigAliases.get("num_iterations"):
572
        if alias in params:
573
            _log_warning(f"Found '{alias}' in params. Will use it instead of 'num_boost_round' argument")
574
            num_boost_round = params.pop(alias)
575
    params["num_iterations"] = num_boost_round
576
577
578
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
        _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
                     "Pass 'early_stopping()' callback via 'callbacks' argument instead.")
579
    for alias in _ConfigAliases.get("early_stopping_round"):
580
581
        if alias in params:
            early_stopping_rounds = params.pop(alias)
582
583
    params["early_stopping_round"] = early_stopping_rounds
    first_metric_only = params.get('first_metric_only', False)
584

585
586
    if num_boost_round <= 0:
        raise ValueError("num_boost_round should be greater than zero.")
587
    if isinstance(init_model, (str, Path)):
588
        predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
Guolin Ke's avatar
Guolin Ke committed
589
    elif isinstance(init_model, Booster):
590
        predictor = init_model._to_predictor(dict(init_model.params, **params))
Guolin Ke's avatar
Guolin Ke committed
591
592
593
    else:
        predictor = None

Peter's avatar
Peter committed
594
    if metrics is not None:
595
596
        for metric_alias in _ConfigAliases.get("metric"):
            params.pop(metric_alias, None)
Peter's avatar
Peter committed
597
        params['metric'] = metrics
wxchan's avatar
wxchan committed
598

599
600
601
602
603
    train_set._update_params(params) \
             ._set_predictor(predictor) \
             .set_feature_name(feature_name) \
             .set_categorical_feature(categorical_feature)

wxchan's avatar
wxchan committed
604
    results = collections.defaultdict(list)
605
606
    cvfolds = _make_n_folds(train_set, folds=folds, nfold=nfold,
                            params=params, seed=seed, fpreproc=fpreproc,
607
608
                            stratified=stratified, shuffle=shuffle,
                            eval_train_metric=eval_train_metric)
wxchan's avatar
wxchan committed
609
610

    # setup callbacks
611
    if callbacks is None:
wxchan's avatar
wxchan committed
612
613
614
615
616
        callbacks = set()
    else:
        for i, cb in enumerate(callbacks):
            cb.__dict__.setdefault('order', i - len(callbacks))
        callbacks = set(callbacks)
617
    if early_stopping_rounds is not None and early_stopping_rounds > 0:
618
        callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=False))
619
620
    if verbose_eval is not None:
        _log_warning("'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. "
621
                     "Pass 'log_evaluation()' callback via 'callbacks' argument instead.")
wxchan's avatar
wxchan committed
622
    if verbose_eval is True:
623
        callbacks.add(callback.log_evaluation(show_stdv=show_stdv))
624
    elif isinstance(verbose_eval, int):
625
        callbacks.add(callback.log_evaluation(verbose_eval, show_stdv=show_stdv))
wxchan's avatar
wxchan committed
626

wxchan's avatar
wxchan committed
627
628
629
630
    callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)}
    callbacks_after_iter = callbacks - callbacks_before_iter
    callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
    callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
wxchan's avatar
wxchan committed
631

632
    for i in range(num_boost_round):
wxchan's avatar
wxchan committed
633
        for cb in callbacks_before_iter:
634
635
            cb(callback.CallbackEnv(model=cvfolds,
                                    params=params,
wxchan's avatar
wxchan committed
636
637
638
639
                                    iteration=i,
                                    begin_iteration=0,
                                    end_iteration=num_boost_round,
                                    evaluation_result_list=None))
wxchan's avatar
wxchan committed
640
        cvfolds.update(fobj=fobj)
641
        res = _agg_cv_result(cvfolds.eval_valid(feval), eval_train_metric)
wxchan's avatar
wxchan committed
642
        for _, key, mean, _, std in res:
643
644
            results[f'{key}-mean'].append(mean)
            results[f'{key}-stdv'].append(std)
wxchan's avatar
wxchan committed
645
646
        try:
            for cb in callbacks_after_iter:
647
648
                cb(callback.CallbackEnv(model=cvfolds,
                                        params=params,
wxchan's avatar
wxchan committed
649
650
651
652
                                        iteration=i,
                                        begin_iteration=0,
                                        end_iteration=num_boost_round,
                                        evaluation_result_list=res))
653
654
        except callback.EarlyStopException as earlyStopException:
            cvfolds.best_iteration = earlyStopException.best_iteration + 1
wxchan's avatar
wxchan committed
655
            for k in results:
656
                results[k] = results[k][:cvfolds.best_iteration]
wxchan's avatar
wxchan committed
657
            break
658
659
660
661

    if return_cvbooster:
        results['cvbooster'] = cvfolds

wxchan's avatar
wxchan committed
662
    return dict(results)