sklearn.py

# coding: utf-8
# pylint: disable = invalid-name, W0105, C0111, C0301
"""Scikit-Learn Wrapper interface for LightGBM."""
from __future__ import absolute_import

import numpy as np
import warnings
try:
    import pandas as pd
    _IS_PANDAS_INSTALLED = True
except ImportError:
    _IS_PANDAS_INSTALLED = False

from .basic import Dataset, LightGBMError
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
                     LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
                     _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
                     _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
                     argc_, range_, LGBMDeprecationWarning)
from .engine import train


def _objective_function_wrapper(func):
    """Decorate an objective function
    Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
          If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]
          and you should group grad and hess in this way as well.

    Parameters
    ----------
    func: callable
        Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
            y_true: array-like of shape = [n_samples]
                The target values.
            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
                The predicted values.
            group: array-like
                Group/query data, used for ranking task.

    Returns
    -------
    new_func: callable
        The new objective function as expected by ``lightgbm.engine.train``.
        The signature is ``new_func(preds, dataset)``:

        preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
            The predicted values.
        dataset: ``dataset``
            The training set from which the labels will be extracted using
            ``dataset.get_label()``.
    """
    def inner(preds, dataset):
        """internal function"""
        labels = dataset.get_label()
        argc = argc_(func)
        if argc == 2:
            grad, hess = func(labels, preds)
        elif argc == 3:
            grad, hess = func(labels, preds, dataset.get_group())
        else:
            raise TypeError("Self-defined objective function should have 2 or 3 arguments, got %d" % argc)
        """weighted for objective"""
        weight = dataset.get_weight()
        if weight is not None:
            """only one class"""
            if len(weight) == len(grad):
                grad = np.multiply(grad, weight)
                hess = np.multiply(hess, weight)
            else:
                num_data = len(weight)
                num_class = len(grad) // num_data
                if num_class * num_data != len(grad):
                    raise ValueError("Length of grad and hess should equal to num_class * num_data")
                for k in range_(num_class):
                    for i in range_(num_data):
                        idx = k * num_data + i
                        grad[idx] *= weight[i]
                        hess[idx] *= weight[i]
        return grad, hess
    return inner


def _eval_function_wrapper(func):
    """Decorate an eval function
    Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
          If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i].

    Parameters
    ----------
    func: callable
        Expects a callable with following functions:
            ``func(y_true, y_pred)``,
            ``func(y_true, y_pred, weight)``
         or ``func(y_true, y_pred, weight, group)``
            and return (eval_name->str, eval_result->float, is_bigger_better->Bool):

            y_true: array-like of shape = [n_samples]
                The target values.
            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
                The predicted values.
            weight: array_like of shape = [n_samples]
                The weight of samples.
            group: array-like
                Group/query data, used for ranking task.

    Returns
    -------
    new_func: callable
        The new eval function as expected by ``lightgbm.engine.train``.
        The signature is ``new_func(preds, dataset)``:

        preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
            The predicted values.
        dataset: ``dataset``
            The training set from which the labels will be extracted using
            ``dataset.get_label()``.
    """
    def inner(preds, dataset):
        """internal function"""
        labels = dataset.get_label()
        argc = argc_(func)
        if argc == 2:
            return func(labels, preds)
        elif argc == 3:
            return func(labels, preds, dataset.get_weight())
        elif argc == 4:
            return func(labels, preds, dataset.get_weight(), dataset.get_group())
        else:
            raise TypeError("Self-defined eval function should have 2, 3 or 4 arguments, got %d" % argc)
    return inner


class LGBMModel(_LGBMModelBase):
    """Implementation of the scikit-learn API for LightGBM."""

    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100,
                 subsample_for_bin=200000, objective=None, class_weight=None,
                 min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
                 subsample=1., subsample_freq=1, colsample_bytree=1.,
                 reg_alpha=0., reg_lambda=0., random_state=None,
                 n_jobs=-1, silent=True, **kwargs):
        """Construct a gradient boosting model.

        Parameters
        ----------
        boosting_type : string, optional (default="gbdt")
            'gbdt', traditional Gradient Boosting Decision Tree.
            'dart', Dropouts meet Multiple Additive Regression Trees.
            'goss', Gradient-based One-Side Sampling.
            'rf', Random Forest.
        num_leaves : int, optional (default=31)
            Maximum tree leaves for base learners.
        max_depth : int, optional (default=-1)
            Maximum tree depth for base learners, -1 means no limit.
        learning_rate : float, optional (default=0.1)
            Boosting learning rate.
        n_estimators : int, optional (default=100)
            Number of boosted trees to fit.
        subsample_for_bin : int, optional (default=50000)
            Number of samples for constructing bins.
        objective : string, callable or None, optional (default=None)
            Specify the learning task and the corresponding learning objective or
            a custom objective function to be used (see note below).
            default: 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker.
        class_weight : dict, 'balanced' or None, optional (default=None)
            Weights associated with classes in the form ``{class_label: weight}``.
            Use this parameter only for multi-class classification task;
            for binary classification task you may use ``is_unbalance`` or ``scale_pos_weight`` parameters.
            The 'balanced' mode uses the values of y to automatically adjust weights
            inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.
            If None, all classes are supposed to have weight one.
            Note that these weights will be multiplied with ``sample_weight`` (passed through the fit method)
            if ``sample_weight`` is specified.
        min_split_gain : float, optional (default=0.)
            Minimum loss reduction required to make a further partition on a leaf node of the tree.
        min_child_weight : float, optional (default=1e-3)
            Minimum sum of instance weight(hessian) needed in a child(leaf).
        min_child_samples : int, optional (default=20)
            Minimum number of data need in a child(leaf).
        subsample : float, optional (default=1.)
            Subsample ratio of the training instance.
        subsample_freq : int, optional (default=1)
            Frequence of subsample, <=0 means no enable.
        colsample_bytree : float, optional (default=1.)
            Subsample ratio of columns when constructing each tree.
        reg_alpha : float, optional (default=0.)
            L1 regularization term on weights.
        reg_lambda : float, optional (default=0.)
            L2 regularization term on weights.
        random_state : int or None, optional (default=None)
            Random number seed.
            Will use default seeds in c++ code if set to None.
        n_jobs : int, optional (default=-1)
            Number of parallel threads.
        silent : bool, optional (default=True)
            Whether to print messages while running boosting.
        **kwargs : other parameters
            Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.

            Note
            ----
            \*\*kwargs is not supported in sklearn, it may cause unexpected issues.

        Attributes
        ----------
        n_features_ : int
            The number of features of fitted model.
        classes_ : array of shape = [n_classes]
            The class label array (only for classification problem).
        n_classes_ : int
            The number of classes (only for classification problem).
        best_score_ : dict or None
            The best score of fitted model.
        best_iteration_ : int or None
            The best iteration of fitted model if ``early_stopping_rounds`` has been specified.
        objective_ : string or callable
            The concrete objective used while fitting this model.
        booster_ : Booster
            The underlying Booster of this model.
        evals_result_ : dict or None
            The evaluation results if ``early_stopping_rounds`` has been specified.
        feature_importances_ : array of shape = [n_features]
            The feature importances (the higher, the more important the feature).

        Note
        ----
        A custom objective function can be provided for the ``objective``
        parameter. In this case, it should have the signature
        ``objective(y_true, y_pred) -> grad, hess`` or
        ``objective(y_true, y_pred, group) -> grad, hess``:

            y_true: array-like of shape = [n_samples]
                The target values.
            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
            group: array-like
                Group/query data, used for ranking task.
            grad: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The value of the gradient for each sample point.
            hess: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The value of the second derivative for each sample point.

        For multi-class task, the y_pred is group by class_id first, then group by row_id.
        If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]
        and you should group grad and hess in this way as well.
        """
        if not SKLEARN_INSTALLED:
            raise LightGBMError('Scikit-learn is required for this module')

        self.boosting_type = boosting_type
        self.objective = objective
        self.num_leaves = num_leaves
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.subsample_for_bin = subsample_for_bin
        self.min_split_gain = min_split_gain
        self.min_child_weight = min_child_weight
        self.min_child_samples = min_child_samples
        self.subsample = subsample
        self.subsample_freq = subsample_freq
        self.colsample_bytree = colsample_bytree
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda
        self.random_state = random_state
        self.n_jobs = n_jobs
        self.silent = silent
        self._Booster = None
        self._evals_result = None
        self._best_score = None
        self._best_iteration = None
        self._other_params = {}
        self._objective = objective
        self.class_weight = class_weight
        self._n_features = None
        self._classes = None
        self._n_classes = None
        self.set_params(**kwargs)

    def get_params(self, deep=True):
        params = super(LGBMModel, self).get_params(deep=deep)
        params.update(self._other_params)
        return params

    # minor change to support `**kwargs`
    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
            if hasattr(self, '_' + key):
                setattr(self, '_' + key, value)
            self._other_params[key] = value
        return self

    def fit(self, X, y,
            sample_weight=None, init_score=None, group=None,
            eval_set=None, eval_names=None, eval_sample_weight=None,
            eval_class_weight=None, eval_init_score=None, eval_group=None,
            eval_metric=None, early_stopping_rounds=None, verbose=True,
            feature_name='auto', categorical_feature='auto', callbacks=None):
        """Build a gradient boosting model from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            Input feature matrix.
        y : array-like of shape = [n_samples]
            The target values (class labels in classification, real numbers in regression).
        sample_weight : array-like of shape = [n_samples] or None, optional (default=None)
            Weights of training data.
        init_score : array-like of shape = [n_samples] or None, optional (default=None)
            Init score of training data.
        group : array-like of shape = [n_samples] or None, optional (default=None)
            Group data of training data.
        eval_set : list or None, optional (default=None)
            A list of (X, y) tuple pairs to use as a validation sets for early-stopping.
        eval_names : list of strings or None, optional (default=None)
            Names of eval_set.
        eval_sample_weight : list of arrays or None, optional (default=None)
            Weights of eval data.
        eval_class_weight : list or None, optional (default=None)
            Class weights of eval data.
        eval_init_score : list of arrays or None, optional (default=None)
            Init score of eval data.
        eval_group : list of arrays or None, optional (default=None)
            Group data of eval data.
        eval_metric : string, list of strings, callable or None, optional (default=None)
            If string, it should be a built-in evaluation metric to use.
            If callable, it should be a custom evaluation metric, see note for more details.
        early_stopping_rounds : int or None, optional (default=None)
            Activates early stopping. The model will train until the validation score stops improving.
            Validation error needs to decrease at least every ``early_stopping_rounds`` round(s)
            to continue training.
        verbose : bool, optional (default=True)
            If True and an evaluation set is used, writes the evaluation progress.
        feature_name : list of strings or 'auto', optional (default="auto")
            Feature names.
            If 'auto' and data is pandas DataFrame, data columns names are used.
        categorical_feature : list of strings or int, or 'auto', optional (default="auto")
            Categorical features.
            If list of int, interpreted as indices.
            If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
            If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
        callbacks : list of callback functions or None, optional (default=None)
            List of callback functions that are applied at each iteration.
            See Callbacks in Python API for more information.

        Returns
        -------
        self : object
            Returns self.

        Note
        ----
        Custom eval function expects a callable with following functions:
        ``func(y_true, y_pred)``, ``func(y_true, y_pred, weight)`` or
        ``func(y_true, y_pred, weight, group)``.
        Returns (eval_name, eval_result, is_bigger_better) or
        list of (eval_name, eval_result, is_bigger_better)

            y_true: array-like of shape = [n_samples]
                The target values.
            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
                The predicted values.
            weight: array-like of shape = [n_samples]
                The weight of samples.
            group: array-like
                Group/query data, used for ranking task.
            eval_name: str
                The name of evaluation.
            eval_result: float
                The eval result.
            is_bigger_better: bool
                Is eval result bigger better, e.g. AUC is bigger_better.

        For multi-class task, the y_pred is group by class_id first, then group by row_id.
        If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i].
        """
        if self._objective is None:
            if isinstance(self, LGBMRegressor):
                self._objective = "regression"
            elif isinstance(self, LGBMClassifier):
                self._objective = "binary"
            elif isinstance(self, LGBMRanker):
                self._objective = "lambdarank"
            else:
                raise ValueError("Unknown LGBMModel type.")
        if callable(self._objective):
            self._fobj = _objective_function_wrapper(self._objective)
        else:
            self._fobj = None
        evals_result = {}
        params = self.get_params()
        # sklearn interface has another naming convention
        params.setdefault('seed', params.pop('random_state'))
        params.setdefault('nthread', params.pop('n_jobs'))
        # user can set verbose with kwargs, it has higher priority
        if 'verbose' not in params and self.silent:
            params['verbose'] = 0
        params.pop('silent', None)
        params.pop('n_estimators', None)
        params.pop('class_weight', None)
        if self._n_classes is not None and self._n_classes > 2:
            params['num_class'] = self._n_classes
        if hasattr(self, '_eval_at'):
            params['ndcg_eval_at'] = self._eval_at
        params['objective'] = self._objective
        if self._fobj:
            params['objective'] = 'None'  # objective = nullptr for unknown objective

        if callable(eval_metric):
            feval = _eval_function_wrapper(eval_metric)
        else:
            feval = None
            params['metric'] = eval_metric

        if not _IS_PANDAS_INSTALLED or not isinstance(X, pd.DataFrame):
            X, y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
            _LGBMCheckConsistentLength(X, y, sample_weight)

        if self.class_weight is not None:
            class_sample_weight = _LGBMComputeSampleWeight(self.class_weight, y)
            if sample_weight is None or len(sample_weight) == 0:
                sample_weight = class_sample_weight
            else:
                sample_weight = np.multiply(sample_weight, class_sample_weight)

        self._n_features = X.shape[1]

        def _construct_dataset(X, y, sample_weight, init_score, group, params):
            ret = Dataset(X, label=y, weight=sample_weight, group=group, params=params)
            ret.set_init_score(init_score)
            return ret

        train_set = _construct_dataset(X, y, sample_weight, init_score, group, params)

        valid_sets = []
        if eval_set is not None:
            if isinstance(eval_set, tuple):
                eval_set = [eval_set]
            for i, valid_data in enumerate(eval_set):
                # reduce cost for prediction training data
                if valid_data[0] is X and valid_data[1] is y:
                    valid_set = train_set
                else:
                    def get_meta_data(collection, i):
                        if collection is None:
                            return None
                        elif isinstance(collection, list):
                            return collection[i] if len(collection) > i else None
                        elif isinstance(collection, dict):
                            return collection.get(i, None)
                        else:
                            raise TypeError('eval_sample_weight, eval_class_weight, eval_init_score, and eval_group should be dict or list')
                    valid_weight = get_meta_data(eval_sample_weight, i)
                    if get_meta_data(eval_class_weight, i) is not None:
                        valid_class_sample_weight = _LGBMComputeSampleWeight(get_meta_data(eval_class_weight, i), valid_data[1])
                        if valid_weight is None or len(valid_weight) == 0:
                            valid_weight = valid_class_sample_weight
                        else:
                            valid_weight = np.multiply(valid_weight, valid_class_sample_weight)
                    valid_init_score = get_meta_data(eval_init_score, i)
                    valid_group = get_meta_data(eval_group, i)
                    valid_set = _construct_dataset(valid_data[0], valid_data[1], valid_weight, valid_init_score, valid_group, params)
                valid_sets.append(valid_set)

        self._Booster = train(params, train_set,
                              self.n_estimators, valid_sets=valid_sets, valid_names=eval_names,
                              early_stopping_rounds=early_stopping_rounds,
                              evals_result=evals_result, fobj=self._fobj, feval=feval,
                              verbose_eval=verbose, feature_name=feature_name,
                              categorical_feature=categorical_feature,
                              callbacks=callbacks)

        if evals_result:
            self._evals_result = evals_result

        if early_stopping_rounds is not None:
            self._best_iteration = self._Booster.best_iteration

        self._best_score = self._Booster.best_score

        # free dataset
        self.booster_.free_dataset()
        del train_set, valid_sets
        return self

    def predict(self, X, raw_score=False, num_iteration=0):
        """Return the predicted value for each sample.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            Input features matrix.
        raw_score : bool, optional (default=False)
            Whether to predict raw scores.
        num_iteration : int, optional (default=0)
            Limit number of iterations in the prediction; defaults to 0 (use all trees).

        Returns
        -------
        predicted_result : array-like of shape = [n_samples] or shape = [n_samples, n_classes]
            The predicted values.
        """
        if self._n_features is None:
            raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.")
        if not _IS_PANDAS_INSTALLED or not isinstance(X, pd.DataFrame):
            X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
        n_features = X.shape[1]
        if self._n_features != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features_ is %s and "
                             "input n_features is %s "
                             % (self._n_features, n_features))
        return self.booster_.predict(X, raw_score=raw_score, num_iteration=num_iteration)

    def apply(self, X, num_iteration=0):
        """Return the predicted leaf every tree for each sample.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            Input features matrix.
        num_iteration : int, optional (default=0)
            Limit number of iterations in the prediction; defaults to 0 (use all trees).

        Returns
        -------
        X_leaves : array-like of shape = [n_samples, n_trees]
            The predicted leaf every tree for each sample.
        """
        if self._n_features is None:
            raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.")
        if not _IS_PANDAS_INSTALLED or not isinstance(X, pd.DataFrame):
            X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
        n_features = X.shape[1]
        if self._n_features != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features_ is %s and "
                             "input n_features is %s "
                             % (self._n_features, n_features))
        return self.booster_.predict(X, pred_leaf=True, num_iteration=num_iteration)

    @property
    def n_features_(self):
        """Get the number of features of fitted model."""
        if self._n_features is None:
            raise LGBMNotFittedError('No n_features found. Need to call fit beforehand.')
        return self._n_features

    @property
    def best_score_(self):
        """Get the best score of fitted model."""
        if self._n_features is None:
            raise LGBMNotFittedError('No best_score found. Need to call fit beforehand.')
        return self._best_score

    @property
    def best_iteration_(self):
        """Get the best iteration of fitted model."""
        if self._n_features is None:
            raise LGBMNotFittedError('No best_iteration found. Need to call fit with early_stopping_rounds beforehand.')
        return self._best_iteration

    @property
    def objective_(self):
        """Get the concrete objective used while fitting this model."""
        if self._n_features is None:
            raise LGBMNotFittedError('No objective found. Need to call fit beforehand.')
        return self._objective

    @property
    def booster_(self):
        """Get the underlying lightgbm Booster of this model."""
        if self._Booster is None:
            raise LGBMNotFittedError('No booster found. Need to call fit beforehand.')
        return self._Booster

    @property
    def evals_result_(self):
        """Get the evaluation results."""
        if self._n_features is None:
            raise LGBMNotFittedError('No results found. Need to call fit with eval_set beforehand.')
        return self._evals_result

    @property
    def feature_importances_(self):
        """Get feature importances.

        Note
        ----
        Feature importance in sklearn interface used to normalize to 1,
        it's deprecated after 2.0.4 and same as Booster.feature_importance() now.
        """
        if self._n_features is None:
            raise LGBMNotFittedError('No feature_importances found. Need to call fit beforehand.')
        return self.booster_.feature_importance()


class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
    """LightGBM regressor."""

    def fit(self, X, y,
            sample_weight=None, init_score=None,
            eval_set=None, eval_names=None, eval_sample_weight=None,
            eval_init_score=None, eval_metric="l2", early_stopping_rounds=None,
            verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None):

        super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight,
                                       init_score=init_score, eval_set=eval_set,
                                       eval_names=eval_names,
                                       eval_sample_weight=eval_sample_weight,
                                       eval_init_score=eval_init_score,
                                       eval_metric=eval_metric,
                                       early_stopping_rounds=early_stopping_rounds,
                                       verbose=verbose, feature_name=feature_name,
                                       categorical_feature=categorical_feature,
                                       callbacks=callbacks)
        return self

    base_doc = LGBMModel.fit.__doc__
    fit.__doc__ = (base_doc[:base_doc.find('eval_class_weight :')] +
                   base_doc[base_doc.find('eval_init_score :'):])
    base_doc = fit.__doc__
    fit.__doc__ = (base_doc[:base_doc.find('eval_metric :')] +
                   'eval_metric : string, list of strings, callable or None, optional (default="l2")\n' +
                   base_doc[base_doc.find('            If string, it should be a built-in evaluation metric to use.'):])


class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
    """LightGBM classifier."""

    def fit(self, X, y,
            sample_weight=None, init_score=None,
            eval_set=None, eval_names=None, eval_sample_weight=None,
            eval_class_weight=None, eval_init_score=None, eval_metric="logloss",
            early_stopping_rounds=None, verbose=True,
            feature_name='auto', categorical_feature='auto', callbacks=None):
        _LGBMCheckClassificationTargets(y)
        self._le = _LGBMLabelEncoder().fit(y)
        _y = self._le.transform(y)

        self._classes = self._le.classes_
        self._n_classes = len(self._classes)
        if self._n_classes > 2:
            # Switch to using a multiclass objective in the underlying LGBM instance
            if self._objective != "multiclassova" and not callable(self._objective):
                self._objective = "multiclass"
            if eval_metric == 'logloss' or eval_metric == 'binary_logloss':
                eval_metric = "multi_logloss"
            elif eval_metric == 'error' or eval_metric == 'binary_error':
                eval_metric = "multi_error"
        else:
            if eval_metric == 'logloss' or eval_metric == 'multi_logloss':
                eval_metric = 'binary_logloss'
            elif eval_metric == 'error' or eval_metric == 'multi_error':
                eval_metric = 'binary_error'

        if eval_set is not None:
            if isinstance(eval_set, tuple):
                eval_set = [eval_set]
            for i, (valid_x, valid_y) in enumerate(eval_set):
                if valid_x is X and valid_y is y:
                    eval_set[i] = (valid_x, _y)
                else:
                    eval_set[i] = (valid_x, self._le.transform(valid_y))

        super(LGBMClassifier, self).fit(X, _y, sample_weight=sample_weight,
                                        init_score=init_score, eval_set=eval_set,
                                        eval_names=eval_names,
                                        eval_sample_weight=eval_sample_weight,
                                        eval_class_weight=eval_class_weight,
                                        eval_init_score=eval_init_score,
                                        eval_metric=eval_metric,
                                        early_stopping_rounds=early_stopping_rounds,
                                        verbose=verbose, feature_name=feature_name,
                                        categorical_feature=categorical_feature,
                                        callbacks=callbacks)
        return self

    base_doc = LGBMModel.fit.__doc__
    fit.__doc__ = (base_doc[:base_doc.find('eval_metric :')] +
                   'eval_metric : string, list of strings, callable or None, optional (default="logloss")\n' +
                   base_doc[base_doc.find('            If string, it should be a built-in evaluation metric to use.'):])

    def predict(self, X, raw_score=False, num_iteration=0):
        class_probs = self.predict_proba(X, raw_score, num_iteration)
        class_index = np.argmax(class_probs, axis=1)
        return self._le.inverse_transform(class_index)

    def predict_proba(self, X, raw_score=False, num_iteration=0):
        """Return the predicted probability for each class for each sample.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            Input features matrix.
        raw_score : bool, optional (default=False)
            Whether to predict raw scores.
        num_iteration : int, optional (default=0)
            Limit number of iterations in the prediction; defaults to 0 (use all trees).

        Returns
        -------
        predicted_probability : array-like of shape = [n_samples, n_classes]
            The predicted probability for each class for each sample.
        """
        if self._n_features is None:
            raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.")
        if not _IS_PANDAS_INSTALLED or not isinstance(X, pd.DataFrame):
            X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
        n_features = X.shape[1]
        if self._n_features != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features_ is %s and "
                             "input n_features is %s "
                             % (self._n_features, n_features))
        class_probs = self.booster_.predict(X, raw_score=raw_score, num_iteration=num_iteration)
        if self._n_classes > 2:
            return class_probs
        else:
            return np.vstack((1. - class_probs, class_probs)).transpose()

    @property
    def classes_(self):
        """Get the class label array."""
        if self._classes is None:
            raise LGBMNotFittedError('No classes found. Need to call fit beforehand.')
        return self._classes

    @property
    def n_classes_(self):
        """Get the number of classes."""
        if self._n_classes is None:
            raise LGBMNotFittedError('No classes found. Need to call fit beforehand.')
        return self._n_classes


class LGBMRanker(LGBMModel):
    """LightGBM ranker."""

    def fit(self, X, y,
            sample_weight=None, init_score=None, group=None,
            eval_set=None, eval_names=None, eval_sample_weight=None,
            eval_init_score=None, eval_group=None, eval_metric='ndcg',
            eval_at=[1], early_stopping_rounds=None, verbose=True,
            feature_name='auto', categorical_feature='auto', callbacks=None):
        # check group data
        if group is None:
            raise ValueError("Should set group for ranking task")

        if eval_set is not None:
            if eval_group is None:
                raise ValueError("Eval_group cannot be None when eval_set is not None")
            elif len(eval_group) != len(eval_set):
                raise ValueError("Length of eval_group should be equal to eval_set")
            elif (isinstance(eval_group, dict) and any(i not in eval_group or eval_group[i] is None for i in range_(len(eval_group)))) \
                    or (isinstance(eval_group, list) and any(group is None for group in eval_group)):
                raise ValueError("Should set group for all eval datasets for ranking task; "
                                 "if you use dict, the index should start from 0")

        self._eval_at = eval_at
        super(LGBMRanker, self).fit(X, y, sample_weight=sample_weight,
                                    init_score=init_score, group=group,
                                    eval_set=eval_set, eval_names=eval_names,
                                    eval_sample_weight=eval_sample_weight,
                                    eval_init_score=eval_init_score, eval_group=eval_group,
                                    eval_metric=eval_metric,
                                    early_stopping_rounds=early_stopping_rounds,
                                    verbose=verbose, feature_name=feature_name,
                                    categorical_feature=categorical_feature,
                                    callbacks=callbacks)
        return self

    base_doc = LGBMModel.fit.__doc__
    fit.__doc__ = (base_doc[:base_doc.find('eval_class_weight :')] +
                   base_doc[base_doc.find('eval_init_score :'):])
    base_doc = fit.__doc__
    fit.__doc__ = (base_doc[:base_doc.find('eval_metric :')] +
                   'eval_metric : string, list of strings, callable or None, optional (default="ndcg")\n' +
                   base_doc[base_doc.find('            If string, it should be a built-in evaluation metric to use.'):base_doc.find('early_stopping_rounds :')] +
                   'eval_at : list of int, optional (default=[1])\n'
                   '            The evaluation positions of NDCG.\n' +
                   base_doc[base_doc.find('        early_stopping_rounds :'):])