callback.py

from __future__ import absolute_import
import collections

class EarlyStopException(Exception):
    """Exception of early stopping.
    Parameters
    ----------
    best_iteration : int
        The best iteration stopped.
    """
    def __init__(self, best_iteration):
        super(EarlyStopException, self).__init__()
        self.best_iteration = best_iteration

# Callback environment used by callbacks
CallbackEnv = collections.namedtuple(
    "LightGBMCallbackEnv",
    ["model",
     "cvfolds",
     "iteration",
     "begin_iteration",
     "end_iteration",
     "evaluation_result_list"])

def _format_eval_result(value, show_stdv=True):
    """format metric string"""
    if len(value) == 4:
        return '%s_%s:%g' % (value[0], value[1], value[2])
    elif len(value) == 5:
        if show_stdv:
            return '%s_%s:%g+%g' % (value[0], value[1], value[2], value[4])
        else:
            return '%s_%s:%g' % (value[0], value[1], value[2])
    else:
        raise ValueError("wrong metric value")


def print_evaluation(period=1, show_stdv=True):
    """Create a callback that print evaluation result.

    Parameters
    ----------
    period : int
        The period to log the evaluation results

    show_stdv : bool, optional
         Whether show stdv if provided

    Returns
    -------
    callback : function
        A callback that print evaluation every period iterations.
    """
    def callback(env):
        """internal function"""
        if len(env.evaluation_result_list) == 0 or period is False:
            return
        if (env.iteration % period == 0 or env.iteration + 1 == env.begin_iteration):
            result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
            print('[%d]\t%s\n' % (env.iteration, result))
    return callback


def record_evaluation(eval_result):
    """Create a call back that records the evaluation history into eval_result.

    Parameters
    ----------
    eval_result : dict
       A dictionary to store the evaluation results.

    Returns
    -------
    callback : function
        The requested callback function.
    """
    if not isinstance(eval_result, dict):
        raise TypeError('eval_result has to be a dictionary')
    eval_result.clear()

    def init(env):
        """internal function"""
        for data_name, eval_name, _, _ in env.evaluation_result_list:
            if data_name not in eval_result:
                eval_result[data_name] = {}
            if eval_name not in eval_result[data_name]:
                eval_result[data_name][eval_name] = []

    def callback(env):
        """internal function"""
        if len(eval_result) == 0:
            init(env)
        for data_name, eval_name, result, _ in env.evaluation_result_list:
            eval_result[data_name][eval_name].append(result)
    return callback


def reset_learning_rate(learning_rates):
    """Reset learning rate after iteration 1

    NOTE: the initial learning rate will still take in-effect on first iteration.

    Parameters
    ----------
    learning_rates: list or function
        List of learning rate for each boosting round
        or a customized function that calculates learning_rate in terms of
        current number of round and the total number of boosting round (e.g. yields
        learning rate decay)
        - list l: learning_rate = l[current_round]
        - function f: learning_rate = f(current_round, total_boost_round)

    Returns
    -------
    callback : function
        The requested callback function.
    """
    def callback(env):
        """internal function"""
        booster = env.model
        i = env.iteration
        if isinstance(learning_rates, list):
            if len(learning_rates) != env.end_iteration:
                raise ValueError("Length of list 'learning_rates' has to equal 'num_boost_round'.")
            booster.reset_parameter({'learning_rate':learning_rates[i]})
        else:
            booster.reset_parameter({'learning_rate':learning_rates(i, env.end_iteration)})
    callback.before_iteration = True
    return callback


def early_stop(stopping_rounds, verbose=True):
    """Create a callback that activates early stopping.
    Activates early stopping. 
    Requires at least one validation data and one metric
    If there's more than one, will check all of them

    Parameters
    ----------
    stopp_rounds : int
       The stopping rounds before the trend occur.

    verbose : optional, bool
        Whether to print message about early stopping information.

    Returns
    -------
    callback : function
        The requested callback function.
    """
    factor_to_bigger_better = {}
    best_score = {}
    best_iter = {}
    best_msg = {}
    def init(env):
        """internal function"""
        bst = env.model

        if len(env.evaluation_result_list) == 0:
            raise ValueError('For early stopping you need at least one set in evals.')

        if verbose:
            msg = "Will train until hasn't improved in {} rounds.\n"
            print(msg.format(stopping_rounds))

        for i in range(len(env.evaluation_result_list)):
            best_score[i] = float('-inf')
            best_iter[i] = 0
            if verbose:
                best_msg[i] = ""
            factor_to_bigger_better[i] = -1.0
            if env.evaluation_result_list[i][3]:
                factor_to_bigger_better[i] = 1.0

    def callback(env):
        """internal function"""
        if len(best_score) == 0:
            init(env)
        for i in range(len(env.evaluation_result_list)):
            score = env.evaluation_result_list[i][2] * factor_to_bigger_better[i]
            if score > best_score[i]:
                best_score[i] = score
                best_iter[i] = env.iteration
                if verbose:
                    best_msg[i] = '[%d]\t%s' % ( env.iteration,
                        '\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
            else:
                if env.iteration - best_iter[i] >= stopping_rounds:
                    if env.model is not None:
                        env.model.set_attr(best_iteration=str(best_iter[i]))
                    if verbose:
                        print('early stopping, best message is:\n {} '.format(best_msg[i]))
                    raise EarlyStopException(best_iter[i])
    return callback