callback.py 6.69 KB
Newer Older
wxchan's avatar
wxchan committed
1
# coding: utf-8
wxchan's avatar
wxchan committed
2
# pylint: disable = invalid-name, W0105, C0301
wxchan's avatar
wxchan committed
3
4
from __future__ import absolute_import
import collections
wxchan's avatar
wxchan committed
5
import inspect
wxchan's avatar
wxchan committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

class EarlyStopException(Exception):
    """Exception of early stopping.
    Parameters
    ----------
    best_iteration : int
        The best iteration stopped.
    """
    def __init__(self, best_iteration):
        super(EarlyStopException, self).__init__()
        self.best_iteration = best_iteration

# Callback environment used by callbacks
CallbackEnv = collections.namedtuple(
    "LightGBMCallbackEnv",
    ["model",
     "cvfolds",
     "iteration",
     "begin_iteration",
     "end_iteration",
     "evaluation_result_list"])

def _format_eval_result(value, show_stdv=True):
    """format metric string"""
    if len(value) == 4:
        return '%s\'s %s:%g' % (value[0], value[1], value[2])
    elif len(value) == 5:
        if show_stdv:
            return '%s\'s %s:%g+%g' % (value[0], value[1], value[2], value[4])
        else:
            return '%s\'s %s:%g' % (value[0], value[1], value[2])
    else:
        raise ValueError("wrong metric value")


def print_evaluation(period=1, show_stdv=True):
    """Create a callback that print evaluation result.

    Parameters
    ----------
    period : int
        The period to log the evaluation results

    show_stdv : bool, optional
50
        Whether show stdv if provided
wxchan's avatar
wxchan committed
51
52
53
54
55
56
57
58

    Returns
    -------
    callback : function
        A callback that print evaluation every period iterations.
    """
    def callback(env):
        """internal function"""
59
        if not env.evaluation_result_list or period <= 0:
wxchan's avatar
wxchan committed
60
            return
wxchan's avatar
wxchan committed
61
        if (env.iteration + 1) % period == 0:
wxchan's avatar
wxchan committed
62
63
            result = '\t'.join([_format_eval_result(x, show_stdv) \
                for x in env.evaluation_result_list])
wxchan's avatar
wxchan committed
64
65
            print('[%d]\t%s' % (env.iteration + 1, result))
    callback.order = 10
wxchan's avatar
wxchan committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
    return callback


def record_evaluation(eval_result):
    """Create a call back that records the evaluation history into eval_result.

    Parameters
    ----------
    eval_result : dict
       A dictionary to store the evaluation results.

    Returns
    -------
    callback : function
        The requested callback function.
    """
    if not isinstance(eval_result, dict):
        raise TypeError('eval_result has to be a dictionary')
    eval_result.clear()

    def init(env):
        """internal function"""
88
89
        for data_name, _, _, _ in env.evaluation_result_list:
            eval_result.setdefault(data_name, collections.defaultdict(list))
wxchan's avatar
wxchan committed
90
91
92

    def callback(env):
        """internal function"""
93
        if not eval_result:
wxchan's avatar
wxchan committed
94
95
96
            init(env)
        for data_name, eval_name, result, _ in env.evaluation_result_list:
            eval_result[data_name][eval_name].append(result)
wxchan's avatar
wxchan committed
97
    callback.order = 20
wxchan's avatar
wxchan committed
98
99
100
101
    return callback


def reset_learning_rate(learning_rates):
102
    """Reset learning rate after first iteration
wxchan's avatar
wxchan committed
103
104
105
106
107
108

    NOTE: the initial learning rate will still take in-effect on first iteration.

    Parameters
    ----------
    learning_rates: list or function
109
110
111
112
        List of learning rate for each boosting round \
        or a customized function that calculates learning_rate in terms of \
        current number of round and the total number of boosting round \
        (e.g. yields learning rate decay)
wxchan's avatar
wxchan committed
113
        - list l: learning_rate = l[current_round]
wxchan's avatar
wxchan committed
114
115
        - function f: learning_rate = f(current_round, total_boost_round) \
                   or learning_rate = f(current_round)
wxchan's avatar
wxchan committed
116
117
118
119
120
121
122
123
    Returns
    -------
    callback : function
        The requested callback function.
    """
    def callback(env):
        """internal function"""
        if isinstance(learning_rates, list):
wxchan's avatar
wxchan committed
124
125
126
            if len(learning_rates) != env.end_iteration - env.begin_iteration:
                raise ValueError("Length of list 'learning_rates' has to equal to 'num_boost_round'.")
            env.model.reset_parameter({'learning_rate':learning_rates[env.iteration]})
wxchan's avatar
wxchan committed
127
        else:
wxchan's avatar
wxchan committed
128
129
130
131
132
133
134
135
            argc = len(inspect.getargspec(learning_rates).args)
            if argc is 1:
                env.model.reset_parameter({"learning_rate": learning_rates(env.iteration - env.begin_iteration)})
            elif argc is 2:
                env.model.reset_parameter({"learning_rate": \
                    learning_rates(env.iteration - env.begin_iteration, env.end_iteration - env.begin_iteration)})
            else:
                raise ValueError("Self-defined function 'learning_rates' should have 1 or 2 arguments")
wxchan's avatar
wxchan committed
136
    callback.before_iteration = True
wxchan's avatar
wxchan committed
137
    callback.order = 10
wxchan's avatar
wxchan committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
    return callback


def early_stop(stopping_rounds, verbose=True):
    """Create a callback that activates early stopping.
    Activates early stopping.
    Requires at least one validation data and one metric
    If there's more than one, will check all of them

    Parameters
    ----------
    stopping_rounds : int
       The stopping rounds before the trend occur.

    verbose : optional, bool
        Whether to print message about early stopping information.

    Returns
    -------
    callback : function
        The requested callback function.
    """
    factor_to_bigger_better = {}
    best_score = {}
    best_iter = {}
    best_msg = {}
    def init(env):
        """internal function"""
166
        if not env.evaluation_result_list:
wxchan's avatar
wxchan committed
167
168
169
170
171
172
173
174
175
176
177
            raise ValueError('For early stopping you need at least one set in evals.')

        if verbose:
            msg = "Train until valid scores didn't improve in {} rounds."
            print(msg.format(stopping_rounds))

        for i in range(len(env.evaluation_result_list)):
            best_score[i] = float('-inf')
            best_iter[i] = 0
            if verbose:
                best_msg[i] = ""
178
            factor_to_bigger_better[i] = 1.0 if env.evaluation_result_list[i][3] else -1.0
wxchan's avatar
wxchan committed
179
180
181

    def callback(env):
        """internal function"""
182
        if not best_score:
wxchan's avatar
wxchan committed
183
184
185
186
187
188
189
            init(env)
        for i in range(len(env.evaluation_result_list)):
            score = env.evaluation_result_list[i][2] * factor_to_bigger_better[i]
            if score > best_score[i]:
                best_score[i] = score
                best_iter[i] = env.iteration
                if verbose:
wxchan's avatar
wxchan committed
190
                    best_msg[i] = '[%d]\t%s' % (env.iteration + 1, \
wxchan's avatar
wxchan committed
191
192
193
194
195
196
                        '\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
            else:
                if env.iteration - best_iter[i] >= stopping_rounds:
                    if env.model is not None:
                        env.model.set_attr(best_iteration=str(best_iter[i]))
                    if verbose:
197
198
                        print('early stopping, best iteration is:')
                        print(best_msg[i])
wxchan's avatar
wxchan committed
199
                    raise EarlyStopException(best_iter[i])
wxchan's avatar
wxchan committed
200
    callback.order = 30
wxchan's avatar
wxchan committed
201
    return callback