split_info.hpp 9 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_
#define LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_

8
9
#include <LightGBM/meta.h>

10
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
#include <cmath>
#include <cstdint>
#include <cstring>
#include <functional>
15
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
16
17
18
19
20
21
22

namespace LightGBM {

/*!
* \brief Used to store some information for gain split point
*/
struct SplitInfo {
Nikita Titov's avatar
Nikita Titov committed
23
 public:
Guolin Ke's avatar
Guolin Ke committed
24
  /*! \brief Feature index */
25
  int feature = -1;
Guolin Ke's avatar
Guolin Ke committed
26
  /*! \brief Split threshold */
27
28
29
30
31
  uint32_t threshold = 0;
  /*! \brief Left number of data after split */
  data_size_t left_count = 0;
  /*! \brief Right number of data after split */
  data_size_t right_count = 0;
32
  int num_cat_threshold = 0;
Guolin Ke's avatar
Guolin Ke committed
33
  /*! \brief Left output after split */
34
  double left_output = 0.0;
Guolin Ke's avatar
Guolin Ke committed
35
  /*! \brief Right output after split */
36
  double right_output = 0.0;
Guolin Ke's avatar
Guolin Ke committed
37
  /*! \brief Split gain */
38
  double gain = kMinScore;
Guolin Ke's avatar
Guolin Ke committed
39
  /*! \brief Left sum gradient after split */
40
  double left_sum_gradient = 0;
Guolin Ke's avatar
Guolin Ke committed
41
  /*! \brief Left sum hessian after split */
42
  double left_sum_hessian = 0;
Guolin Ke's avatar
Guolin Ke committed
43
  /*! \brief Right sum gradient after split */
44
  double right_sum_gradient = 0;
Guolin Ke's avatar
Guolin Ke committed
45
  /*! \brief Right sum hessian after split */
46
  double right_sum_hessian = 0;
47
  std::vector<uint32_t> cat_threshold;
48
49
  /*! \brief True if default split is left */
  bool default_left = true;
Guolin Ke's avatar
Guolin Ke committed
50
  int8_t monotone_type = 0;
51
  inline static int Size(int max_cat_threshold) {
52
    return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t);
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  }

  inline void CopyTo(char* buffer) const {
    std::memcpy(buffer, &feature, sizeof(feature));
    buffer += sizeof(feature);
    std::memcpy(buffer, &left_count, sizeof(left_count));
    buffer += sizeof(left_count);
    std::memcpy(buffer, &right_count, sizeof(right_count));
    buffer += sizeof(right_count);
    std::memcpy(buffer, &gain, sizeof(gain));
    buffer += sizeof(gain);
    std::memcpy(buffer, &threshold, sizeof(threshold));
    buffer += sizeof(threshold);
    std::memcpy(buffer, &left_output, sizeof(left_output));
    buffer += sizeof(left_output);
    std::memcpy(buffer, &right_output, sizeof(right_output));
    buffer += sizeof(right_output);
    std::memcpy(buffer, &left_sum_gradient, sizeof(left_sum_gradient));
    buffer += sizeof(left_sum_gradient);
    std::memcpy(buffer, &left_sum_hessian, sizeof(left_sum_hessian));
    buffer += sizeof(left_sum_hessian);
    std::memcpy(buffer, &right_sum_gradient, sizeof(right_sum_gradient));
    buffer += sizeof(right_sum_gradient);
    std::memcpy(buffer, &right_sum_hessian, sizeof(right_sum_hessian));
    buffer += sizeof(right_sum_hessian);
    std::memcpy(buffer, &default_left, sizeof(default_left));
    buffer += sizeof(default_left);
Guolin Ke's avatar
Guolin Ke committed
80
81
    std::memcpy(buffer, &monotone_type, sizeof(monotone_type));
    buffer += sizeof(monotone_type);
Guolin Ke's avatar
Guolin Ke committed
82
83
84
    std::memcpy(buffer, &num_cat_threshold, sizeof(num_cat_threshold));
    buffer += sizeof(num_cat_threshold);
    std::memcpy(buffer, cat_threshold.data(), sizeof(uint32_t) * num_cat_threshold);
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
  }

  void CopyFrom(const char* buffer) {
    std::memcpy(&feature, buffer, sizeof(feature));
    buffer += sizeof(feature);
    std::memcpy(&left_count, buffer, sizeof(left_count));
    buffer += sizeof(left_count);
    std::memcpy(&right_count, buffer, sizeof(right_count));
    buffer += sizeof(right_count);
    std::memcpy(&gain, buffer, sizeof(gain));
    buffer += sizeof(gain);
    std::memcpy(&threshold, buffer, sizeof(threshold));
    buffer += sizeof(threshold);
    std::memcpy(&left_output, buffer, sizeof(left_output));
    buffer += sizeof(left_output);
    std::memcpy(&right_output, buffer, sizeof(right_output));
    buffer += sizeof(right_output);
    std::memcpy(&left_sum_gradient, buffer, sizeof(left_sum_gradient));
    buffer += sizeof(left_sum_gradient);
    std::memcpy(&left_sum_hessian, buffer, sizeof(left_sum_hessian));
    buffer += sizeof(left_sum_hessian);
    std::memcpy(&right_sum_gradient, buffer, sizeof(right_sum_gradient));
    buffer += sizeof(right_sum_gradient);
    std::memcpy(&right_sum_hessian, buffer, sizeof(right_sum_hessian));
    buffer += sizeof(right_sum_hessian);
    std::memcpy(&default_left, buffer, sizeof(default_left));
    buffer += sizeof(default_left);
Guolin Ke's avatar
Guolin Ke committed
112
113
    std::memcpy(&monotone_type, buffer, sizeof(monotone_type));
    buffer += sizeof(monotone_type);
Guolin Ke's avatar
Guolin Ke committed
114
115
    std::memcpy(&num_cat_threshold, buffer, sizeof(num_cat_threshold));
    buffer += sizeof(num_cat_threshold);
116
    cat_threshold.resize(num_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
117
    std::memcpy(cat_threshold.data(), buffer, sizeof(uint32_t) * num_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
118
119
120
  }

  inline void Reset() {
121
    // initialize with -1 and -inf gain
Guolin Ke's avatar
Guolin Ke committed
122
123
124
125
    feature = -1;
    gain = kMinScore;
  }

126
127
128
129
130
131
132
133
134
135
136
  inline bool operator > (const SplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
137
138
139
140
141
    if (local_gain != other_gain) {
      return local_gain > other_gain;
    }

    // if gains are identical, choose the feature with the smaller index
142
143
144
145
146
147
148
149
150
151
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
152
    return local_feature < other_feature;
Guolin Ke's avatar
Guolin Ke committed
153
154
  }

155
  /*! \brief test if a candidate SplitInfo is equivalent to this one */
156
157
158
159
160
161
162
163
164
165
166
  inline bool operator == (const SplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
167
168
169
170
171
    if (local_gain != other_gain) {
      return false;
    }

    // if same gain, splits are only equal if they also use the same feature
172
173
174
175
176
177
178
179
180
181
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
182
    return local_feature == other_feature;
183
184
  }
};
Guolin Ke's avatar
Guolin Ke committed
185

186
struct LightSplitInfo {
Nikita Titov's avatar
Nikita Titov committed
187
 public:
188
189
190
191
192
193
194
195
196
197
198
199
200
  /*! \brief Feature index */
  int feature = -1;
  /*! \brief Split gain */
  double gain = kMinScore;
  /*! \brief Left number of data after split */
  data_size_t left_count = 0;
  /*! \brief Right number of data after split */
  data_size_t right_count = 0;

  inline void Reset() {
    // initialize with -1 and -inf gain
    feature = -1;
    gain = kMinScore;
Guolin Ke's avatar
Guolin Ke committed
201
  }
202
203
204
205
206
207

  void CopyFrom(const SplitInfo& other) {
    feature = other.feature;
    gain = other.gain;
    left_count = other.left_count;
    right_count = other.right_count;
Guolin Ke's avatar
Guolin Ke committed
208
  }
209
210
211
212
213
214
215
216
217
218

  void CopyFrom(const char* buffer) {
    std::memcpy(&feature, buffer, sizeof(feature));
    buffer += sizeof(feature);
    std::memcpy(&left_count, buffer, sizeof(left_count));
    buffer += sizeof(left_count);
    std::memcpy(&right_count, buffer, sizeof(right_count));
    buffer += sizeof(right_count);
    std::memcpy(&gain, buffer, sizeof(gain));
    buffer += sizeof(gain);
Guolin Ke's avatar
Guolin Ke committed
219
  }
220
221
222
223
224
225
226
227
228
229
230
231

  inline bool operator > (const LightSplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
232
233
234
235
236
    if (local_gain != other_gain) {
      return local_gain > other_gain;
    }

    // if gains are identical, choose the feature with the smaller index
237
238
239
240
241
242
243
244
245
246
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
247
    return local_feature < other_feature;
Guolin Ke's avatar
Guolin Ke committed
248
  }
249

250
  /*! \brief test if a candidate LightSplitInfo is equivalent to this one */
251
252
253
254
255
256
257
258
259
260
261
  inline bool operator == (const LightSplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
262
263
264
265
266
    if (local_gain != other_gain) {
      return false;
    }

    // if same gain, splits are only equal if they also use the same feature
267
268
269
270
271
272
273
274
275
276
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
277
    return local_feature == other_feature;
Guolin Ke's avatar
Guolin Ke committed
278
  }
279
};
Guolin Ke's avatar
Guolin Ke committed
280

Guolin Ke's avatar
Guolin Ke committed
281
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
282
#endif   // LightGBM_TREELEARNER_SPLIT_INFO_HPP_