"vscode:/vscode.git/clone" did not exist on "4971a06668df7eabeb7d4bb1987abb442f2970c9"
split_info.hpp 9.81 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_
#define LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_

8
9
#include <LightGBM/meta.h>

10
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
#include <cmath>
#include <cstdint>
#include <cstring>
#include <functional>
15
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
16
17
18
19
20
21
22

namespace LightGBM {

/*!
* \brief Used to store some information for gain split point
*/
struct SplitInfo {
Nikita Titov's avatar
Nikita Titov committed
23
 public:
Guolin Ke's avatar
Guolin Ke committed
24
  /*! \brief Feature index */
25
  int feature = -1;
Guolin Ke's avatar
Guolin Ke committed
26
  /*! \brief Split threshold */
27
28
29
30
31
  uint32_t threshold = 0;
  /*! \brief Left number of data after split */
  data_size_t left_count = 0;
  /*! \brief Right number of data after split */
  data_size_t right_count = 0;
32
  int num_cat_threshold = 0;
Guolin Ke's avatar
Guolin Ke committed
33
  /*! \brief Left output after split */
34
  double left_output = 0.0;
Guolin Ke's avatar
Guolin Ke committed
35
  /*! \brief Right output after split */
36
  double right_output = 0.0;
Guolin Ke's avatar
Guolin Ke committed
37
  /*! \brief Split gain */
38
  double gain = kMinScore;
Guolin Ke's avatar
Guolin Ke committed
39
  /*! \brief Left sum gradient after split */
40
  double left_sum_gradient = 0;
Guolin Ke's avatar
Guolin Ke committed
41
  /*! \brief Left sum hessian after split */
42
  double left_sum_hessian = 0;
43
44
  /*! \brief Left sum discretized gradient and hessian after split */
  int64_t left_sum_gradient_and_hessian = 0;
Guolin Ke's avatar
Guolin Ke committed
45
  /*! \brief Right sum gradient after split */
46
  double right_sum_gradient = 0;
Guolin Ke's avatar
Guolin Ke committed
47
  /*! \brief Right sum hessian after split */
48
  double right_sum_hessian = 0;
49
50
  /*! \brief Right sum discretized gradient and hessian after split */
  int64_t right_sum_gradient_and_hessian = 0;
51
  std::vector<uint32_t> cat_threshold;
52
53
  /*! \brief True if default split is left */
  bool default_left = true;
Guolin Ke's avatar
Guolin Ke committed
54
  int8_t monotone_type = 0;
55
  inline static int Size(int max_cat_threshold) {
56
    return 2 * sizeof(int) + sizeof(uint32_t) + sizeof(bool) + sizeof(double) * 7 + sizeof(data_size_t) * 2 + max_cat_threshold * sizeof(uint32_t) + sizeof(int8_t);
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  }

  inline void CopyTo(char* buffer) const {
    std::memcpy(buffer, &feature, sizeof(feature));
    buffer += sizeof(feature);
    std::memcpy(buffer, &left_count, sizeof(left_count));
    buffer += sizeof(left_count);
    std::memcpy(buffer, &right_count, sizeof(right_count));
    buffer += sizeof(right_count);
    std::memcpy(buffer, &gain, sizeof(gain));
    buffer += sizeof(gain);
    std::memcpy(buffer, &threshold, sizeof(threshold));
    buffer += sizeof(threshold);
    std::memcpy(buffer, &left_output, sizeof(left_output));
    buffer += sizeof(left_output);
    std::memcpy(buffer, &right_output, sizeof(right_output));
    buffer += sizeof(right_output);
    std::memcpy(buffer, &left_sum_gradient, sizeof(left_sum_gradient));
    buffer += sizeof(left_sum_gradient);
    std::memcpy(buffer, &left_sum_hessian, sizeof(left_sum_hessian));
    buffer += sizeof(left_sum_hessian);
78
79
    std::memcpy(buffer, &left_sum_gradient_and_hessian, sizeof(left_sum_gradient_and_hessian));
    buffer += sizeof(left_sum_gradient_and_hessian);
80
81
82
83
    std::memcpy(buffer, &right_sum_gradient, sizeof(right_sum_gradient));
    buffer += sizeof(right_sum_gradient);
    std::memcpy(buffer, &right_sum_hessian, sizeof(right_sum_hessian));
    buffer += sizeof(right_sum_hessian);
84
85
    std::memcpy(buffer, &right_sum_gradient_and_hessian, sizeof(right_sum_gradient_and_hessian));
    buffer += sizeof(right_sum_gradient_and_hessian);
86
87
    std::memcpy(buffer, &default_left, sizeof(default_left));
    buffer += sizeof(default_left);
Guolin Ke's avatar
Guolin Ke committed
88
89
    std::memcpy(buffer, &monotone_type, sizeof(monotone_type));
    buffer += sizeof(monotone_type);
Guolin Ke's avatar
Guolin Ke committed
90
91
92
    std::memcpy(buffer, &num_cat_threshold, sizeof(num_cat_threshold));
    buffer += sizeof(num_cat_threshold);
    std::memcpy(buffer, cat_threshold.data(), sizeof(uint32_t) * num_cat_threshold);
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  }

  void CopyFrom(const char* buffer) {
    std::memcpy(&feature, buffer, sizeof(feature));
    buffer += sizeof(feature);
    std::memcpy(&left_count, buffer, sizeof(left_count));
    buffer += sizeof(left_count);
    std::memcpy(&right_count, buffer, sizeof(right_count));
    buffer += sizeof(right_count);
    std::memcpy(&gain, buffer, sizeof(gain));
    buffer += sizeof(gain);
    std::memcpy(&threshold, buffer, sizeof(threshold));
    buffer += sizeof(threshold);
    std::memcpy(&left_output, buffer, sizeof(left_output));
    buffer += sizeof(left_output);
    std::memcpy(&right_output, buffer, sizeof(right_output));
    buffer += sizeof(right_output);
    std::memcpy(&left_sum_gradient, buffer, sizeof(left_sum_gradient));
    buffer += sizeof(left_sum_gradient);
    std::memcpy(&left_sum_hessian, buffer, sizeof(left_sum_hessian));
    buffer += sizeof(left_sum_hessian);
114
115
    std::memcpy(&left_sum_gradient_and_hessian, buffer, sizeof(left_sum_gradient_and_hessian));
    buffer += sizeof(left_sum_gradient_and_hessian);
116
117
118
119
    std::memcpy(&right_sum_gradient, buffer, sizeof(right_sum_gradient));
    buffer += sizeof(right_sum_gradient);
    std::memcpy(&right_sum_hessian, buffer, sizeof(right_sum_hessian));
    buffer += sizeof(right_sum_hessian);
120
121
    std::memcpy(&right_sum_gradient_and_hessian, buffer, sizeof(right_sum_gradient_and_hessian));
    buffer += sizeof(right_sum_gradient_and_hessian);
122
123
    std::memcpy(&default_left, buffer, sizeof(default_left));
    buffer += sizeof(default_left);
Guolin Ke's avatar
Guolin Ke committed
124
125
    std::memcpy(&monotone_type, buffer, sizeof(monotone_type));
    buffer += sizeof(monotone_type);
Guolin Ke's avatar
Guolin Ke committed
126
127
    std::memcpy(&num_cat_threshold, buffer, sizeof(num_cat_threshold));
    buffer += sizeof(num_cat_threshold);
128
    cat_threshold.resize(num_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
129
    std::memcpy(cat_threshold.data(), buffer, sizeof(uint32_t) * num_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
130
131
132
  }

  inline void Reset() {
133
    // initialize with -1 and -inf gain
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
    feature = -1;
    gain = kMinScore;
  }

138
139
140
141
142
143
144
145
146
147
148
  inline bool operator > (const SplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
149
150
151
152
153
    if (local_gain != other_gain) {
      return local_gain > other_gain;
    }

    // if gains are identical, choose the feature with the smaller index
154
155
156
157
158
159
160
161
162
163
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
164
    return local_feature < other_feature;
Guolin Ke's avatar
Guolin Ke committed
165
166
  }

167
  /*! \brief test if a candidate SplitInfo is equivalent to this one */
168
169
170
171
172
173
174
175
176
177
178
  inline bool operator == (const SplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
179
180
181
182
183
    if (local_gain != other_gain) {
      return false;
    }

    // if same gain, splits are only equal if they also use the same feature
184
185
186
187
188
189
190
191
192
193
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
194
    return local_feature == other_feature;
195
196
  }
};
Guolin Ke's avatar
Guolin Ke committed
197

198
struct LightSplitInfo {
Nikita Titov's avatar
Nikita Titov committed
199
 public:
200
201
202
203
204
205
206
207
208
209
210
211
212
  /*! \brief Feature index */
  int feature = -1;
  /*! \brief Split gain */
  double gain = kMinScore;
  /*! \brief Left number of data after split */
  data_size_t left_count = 0;
  /*! \brief Right number of data after split */
  data_size_t right_count = 0;

  inline void Reset() {
    // initialize with -1 and -inf gain
    feature = -1;
    gain = kMinScore;
Guolin Ke's avatar
Guolin Ke committed
213
  }
214
215
216
217
218
219

  void CopyFrom(const SplitInfo& other) {
    feature = other.feature;
    gain = other.gain;
    left_count = other.left_count;
    right_count = other.right_count;
Guolin Ke's avatar
Guolin Ke committed
220
  }
221
222
223
224
225
226
227
228
229
230

  void CopyFrom(const char* buffer) {
    std::memcpy(&feature, buffer, sizeof(feature));
    buffer += sizeof(feature);
    std::memcpy(&left_count, buffer, sizeof(left_count));
    buffer += sizeof(left_count);
    std::memcpy(&right_count, buffer, sizeof(right_count));
    buffer += sizeof(right_count);
    std::memcpy(&gain, buffer, sizeof(gain));
    buffer += sizeof(gain);
Guolin Ke's avatar
Guolin Ke committed
231
  }
232
233
234
235
236
237
238
239
240
241
242
243

  inline bool operator > (const LightSplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
244
245
246
247
248
    if (local_gain != other_gain) {
      return local_gain > other_gain;
    }

    // if gains are identical, choose the feature with the smaller index
249
250
251
252
253
254
255
256
257
258
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
259
    return local_feature < other_feature;
Guolin Ke's avatar
Guolin Ke committed
260
  }
261

262
  /*! \brief test if a candidate LightSplitInfo is equivalent to this one */
263
264
265
266
267
268
269
270
271
272
273
  inline bool operator == (const LightSplitInfo& si) const {
    double local_gain = this->gain;
    double other_gain = si.gain;
    // replace nan with -inf
    if (local_gain == NAN) {
      local_gain = kMinScore;
    }
    // replace nan with -inf
    if (other_gain == NAN) {
      other_gain = kMinScore;
    }
274
275
276
277
278
    if (local_gain != other_gain) {
      return false;
    }

    // if same gain, splits are only equal if they also use the same feature
279
280
281
282
283
284
285
286
287
288
    int local_feature = this->feature;
    int other_feature = si.feature;
    // replace -1 with max int
    if (local_feature == -1) {
      local_feature = INT32_MAX;
    }
    // replace -1 with max int
    if (other_feature == -1) {
      other_feature = INT32_MAX;
    }
289
    return local_feature == other_feature;
Guolin Ke's avatar
Guolin Ke committed
290
  }
291
};
Guolin Ke's avatar
Guolin Ke committed
292

Guolin Ke's avatar
Guolin Ke committed
293
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
294
#endif   // LightGBM_TREELEARNER_SPLIT_INFO_HPP_