feature_histogram.hpp 68.4 KB
Newer Older
1
2
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
3
4
 * Licensed under the MIT License. See LICENSE file in the project root for
 * license information.
5
 */
6
7
#ifndef LIGHTGBM_SRC_TREELEARNER_FEATURE_HISTOGRAM_HPP_
#define LIGHTGBM_SRC_TREELEARNER_FEATURE_HISTOGRAM_HPP_
Guolin Ke's avatar
Guolin Ke committed
8

9
10
11
12
#include <LightGBM/bin.h>
#include <LightGBM/dataset.h>
#include <LightGBM/utils/array_args.h>

13
#include <algorithm>
14
#include <cmath>
15
16
17
18
19
#include <cstring>
#include <memory>
#include <utility>
#include <vector>

20
#include "monotone_constraints.hpp"
Nikita Titov's avatar
Nikita Titov committed
21
#include "split_info.hpp"
Guolin Ke's avatar
Guolin Ke committed
22

23
namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
24

Guolin Ke's avatar
Guolin Ke committed
25
class FeatureMetainfo {
26
 public:
Guolin Ke's avatar
Guolin Ke committed
27
  int num_bin;
Guolin Ke's avatar
Guolin Ke committed
28
  MissingType missing_type;
29
  int8_t offset = 0;
Guolin Ke's avatar
Guolin Ke committed
30
  uint32_t default_bin;
31
32
  int8_t monotone_type = 0;
  double penalty = 1.0;
Guolin Ke's avatar
Guolin Ke committed
33
  /*! \brief pointer of tree config */
Guolin Ke's avatar
Guolin Ke committed
34
  const Config* config;
35
  BinType bin_type;
36
37
  /*! \brief random number generator for extremely randomized trees */
  mutable Random rand;
Guolin Ke's avatar
Guolin Ke committed
38
};
Guolin Ke's avatar
Guolin Ke committed
39
/*!
40
41
42
 * \brief FeatureHistogram is used to construct and store a histogram for a
 * feature.
 */
Guolin Ke's avatar
Guolin Ke committed
43
class FeatureHistogram {
44
 public:
45
  FeatureHistogram() { data_ = nullptr; }
Guolin Ke's avatar
Guolin Ke committed
46

47
  ~FeatureHistogram() {}
Guolin Ke's avatar
Guolin Ke committed
48

Guolin Ke's avatar
Guolin Ke committed
49
50
51
52
53
  /*! \brief Disable copy */
  FeatureHistogram& operator=(const FeatureHistogram&) = delete;
  /*! \brief Disable copy */
  FeatureHistogram(const FeatureHistogram&) = delete;

54
55
56
57
58
59
60
61
62
63
64
65
  /*!
   * \brief Init the feature histogram
   * \param feature the feature data for this histogram
   * \param min_num_data_one_leaf minimal number of data in one leaf
   */
  void Init(hist_t* data, int16_t* data_int16, const FeatureMetainfo* meta) {
    meta_ = meta;
    data_ = data;
    data_int16_ = data_int16;
    ResetFunc();
  }

Guolin Ke's avatar
Guolin Ke committed
66
  /*!
67
68
69
70
   * \brief Init the feature histogram
   * \param feature the feature data for this histogram
   * \param min_num_data_one_leaf minimal number of data in one leaf
   */
71
  void Init(hist_t* data, const FeatureMetainfo* meta) {
Guolin Ke's avatar
Guolin Ke committed
72
73
    meta_ = meta;
    data_ = data;
74
    data_int16_ = nullptr;
75
76
77
78
    ResetFunc();
  }

  void ResetFunc() {
79
    if (meta_->bin_type == BinType::NumericalBin) {
80
      FuncForNumrical();
81
    } else {
82
      FuncForCategorical();
83
    }
Guolin Ke's avatar
Guolin Ke committed
84
85
  }

86
  hist_t* RawData() { return data_; }
87

88
89
90
91
  int32_t* RawDataInt32() { return reinterpret_cast<int32_t*>(data_); }

  int16_t* RawDataInt16() { return data_int16_; }

Guolin Ke's avatar
Guolin Ke committed
92
  /*!
93
94
95
   * \brief Subtract current histograms with other
   * \param other The histogram that want to subtract
   */
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
  template <bool USE_DIST_GRAD = false,
    typename THIS_HIST_T = hist_t, typename OTHER_HIST_T = hist_t, typename RESULT_HIST_T = hist_t,
    int THIS_HIST_BITS = 0, int OTHER_HIST_BITS = 0, int RESULT_HIST_BITS = 0>
  void Subtract(const FeatureHistogram& other, const int32_t* buffer = nullptr) {
    if (USE_DIST_GRAD) {
      const THIS_HIST_T* this_int_data = THIS_HIST_BITS == 16 ?
        reinterpret_cast<const THIS_HIST_T*>(data_int16_) :
        (RESULT_HIST_BITS == 16 ?
          reinterpret_cast<const THIS_HIST_T*>(buffer) :
          reinterpret_cast<const THIS_HIST_T*>(data_));
      const OTHER_HIST_T* other_int_data = OTHER_HIST_BITS == 16 ?
        reinterpret_cast<OTHER_HIST_T*>(other.data_int16_) :
        reinterpret_cast<OTHER_HIST_T*>(other.data_);
      RESULT_HIST_T* result_int_data = RESULT_HIST_BITS == 16 ?
        reinterpret_cast<RESULT_HIST_T*>(data_int16_) :
        reinterpret_cast<RESULT_HIST_T*>(data_);
      if (THIS_HIST_BITS == 32 && OTHER_HIST_BITS == 16 && RESULT_HIST_BITS == 32) {
        for (int i = 0; i < meta_->num_bin - meta_->offset; ++i) {
          const int32_t other_grad_hess = static_cast<int32_t>(other_int_data[i]);
          const int64_t this_grad_hess = this_int_data[i];
          const int64_t other_grad_hess_int64 =
            (static_cast<int64_t>(static_cast<int16_t>(other_grad_hess >> 16)) << 32) |
            (static_cast<int64_t>(other_grad_hess & 0x0000ffff));
          const int64_t result_grad_hess = this_grad_hess - other_grad_hess_int64;
120
          result_int_data[i] = static_cast<RESULT_HIST_T>(result_grad_hess);
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
        }
      } else if (THIS_HIST_BITS == 32 && OTHER_HIST_BITS == 16 && RESULT_HIST_BITS == 16) {
        for (int i = 0; i < meta_->num_bin - meta_->offset; ++i) {
          const int32_t other_grad_hess = static_cast<int32_t>(other_int_data[i]);
          const int64_t this_grad_hess = this_int_data[i];
          const int64_t other_grad_hess_int64 =
            (static_cast<int64_t>(static_cast<int16_t>(other_grad_hess >> 16)) << 32) |
            (static_cast<int64_t>(other_grad_hess & 0x0000ffff));
          const int64_t result_grad_hess = this_grad_hess - other_grad_hess_int64;
          const int32_t result_grad_hess_int32 =
            (static_cast<int32_t>(result_grad_hess >> 32) << 16) |
            static_cast<int32_t>(result_grad_hess & 0x00000000ffffffff);
          result_int_data[i] = result_grad_hess_int32;
        }
      } else {
        for (int i = 0; i < meta_->num_bin - meta_->offset; ++i) {
          result_int_data[i] = this_int_data[i] - other_int_data[i];
        }
      }
    } else {
      for (int i = 0; i < (meta_->num_bin - meta_->offset) * 2; ++i) {
        data_[i] -= other.data_[i];
      }
    }
  }

  void CopyToBuffer(int32_t* buffer) {
    const int64_t* data_ptr = reinterpret_cast<const int64_t*>(data_);
    int64_t* buffer_ptr = reinterpret_cast<int64_t*>(buffer);
    for (int i = 0; i < meta_->num_bin - meta_->offset; ++i) {
      buffer_ptr[i] = data_ptr[i];
    }
  }

  void CopyFromInt16ToInt32(char* buffer) {
    const int32_t* int16_data = reinterpret_cast<const int32_t*>(RawDataInt16());
    int64_t* int32_data = reinterpret_cast<int64_t*>(buffer);
    for (int i = 0; i < meta_->num_bin - meta_->offset; ++i) {
      const int32_t int16_val = int16_data[i];
      int32_data[i] = (static_cast<int64_t>(static_cast<int16_t>(int16_val >> 16)) << 32) |
        static_cast<int64_t>(int16_val & 0x0000ffff);
Guolin Ke's avatar
Guolin Ke committed
162
163
    }
  }
164

165
166
  void FindBestThreshold(double sum_gradient, double sum_hessian,
                         data_size_t num_data,
167
                         const FeatureConstraint* constraints,
Belinda Trotta's avatar
Belinda Trotta committed
168
                         double parent_output,
169
                         SplitInfo* output) {
Guolin Ke's avatar
Guolin Ke committed
170
    output->default_left = true;
Guolin Ke's avatar
Guolin Ke committed
171
    output->gain = kMinScore;
172
    find_best_threshold_fun_(sum_gradient, sum_hessian + 2 * kEpsilon, num_data,
Belinda Trotta's avatar
Belinda Trotta committed
173
                             constraints, parent_output, output);
Guolin Ke's avatar
Guolin Ke committed
174
    output->gain *= meta_->penalty;
175
176
  }

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
  void FindBestThresholdInt(int64_t sum_gradient_and_hessian,
                            double grad_scale, double hess_scale,
                            const uint8_t num_bits_bin,
                            const uint8_t num_bits_acc,
                            data_size_t num_data,
                            const FeatureConstraint* constraints,
                            double parent_output,
                            SplitInfo* output) {
    output->default_left = true;
    output->gain = kMinScore;
    int_find_best_threshold_fun_(sum_gradient_and_hessian, grad_scale, hess_scale, num_bits_bin, num_bits_acc, num_data,
                             constraints, parent_output, output);
    output->gain *= meta_->penalty;
  }

Belinda Trotta's avatar
Belinda Trotta committed
192
  template <bool USE_RAND, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
193
  double BeforeNumerical(double sum_gradient, double sum_hessian, double parent_output, data_size_t num_data,
194
                        SplitInfo* output, int* rand_threshold) {
Guolin Ke's avatar
Guolin Ke committed
195
    is_splittable_ = false;
196
    output->monotone_type = meta_->monotone_type;
Belinda Trotta's avatar
Belinda Trotta committed
197
198
199
200

    double gain_shift = GetLeafGain<USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
        sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2,
        meta_->config->max_delta_step, meta_->config->path_smooth, num_data, parent_output);
201
202
203
204
205
    *rand_threshold = 0;
    if (USE_RAND) {
      if (meta_->num_bin - 2 > 0) {
        *rand_threshold = meta_->rand.NextInt(0, meta_->num_bin - 2);
      }
206
    }
207
208
209
    return gain_shift + meta_->config->min_gain_to_split;
  }

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
  template <bool USE_RAND, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
  double BeforeNumericalInt(int64_t sum_gradient_and_hessian, double grad_scale, double hess_scale, double parent_output, data_size_t num_data,
                        SplitInfo* output, int* rand_threshold) {
    is_splittable_ = false;
    output->monotone_type = meta_->monotone_type;
    const int32_t int_sum_gradient = static_cast<int32_t>(sum_gradient_and_hessian >> 32);
    const uint32_t int_sum_hessian = static_cast<uint32_t>(sum_gradient_and_hessian & 0x00000000ffffffff);
    const double sum_gradient = static_cast<double>(int_sum_gradient) * grad_scale;
    const double sum_hessian = static_cast<double>(int_sum_hessian) * hess_scale;
    double gain_shift = GetLeafGain<USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
        sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2,
        meta_->config->max_delta_step, meta_->config->path_smooth, num_data, parent_output);
    *rand_threshold = 0;
    if (USE_RAND) {
      if (meta_->num_bin - 2 > 0) {
        *rand_threshold = meta_->rand.NextInt(0, meta_->num_bin - 2);
      }
    }
    return gain_shift + meta_->config->min_gain_to_split;
  }

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
  void FuncForNumrical() {
    if (meta_->config->extra_trees) {
      if (meta_->config->monotone_constraints.empty()) {
        FuncForNumricalL1<true, false>();
      } else {
        FuncForNumricalL1<true, true>();
      }
    } else {
      if (meta_->config->monotone_constraints.empty()) {
        FuncForNumricalL1<false, false>();
      } else {
        FuncForNumricalL1<false, true>();
      }
    }
  }
  template <bool USE_RAND, bool USE_MC>
  void FuncForNumricalL1() {
    if (meta_->config->lambda_l1 > 0) {
      if (meta_->config->max_delta_step > 0) {
        FuncForNumricalL2<USE_RAND, USE_MC, true, true>();
      } else {
        FuncForNumricalL2<USE_RAND, USE_MC, true, false>();
      }
    } else {
      if (meta_->config->max_delta_step > 0) {
        FuncForNumricalL2<USE_RAND, USE_MC, false, true>();
      } else {
        FuncForNumricalL2<USE_RAND, USE_MC, false, false>();
      }
    }
  }

  template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT>
  void FuncForNumricalL2() {
Belinda Trotta's avatar
Belinda Trotta committed
265
266
267
268
269
270
271
272
273
    if (meta_->config->path_smooth > kEpsilon) {
      FuncForNumricalL3<USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT, true>();
    } else {
      FuncForNumricalL3<USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT, false>();
    }
  }

  template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
  void FuncForNumricalL3() {
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
  if (meta_->config->use_quantized_grad) {
#define TEMPLATE_PREFIX_INT USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING
#define LAMBDA_ARGUMENTS_INT                                         \
  int64_t sum_gradient_and_hessian, double grad_scale, double hess_scale, const uint8_t hist_bits_bin, const uint8_t hist_bits_acc, data_size_t num_data, \
      const FeatureConstraint* constraints, double parent_output, SplitInfo *output
#define BEFORE_ARGUMENTS_INT sum_gradient_and_hessian, grad_scale, hess_scale, parent_output, num_data, output, &rand_threshold
#define FUNC_ARGUMENTS_INT                                                      \
  sum_gradient_and_hessian, grad_scale, hess_scale, num_data, constraints, min_gain_shift, \
      output, rand_threshold, parent_output

      if (meta_->num_bin > 2 && meta_->missing_type != MissingType::None) {
        if (meta_->missing_type == MissingType::Zero) {
          int_find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS_INT) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumericalInt<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS_INT);
            if (hist_bits_acc <= 16) {
              CHECK_LE(hist_bits_bin, 16);
              FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, true, false, int32_t, int32_t, int16_t, int16_t, 16, 16>(
                  FUNC_ARGUMENTS_INT);
              FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, false, true, false, int32_t, int32_t, int16_t, int16_t, 16, 16>(
                  FUNC_ARGUMENTS_INT);
            } else {
              if (hist_bits_bin == 32) {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, true, false, int64_t, int64_t, int32_t, int32_t, 32, 32>(
                    FUNC_ARGUMENTS_INT);
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, false, true, false, int64_t, int64_t, int32_t, int32_t, 32, 32>(
                    FUNC_ARGUMENTS_INT);
              } else {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, true, false, int32_t, int64_t, int16_t, int32_t, 16, 32>(
                    FUNC_ARGUMENTS_INT);
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, false, true, false, int32_t, int64_t, int16_t, int32_t, 16, 32>(
                    FUNC_ARGUMENTS_INT);
              }
            }
          };
        } else {
          int_find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS_INT) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumericalInt<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS_INT);
            if (hist_bits_acc <= 16) {
              CHECK_LE(hist_bits_bin, 16);
              FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, true, int32_t, int32_t, int16_t, int16_t, 16, 16>(
                  FUNC_ARGUMENTS_INT);
              FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, false, false, true, int32_t, int32_t, int16_t, int16_t, 16, 16>(
                  FUNC_ARGUMENTS_INT);
            } else {
              if (hist_bits_bin == 32) {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, true, int64_t, int64_t, int32_t, int32_t, 32, 32>(
                    FUNC_ARGUMENTS_INT);
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, false, false, true, int64_t, int64_t, int32_t, int32_t, 32, 32>(
                    FUNC_ARGUMENTS_INT);
              } else {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, true, int32_t, int64_t, int16_t, int32_t, 16, 32>(
                    FUNC_ARGUMENTS_INT);
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, false, false, true, int32_t, int64_t, int16_t, int32_t, 16, 32>(
                    FUNC_ARGUMENTS_INT);
              }
            }
          };
        }
      } else {
        if (meta_->missing_type != MissingType::NaN) {
          int_find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS_INT) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumericalInt<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS_INT);
            if (hist_bits_acc <= 16) {
              CHECK_LE(hist_bits_bin, 16);
              FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, false, int32_t, int32_t, int16_t, int16_t, 16, 16>(
                  FUNC_ARGUMENTS_INT);
            } else {
              if (hist_bits_bin == 32) {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, false, int64_t, int64_t, int32_t, int32_t, 32, 32>(
                    FUNC_ARGUMENTS_INT);
              } else {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, false, int32_t, int64_t, int16_t, int32_t, 16, 32>(
                    FUNC_ARGUMENTS_INT);
              }
            }
          };
        } else {
          int_find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS_INT) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumericalInt<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS_INT);
            if (hist_bits_acc <= 16) {
              CHECK_LE(hist_bits_bin, 16);
              FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, false, int32_t, int32_t, int16_t, int16_t, 16, 16>(
                  FUNC_ARGUMENTS_INT);
            } else {
              if (hist_bits_bin == 32) {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, false, int64_t, int64_t, int32_t, int32_t, 32, 32>(
                    FUNC_ARGUMENTS_INT);
              } else {
                FindBestThresholdSequentiallyInt<TEMPLATE_PREFIX_INT, true, false, false, int32_t, int64_t, int16_t, int32_t, 16, 32>(
                    FUNC_ARGUMENTS_INT);
              }
            }
            output->default_left = false;
          };
        }
      }
#undef TEMPLATE_PREFIX_INT
#undef LAMBDA_ARGUMENTS_INT
#undef BEFORE_ARGUMENTS_INT
#undef FUNC_ARGURMENTS_INT
  } else {
Belinda Trotta's avatar
Belinda Trotta committed
387
#define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING
388
389
#define LAMBDA_ARGUMENTS                                         \
  double sum_gradient, double sum_hessian, data_size_t num_data, \
390
      const FeatureConstraint* constraints, double parent_output, SplitInfo *output
Belinda Trotta's avatar
Belinda Trotta committed
391
#define BEFORE_ARGUMENTS sum_gradient, sum_hessian, parent_output, num_data, output, &rand_threshold
392
#define FUNC_ARGUMENTS                                                      \
Belinda Trotta's avatar
Belinda Trotta committed
393
394
  sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, \
      output, rand_threshold, parent_output
395

396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
      if (meta_->num_bin > 2 && meta_->missing_type != MissingType::None) {
        if (meta_->missing_type == MissingType::Zero) {
          find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumerical<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS);
            FindBestThresholdSequentially<TEMPLATE_PREFIX, true, true, false>(
                FUNC_ARGUMENTS);
            FindBestThresholdSequentially<TEMPLATE_PREFIX, false, true, false>(
                FUNC_ARGUMENTS);
          };
        } else {
          find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumerical<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS);
            FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, true>(
                FUNC_ARGUMENTS);
            FindBestThresholdSequentially<TEMPLATE_PREFIX, false, false, true>(
                FUNC_ARGUMENTS);
          };
        }
420
      } else {
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
        if (meta_->missing_type != MissingType::NaN) {
          find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumerical<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS);
            FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, false>(
                FUNC_ARGUMENTS);
          };
        } else {
          find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
            int rand_threshold = 0;
            double min_gain_shift =
                BeforeNumerical<USE_RAND, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
                    BEFORE_ARGUMENTS);
            FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, false>(
                FUNC_ARGUMENTS);
            output->default_left = false;
          };
        }
Guolin Ke's avatar
Guolin Ke committed
441
      }
442
443
444
445
#undef TEMPLATE_PREFIX
#undef LAMBDA_ARGUMENTS
#undef BEFORE_ARGUMENTS
#undef FUNC_ARGURMENTS
446
    }
Guolin Ke's avatar
Guolin Ke committed
447
  }
448

449
  void FuncForCategorical();
450

451
  template <bool USE_RAND, bool USE_MC>
452
  void FuncForCategoricalL1();
Belinda Trotta's avatar
Belinda Trotta committed
453
454

  template <bool USE_RAND, bool USE_MC, bool USE_SMOOTHING>
455
  void FuncForCategoricalL2();
456

Belinda Trotta's avatar
Belinda Trotta committed
457
  template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
458
459
460
  void FindBestThresholdCategoricalInner(double sum_gradient,
                                         double sum_hessian,
                                         data_size_t num_data,
461
                                         const FeatureConstraint* constraints,
Belinda Trotta's avatar
Belinda Trotta committed
462
                                         double parent_output,
463
                                         SplitInfo* output);
464

465
466
467
468
469
470
471
472
  template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING, typename PACKED_HIST_BIN_T, typename PACKED_HIST_ACC_T,
          typename HIST_BIN_T, typename HIST_ACC_T, int HIST_BITS_BIN, int HIST_BITS_ACC>
  void FindBestThresholdCategoricalIntInner(int64_t int_sum_gradient_and_hessian,
                                            const double grad_scale, const double hess_scale,
                                            data_size_t num_data,
                                            const FeatureConstraint* constraints,
                                            double parent_output,
                                            SplitInfo* output);
473

474
  void GatherInfoForThreshold(double sum_gradient, double sum_hessian,
475
                              uint32_t threshold, data_size_t num_data,
Belinda Trotta's avatar
Belinda Trotta committed
476
                              double parent_output, SplitInfo* output) {
477
    if (meta_->bin_type == BinType::NumericalBin) {
478
      GatherInfoForThresholdNumerical(sum_gradient, sum_hessian, threshold,
Belinda Trotta's avatar
Belinda Trotta committed
479
                                      num_data, parent_output, output);
480
    } else {
481
      GatherInfoForThresholdCategorical(sum_gradient, sum_hessian, threshold,
Belinda Trotta's avatar
Belinda Trotta committed
482
                                        num_data, parent_output, output);
483
484
485
486
    }
  }

  void GatherInfoForThresholdNumerical(double sum_gradient, double sum_hessian,
487
                                       uint32_t threshold, data_size_t num_data,
Belinda Trotta's avatar
Belinda Trotta committed
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
                                       double parent_output, SplitInfo* output) {
    bool use_smoothing = meta_->config->path_smooth > kEpsilon;
    if (use_smoothing) {
      GatherInfoForThresholdNumericalInner<true>(sum_gradient, sum_hessian,
                                                 threshold, num_data,
                                                 parent_output, output);
    } else {
      GatherInfoForThresholdNumericalInner<false>(sum_gradient, sum_hessian,
                                                  threshold, num_data,
                                                  parent_output, output);
    }
  }

  template<bool USE_SMOOTHING>
  void GatherInfoForThresholdNumericalInner(double sum_gradient, double sum_hessian,
                                            uint32_t threshold, data_size_t num_data,
                                            double parent_output, SplitInfo* output) {
    double gain_shift = GetLeafGainGivenOutput<true>(
506
        sum_gradient, sum_hessian, meta_->config->lambda_l1,
Belinda Trotta's avatar
Belinda Trotta committed
507
        meta_->config->lambda_l2, parent_output);
Guolin Ke's avatar
Guolin Ke committed
508
    double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
509
510

    // do stuff here
511
    const int8_t offset = meta_->offset;
512
513
514
515
516
517

    double sum_right_gradient = 0.0f;
    double sum_right_hessian = kEpsilon;
    data_size_t right_count = 0;

    // set values
518
519
    bool use_na_as_missing = false;
    bool skip_default_bin = false;
520
521
    if (meta_->missing_type == MissingType::Zero) {
      skip_default_bin = true;
522
    } else if (meta_->missing_type == MissingType::NaN) {
523
524
525
      use_na_as_missing = true;
    }

526
527
    int t = meta_->num_bin - 1 - offset - use_na_as_missing;
    const int t_end = 1 - offset;
528
    const double cnt_factor = num_data / sum_hessian;
529
530
    // from right to left, and we don't need data in bin0
    for (; t >= t_end; --t) {
531
      if (static_cast<uint32_t>(t + offset) <= threshold) {
532
533
        break;
      }
534
535

      // need to skip default bin
536
537
538
539
      if (skip_default_bin &&
          (t + offset) == static_cast<int>(meta_->default_bin)) {
        continue;
      }
540
541
      const auto grad = GET_GRAD(data_, t);
      const auto hess = GET_HESS(data_, t);
542
543
      data_size_t cnt =
          static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
544
545
546
      sum_right_gradient += grad;
      sum_right_hessian += hess;
      right_count += cnt;
547
548
549
550
    }
    double sum_left_gradient = sum_gradient - sum_right_gradient;
    double sum_left_hessian = sum_hessian - sum_right_hessian;
    data_size_t left_count = num_data - right_count;
551
    double current_gain =
Belinda Trotta's avatar
Belinda Trotta committed
552
        GetLeafGain<true, true, USE_SMOOTHING>(
553
            sum_left_gradient, sum_left_hessian, meta_->config->lambda_l1,
Belinda Trotta's avatar
Belinda Trotta committed
554
555
556
            meta_->config->lambda_l2, meta_->config->max_delta_step,
            meta_->config->path_smooth, left_count, parent_output) +
        GetLeafGain<true, true, USE_SMOOTHING>(
557
            sum_right_gradient, sum_right_hessian, meta_->config->lambda_l1,
Belinda Trotta's avatar
Belinda Trotta committed
558
559
            meta_->config->lambda_l2, meta_->config->max_delta_step,
            meta_->config->path_smooth, right_count, parent_output);
560
561
562
563

    // gain with split is worse than without split
    if (std::isnan(current_gain) || current_gain <= min_gain_shift) {
      output->gain = kMinScore;
564
      Log::Warning(
565
          "'Forced Split' will be ignored since the gain getting worse.");
566
      return;
567
    }
568
569
570

    // update split information
    output->threshold = threshold;
Belinda Trotta's avatar
Belinda Trotta committed
571
    output->left_output = CalculateSplittedLeafOutput<true, true, USE_SMOOTHING>(
572
        sum_left_gradient, sum_left_hessian, meta_->config->lambda_l1,
Belinda Trotta's avatar
Belinda Trotta committed
573
574
        meta_->config->lambda_l2, meta_->config->max_delta_step,
        meta_->config->path_smooth, left_count, parent_output);
575
576
577
    output->left_count = left_count;
    output->left_sum_gradient = sum_left_gradient;
    output->left_sum_hessian = sum_left_hessian - kEpsilon;
Belinda Trotta's avatar
Belinda Trotta committed
578
    output->right_output = CalculateSplittedLeafOutput<true, true, USE_SMOOTHING>(
579
580
        sum_gradient - sum_left_gradient, sum_hessian - sum_left_hessian,
        meta_->config->lambda_l1, meta_->config->lambda_l2,
Belinda Trotta's avatar
Belinda Trotta committed
581
582
        meta_->config->max_delta_step, meta_->config->path_smooth,
        right_count, parent_output);
583
584
585
    output->right_count = num_data - left_count;
    output->right_sum_gradient = sum_gradient - sum_left_gradient;
    output->right_sum_hessian = sum_hessian - sum_left_hessian - kEpsilon;
586
    output->gain = current_gain - min_gain_shift;
587
588
589
    output->default_left = true;
  }

Belinda Trotta's avatar
Belinda Trotta committed
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
  void GatherInfoForThresholdCategorical(double sum_gradient,  double sum_hessian,
                                         uint32_t threshold, data_size_t num_data,
                                         double parent_output, SplitInfo* output) {
    bool use_smoothing = meta_->config->path_smooth > kEpsilon;
    if (use_smoothing) {
      GatherInfoForThresholdCategoricalInner<true>(sum_gradient, sum_hessian, threshold,
                                                   num_data, parent_output, output);
    } else {
      GatherInfoForThresholdCategoricalInner<false>(sum_gradient, sum_hessian, threshold,
                                                    num_data, parent_output, output);
    }
  }

  template<bool USE_SMOOTHING>
  void GatherInfoForThresholdCategoricalInner(double sum_gradient,
                                              double sum_hessian, uint32_t threshold,
                                              data_size_t num_data, double parent_output,
                                              SplitInfo* output) {
608
609
    // get SplitInfo for a given one-hot categorical split.
    output->default_left = false;
Belinda Trotta's avatar
Belinda Trotta committed
610
611
    double gain_shift = GetLeafGainGivenOutput<true>(
        sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, parent_output);
Guolin Ke's avatar
Guolin Ke committed
612
    double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
613
    if (threshold >= static_cast<uint32_t>(meta_->num_bin) || threshold == 0) {
614
615
616
617
      output->gain = kMinScore;
      Log::Warning("Invalid categorical threshold split");
      return;
    }
618
    const double cnt_factor = num_data / sum_hessian;
619
620
    const auto grad = GET_GRAD(data_, threshold - meta_->offset);
    const auto hess = GET_HESS(data_, threshold - meta_->offset);
621
622
    data_size_t cnt =
        static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
623

Guolin Ke's avatar
Guolin Ke committed
624
    double l2 = meta_->config->lambda_l2;
625
    data_size_t left_count = cnt;
626
    data_size_t right_count = num_data - left_count;
627
    double sum_left_hessian = hess + kEpsilon;
628
    double sum_right_hessian = sum_hessian - sum_left_hessian;
629
    double sum_left_gradient = grad;
630
631
    double sum_right_gradient = sum_gradient - sum_left_gradient;
    // current split gain
632
    double current_gain =
Belinda Trotta's avatar
Belinda Trotta committed
633
634
635
636
637
638
639
640
641
642
        GetLeafGain<true, true, USE_SMOOTHING>(sum_right_gradient, sum_right_hessian,
                                      meta_->config->lambda_l1, l2,
                                      meta_->config->max_delta_step,
                                      meta_->config->path_smooth, right_count,
                                      parent_output) +
        GetLeafGain<true, true, USE_SMOOTHING>(sum_left_gradient, sum_left_hessian,
                                      meta_->config->lambda_l1, l2,
                                      meta_->config->max_delta_step,
                                      meta_->config->path_smooth, left_count,
                                      parent_output);
643
644
    if (std::isnan(current_gain) || current_gain <= min_gain_shift) {
      output->gain = kMinScore;
645
646
      Log::Warning(
          "'Forced Split' will be ignored since the gain getting worse.");
647
648
      return;
    }
Belinda Trotta's avatar
Belinda Trotta committed
649
    output->left_output = CalculateSplittedLeafOutput<true, true, USE_SMOOTHING>(
650
        sum_left_gradient, sum_left_hessian, meta_->config->lambda_l1, l2,
Belinda Trotta's avatar
Belinda Trotta committed
651
652
        meta_->config->max_delta_step, meta_->config->path_smooth, left_count,
        parent_output);
653
654
655
    output->left_count = left_count;
    output->left_sum_gradient = sum_left_gradient;
    output->left_sum_hessian = sum_left_hessian - kEpsilon;
Belinda Trotta's avatar
Belinda Trotta committed
656
    output->right_output = CalculateSplittedLeafOutput<true, true, USE_SMOOTHING>(
657
        sum_right_gradient, sum_right_hessian, meta_->config->lambda_l1, l2,
Belinda Trotta's avatar
Belinda Trotta committed
658
659
        meta_->config->max_delta_step, meta_->config->path_smooth, right_count,
        parent_output);
660
661
662
663
664
665
666
667
    output->right_count = right_count;
    output->right_sum_gradient = sum_gradient - sum_left_gradient;
    output->right_sum_hessian = sum_right_hessian - kEpsilon;
    output->gain = current_gain - min_gain_shift;
    output->num_cat_threshold = 1;
    output->cat_threshold = std::vector<uint32_t>(1, threshold);
  }

Guolin Ke's avatar
Guolin Ke committed
668
  /*!
669
670
   * \brief Binary size of this histogram
   */
671
  int SizeOfHistogram() const {
672
    return (meta_->num_bin - meta_->offset) * kHistEntrySize;
Guolin Ke's avatar
Guolin Ke committed
673
674
  }

675
  int SizeOfInt32Histogram() const {
676
677
678
    return (meta_->num_bin - meta_->offset) * kInt32HistEntrySize;
  }

679
  int SizeOfInt16Histogram() const {
680
681
682
    return (meta_->num_bin - meta_->offset) * kInt16HistEntrySize;
  }

Guolin Ke's avatar
Guolin Ke committed
683
  /*!
684
685
   * \brief Restore histogram from memory
   */
Guolin Ke's avatar
Guolin Ke committed
686
  void FromMemory(char* memory_data) {
687
688
    std::memcpy(data_, memory_data,
                (meta_->num_bin - meta_->offset) * kHistEntrySize);
Guolin Ke's avatar
Guolin Ke committed
689
690
  }

691
692
693
694
695
696
697
698
699
700
  void FromMemoryInt32(char* memory_data) {
    std::memcpy(data_, memory_data,
                (meta_->num_bin - meta_->offset) * kInt32HistEntrySize);
  }

  void FromMemoryInt16(char* memory_data) {
    std::memcpy(data_int16_, memory_data,
                (meta_->num_bin - meta_->offset) * kInt16HistEntrySize);
  }

Guolin Ke's avatar
Guolin Ke committed
701
  /*!
702
703
   * \brief True if this histogram can be splitted
   */
Guolin Ke's avatar
Guolin Ke committed
704
705
706
  bool is_splittable() { return is_splittable_; }

  /*!
707
708
   * \brief Set splittable to this histogram
   */
Guolin Ke's avatar
Guolin Ke committed
709
710
  void set_is_splittable(bool val) { is_splittable_ = val; }

711
712
713
714
715
  static double ThresholdL1(double s, double l1) {
    const double reg_s = std::max(0.0, std::fabs(s) - l1);
    return Common::Sign(s) * reg_s;
  }

Belinda Trotta's avatar
Belinda Trotta committed
716
  template <bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
717
718
  static double CalculateSplittedLeafOutput(double sum_gradients,
                                            double sum_hessians, double l1,
Belinda Trotta's avatar
Belinda Trotta committed
719
720
721
722
                                            double l2, double max_delta_step,
                                            double smoothing, data_size_t num_data,
                                            double parent_output) {
    double ret;
723
    if (USE_L1) {
Belinda Trotta's avatar
Belinda Trotta committed
724
      ret = -ThresholdL1(sum_gradients, l1) / (sum_hessians + l2);
725
    } else {
Belinda Trotta's avatar
Belinda Trotta committed
726
727
728
729
730
      ret = -sum_gradients / (sum_hessians + l2);
    }
    if (USE_MAX_OUTPUT) {
      if (max_delta_step > 0 && std::fabs(ret) > max_delta_step) {
        ret = Common::Sign(ret) * max_delta_step;
731
      }
732
    }
Belinda Trotta's avatar
Belinda Trotta committed
733
734
735
736
737
    if (USE_SMOOTHING) {
      ret = ret * (num_data / smoothing) / (num_data / smoothing + 1) \
          + parent_output / (num_data / smoothing + 1);
    }
    return ret;
Guolin Ke's avatar
Guolin Ke committed
738
739
  }

Belinda Trotta's avatar
Belinda Trotta committed
740
  template <bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
741
742
  static double CalculateSplittedLeafOutput(
      double sum_gradients, double sum_hessians, double l1, double l2,
743
      double max_delta_step, const BasicConstraint& constraints,
Belinda Trotta's avatar
Belinda Trotta committed
744
745
746
      double smoothing, data_size_t num_data, double parent_output) {
    double ret = CalculateSplittedLeafOutput<USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
        sum_gradients, sum_hessians, l1, l2, max_delta_step, smoothing, num_data, parent_output);
747
748
749
750
751
752
753
754
    if (USE_MC) {
      if (ret < constraints.min) {
        ret = constraints.min;
      } else if (ret > constraints.max) {
        ret = constraints.max;
      }
    }
    return ret;
Guolin Ke's avatar
Guolin Ke committed
755
756
  }

757
 private:
Belinda Trotta's avatar
Belinda Trotta committed
758
  template <bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
759
760
761
762
763
  static double GetSplitGains(double sum_left_gradients,
                              double sum_left_hessians,
                              double sum_right_gradients,
                              double sum_right_hessians, double l1, double l2,
                              double max_delta_step,
764
                              const FeatureConstraint* constraints,
Belinda Trotta's avatar
Belinda Trotta committed
765
766
767
768
769
                              int8_t monotone_constraint,
                              double smoothing,
                              data_size_t left_count,
                              data_size_t right_count,
                              double parent_output) {
770
    if (!USE_MC) {
Belinda Trotta's avatar
Belinda Trotta committed
771
772
773
774
775
776
777
778
      return GetLeafGain<USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(sum_left_gradients,
                                                                sum_left_hessians, l1, l2,
                                                                max_delta_step, smoothing,
                                                                left_count, parent_output) +
             GetLeafGain<USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(sum_right_gradients,
                                                                sum_right_hessians, l1, l2,
                                                                max_delta_step, smoothing,
                                                                right_count, parent_output);
779
780
    } else {
      double left_output =
Belinda Trotta's avatar
Belinda Trotta committed
781
          CalculateSplittedLeafOutput<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
782
              sum_left_gradients, sum_left_hessians, l1, l2, max_delta_step,
783
              constraints->LeftToBasicConstraint(), smoothing, left_count, parent_output);
784
      double right_output =
Belinda Trotta's avatar
Belinda Trotta committed
785
          CalculateSplittedLeafOutput<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
786
              sum_right_gradients, sum_right_hessians, l1, l2, max_delta_step,
787
              constraints->RightToBasicConstraint(), smoothing, right_count, parent_output);
788
789
790
791
792
793
794
795
      if (((monotone_constraint > 0) && (left_output > right_output)) ||
          ((monotone_constraint < 0) && (left_output < right_output))) {
        return 0;
      }
      return GetLeafGainGivenOutput<USE_L1>(
                 sum_left_gradients, sum_left_hessians, l1, l2, left_output) +
             GetLeafGainGivenOutput<USE_L1>(
                 sum_right_gradients, sum_right_hessians, l1, l2, right_output);
Guolin Ke's avatar
Guolin Ke committed
796
    }
Guolin Ke's avatar
Guolin Ke committed
797
  }
Guolin Ke's avatar
Guolin Ke committed
798

Belinda Trotta's avatar
Belinda Trotta committed
799
  template <bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING>
800
  static double GetLeafGain(double sum_gradients, double sum_hessians,
Belinda Trotta's avatar
Belinda Trotta committed
801
802
803
                            double l1, double l2, double max_delta_step,
                            double smoothing, data_size_t num_data, double parent_output) {
    if (!USE_MAX_OUTPUT && !USE_SMOOTHING) {
804
805
806
807
808
809
810
      if (USE_L1) {
        const double sg_l1 = ThresholdL1(sum_gradients, l1);
        return (sg_l1 * sg_l1) / (sum_hessians + l2);
      } else {
        return (sum_gradients * sum_gradients) / (sum_hessians + l2);
      }
    } else {
Belinda Trotta's avatar
Belinda Trotta committed
811
812
813
      double output = CalculateSplittedLeafOutput<USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
          sum_gradients, sum_hessians, l1, l2, max_delta_step, smoothing, num_data, parent_output);
      return GetLeafGainGivenOutput<USE_L1>(sum_gradients, sum_hessians, l1, l2, output);
814
    }
Guolin Ke's avatar
Guolin Ke committed
815
816
  }

817
818
819
820
821
822
823
824
825
826
827
  template <bool USE_L1>
  static double GetLeafGainGivenOutput(double sum_gradients,
                                       double sum_hessians, double l1,
                                       double l2, double output) {
    if (USE_L1) {
      const double sg_l1 = ThresholdL1(sum_gradients, l1);
      return -(2.0 * sg_l1 * output + (sum_hessians + l2) * output * output);
    } else {
      return -(2.0 * sum_gradients * output +
               (sum_hessians + l2) * output * output);
    }
Guolin Ke's avatar
Guolin Ke committed
828
  }
Guolin Ke's avatar
Guolin Ke committed
829

Belinda Trotta's avatar
Belinda Trotta committed
830
  template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING,
831
            bool REVERSE, bool SKIP_DEFAULT_BIN, bool NA_AS_MISSING>
guolinke's avatar
guolinke committed
832
833
  void FindBestThresholdSequentially(double sum_gradient, double sum_hessian,
                                     data_size_t num_data,
834
                                     const FeatureConstraint* constraints,
guolinke's avatar
guolinke committed
835
                                     double min_gain_shift, SplitInfo* output,
Belinda Trotta's avatar
Belinda Trotta committed
836
                                     int rand_threshold, double parent_output) {
837
    const int8_t offset = meta_->offset;
Guolin Ke's avatar
Guolin Ke committed
838
839
840
841
842
    double best_sum_left_gradient = NAN;
    double best_sum_left_hessian = NAN;
    double best_gain = kMinScore;
    data_size_t best_left_count = 0;
    uint32_t best_threshold = static_cast<uint32_t>(meta_->num_bin);
843
    const double cnt_factor = num_data / sum_hessian;
844
845
846
847
848
849
850
851
852
853

    BasicConstraint best_right_constraints;
    BasicConstraint best_left_constraints;
    bool constraint_update_necessary =
        USE_MC && constraints->ConstraintDifferentDependingOnThreshold();

    if (USE_MC) {
      constraints->InitCumulativeConstraints(REVERSE);
    }

854
    if (REVERSE) {
Guolin Ke's avatar
Guolin Ke committed
855
856
857
858
      double sum_right_gradient = 0.0f;
      double sum_right_hessian = kEpsilon;
      data_size_t right_count = 0;

859
      int t = meta_->num_bin - 1 - offset - NA_AS_MISSING;
860
      const int t_end = 1 - offset;
Guolin Ke's avatar
Guolin Ke committed
861
862
863
864

      // from right to left, and we don't need data in bin0
      for (; t >= t_end; --t) {
        // need to skip default bin
865
866
867
868
869
        if (SKIP_DEFAULT_BIN) {
          if ((t + offset) == static_cast<int>(meta_->default_bin)) {
            continue;
          }
        }
870
871
        const auto grad = GET_GRAD(data_, t);
        const auto hess = GET_HESS(data_, t);
872
873
        data_size_t cnt =
            static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
874
875
876
        sum_right_gradient += grad;
        sum_right_hessian += hess;
        right_count += cnt;
Guolin Ke's avatar
Guolin Ke committed
877
        // if data not enough, or sum hessian too small
878
        if (right_count < meta_->config->min_data_in_leaf ||
879
            sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) {
880
          continue;
881
        }
Guolin Ke's avatar
Guolin Ke committed
882
883
        data_size_t left_count = num_data - right_count;
        // if data not enough
884
885
886
        if (left_count < meta_->config->min_data_in_leaf) {
          break;
        }
Guolin Ke's avatar
Guolin Ke committed
887
888
889

        double sum_left_hessian = sum_hessian - sum_right_hessian;
        // if sum hessian too small
890
891
892
        if (sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) {
          break;
        }
Guolin Ke's avatar
Guolin Ke committed
893
894

        double sum_left_gradient = sum_gradient - sum_right_gradient;
895
        if (USE_RAND) {
896
          if (t - 1 + offset != rand_threshold) {
Guolin Ke's avatar
Guolin Ke committed
897
            continue;
898
          }
Guolin Ke's avatar
Guolin Ke committed
899
        }
900
901
902
903
904

        if (USE_MC && constraint_update_necessary) {
          constraints->Update(t + offset);
        }

Guolin Ke's avatar
Guolin Ke committed
905
        // current split gain
Belinda Trotta's avatar
Belinda Trotta committed
906
        double current_gain = GetSplitGains<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
907
908
909
            sum_left_gradient, sum_left_hessian, sum_right_gradient,
            sum_right_hessian, meta_->config->lambda_l1,
            meta_->config->lambda_l2, meta_->config->max_delta_step,
Belinda Trotta's avatar
Belinda Trotta committed
910
911
            constraints, meta_->monotone_type, meta_->config->path_smooth,
            left_count, right_count, parent_output);
Guolin Ke's avatar
Guolin Ke committed
912
        // gain with split is worse than without split
913
914
915
        if (current_gain <= min_gain_shift) {
          continue;
        }
Guolin Ke's avatar
Guolin Ke committed
916

Andrew Ziem's avatar
Andrew Ziem committed
917
        // mark as able to be split
Guolin Ke's avatar
Guolin Ke committed
918
919
920
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
921
922
923
924
925
926
927
928
          if (USE_MC) {
            best_right_constraints = constraints->RightToBasicConstraint();
            best_left_constraints = constraints->LeftToBasicConstraint();
            if (best_right_constraints.min > best_right_constraints.max ||
                best_left_constraints.min > best_left_constraints.max) {
              continue;
            }
          }
Guolin Ke's avatar
Guolin Ke committed
929
930
931
932
933
934
935
          best_left_count = left_count;
          best_sum_left_gradient = sum_left_gradient;
          best_sum_left_hessian = sum_left_hessian;
          // left is <= threshold, right is > threshold.  so this is t-1
          best_threshold = static_cast<uint32_t>(t - 1 + offset);
          best_gain = current_gain;
        }
Guolin Ke's avatar
Guolin Ke committed
936
      }
ChenZhiyong's avatar
ChenZhiyong committed
937
    } else {
Guolin Ke's avatar
Guolin Ke committed
938
939
940
941
942
      double sum_left_gradient = 0.0f;
      double sum_left_hessian = kEpsilon;
      data_size_t left_count = 0;

      int t = 0;
943
      const int t_end = meta_->num_bin - 2 - offset;
Guolin Ke's avatar
Guolin Ke committed
944

945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
      if (NA_AS_MISSING) {
        if (offset == 1) {
          sum_left_gradient = sum_gradient;
          sum_left_hessian = sum_hessian - kEpsilon;
          left_count = num_data;
          for (int i = 0; i < meta_->num_bin - offset; ++i) {
            const auto grad = GET_GRAD(data_, i);
            const auto hess = GET_HESS(data_, i);
            data_size_t cnt =
                static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
            sum_left_gradient -= grad;
            sum_left_hessian -= hess;
            left_count -= cnt;
          }
          t = -1;
Guolin Ke's avatar
Guolin Ke committed
960
961
962
        }
      }

Guolin Ke's avatar
Guolin Ke committed
963
      for (; t <= t_end; ++t) {
964
965
966
967
968
        if (SKIP_DEFAULT_BIN) {
          if ((t + offset) == static_cast<int>(meta_->default_bin)) {
            continue;
          }
        }
Guolin Ke's avatar
Guolin Ke committed
969
        if (t >= 0) {
970
971
          sum_left_gradient += GET_GRAD(data_, t);
          sum_left_hessian += GET_HESS(data_, t);
972
973
          left_count += static_cast<data_size_t>(
              Common::RoundInt(GET_HESS(data_, t) * cnt_factor));
Guolin Ke's avatar
Guolin Ke committed
974
        }
Guolin Ke's avatar
Guolin Ke committed
975
        // if data not enough, or sum hessian too small
976
        if (left_count < meta_->config->min_data_in_leaf ||
977
            sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) {
978
          continue;
979
        }
Guolin Ke's avatar
Guolin Ke committed
980
981
        data_size_t right_count = num_data - left_count;
        // if data not enough
982
983
984
        if (right_count < meta_->config->min_data_in_leaf) {
          break;
        }
Guolin Ke's avatar
Guolin Ke committed
985
986

        double sum_right_hessian = sum_hessian - sum_left_hessian;
Andrew Ziem's avatar
Andrew Ziem committed
987
        // if sum Hessian too small
988
989
990
        if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) {
          break;
        }
Guolin Ke's avatar
Guolin Ke committed
991
992

        double sum_right_gradient = sum_gradient - sum_left_gradient;
993
        if (USE_RAND) {
Guolin Ke's avatar
Guolin Ke committed
994
995
          if (t + offset != rand_threshold) {
            continue;
996
          }
Guolin Ke's avatar
Guolin Ke committed
997
        }
Guolin Ke's avatar
Guolin Ke committed
998
        // current split gain
Belinda Trotta's avatar
Belinda Trotta committed
999
        double current_gain = GetSplitGains<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
1000
1001
1002
            sum_left_gradient, sum_left_hessian, sum_right_gradient,
            sum_right_hessian, meta_->config->lambda_l1,
            meta_->config->lambda_l2, meta_->config->max_delta_step,
Belinda Trotta's avatar
Belinda Trotta committed
1003
1004
            constraints, meta_->monotone_type, meta_->config->path_smooth, left_count,
            right_count, parent_output);
Guolin Ke's avatar
Guolin Ke committed
1005
        // gain with split is worse than without split
1006
1007
1008
        if (current_gain <= min_gain_shift) {
          continue;
        }
Guolin Ke's avatar
Guolin Ke committed
1009

Andrew Ziem's avatar
Andrew Ziem committed
1010
        // mark as able to be split
Guolin Ke's avatar
Guolin Ke committed
1011
1012
1013
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
1014
1015
1016
1017
1018
1019
1020
1021
          if (USE_MC) {
            best_right_constraints = constraints->RightToBasicConstraint();
            best_left_constraints = constraints->LeftToBasicConstraint();
            if (best_right_constraints.min > best_right_constraints.max ||
                best_left_constraints.min > best_left_constraints.max) {
              continue;
            }
          }
Guolin Ke's avatar
Guolin Ke committed
1022
1023
1024
1025
1026
1027
          best_left_count = left_count;
          best_sum_left_gradient = sum_left_gradient;
          best_sum_left_hessian = sum_left_hessian;
          best_threshold = static_cast<uint32_t>(t + offset);
          best_gain = current_gain;
        }
Guolin Ke's avatar
Guolin Ke committed
1028
1029
1030
      }
    }

1031
    if (is_splittable_ && best_gain > output->gain + min_gain_shift) {
Guolin Ke's avatar
Guolin Ke committed
1032
1033
      // update split information
      output->threshold = best_threshold;
1034
      output->left_output =
Belinda Trotta's avatar
Belinda Trotta committed
1035
          CalculateSplittedLeafOutput<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
1036
1037
              best_sum_left_gradient, best_sum_left_hessian,
              meta_->config->lambda_l1, meta_->config->lambda_l2,
1038
              meta_->config->max_delta_step, best_left_constraints, meta_->config->path_smooth,
Belinda Trotta's avatar
Belinda Trotta committed
1039
              best_left_count, parent_output);
Guolin Ke's avatar
Guolin Ke committed
1040
1041
1042
      output->left_count = best_left_count;
      output->left_sum_gradient = best_sum_left_gradient;
      output->left_sum_hessian = best_sum_left_hessian - kEpsilon;
1043
      output->right_output =
Belinda Trotta's avatar
Belinda Trotta committed
1044
          CalculateSplittedLeafOutput<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
1045
1046
1047
              sum_gradient - best_sum_left_gradient,
              sum_hessian - best_sum_left_hessian, meta_->config->lambda_l1,
              meta_->config->lambda_l2, meta_->config->max_delta_step,
1048
              best_right_constraints, meta_->config->path_smooth, num_data - best_left_count,
Belinda Trotta's avatar
Belinda Trotta committed
1049
              parent_output);
Guolin Ke's avatar
Guolin Ke committed
1050
1051
      output->right_count = num_data - best_left_count;
      output->right_sum_gradient = sum_gradient - best_sum_left_gradient;
1052
1053
1054
1055
      output->right_sum_hessian =
          sum_hessian - best_sum_left_hessian - kEpsilon;
      output->gain = best_gain - min_gain_shift;
      output->default_left = REVERSE;
Guolin Ke's avatar
Guolin Ke committed
1056
1057
1058
    }
  }

1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
  template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT, bool USE_SMOOTHING,
          bool REVERSE, bool SKIP_DEFAULT_BIN, bool NA_AS_MISSING, typename PACKED_HIST_BIN_T, typename PACKED_HIST_ACC_T,
          typename HIST_BIN_T, typename HIST_ACC_T, int HIST_BITS_BIN, int HIST_BITS_ACC>
  void FindBestThresholdSequentiallyInt(int64_t int_sum_gradient_and_hessian,
                                        const double grad_scale, const double hess_scale,
                                        data_size_t num_data,
                                        const FeatureConstraint* constraints,
                                        double min_gain_shift, SplitInfo* output,
                                        int rand_threshold, double parent_output) {
    const int8_t offset = meta_->offset;
    PACKED_HIST_ACC_T best_sum_left_gradient_and_hessian = 0;
    PACKED_HIST_ACC_T local_int_sum_gradient_and_hessian =
      HIST_BITS_ACC == 16 ?
      ((static_cast<int32_t>(int_sum_gradient_and_hessian >> 32) << 16) | static_cast<int32_t>(int_sum_gradient_and_hessian & 0x0000ffff)) :
1073
      static_cast<PACKED_HIST_ACC_T>(int_sum_gradient_and_hessian);
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
    double best_gain = kMinScore;
    uint32_t best_threshold = static_cast<uint32_t>(meta_->num_bin);
    const double cnt_factor = static_cast<double>(num_data) /
      static_cast<double>(static_cast<uint32_t>(int_sum_gradient_and_hessian & 0x00000000ffffffff));

    BasicConstraint best_right_constraints;
    BasicConstraint best_left_constraints;
    bool constraint_update_necessary =
        USE_MC && constraints->ConstraintDifferentDependingOnThreshold();

    if (USE_MC) {
      constraints->InitCumulativeConstraints(REVERSE);
    }

    const PACKED_HIST_BIN_T* data_ptr = nullptr;
    if (HIST_BITS_BIN == 16) {
      data_ptr = reinterpret_cast<const PACKED_HIST_BIN_T*>(data_int16_);
    } else {
      data_ptr = reinterpret_cast<const PACKED_HIST_BIN_T*>(data_);
    }
    if (REVERSE) {
      PACKED_HIST_ACC_T sum_right_gradient_and_hessian = 0;

      int t = meta_->num_bin - 1 - offset - NA_AS_MISSING;
      const int t_end = 1 - offset;

      // from right to left, and we don't need data in bin0
      for (; t >= t_end; --t) {
        // need to skip default bin
        if (SKIP_DEFAULT_BIN) {
          if ((t + offset) == static_cast<int>(meta_->default_bin)) {
            continue;
          }
        }
        const PACKED_HIST_BIN_T grad_and_hess = data_ptr[t];
        if (HIST_BITS_ACC != HIST_BITS_BIN) {
          const PACKED_HIST_ACC_T grad_and_hess_acc = HIST_BITS_BIN == 16 ?
            ((static_cast<PACKED_HIST_ACC_T>(static_cast<HIST_BIN_T>(grad_and_hess >> HIST_BITS_BIN)) << HIST_BITS_ACC) |
            (static_cast<PACKED_HIST_ACC_T>(grad_and_hess & 0x0000ffff))) :
            ((static_cast<PACKED_HIST_ACC_T>(static_cast<HIST_BIN_T>(grad_and_hess >> HIST_BITS_BIN)) << HIST_BITS_ACC) |
            (static_cast<PACKED_HIST_ACC_T>(grad_and_hess & 0x00000000ffffffff)));
          sum_right_gradient_and_hessian += grad_and_hess_acc;
        } else {
          sum_right_gradient_and_hessian += grad_and_hess;
        }
        const uint32_t int_sum_right_hessian = HIST_BITS_ACC == 16 ?
          static_cast<uint32_t>(sum_right_gradient_and_hessian & 0x0000ffff) :
          static_cast<uint32_t>(sum_right_gradient_and_hessian & 0x00000000ffffffff);
        data_size_t right_count = Common::RoundInt(int_sum_right_hessian * cnt_factor);
        double sum_right_hessian = int_sum_right_hessian * hess_scale;
        // if data not enough, or sum hessian too small
        if (right_count < meta_->config->min_data_in_leaf ||
            sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) {
          continue;
        }
        data_size_t left_count = num_data - right_count;
        // if data not enough
        if (left_count < meta_->config->min_data_in_leaf) {
          break;
        }

        const PACKED_HIST_ACC_T sum_left_gradient_and_hessian = local_int_sum_gradient_and_hessian - sum_right_gradient_and_hessian;
        const uint32_t int_sum_left_hessian = HIST_BITS_ACC == 16 ?
          static_cast<uint32_t>(sum_left_gradient_and_hessian & 0x0000ffff) :
          static_cast<uint32_t>(sum_left_gradient_and_hessian & 0x00000000ffffffff);
        double sum_left_hessian = int_sum_left_hessian * hess_scale;
        // if sum hessian too small
        if (sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) {
          break;
        }

        double sum_right_gradient = HIST_BITS_ACC == 16 ?
          static_cast<double>(static_cast<int16_t>(sum_right_gradient_and_hessian >> 16)) * grad_scale :
1147
          static_cast<double>(static_cast<int32_t>(static_cast<int64_t>(sum_right_gradient_and_hessian) >> 32)) * grad_scale;
1148
1149
        double sum_left_gradient = HIST_BITS_ACC == 16 ?
          static_cast<double>(static_cast<int16_t>(sum_left_gradient_and_hessian >> 16)) * grad_scale :
1150
          static_cast<double>(static_cast<int32_t>(static_cast<int64_t>(sum_left_gradient_and_hessian) >> 32)) * grad_scale;
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
        if (USE_RAND) {
          if (t - 1 + offset != rand_threshold) {
            continue;
          }
        }

        if (USE_MC && constraint_update_necessary) {
          constraints->Update(t + offset);
        }

        // current split gain
        double current_gain = GetSplitGains<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
            sum_left_gradient, sum_left_hessian + kEpsilon, sum_right_gradient,
            sum_right_hessian + kEpsilon, meta_->config->lambda_l1,
            meta_->config->lambda_l2, meta_->config->max_delta_step,
            constraints, meta_->monotone_type, meta_->config->path_smooth,
            left_count, right_count, parent_output);
        // gain with split is worse than without split
        if (current_gain <= min_gain_shift) {
          continue;
        }

        // mark as able to be split
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
          if (USE_MC) {
            best_right_constraints = constraints->RightToBasicConstraint();
            best_left_constraints = constraints->LeftToBasicConstraint();
            if (best_right_constraints.min > best_right_constraints.max ||
                best_left_constraints.min > best_left_constraints.max) {
              continue;
            }
          }
          best_sum_left_gradient_and_hessian = sum_left_gradient_and_hessian;
          // left is <= threshold, right is > threshold.  so this is t-1
          best_threshold = static_cast<uint32_t>(t - 1 + offset);
          best_gain = current_gain;
        }
      }
    } else {
      PACKED_HIST_ACC_T sum_left_gradient_and_hessian = 0;

      int t = 0;
      const int t_end = meta_->num_bin - 2 - offset;

      if (NA_AS_MISSING) {
        if (offset == 1) {
          sum_left_gradient_and_hessian = local_int_sum_gradient_and_hessian;
          for (int i = 0; i < meta_->num_bin - offset; ++i) {
            const PACKED_HIST_BIN_T grad_and_hess = data_ptr[i];
            if (HIST_BITS_ACC != HIST_BITS_BIN) {
              const PACKED_HIST_ACC_T grad_and_hess_acc = HIST_BITS_BIN == 16 ?
                ((static_cast<PACKED_HIST_ACC_T>(static_cast<HIST_BIN_T>(grad_and_hess >> HIST_BITS_BIN)) << HIST_BITS_ACC) |
                (static_cast<PACKED_HIST_ACC_T>(grad_and_hess & 0x0000ffff))) :
                ((static_cast<PACKED_HIST_ACC_T>(static_cast<HIST_BIN_T>(grad_and_hess >> HIST_BITS_BIN)) << HIST_BITS_ACC) |
                (static_cast<PACKED_HIST_ACC_T>(grad_and_hess & 0x00000000ffffffff)));
              sum_left_gradient_and_hessian -= grad_and_hess_acc;
            } else {
              sum_left_gradient_and_hessian -= grad_and_hess;
            }
          }
          t = -1;
        }
      }

      for (; t <= t_end; ++t) {
        if (SKIP_DEFAULT_BIN) {
          if ((t + offset) == static_cast<int>(meta_->default_bin)) {
            continue;
          }
        }
        if (t >= 0) {
          const PACKED_HIST_BIN_T grad_and_hess = data_ptr[t];
          if (HIST_BITS_ACC != HIST_BITS_BIN) {
            const PACKED_HIST_ACC_T grad_and_hess_acc = HIST_BITS_BIN == 16 ?
              ((static_cast<PACKED_HIST_ACC_T>(static_cast<HIST_BIN_T>(grad_and_hess >> HIST_BITS_BIN)) << HIST_BITS_ACC) |
              (static_cast<PACKED_HIST_ACC_T>(grad_and_hess & 0x0000ffff))) :
              ((static_cast<PACKED_HIST_ACC_T>(static_cast<HIST_BIN_T>(grad_and_hess >> HIST_BITS_BIN)) << HIST_BITS_ACC) |
              (static_cast<PACKED_HIST_ACC_T>(grad_and_hess & 0x00000000ffffffff)));
            sum_left_gradient_and_hessian += grad_and_hess_acc;
          } else {
            sum_left_gradient_and_hessian += grad_and_hess;
          }
        }
        // if data not enough, or sum hessian too small
        const uint32_t int_sum_left_hessian = HIST_BITS_ACC == 16 ?
          static_cast<uint32_t>(sum_left_gradient_and_hessian & 0x0000ffff) :
          static_cast<uint32_t>(sum_left_gradient_and_hessian & 0x00000000ffffffff);
        const data_size_t left_count = Common::RoundInt(static_cast<double>(int_sum_left_hessian) * cnt_factor);
        const double sum_left_hessian = static_cast<double>(int_sum_left_hessian) * hess_scale;
        if (left_count < meta_->config->min_data_in_leaf ||
            sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) {
          continue;
        }
        data_size_t right_count = num_data - left_count;
        // if data not enough
        if (right_count < meta_->config->min_data_in_leaf) {
          break;
        }

        const PACKED_HIST_ACC_T sum_right_gradient_and_hessian = local_int_sum_gradient_and_hessian - sum_left_gradient_and_hessian;
        const uint32_t int_sum_right_hessian = HIST_BITS_ACC == 16 ?
          static_cast<uint32_t>(sum_right_gradient_and_hessian & 0x0000ffff) :
          static_cast<uint32_t>(sum_right_gradient_and_hessian & 0x00000000ffffffff);
        const double sum_right_hessian = static_cast<double>(int_sum_right_hessian) * hess_scale;
        // if sum Hessian too small
        if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) {
          break;
        }

        double sum_right_gradient = HIST_BITS_ACC == 16 ?
          static_cast<double>(static_cast<int16_t>(sum_right_gradient_and_hessian >> 16)) * grad_scale :
1264
          static_cast<double>(static_cast<int32_t>(static_cast<int64_t>(sum_right_gradient_and_hessian) >> 32)) * grad_scale;
1265
1266
        double sum_left_gradient = HIST_BITS_ACC == 16 ?
          static_cast<double>(static_cast<int16_t>(sum_left_gradient_and_hessian >> 16)) * grad_scale :
1267
          static_cast<double>(static_cast<int32_t>(static_cast<int64_t>(sum_left_gradient_and_hessian) >> 32)) * grad_scale;
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
        if (USE_RAND) {
          if (t + offset != rand_threshold) {
            continue;
          }
        }
        // current split gain
        double current_gain = GetSplitGains<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
            sum_left_gradient, sum_left_hessian + kEpsilon, sum_right_gradient,
            sum_right_hessian + kEpsilon, meta_->config->lambda_l1,
            meta_->config->lambda_l2, meta_->config->max_delta_step,
            constraints, meta_->monotone_type, meta_->config->path_smooth, left_count,
            right_count, parent_output);
        // gain with split is worse than without split
        if (current_gain <= min_gain_shift) {
          continue;
        }

        // mark as able to be split
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
          if (USE_MC) {
            best_right_constraints = constraints->RightToBasicConstraint();
            best_left_constraints = constraints->LeftToBasicConstraint();
            if (best_right_constraints.min > best_right_constraints.max ||
                best_left_constraints.min > best_left_constraints.max) {
              continue;
            }
          }
          best_sum_left_gradient_and_hessian = sum_left_gradient_and_hessian;
          best_threshold = static_cast<uint32_t>(t + offset);
          best_gain = current_gain;
        }
      }
    }

    if (is_splittable_ && best_gain > output->gain + min_gain_shift) {
      const int32_t int_best_sum_left_gradient = HIST_BITS_ACC == 16 ?
        static_cast<int32_t>(static_cast<int16_t>(best_sum_left_gradient_and_hessian >> 16)) :
1307
        static_cast<int32_t>(static_cast<int64_t>(best_sum_left_gradient_and_hessian) >> 32);
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
      const uint32_t int_best_sum_left_hessian = HIST_BITS_ACC == 16 ?
        static_cast<uint32_t>(best_sum_left_gradient_and_hessian & 0x0000ffff) :
        static_cast<uint32_t>(best_sum_left_gradient_and_hessian & 0x00000000ffffffff);
      const double best_sum_left_gradient = static_cast<double>(int_best_sum_left_gradient) * grad_scale;
      const double best_sum_left_hessian = static_cast<double>(int_best_sum_left_hessian) * hess_scale;
      const int64_t best_sum_left_gradient_and_hessian_int64 = HIST_BITS_ACC == 16 ?
          ((static_cast<int64_t>(static_cast<int16_t>(best_sum_left_gradient_and_hessian >> 16)) << 32) |
          static_cast<int64_t>(best_sum_left_gradient_and_hessian & 0x0000ffff)) :
          best_sum_left_gradient_and_hessian;
      const int64_t best_sum_right_gradient_and_hessian = int_sum_gradient_and_hessian - best_sum_left_gradient_and_hessian_int64;
      const int32_t int_best_sum_right_gradient = static_cast<int32_t>(best_sum_right_gradient_and_hessian >> 32);
      const uint32_t int_best_sum_right_hessian = static_cast<uint32_t>(best_sum_right_gradient_and_hessian & 0x00000000ffffffff);
      const double best_sum_right_gradient = static_cast<double>(int_best_sum_right_gradient) * grad_scale;
      const double best_sum_right_hessian = static_cast<double>(int_best_sum_right_hessian) * hess_scale;
      const data_size_t best_left_count = Common::RoundInt(static_cast<double>(int_best_sum_left_hessian) * cnt_factor);
      const data_size_t best_right_count = Common::RoundInt(static_cast<double>(int_best_sum_right_hessian) * cnt_factor);
      // update split information
      output->threshold = best_threshold;
      output->left_output =
          CalculateSplittedLeafOutput<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
              best_sum_left_gradient, best_sum_left_hessian,
              meta_->config->lambda_l1, meta_->config->lambda_l2,
              meta_->config->max_delta_step, best_left_constraints, meta_->config->path_smooth,
              best_left_count, parent_output);
      output->left_count = best_left_count;
      output->left_sum_gradient = best_sum_left_gradient;
      output->left_sum_hessian = best_sum_left_hessian;
      output->left_sum_gradient_and_hessian = best_sum_left_gradient_and_hessian_int64;
      output->right_output =
          CalculateSplittedLeafOutput<USE_MC, USE_L1, USE_MAX_OUTPUT, USE_SMOOTHING>(
              best_sum_right_gradient,
              best_sum_right_hessian, meta_->config->lambda_l1,
              meta_->config->lambda_l2, meta_->config->max_delta_step,
              best_right_constraints, meta_->config->path_smooth, best_right_count,
              parent_output);
      output->right_count = best_right_count;
      output->right_sum_gradient = best_sum_right_gradient;
      output->right_sum_hessian = best_sum_right_hessian;
      output->right_sum_gradient_and_hessian = best_sum_right_gradient_and_hessian;
      output->gain = best_gain - min_gain_shift;
      output->default_left = REVERSE;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
1352
  const FeatureMetainfo* meta_;
Guolin Ke's avatar
Guolin Ke committed
1353
  /*! \brief sum of gradient of each bin */
1354
  hist_t* data_;
1355
  int16_t* data_int16_;
Guolin Ke's avatar
Guolin Ke committed
1356
  bool is_splittable_ = true;
1357

1358
  std::function<void(double, double, data_size_t, const FeatureConstraint*,
Belinda Trotta's avatar
Belinda Trotta committed
1359
                     double, SplitInfo*)>
1360
      find_best_threshold_fun_;
1361
1362
1363
1364

  std::function<void(int64_t, double, double, const uint8_t, const uint8_t, data_size_t, const FeatureConstraint*,
                     double, SplitInfo*)>
      int_find_best_threshold_fun_;
Guolin Ke's avatar
Guolin Ke committed
1365
};
Nikita Titov's avatar
Nikita Titov committed
1366

Guolin Ke's avatar
Guolin Ke committed
1367
class HistogramPool {
1368
 public:
Guolin Ke's avatar
Guolin Ke committed
1369
  /*!
1370
1371
   * \brief Constructor
   */
Guolin Ke's avatar
Guolin Ke committed
1372
  HistogramPool() {
Guolin Ke's avatar
Guolin Ke committed
1373
1374
    cache_size_ = 0;
    total_size_ = 0;
Guolin Ke's avatar
Guolin Ke committed
1375
  }
1376

Guolin Ke's avatar
Guolin Ke committed
1377
  /*!
1378
1379
1380
   * \brief Destructor
   */
  ~HistogramPool() {}
1381

Guolin Ke's avatar
Guolin Ke committed
1382
  /*!
1383
1384
1385
1386
   * \brief Reset pool size
   * \param cache_size Max cache size
   * \param total_size Total size will be used
   */
Guolin Ke's avatar
Guolin Ke committed
1387
  void Reset(int cache_size, int total_size) {
Guolin Ke's avatar
Guolin Ke committed
1388
1389
    cache_size_ = cache_size;
    // at least need 2 bucket to store smaller leaf and larger leaf
1390
    CHECK_GE(cache_size_, 2);
Guolin Ke's avatar
Guolin Ke committed
1391
1392
1393
1394
1395
1396
    total_size_ = total_size;
    if (cache_size_ > total_size_) {
      cache_size_ = total_size_;
    }
    is_enough_ = (cache_size_ == total_size_);
    if (!is_enough_) {
1397
1398
1399
      mapper_.resize(total_size_);
      inverse_mapper_.resize(cache_size_);
      last_used_time_.resize(cache_size_);
Guolin Ke's avatar
Guolin Ke committed
1400
1401
1402
      ResetMap();
    }
  }
1403

Guolin Ke's avatar
Guolin Ke committed
1404
  /*!
1405
1406
   * \brief Reset mapper
   */
Guolin Ke's avatar
Guolin Ke committed
1407
1408
1409
1410
1411
1412
1413
1414
  void ResetMap() {
    if (!is_enough_) {
      cur_time_ = 0;
      std::fill(mapper_.begin(), mapper_.end(), -1);
      std::fill(inverse_mapper_.begin(), inverse_mapper_.end(), -1);
      std::fill(last_used_time_.begin(), last_used_time_.end(), 0);
    }
  }
1415
1416
1417
  template <bool USE_DATA, bool USE_CONFIG>
  static void SetFeatureInfo(const Dataset* train_data, const Config* config,
                             std::vector<FeatureMetainfo>* feature_meta) {
1418
1419
1420
    auto& ref_feature_meta = *feature_meta;
    const int num_feature = train_data->num_features();
    ref_feature_meta.resize(num_feature);
1421
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static, 512) if (num_feature >= 1024)
1422
    for (int i = 0; i < num_feature; ++i) {
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
      if (USE_DATA) {
        ref_feature_meta[i].num_bin = train_data->FeatureNumBin(i);
        ref_feature_meta[i].default_bin =
            train_data->FeatureBinMapper(i)->GetDefaultBin();
        ref_feature_meta[i].missing_type =
            train_data->FeatureBinMapper(i)->missing_type();
        if (train_data->FeatureBinMapper(i)->GetMostFreqBin() == 0) {
          ref_feature_meta[i].offset = 1;
        } else {
          ref_feature_meta[i].offset = 0;
        }
        ref_feature_meta[i].bin_type =
            train_data->FeatureBinMapper(i)->bin_type();
1436
      }
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
      if (USE_CONFIG) {
        const int real_fidx = train_data->RealFeatureIndex(i);
        if (!config->monotone_constraints.empty()) {
          ref_feature_meta[i].monotone_type =
              config->monotone_constraints[real_fidx];
        } else {
          ref_feature_meta[i].monotone_type = 0;
        }
        if (!config->feature_contri.empty()) {
          ref_feature_meta[i].penalty = config->feature_contri[real_fidx];
        } else {
          ref_feature_meta[i].penalty = 1.0;
        }
        ref_feature_meta[i].rand = Random(config->extra_seed + i);
1451
1452
1453
1454
1455
      }
      ref_feature_meta[i].config = config;
    }
  }

1456
1457
1458
  void DynamicChangeSize(const Dataset* train_data, int num_total_bin,
                        const std::vector<uint32_t>& offsets, const Config* config,
                        int cache_size, int total_size) {
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
    if (feature_metas_.empty()) {
      SetFeatureInfo<true, true>(train_data, config, &feature_metas_);
      uint64_t bin_cnt_over_features = 0;
      for (int i = 0; i < train_data->num_features(); ++i) {
        bin_cnt_over_features +=
            static_cast<uint64_t>(feature_metas_[i].num_bin);
      }
      Log::Info("Total Bins %d", bin_cnt_over_features);
    }
    int old_cache_size = static_cast<int>(pool_.size());
    Reset(cache_size, total_size);

    if (cache_size > old_cache_size) {
      pool_.resize(cache_size);
      data_.resize(cache_size);
    }
1475
1476
1477

    if (config->use_quantized_grad) {
      OMP_INIT_EX();
1478
      #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
      for (int i = old_cache_size; i < cache_size; ++i) {
        OMP_LOOP_EX_BEGIN();
        pool_[i].reset(new FeatureHistogram[train_data->num_features()]);
        data_[i].resize(num_total_bin);
        for (int j = 0; j < train_data->num_features(); ++j) {
          int16_t* data_ptr = reinterpret_cast<int16_t*>(data_[i].data());
          pool_[i][j].Init(data_[i].data() + offsets[j], data_ptr + 2 * offsets[j], &feature_metas_[j]);
        }
        OMP_LOOP_EX_END();
      }
      OMP_THROW_EX();
    } else {
      OMP_INIT_EX();
1492
      #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
1493
1494
1495
1496
1497
1498
1499
1500
      for (int i = old_cache_size; i < cache_size; ++i) {
        OMP_LOOP_EX_BEGIN();
        pool_[i].reset(new FeatureHistogram[train_data->num_features()]);
        data_[i].resize(num_total_bin * 2);
        for (int j = 0; j < train_data->num_features(); ++j) {
          pool_[i][j].Init(data_[i].data() + offsets[j] * 2, &feature_metas_[j]);
        }
        OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
1501
      }
1502
      OMP_THROW_EX();
Guolin Ke's avatar
Guolin Ke committed
1503
1504
1505
    }
  }

1506
  void ResetConfig(const Dataset* train_data, const Config* config) {
1507
1508
    CHECK_GT(train_data->num_features(), 0);
    const Config* old_config = feature_metas_[0].config;
1509
    SetFeatureInfo<false, true>(train_data, config, &feature_metas_);
1510
1511
1512
1513
    // if need to reset the function pointers
    if (old_config->lambda_l1 != config->lambda_l1 ||
        old_config->monotone_constraints != config->monotone_constraints ||
        old_config->extra_trees != config->extra_trees ||
Belinda Trotta's avatar
Belinda Trotta committed
1514
1515
        old_config->max_delta_step != config->max_delta_step ||
        old_config->path_smooth != config->path_smooth) {
1516
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
1517
1518
1519
1520
1521
1522
      for (int i = 0; i < cache_size_; ++i) {
        for (int j = 0; j < train_data->num_features(); ++j) {
          pool_[i][j].ResetFunc();
        }
      }
    }
Guolin Ke's avatar
Guolin Ke committed
1523
  }
1524

Guolin Ke's avatar
Guolin Ke committed
1525
  /*!
1526
1527
1528
1529
1530
1531
   * \brief Get data for the specific index
   * \param idx which index want to get
   * \param out output data will store into this
   * \return True if this index is in the pool, False if this index is not in
   * the pool
   */
Guolin Ke's avatar
Guolin Ke committed
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
  bool Get(int idx, FeatureHistogram** out) {
    if (is_enough_) {
      *out = pool_[idx].get();
      return true;
    } else if (mapper_[idx] >= 0) {
      int slot = mapper_[idx];
      *out = pool_[slot].get();
      last_used_time_[slot] = ++cur_time_;
      return true;
    } else {
1542
      // choose the least used slot
Guolin Ke's avatar
Guolin Ke committed
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
      int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_));
      *out = pool_[slot].get();
      last_used_time_[slot] = ++cur_time_;

      // reset previous mapper
      if (inverse_mapper_[slot] >= 0) mapper_[inverse_mapper_[slot]] = -1;

      // update current mapper
      mapper_[idx] = slot;
      inverse_mapper_[slot] = idx;
      return false;
    }
  }

  /*!
1558
1559
1560
1561
   * \brief Move data from one index to another index
   * \param src_idx
   * \param dst_idx
   */
Guolin Ke's avatar
Guolin Ke committed
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
  void Move(int src_idx, int dst_idx) {
    if (is_enough_) {
      std::swap(pool_[src_idx], pool_[dst_idx]);
      return;
    }
    if (mapper_[src_idx] < 0) {
      return;
    }
    // get slot of src idx
    int slot = mapper_[src_idx];
    // reset src_idx
    mapper_[src_idx] = -1;

    // move to dst idx
    mapper_[dst_idx] = slot;
    last_used_time_[slot] = ++cur_time_;
    inverse_mapper_[slot] = dst_idx;
  }
1580

1581
 private:
Guolin Ke's avatar
Guolin Ke committed
1582
  std::vector<std::unique_ptr<FeatureHistogram[]>> pool_;
1583
1584
1585
  std::vector<
      std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>>
      data_;
Guolin Ke's avatar
Guolin Ke committed
1586
  std::vector<FeatureMetainfo> feature_metas_;
Guolin Ke's avatar
Guolin Ke committed
1587
1588
1589
1590
1591
1592
1593
1594
1595
  int cache_size_;
  int total_size_;
  bool is_enough_ = false;
  std::vector<int> mapper_;
  std::vector<int> inverse_mapper_;
  std::vector<int> last_used_time_;
  int cur_time_ = 0;
};

Guolin Ke's avatar
Guolin Ke committed
1596
}  // namespace LightGBM
1597
#endif  // LIGHTGBM_SRC_TREELEARNER_FEATURE_HISTOGRAM_HPP_