"vscode:/vscode.git/clone" did not exist on "f7ad945717d90a47f68dcac78ebc508a1fe00c40"
feature_histogram.hpp 33.6 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_TREELEARNER_FEATURE_HISTOGRAM_HPP_
#define LIGHTGBM_TREELEARNER_FEATURE_HISTOGRAM_HPP_

8
#include <LightGBM/bin.h>
Guolin Ke's avatar
Guolin Ke committed
9
#include <LightGBM/dataset.h>
10
#include <LightGBM/utils/array_args.h>
Guolin Ke's avatar
Guolin Ke committed
11

12
#include <algorithm>
13
#include <cmath>
14
15
16
17
18
19
#include <cstring>
#include <memory>
#include <utility>
#include <vector>

#include "split_info.hpp"
Guolin Ke's avatar
Guolin Ke committed
20

21
namespace LightGBM {
Guolin Ke's avatar
Guolin Ke committed
22

Guolin Ke's avatar
Guolin Ke committed
23
class FeatureMetainfo {
24
public:
Guolin Ke's avatar
Guolin Ke committed
25
  int num_bin;
Guolin Ke's avatar
Guolin Ke committed
26
  MissingType missing_type;
27
  int8_t offset = 0;
Guolin Ke's avatar
Guolin Ke committed
28
  uint32_t default_bin;
Guolin Ke's avatar
Guolin Ke committed
29
  int8_t monotone_type;
Guolin Ke's avatar
Guolin Ke committed
30
  double penalty;
Guolin Ke's avatar
Guolin Ke committed
31
  /*! \brief pointer of tree config */
Guolin Ke's avatar
Guolin Ke committed
32
  const Config* config;
33
  BinType bin_type;
Guolin Ke's avatar
Guolin Ke committed
34
};
Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
/*!
* \brief FeatureHistogram is used to construct and store a histogram for a feature.
*/
class FeatureHistogram {
39
public:
Guolin Ke's avatar
Guolin Ke committed
40
  FeatureHistogram() {
Guolin Ke's avatar
Guolin Ke committed
41
    data_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
42
  }
Guolin Ke's avatar
Guolin Ke committed
43

Guolin Ke's avatar
Guolin Ke committed
44
45
46
  ~FeatureHistogram() {
  }

Guolin Ke's avatar
Guolin Ke committed
47
48
49
50
51
  /*! \brief Disable copy */
  FeatureHistogram& operator=(const FeatureHistogram&) = delete;
  /*! \brief Disable copy */
  FeatureHistogram(const FeatureHistogram&) = delete;

Guolin Ke's avatar
Guolin Ke committed
52
53
54
55
56
  /*!
  * \brief Init the feature histogram
  * \param feature the feature data for this histogram
  * \param min_num_data_one_leaf minimal number of data in one leaf
  */
57
  void Init(hist_t* data, const FeatureMetainfo* meta) {
Guolin Ke's avatar
Guolin Ke committed
58
59
    meta_ = meta;
    data_ = data;
60
    if (meta_->bin_type == BinType::NumericalBin) {
61
      find_best_threshold_fun_ = std::bind(&FeatureHistogram::FindBestThresholdNumerical, this, std::placeholders::_1
62
        , std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, std::placeholders::_6);
63
64
    } else {
      find_best_threshold_fun_ = std::bind(&FeatureHistogram::FindBestThresholdCategorical, this, std::placeholders::_1
65
        , std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, std::placeholders::_6);
66
    }
Guolin Ke's avatar
Guolin Ke committed
67
68
  }

69
  hist_t* RawData() {
Guolin Ke's avatar
Guolin Ke committed
70
    return data_;
Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
76
  }
  /*!
  * \brief Subtract current histograms with other
  * \param other The histogram that want to subtract
  */
  void Subtract(const FeatureHistogram& other) {
77
78
    for (int i = 0; i < (meta_->num_bin - meta_->offset) * 2; ++i) {
      data_[i] -= other.data_[i];
Guolin Ke's avatar
Guolin Ke committed
79
80
    }
  }
81

Guolin Ke's avatar
Guolin Ke committed
82
  void FindBestThreshold(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint,
83
    SplitInfo* output) {
Guolin Ke's avatar
Guolin Ke committed
84
    output->default_left = true;
Guolin Ke's avatar
Guolin Ke committed
85
    output->gain = kMinScore;
Guolin Ke's avatar
Guolin Ke committed
86
    find_best_threshold_fun_(sum_gradient, sum_hessian + 2 * kEpsilon, num_data, min_constraint, max_constraint, output);
Guolin Ke's avatar
Guolin Ke committed
87
    output->gain *= meta_->penalty;
88
89
  }

Guolin Ke's avatar
Guolin Ke committed
90
  void FindBestThresholdNumerical(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint,
91
    SplitInfo* output) {
Guolin Ke's avatar
Guolin Ke committed
92
    is_splittable_ = false;
Guolin Ke's avatar
Guolin Ke committed
93
    double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
94
      meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
Guolin Ke's avatar
Guolin Ke committed
95
    double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
Guolin Ke's avatar
Guolin Ke committed
96
97
    if (meta_->num_bin > 2 && meta_->missing_type != MissingType::None) {
      if (meta_->missing_type == MissingType::Zero) {
Guolin Ke's avatar
Guolin Ke committed
98
99
        FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, true, false);
        FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, 1, true, false);
Guolin Ke's avatar
Guolin Ke committed
100
      } else {
Guolin Ke's avatar
Guolin Ke committed
101
102
        FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, false, true);
        FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, 1, false, true);
Guolin Ke's avatar
Guolin Ke committed
103
      }
104
    } else {
Guolin Ke's avatar
Guolin Ke committed
105
      FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, false, false);
Guolin Ke's avatar
Guolin Ke committed
106
107
108
109
      // fix the direction error when only have 2 bins
      if (meta_->missing_type == MissingType::NaN) {
        output->default_left = false;
      }
Guolin Ke's avatar
Guolin Ke committed
110
    }
Guolin Ke's avatar
Guolin Ke committed
111
    output->gain -= min_gain_shift;
Guolin Ke's avatar
Guolin Ke committed
112
113
114
    output->monotone_type = meta_->monotone_type;
    output->min_constraint = min_constraint;
    output->max_constraint = max_constraint;
Guolin Ke's avatar
Guolin Ke committed
115
  }
116

117
  void FindBestThresholdCategorical(double sum_gradient, double sum_hessian, data_size_t num_data,
118
119
    double min_constraint, double max_constraint,
    SplitInfo* output) {
Guolin Ke's avatar
Guolin Ke committed
120
    output->default_left = false;
121
    double best_gain = kMinScore;
122
    data_size_t best_left_count = 0;
ChenZhiyong's avatar
ChenZhiyong committed
123
124
    double best_sum_left_gradient = 0;
    double best_sum_left_hessian = 0;
Guolin Ke's avatar
Guolin Ke committed
125
    double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
126

Guolin Ke's avatar
Guolin Ke committed
127
    double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
ChenZhiyong's avatar
ChenZhiyong committed
128
    bool is_full_categorical = meta_->missing_type == MissingType::None;
129
    int used_bin = meta_->num_bin - 1 + is_full_categorical;
ChenZhiyong's avatar
ChenZhiyong committed
130

Guolin Ke's avatar
Guolin Ke committed
131
    std::vector<int> sorted_idx;
Guolin Ke's avatar
Guolin Ke committed
132
133
    double l2 = meta_->config->lambda_l2;
    bool use_onehot = meta_->num_bin <= meta_->config->max_cat_to_onehot;
134
135
    int best_threshold = -1;
    int best_dir = 1;
136
    const double cnt_factor = num_data / sum_hessian;
137
138
    if (use_onehot) {
      for (int t = 0; t < used_bin; ++t) {
139
140
141
        const auto grad = GET_GRAD(data_, t);
        const auto hess = GET_HESS(data_, t);
        data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
142
        // if data not enough, or sum hessian too small
143
144
145
        if (cnt < meta_->config->min_data_in_leaf
          || hess < meta_->config->min_sum_hessian_in_leaf) continue;
        data_size_t other_count = num_data - cnt;
146
        // if data not enough
Guolin Ke's avatar
Guolin Ke committed
147
        if (other_count < meta_->config->min_data_in_leaf) continue;
ChenZhiyong's avatar
ChenZhiyong committed
148

149
        double sum_other_hessian = sum_hessian - hess - kEpsilon;
150
        // if sum hessian too small
Guolin Ke's avatar
Guolin Ke committed
151
        if (sum_other_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
ChenZhiyong's avatar
ChenZhiyong committed
152

153
        double sum_other_gradient = sum_gradient - grad;
154
        // current split gain
155
156
157
        double current_gain = GetSplitGains(sum_other_gradient, sum_other_hessian, grad, hess + kEpsilon,
          meta_->config->lambda_l1, l2, meta_->config->max_delta_step,
          min_constraint, max_constraint, 0);
158
        // gain with split is worse than without split
ChenZhiyong's avatar
ChenZhiyong committed
159
        if (current_gain <= min_gain_shift) continue;
160
161

        // mark to is splittable
ChenZhiyong's avatar
ChenZhiyong committed
162
        is_splittable_ = true;
163
        // better split point
ChenZhiyong's avatar
ChenZhiyong committed
164
        if (current_gain > best_gain) {
165
          best_threshold = t;
166
167
168
          best_sum_left_gradient = grad;
          best_sum_left_hessian = hess + kEpsilon;
          best_left_count = cnt;
ChenZhiyong's avatar
ChenZhiyong committed
169
          best_gain = current_gain;
170
171
172
173
        }
      }
    } else {
      for (int i = 0; i < used_bin; ++i) {
174
        if (Common::RoundInt(GET_HESS(data_, i) * cnt_factor) >= meta_->config->cat_smooth) {
175
176
177
178
179
          sorted_idx.push_back(i);
        }
      }
      used_bin = static_cast<int>(sorted_idx.size());

Guolin Ke's avatar
Guolin Ke committed
180
      l2 += meta_->config->cat_l2;
181
182

      auto ctr_fun = [this](double sum_grad, double sum_hess) {
Guolin Ke's avatar
Guolin Ke committed
183
        return (sum_grad) / (sum_hess + meta_->config->cat_smooth);
184
185
      };
      std::sort(sorted_idx.begin(), sorted_idx.end(),
186
187
188
        [this, &ctr_fun](int i, int j) {
          return ctr_fun(GET_GRAD(data_, i), GET_HESS(data_, i)) < ctr_fun(GET_GRAD(data_, j), GET_HESS(data_, j));
        });
189
190
191
192
193

      std::vector<int> find_direction(1, 1);
      std::vector<int> start_position(1, 0);
      find_direction.push_back(-1);
      start_position.push_back(used_bin - 1);
Guolin Ke's avatar
Guolin Ke committed
194
      const int max_num_cat = std::min(meta_->config->max_cat_threshold, (used_bin + 1) / 2);
195
196
197
198
199

      is_splittable_ = false;
      for (size_t out_i = 0; out_i < find_direction.size(); ++out_i) {
        auto dir = find_direction[out_i];
        auto start_pos = start_position[out_i];
Guolin Ke's avatar
Guolin Ke committed
200
        data_size_t min_data_per_group = meta_->config->min_data_per_group;
201
202
203
204
205
206
207
        data_size_t cnt_cur_group = 0;
        double sum_left_gradient = 0.0f;
        double sum_left_hessian = kEpsilon;
        data_size_t left_count = 0;
        for (int i = 0; i < used_bin && i < max_num_cat; ++i) {
          auto t = sorted_idx[start_pos];
          start_pos += dir;
208
209
210
          const auto grad = GET_GRAD(data_, t);
          const auto hess = GET_HESS(data_, t);
          data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
211

212
213
214
215
          sum_left_gradient += grad;
          sum_left_hessian += hess;
          left_count += cnt;
          cnt_cur_group += cnt;
216

Guolin Ke's avatar
Guolin Ke committed
217
          if (left_count < meta_->config->min_data_in_leaf
218
            || sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
219
          data_size_t right_count = num_data - left_count;
Guolin Ke's avatar
Guolin Ke committed
220
          if (right_count < meta_->config->min_data_in_leaf || right_count < min_data_per_group) break;
221
222

          double sum_right_hessian = sum_hessian - sum_left_hessian;
Guolin Ke's avatar
Guolin Ke committed
223
          if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) break;
224
225
226
227
228
229

          if (cnt_cur_group < min_data_per_group) continue;

          cnt_cur_group = 0;

          double sum_right_gradient = sum_gradient - sum_left_gradient;
Guolin Ke's avatar
Guolin Ke committed
230
          double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
231
232
            meta_->config->lambda_l1, l2, meta_->config->max_delta_step,
            min_constraint, max_constraint, 0);
233
234
235
236
237
238
239
240
241
242
          if (current_gain <= min_gain_shift) continue;
          is_splittable_ = true;
          if (current_gain > best_gain) {
            best_left_count = left_count;
            best_sum_left_gradient = sum_left_gradient;
            best_sum_left_hessian = sum_left_hessian;
            best_threshold = i;
            best_gain = current_gain;
            best_dir = dir;
          }
ChenZhiyong's avatar
ChenZhiyong committed
243
        }
244
245
      }
    }
246

247
    if (is_splittable_) {
Guolin Ke's avatar
Guolin Ke committed
248
      output->left_output = CalculateSplittedLeafOutput(best_sum_left_gradient, best_sum_left_hessian,
249
250
        meta_->config->lambda_l1, l2, meta_->config->max_delta_step,
        min_constraint, max_constraint);
251
252
253
254
      output->left_count = best_left_count;
      output->left_sum_gradient = best_sum_left_gradient;
      output->left_sum_hessian = best_sum_left_hessian - kEpsilon;
      output->right_output = CalculateSplittedLeafOutput(sum_gradient - best_sum_left_gradient,
255
256
257
        sum_hessian - best_sum_left_hessian,
        meta_->config->lambda_l1, l2, meta_->config->max_delta_step,
        min_constraint, max_constraint);
258
259
260
      output->right_count = num_data - best_left_count;
      output->right_sum_gradient = sum_gradient - best_sum_left_gradient;
      output->right_sum_hessian = sum_hessian - best_sum_left_hessian - kEpsilon;
Guolin Ke's avatar
Guolin Ke committed
261
      output->gain = best_gain - min_gain_shift;
262
263
264
      if (use_onehot) {
        output->num_cat_threshold = 1;
        output->cat_threshold = std::vector<uint32_t>(1, static_cast<uint32_t>(best_threshold));
ChenZhiyong's avatar
ChenZhiyong committed
265
      } else {
266
267
268
269
270
271
272
273
274
275
276
277
        output->num_cat_threshold = best_threshold + 1;
        output->cat_threshold = std::vector<uint32_t>(output->num_cat_threshold);
        if (best_dir == 1) {
          for (int i = 0; i < output->num_cat_threshold; ++i) {
            auto t = sorted_idx[i];
            output->cat_threshold[i] = t;
          }
        } else {
          for (int i = 0; i < output->num_cat_threshold; ++i) {
            auto t = sorted_idx[used_bin - 1 - i];
            output->cat_threshold[i] = t;
          }
ChenZhiyong's avatar
ChenZhiyong committed
278
279
        }
      }
Guolin Ke's avatar
Guolin Ke committed
280
281
282
      output->monotone_type = 0;
      output->min_constraint = min_constraint;
      output->max_constraint = max_constraint;
283
    }
284
285
  }

286
  void GatherInfoForThreshold(double sum_gradient, double sum_hessian,
287
    uint32_t threshold, data_size_t num_data, SplitInfo* output) {
288
289
    if (meta_->bin_type == BinType::NumericalBin) {
      GatherInfoForThresholdNumerical(sum_gradient, sum_hessian, threshold,
290
        num_data, output);
291
292
    } else {
      GatherInfoForThresholdCategorical(sum_gradient, sum_hessian, threshold,
293
        num_data, output);
294
295
296
297
    }
  }

  void GatherInfoForThresholdNumerical(double sum_gradient, double sum_hessian,
298
299
    uint32_t threshold, data_size_t num_data,
    SplitInfo* output) {
300
    double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
301
302
      meta_->config->lambda_l1, meta_->config->lambda_l2,
      meta_->config->max_delta_step);
Guolin Ke's avatar
Guolin Ke committed
303
    double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
304
305

    // do stuff here
306
    const int8_t offset = meta_->offset;
307
308
309
310
311
312

    double sum_right_gradient = 0.0f;
    double sum_right_hessian = kEpsilon;
    data_size_t right_count = 0;

    // set values
313
314
    bool use_na_as_missing = false;
    bool skip_default_bin = false;
315
316
    if (meta_->missing_type == MissingType::Zero) {
      skip_default_bin = true;
317
    } else if (meta_->missing_type == MissingType::NaN) {
318
319
320
      use_na_as_missing = true;
    }

321
322
    int t = meta_->num_bin - 1 - offset - use_na_as_missing;
    const int t_end = 1 - offset;
323
    const double cnt_factor = num_data / sum_hessian;
324
325
    // from right to left, and we don't need data in bin0
    for (; t >= t_end; --t) {
326
      if (static_cast<uint32_t>(t + offset) < threshold) { break; }
327
328

      // need to skip default bin
329
      if (skip_default_bin && (t + offset) == static_cast<int>(meta_->default_bin)) { continue; }
330
331
332
333
334
335
      const auto grad = GET_GRAD(data_, t);
      const auto hess = GET_HESS(data_, t);
      data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
      sum_right_gradient += grad;
      sum_right_hessian += hess;
      right_count += cnt;
336
337
338
339
340
    }
    double sum_left_gradient = sum_gradient - sum_right_gradient;
    double sum_left_hessian = sum_hessian - sum_right_hessian;
    data_size_t left_count = num_data - right_count;
    double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian,
341
342
343
344
345
      meta_->config->lambda_l1, meta_->config->lambda_l2,
      meta_->config->max_delta_step)
      + GetLeafSplitGain(sum_right_gradient, sum_right_hessian,
        meta_->config->lambda_l1, meta_->config->lambda_l2,
        meta_->config->max_delta_step);
346
347
348
349

    // gain with split is worse than without split
    if (std::isnan(current_gain) || current_gain <= min_gain_shift) {
      output->gain = kMinScore;
350
      Log::Warning("'Forced Split' will be ignored since the gain getting worse. ");
351
      return;
352
    }
353
354
355
356

    // update split information
    output->threshold = threshold;
    output->left_output = CalculateSplittedLeafOutput(sum_left_gradient, sum_left_hessian,
357
358
      meta_->config->lambda_l1, meta_->config->lambda_l2,
      meta_->config->max_delta_step);
359
360
361
362
    output->left_count = left_count;
    output->left_sum_gradient = sum_left_gradient;
    output->left_sum_hessian = sum_left_hessian - kEpsilon;
    output->right_output = CalculateSplittedLeafOutput(sum_gradient - sum_left_gradient,
363
364
365
      sum_hessian - sum_left_hessian,
      meta_->config->lambda_l1, meta_->config->lambda_l2,
      meta_->config->max_delta_step);
366
367
368
369
370
371
372
373
374
    output->right_count = num_data - left_count;
    output->right_sum_gradient = sum_gradient - sum_left_gradient;
    output->right_sum_hessian = sum_hessian - sum_left_hessian - kEpsilon;
    output->gain = current_gain;
    output->gain -= min_gain_shift;
    output->default_left = true;
  }

  void GatherInfoForThresholdCategorical(double sum_gradient, double sum_hessian,
375
    uint32_t threshold, data_size_t num_data, SplitInfo* output) {
376
377
378
    // get SplitInfo for a given one-hot categorical split.
    output->default_left = false;
    double gain_shift = GetLeafSplitGain(
379
380
381
      sum_gradient, sum_hessian,
      meta_->config->lambda_l1, meta_->config->lambda_l2,
      meta_->config->max_delta_step);
Guolin Ke's avatar
Guolin Ke committed
382
    double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
383
384
385
386
387
388
389
    bool is_full_categorical = meta_->missing_type == MissingType::None;
    int used_bin = meta_->num_bin - 1 + is_full_categorical;
    if (threshold >= static_cast<uint32_t>(used_bin)) {
      output->gain = kMinScore;
      Log::Warning("Invalid categorical threshold split");
      return;
    }
390
391
392
393
    const double cnt_factor = num_data / sum_hessian;
    const auto grad = GET_GRAD(data_, threshold);
    const auto hess = GET_HESS(data_, threshold);
    data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
394

Guolin Ke's avatar
Guolin Ke committed
395
    double l2 = meta_->config->lambda_l2;
396
    data_size_t left_count = cnt;
397
    data_size_t right_count = num_data - left_count;
398
    double sum_left_hessian = hess + kEpsilon;
399
    double sum_right_hessian = sum_hessian - sum_left_hessian;
400
    double sum_left_gradient = grad;
401
402
403
    double sum_right_gradient = sum_gradient - sum_left_gradient;
    // current split gain
    double current_gain = GetLeafSplitGain(sum_right_gradient, sum_right_hessian,
404
405
406
407
408
      meta_->config->lambda_l1, l2,
      meta_->config->max_delta_step)
      + GetLeafSplitGain(sum_left_gradient, sum_left_hessian,
        meta_->config->lambda_l1, l2,
        meta_->config->max_delta_step);
409
410
    if (std::isnan(current_gain) || current_gain <= min_gain_shift) {
      output->gain = kMinScore;
411
      Log::Warning("'Forced Split' will be ignored since the gain getting worse. ");
412
413
414
415
      return;
    }

    output->left_output = CalculateSplittedLeafOutput(sum_left_gradient, sum_left_hessian,
416
417
      meta_->config->lambda_l1, l2,
      meta_->config->max_delta_step);
418
419
420
421
    output->left_count = left_count;
    output->left_sum_gradient = sum_left_gradient;
    output->left_sum_hessian = sum_left_hessian - kEpsilon;
    output->right_output = CalculateSplittedLeafOutput(sum_right_gradient, sum_right_hessian,
422
423
      meta_->config->lambda_l1, l2,
      meta_->config->max_delta_step);
424
425
426
427
428
429
430
431
432
    output->right_count = right_count;
    output->right_sum_gradient = sum_gradient - sum_left_gradient;
    output->right_sum_hessian = sum_right_hessian - kEpsilon;
    output->gain = current_gain - min_gain_shift;
    output->num_cat_threshold = 1;
    output->cat_threshold = std::vector<uint32_t>(1, threshold);
  }


Guolin Ke's avatar
Guolin Ke committed
433
434
435
436
  /*!
  * \brief Binary size of this histogram
  */
  int SizeOfHistgram() const {
437
    return (meta_->num_bin - meta_->offset) * KHistEntrySize;
Guolin Ke's avatar
Guolin Ke committed
438
439
440
441
442
  }

  /*!
  * \brief Restore histogram from memory
  */
Guolin Ke's avatar
Guolin Ke committed
443
  void FromMemory(char* memory_data) {
444
    std::memcpy(data_, memory_data, (meta_->num_bin - meta_->offset) * KHistEntrySize);
Guolin Ke's avatar
Guolin Ke committed
445
446
447
448
449
450
451
452
453
454
455
456
  }

  /*!
  * \brief True if this histogram can be splitted
  */
  bool is_splittable() { return is_splittable_; }

  /*!
  * \brief Set splittable to this histogram
  */
  void set_is_splittable(bool val) { is_splittable_ = val; }

457
458
459
460
461
462
463
464
465
466
467
468
  static double ThresholdL1(double s, double l1) {
    const double reg_s = std::max(0.0, std::fabs(s) - l1);
    return Common::Sign(s) * reg_s;
  }

  static double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians, double l1, double l2, double max_delta_step) {
    double ret = -ThresholdL1(sum_gradients, l1) / (sum_hessians + l2);
    if (max_delta_step <= 0.0f || std::fabs(ret) <= max_delta_step) {
      return ret;
    } else {
      return Common::Sign(ret) * max_delta_step;
    }
Guolin Ke's avatar
Guolin Ke committed
469
470
  }

471
private:
Guolin Ke's avatar
Guolin Ke committed
472
  static double GetSplitGains(double sum_left_gradients, double sum_left_hessians,
473
474
475
    double sum_right_gradients, double sum_right_hessians,
    double l1, double l2, double max_delta_step,
    double min_constraint, double max_constraint, int8_t monotone_constraint) {
476
477
    double left_output = CalculateSplittedLeafOutput(sum_left_gradients, sum_left_hessians, l1, l2, max_delta_step, min_constraint, max_constraint);
    double right_output = CalculateSplittedLeafOutput(sum_right_gradients, sum_right_hessians, l1, l2, max_delta_step, min_constraint, max_constraint);
Guolin Ke's avatar
Guolin Ke committed
478
479
480
481
482
483
484
485
    if (((monotone_constraint > 0) && (left_output > right_output)) ||
      ((monotone_constraint < 0) && (left_output < right_output))) {
      return 0;
    }
    return GetLeafSplitGainGivenOutput(sum_left_gradients, sum_left_hessians, l1, l2, left_output)
      + GetLeafSplitGainGivenOutput(sum_right_gradients, sum_right_hessians, l1, l2, right_output);
  }

Guolin Ke's avatar
Guolin Ke committed
486
  /*!
487
  * \brief Calculate the output of a leaf based on regularized sum_gradients and sum_hessians
Guolin Ke's avatar
Guolin Ke committed
488
489
490
491
  * \param sum_gradients
  * \param sum_hessians
  * \return leaf output
  */
492
  static double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians, double l1, double l2, double max_delta_step,
493
    double min_constraint, double max_constraint) {
494
    double ret = CalculateSplittedLeafOutput(sum_gradients, sum_hessians, l1, l2, max_delta_step);
Guolin Ke's avatar
Guolin Ke committed
495
496
497
498
499
500
    if (ret < min_constraint) {
      ret = min_constraint;
    } else if (ret > max_constraint) {
      ret = max_constraint;
    }
    return ret;
Guolin Ke's avatar
Guolin Ke committed
501
  }
Guolin Ke's avatar
Guolin Ke committed
502

Guolin Ke's avatar
Guolin Ke committed
503
504
505
506
507
508
  /*!
  * \brief Calculate the split gain based on regularized sum_gradients and sum_hessians
  * \param sum_gradients
  * \param sum_hessians
  * \return split gain
  */
509
510
511
  static double GetLeafSplitGain(double sum_gradients, double sum_hessians, double l1, double l2, double max_delta_step) {
    double output = CalculateSplittedLeafOutput(sum_gradients, sum_hessians, l1, l2, max_delta_step);
    return GetLeafSplitGainGivenOutput(sum_gradients, sum_hessians, l1, l2, output);
Guolin Ke's avatar
Guolin Ke committed
512
513
514
515
516
517
  }

  static double GetLeafSplitGainGivenOutput(double sum_gradients, double sum_hessians, double l1, double l2, double output) {
    const double sg_l1 = ThresholdL1(sum_gradients, l1);
    return -(2.0 * sg_l1 * output + (sum_hessians + l2) * output * output);
  }
Guolin Ke's avatar
Guolin Ke committed
518

Guolin Ke's avatar
Guolin Ke committed
519
  void FindBestThresholdSequence(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint,
520
    double min_gain_shift, SplitInfo* output, int dir, bool skip_default_bin, bool use_na_as_missing) {
521
    const int8_t offset = meta_->offset;
Guolin Ke's avatar
Guolin Ke committed
522
523
524
525
526
527

    double best_sum_left_gradient = NAN;
    double best_sum_left_hessian = NAN;
    double best_gain = kMinScore;
    data_size_t best_left_count = 0;
    uint32_t best_threshold = static_cast<uint32_t>(meta_->num_bin);
528
    const double cnt_factor = num_data / sum_hessian;
Guolin Ke's avatar
Guolin Ke committed
529
530
531
532
533
    if (dir == -1) {
      double sum_right_gradient = 0.0f;
      double sum_right_hessian = kEpsilon;
      data_size_t right_count = 0;

534
535
      int t = meta_->num_bin - 1 - offset - use_na_as_missing;
      const int t_end = 1 - offset;
Guolin Ke's avatar
Guolin Ke committed
536
537
538
539

      // from right to left, and we don't need data in bin0
      for (; t >= t_end; --t) {
        // need to skip default bin
540
        if (skip_default_bin && (t + offset) == static_cast<int>(meta_->default_bin)) { continue; }
Guolin Ke's avatar
Guolin Ke committed
541

542
543
544
545
546
547
        const auto grad = GET_GRAD(data_, t);
        const auto hess = GET_HESS(data_, t);
        data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
        sum_right_gradient += grad;
        sum_right_hessian += hess;
        right_count += cnt;
Guolin Ke's avatar
Guolin Ke committed
548
        // if data not enough, or sum hessian too small
Guolin Ke's avatar
Guolin Ke committed
549
        if (right_count < meta_->config->min_data_in_leaf
550
          || sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
Guolin Ke's avatar
Guolin Ke committed
551
552
        data_size_t left_count = num_data - right_count;
        // if data not enough
Guolin Ke's avatar
Guolin Ke committed
553
        if (left_count < meta_->config->min_data_in_leaf) break;
Guolin Ke's avatar
Guolin Ke committed
554
555
556

        double sum_left_hessian = sum_hessian - sum_right_hessian;
        // if sum hessian too small
Guolin Ke's avatar
Guolin Ke committed
557
        if (sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) break;
Guolin Ke's avatar
Guolin Ke committed
558
559
560

        double sum_left_gradient = sum_gradient - sum_right_gradient;
        // current split gain
Guolin Ke's avatar
Guolin Ke committed
561
        double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
562
563
          meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
          min_constraint, max_constraint, meta_->monotone_type);
Guolin Ke's avatar
Guolin Ke committed
564
565
566
567
568
569
570
571
572
573
574
        // gain with split is worse than without split
        if (current_gain <= min_gain_shift) continue;

        // mark to is splittable
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
          best_left_count = left_count;
          best_sum_left_gradient = sum_left_gradient;
          best_sum_left_hessian = sum_left_hessian;
          // left is <= threshold, right is > threshold.  so this is t-1
575
          best_threshold = static_cast<uint32_t>(t - 1 + offset);
Guolin Ke's avatar
Guolin Ke committed
576
577
578
          best_gain = current_gain;
        }
      }
ChenZhiyong's avatar
ChenZhiyong committed
579
    } else {
Guolin Ke's avatar
Guolin Ke committed
580
581
582
583
584
      double sum_left_gradient = 0.0f;
      double sum_left_hessian = kEpsilon;
      data_size_t left_count = 0;

      int t = 0;
585
      const int t_end = meta_->num_bin - 2 - offset;
Guolin Ke's avatar
Guolin Ke committed
586

587
      if (use_na_as_missing && offset == 1) {
Guolin Ke's avatar
Guolin Ke committed
588
589
590
        sum_left_gradient = sum_gradient;
        sum_left_hessian = sum_hessian - kEpsilon;
        left_count = num_data;
591
        for (int i = 0; i < meta_->num_bin - offset; ++i) {
592
593
594
595
596
597
          const auto grad = GET_GRAD(data_, i);
          const auto hess = GET_HESS(data_, i);
          data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
          sum_left_gradient -= grad;
          sum_left_hessian -= hess;
          left_count -= cnt;
Guolin Ke's avatar
Guolin Ke committed
598
599
600
601
        }
        t = -1;
      }

Guolin Ke's avatar
Guolin Ke committed
602
603
      for (; t <= t_end; ++t) {
        // need to skip default bin
604
        if (skip_default_bin && (t + offset) == static_cast<int>(meta_->default_bin)) { continue; }
Guolin Ke's avatar
Guolin Ke committed
605
        if (t >= 0) {
606
607
608
          sum_left_gradient += GET_GRAD(data_, t);
          sum_left_hessian += GET_HESS(data_, t);
          left_count += static_cast<data_size_t>(Common::RoundInt(GET_HESS(data_, t) * cnt_factor));
Guolin Ke's avatar
Guolin Ke committed
609
        }
Guolin Ke's avatar
Guolin Ke committed
610
        // if data not enough, or sum hessian too small
Guolin Ke's avatar
Guolin Ke committed
611
        if (left_count < meta_->config->min_data_in_leaf
612
          || sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
Guolin Ke's avatar
Guolin Ke committed
613
614
        data_size_t right_count = num_data - left_count;
        // if data not enough
Guolin Ke's avatar
Guolin Ke committed
615
        if (right_count < meta_->config->min_data_in_leaf) break;
Guolin Ke's avatar
Guolin Ke committed
616
617
618

        double sum_right_hessian = sum_hessian - sum_left_hessian;
        // if sum hessian too small
Guolin Ke's avatar
Guolin Ke committed
619
        if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) break;
Guolin Ke's avatar
Guolin Ke committed
620
621
622

        double sum_right_gradient = sum_gradient - sum_left_gradient;
        // current split gain
Guolin Ke's avatar
Guolin Ke committed
623
        double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
624
625
          meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
          min_constraint, max_constraint, meta_->monotone_type);
Guolin Ke's avatar
Guolin Ke committed
626
627
628
629
630
631
632
633
634
635
        // gain with split is worse than without split
        if (current_gain <= min_gain_shift) continue;

        // mark to is splittable
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
          best_left_count = left_count;
          best_sum_left_gradient = sum_left_gradient;
          best_sum_left_hessian = sum_left_hessian;
636
          best_threshold = static_cast<uint32_t>(t + offset);
Guolin Ke's avatar
Guolin Ke committed
637
638
639
640
641
642
643
644
645
          best_gain = current_gain;
        }
      }
    }

    if (is_splittable_ && best_gain > output->gain) {
      // update split information
      output->threshold = best_threshold;
      output->left_output = CalculateSplittedLeafOutput(best_sum_left_gradient, best_sum_left_hessian,
646
647
        meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
        min_constraint, max_constraint);
Guolin Ke's avatar
Guolin Ke committed
648
649
650
651
      output->left_count = best_left_count;
      output->left_sum_gradient = best_sum_left_gradient;
      output->left_sum_hessian = best_sum_left_hessian - kEpsilon;
      output->right_output = CalculateSplittedLeafOutput(sum_gradient - best_sum_left_gradient,
652
653
654
        sum_hessian - best_sum_left_hessian,
        meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
        min_constraint, max_constraint);
Guolin Ke's avatar
Guolin Ke committed
655
656
657
658
      output->right_count = num_data - best_left_count;
      output->right_sum_gradient = sum_gradient - best_sum_left_gradient;
      output->right_sum_hessian = sum_hessian - best_sum_left_hessian - kEpsilon;
      output->gain = best_gain;
Guolin Ke's avatar
Guolin Ke committed
659
      output->default_left = dir == -1;
Guolin Ke's avatar
Guolin Ke committed
660
661
662
    }
  }

Guolin Ke's avatar
Guolin Ke committed
663
  const FeatureMetainfo* meta_;
Guolin Ke's avatar
Guolin Ke committed
664
  /*! \brief sum of gradient of each bin */
665
  hist_t* data_;
Guolin Ke's avatar
Guolin Ke committed
666
  bool is_splittable_ = true;
667

Guolin Ke's avatar
Guolin Ke committed
668
  std::function<void(double, double, data_size_t, double, double, SplitInfo*)> find_best_threshold_fun_;
Guolin Ke's avatar
Guolin Ke committed
669
};
Guolin Ke's avatar
Guolin Ke committed
670
class HistogramPool {
671
public:
Guolin Ke's avatar
Guolin Ke committed
672
673
674
675
  /*!
  * \brief Constructor
  */
  HistogramPool() {
Guolin Ke's avatar
Guolin Ke committed
676
677
    cache_size_ = 0;
    total_size_ = 0;
Guolin Ke's avatar
Guolin Ke committed
678
679
680
681
682
683
684
685
686
687
688
  }
  /*!
  * \brief Destructor
  */
  ~HistogramPool() {
  }
  /*!
  * \brief Reset pool size
  * \param cache_size Max cache size
  * \param total_size Total size will be used
  */
Guolin Ke's avatar
Guolin Ke committed
689
  void Reset(int cache_size, int total_size) {
Guolin Ke's avatar
Guolin Ke committed
690
691
692
693
694
695
696
697
698
    cache_size_ = cache_size;
    // at least need 2 bucket to store smaller leaf and larger leaf
    CHECK(cache_size_ >= 2);
    total_size_ = total_size;
    if (cache_size_ > total_size_) {
      cache_size_ = total_size_;
    }
    is_enough_ = (cache_size_ == total_size_);
    if (!is_enough_) {
699
700
701
      mapper_.resize(total_size_);
      inverse_mapper_.resize(cache_size_);
      last_used_time_.resize(cache_size_);
Guolin Ke's avatar
Guolin Ke committed
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
      ResetMap();
    }
  }
  /*!
  * \brief Reset mapper
  */
  void ResetMap() {
    if (!is_enough_) {
      cur_time_ = 0;
      std::fill(mapper_.begin(), mapper_.end(), -1);
      std::fill(inverse_mapper_.begin(), inverse_mapper_.end(), -1);
      std::fill(last_used_time_.begin(), last_used_time_.end(), 0);
    }
  }

717
  void DynamicChangeSize(const Dataset* train_data, bool is_hist_colwise, const Config* config, int cache_size, int total_size) {
Guolin Ke's avatar
Guolin Ke committed
718
    if (feature_metas_.empty()) {
719
      uint64_t bin_cnt_over_features = 0;
Guolin Ke's avatar
Guolin Ke committed
720
721
722
      int num_feature = train_data->num_features();
      feature_metas_.resize(num_feature);
      for (int i = 0; i < num_feature; ++i) {
Guolin Ke's avatar
Guolin Ke committed
723
        feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
724
        bin_cnt_over_features += static_cast<uint64_t>(feature_metas_[i].num_bin);
Guolin Ke's avatar
Guolin Ke committed
725
        feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin();
Guolin Ke's avatar
Guolin Ke committed
726
        feature_metas_[i].missing_type = train_data->FeatureBinMapper(i)->missing_type();
Guolin Ke's avatar
Guolin Ke committed
727
        feature_metas_[i].monotone_type = train_data->FeatureMonotone(i);
Guolin Ke's avatar
Guolin Ke committed
728
        feature_metas_[i].penalty = train_data->FeaturePenalte(i);
Guolin Ke's avatar
Guolin Ke committed
729
        if (train_data->FeatureBinMapper(i)->GetMostFreqBin() == 0) {
730
          feature_metas_[i].offset = 1;
Guolin Ke's avatar
Guolin Ke committed
731
        } else {
732
          feature_metas_[i].offset = 0;
Guolin Ke's avatar
Guolin Ke committed
733
        }
Guolin Ke's avatar
Guolin Ke committed
734
        feature_metas_[i].config = config;
735
        feature_metas_[i].bin_type = train_data->FeatureBinMapper(i)->bin_type();
Guolin Ke's avatar
Guolin Ke committed
736
      }
737
      Log::Info("Total Bins %d", bin_cnt_over_features);
Guolin Ke's avatar
Guolin Ke committed
738
    }
Guolin Ke's avatar
Guolin Ke committed
739
    int old_cache_size = static_cast<int>(pool_.size());
Guolin Ke's avatar
Guolin Ke committed
740
    Reset(cache_size, total_size);
Guolin Ke's avatar
Guolin Ke committed
741
742
743
744
745

    if (cache_size > old_cache_size) {
      pool_.resize(cache_size);
      data_.resize(cache_size);
    }
746
    int num_total_bin = static_cast<int>(train_data->NumTotalBin());
Guolin Ke's avatar
Guolin Ke committed
747

748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
    std::vector<int> offsets;
    if (is_hist_colwise) {
      int offset = 0;
      for (int j = 0; j < train_data->num_features(); ++j) {
        offset += train_data->SubFeatureBinOffset(j);
        offsets.push_back(offset);
        auto num_bin = train_data->FeatureNumBin(j);
        if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
          num_bin -= 1;
        }
        offset += num_bin;
      }
    } else {
      num_total_bin = 1;
      for (int j = 0; j < train_data->num_features(); ++j) {
        offsets.push_back(num_total_bin);
        num_total_bin += train_data->FeatureBinMapper(j)->num_bin();
        if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
          num_total_bin -= 1;
        }
      }
    }
770
    OMP_INIT_EX();
Guolin Ke's avatar
Guolin Ke committed
771
    #pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
772
    for (int i = old_cache_size; i < cache_size; ++i) {
773
      OMP_LOOP_EX_BEGIN();
Guolin Ke's avatar
Guolin Ke committed
774
      pool_[i].reset(new FeatureHistogram[train_data->num_features()]);
775
      data_[i].resize(num_total_bin * 2);
Guolin Ke's avatar
Guolin Ke committed
776
      for (int j = 0; j < train_data->num_features(); ++j) {
777
        pool_[i][j].Init(data_[i].data() + offsets[j] * 2, &feature_metas_[j]);
Guolin Ke's avatar
Guolin Ke committed
778
      }
779
      OMP_LOOP_EX_END();
Guolin Ke's avatar
Guolin Ke committed
780
    }
781
    OMP_THROW_EX();
782
    train_data_ = train_data;
Guolin Ke's avatar
Guolin Ke committed
783
784
  }

Guolin Ke's avatar
Guolin Ke committed
785
  void ResetConfig(const Config* config) {
Guolin Ke's avatar
Guolin Ke committed
786
    int size = static_cast<int>(feature_metas_.size());
787
    #pragma omp parallel for schedule(static, 512) if (size >= 1024)
Guolin Ke's avatar
Guolin Ke committed
788
    for (int i = 0; i < size; ++i) {
Guolin Ke's avatar
Guolin Ke committed
789
      feature_metas_[i].config = config;
790
791
      feature_metas_[i].monotone_type = train_data_->FeatureMonotone(i);
      feature_metas_[i].penalty = train_data_->FeaturePenalte(i);
Guolin Ke's avatar
Guolin Ke committed
792
793
    }
  }
Guolin Ke's avatar
Guolin Ke committed
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
  /*!
  * \brief Get data for the specific index
  * \param idx which index want to get
  * \param out output data will store into this
  * \return True if this index is in the pool, False if this index is not in the pool
  */
  bool Get(int idx, FeatureHistogram** out) {
    if (is_enough_) {
      *out = pool_[idx].get();
      return true;
    } else if (mapper_[idx] >= 0) {
      int slot = mapper_[idx];
      *out = pool_[slot].get();
      last_used_time_[slot] = ++cur_time_;
      return true;
    } else {
810
      // choose the least used slot
Guolin Ke's avatar
Guolin Ke committed
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
      int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_));
      *out = pool_[slot].get();
      last_used_time_[slot] = ++cur_time_;

      // reset previous mapper
      if (inverse_mapper_[slot] >= 0) mapper_[inverse_mapper_[slot]] = -1;

      // update current mapper
      mapper_[idx] = slot;
      inverse_mapper_[slot] = idx;
      return false;
    }
  }

  /*!
  * \brief Move data from one index to another index
  * \param src_idx
  * \param dst_idx
  */
  void Move(int src_idx, int dst_idx) {
    if (is_enough_) {
      std::swap(pool_[src_idx], pool_[dst_idx]);
      return;
    }
    if (mapper_[src_idx] < 0) {
      return;
    }
    // get slot of src idx
    int slot = mapper_[src_idx];
    // reset src_idx
    mapper_[src_idx] = -1;

    // move to dst idx
    mapper_[dst_idx] = slot;
    last_used_time_[slot] = ++cur_time_;
    inverse_mapper_[slot] = dst_idx;
  }
848

849
private:
Guolin Ke's avatar
Guolin Ke committed
850
  std::vector<std::unique_ptr<FeatureHistogram[]>> pool_;
851
  std::vector<std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>> data_;
Guolin Ke's avatar
Guolin Ke committed
852
  std::vector<FeatureMetainfo> feature_metas_;
Guolin Ke's avatar
Guolin Ke committed
853
854
855
856
857
858
859
  int cache_size_;
  int total_size_;
  bool is_enough_ = false;
  std::vector<int> mapper_;
  std::vector<int> inverse_mapper_;
  std::vector<int> last_used_time_;
  int cur_time_ = 0;
860
  const Dataset* train_data_;
Guolin Ke's avatar
Guolin Ke committed
861
862
};

Guolin Ke's avatar
Guolin Ke committed
863
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
864
#endif   // LightGBM_TREELEARNER_FEATURE_HISTOGRAM_HPP_