leaf_splits.hpp 4.73 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
#ifndef LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
#define LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_

Guolin Ke's avatar
Guolin Ke committed
4
5
#include <limits>

Guolin Ke's avatar
Guolin Ke committed
6
#include <LightGBM/meta.h>
Guolin Ke's avatar
Guolin Ke committed
7
#include "data_partition.hpp"
Guolin Ke's avatar
Guolin Ke committed
8
9
10
11
12
13

#include <vector>

namespace LightGBM {

/*!
Qiwei Ye's avatar
Qiwei Ye committed
14
* \brief used to find split candidates for a leaf
Guolin Ke's avatar
Guolin Ke committed
15
16
*/
class LeafSplits {
Nikita Titov's avatar
Nikita Titov committed
17
 public:
Guolin Ke's avatar
Guolin Ke committed
18
19
  LeafSplits(data_size_t num_data)
    :num_data_in_leaf_(num_data), num_data_(num_data),
Guolin Ke's avatar
Guolin Ke committed
20
21
    data_indices_(nullptr) {
  }
Guolin Ke's avatar
Guolin Ke committed
22
23
24
25
  void ResetNumData(data_size_t num_data) {
    num_data_ = num_data;
    num_data_in_leaf_ = num_data;
  }
Guolin Ke's avatar
Guolin Ke committed
26
27
28
29
  ~LeafSplits() {
  }

  /*!
30

Qiwei Ye's avatar
Qiwei Ye committed
31
  * \brief Init split on current leaf on partial data. 
Guolin Ke's avatar
Guolin Ke committed
32
33
34
35
36
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param sum_gradients
  * \param sum_hessians
  */
Guolin Ke's avatar
Guolin Ke committed
37
  void Init(int leaf, const DataPartition* data_partition, double sum_gradients, double sum_hessians) {
Guolin Ke's avatar
Guolin Ke committed
38
    leaf_index_ = leaf;
Guolin Ke's avatar
Guolin Ke committed
39
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
Guolin Ke's avatar
Guolin Ke committed
40
41
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
46
47
48
    min_val_ = -std::numeric_limits<double>::max();
    max_val_ = std::numeric_limits<double>::max();
  }

  void SetValueConstraint(double min, double max) {
    min_val_ = min;
    max_val_ = max;
Guolin Ke's avatar
Guolin Ke committed
49
50
  }

Guolin Ke's avatar
Guolin Ke committed
51

Guolin Ke's avatar
Guolin Ke committed
52
  /*!
53
  * \brief Init splits on current leaf, it will traverse all data to sum up the results
Guolin Ke's avatar
Guolin Ke committed
54
55
56
  * \param gradients
  * \param hessians
  */
57
  void Init(const score_t* gradients, const score_t* hessians) {
Guolin Ke's avatar
Guolin Ke committed
58
59
60
    num_data_in_leaf_ = num_data_;
    leaf_index_ = 0;
    data_indices_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
61
62
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
67
68
69
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
    for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
      tmp_sum_gradients += gradients[i];
      tmp_sum_hessians += hessians[i];
    }
    sum_gradients_ = tmp_sum_gradients;
    sum_hessians_ = tmp_sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
70
71
    min_val_ = -std::numeric_limits<double>::max();
    max_val_ = std::numeric_limits<double>::max();
Guolin Ke's avatar
Guolin Ke committed
72
73
74
  }

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
75
  * \brief Init splits on current leaf of partial data.
Guolin Ke's avatar
Guolin Ke committed
76
77
78
79
80
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param gradients
  * \param hessians
  */
81
  void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
Guolin Ke's avatar
Guolin Ke committed
82
    leaf_index_ = leaf;
Guolin Ke's avatar
Guolin Ke committed
83
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
Guolin Ke's avatar
Guolin Ke committed
84
85
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
91
92
93
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
    for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
      data_size_t idx = data_indices_[i];
      tmp_sum_gradients += gradients[idx];
      tmp_sum_hessians += hessians[idx];
    }
    sum_gradients_ = tmp_sum_gradients;
    sum_hessians_ = tmp_sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
94
95
    min_val_ = -std::numeric_limits<double>::max();
    max_val_ = std::numeric_limits<double>::max();
Guolin Ke's avatar
Guolin Ke committed
96
97
98
99
100
101
102
103
  }


  /*!
  * \brief Init splits on current leaf, only update sum_gradients and sum_hessians
  * \param sum_gradients
  * \param sum_hessians
  */
Guolin Ke's avatar
Guolin Ke committed
104
  void Init(double sum_gradients, double sum_hessians) {
Guolin Ke's avatar
Guolin Ke committed
105
106
107
    leaf_index_ = 0;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
108
109
    min_val_ = -std::numeric_limits<double>::max();
    max_val_ = std::numeric_limits<double>::max();
Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
116
  }

  /*!
  * \brief Init splits on current leaf
  */
  void Init() {
    leaf_index_ = -1;
Guolin Ke's avatar
Guolin Ke committed
117
118
    data_indices_ = nullptr;
    num_data_in_leaf_ = 0;
Guolin Ke's avatar
Guolin Ke committed
119
120
    min_val_ = -std::numeric_limits<double>::max();
    max_val_ = std::numeric_limits<double>::max();
Guolin Ke's avatar
Guolin Ke committed
121
122
123
124
125
126
127
128
129
130
  }


  /*! \brief Get current leaf index */
  int LeafIndex() const { return leaf_index_; }

  /*! \brief Get numer of data in current leaf */
  data_size_t num_data_in_leaf() const { return num_data_in_leaf_; }

  /*! \brief Get sum of gradients of current leaf */
Guolin Ke's avatar
Guolin Ke committed
131
  double sum_gradients() const { return sum_gradients_; }
132

Guolin Ke's avatar
Guolin Ke committed
133
  /*! \brief Get sum of hessians of current leaf */
Guolin Ke's avatar
Guolin Ke committed
134
  double sum_hessians() const { return sum_hessians_; }
Guolin Ke's avatar
Guolin Ke committed
135

Guolin Ke's avatar
Guolin Ke committed
136
137
138
139
  double max_constraint() const { return max_val_; }

  double min_constraint() const { return min_val_; }

Guolin Ke's avatar
Guolin Ke committed
140
  /*! \brief Get indices of data of current leaf */
Guolin Ke's avatar
Guolin Ke committed
141
  const data_size_t* data_indices() const { return data_indices_; }
Guolin Ke's avatar
Guolin Ke committed
142
143


Nikita Titov's avatar
Nikita Titov committed
144
 private:
Guolin Ke's avatar
Guolin Ke committed
145
146
147
148
149
150
151
  /*! \brief current leaf index */
  int leaf_index_;
  /*! \brief number of data on current leaf */
  data_size_t num_data_in_leaf_;
  /*! \brief number of all training data */
  data_size_t num_data_;
  /*! \brief sum of gradients of current leaf */
Guolin Ke's avatar
Guolin Ke committed
152
  double sum_gradients_;
Guolin Ke's avatar
Guolin Ke committed
153
  /*! \brief sum of hessians of current leaf */
Guolin Ke's avatar
Guolin Ke committed
154
  double sum_hessians_;
Guolin Ke's avatar
Guolin Ke committed
155
  /*! \brief indices of data of current leaf */
Guolin Ke's avatar
Guolin Ke committed
156
  const data_size_t* data_indices_;
Guolin Ke's avatar
Guolin Ke committed
157
158
  double min_val_;
  double max_val_;
Guolin Ke's avatar
Guolin Ke committed
159
160
161
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
162
#endif   // LightGBM_TREELEARNER_LEAF_SPLITS_HPP_