tree.h 6.59 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
9
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

namespace LightGBM {


/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
34
35
36
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
Guolin Ke's avatar
Guolin Ke committed
37
38
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
39
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
40
41
  * \param left_value Model Left child output
  * \param right_value Model Right child output
Guolin Ke's avatar
Guolin Ke committed
42
43
  * \param left_cnt Count of left child
  * \param right_cnt Count of right child
Guolin Ke's avatar
Guolin Ke committed
44
45
46
  * \param gain Split gain
  * \return The index of new leaf.
  */
Guolin Ke's avatar
Guolin Ke committed
47
  int Split(int leaf, int feature, uint32_t threshold, int real_feature,
48
    double threshold_double, double left_value,
Guolin Ke's avatar
Guolin Ke committed
49
    double right_value, data_size_t left_cnt, data_size_t right_cnt, double gain);
Guolin Ke's avatar
Guolin Ke committed
50

Guolin Ke's avatar
Guolin Ke committed
51
  /*! \brief Get the output of one leaf */
52
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
53

Guolin Ke's avatar
Guolin Ke committed
54
55
56
57
58
  /*! \brief Set the output of one leaf */
  inline void SetLeafOutput(int leaf, double output) {
    leaf_value_[leaf] = output;
  }

Guolin Ke's avatar
Guolin Ke committed
59
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
60
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
61
62
63
64
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
Guolin Ke's avatar
Guolin Ke committed
65
66
67
  void AddPredictionToScore(const Dataset* data, 
    data_size_t num_data,
    double* score) const;
Guolin Ke's avatar
Guolin Ke committed
68
69

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
70
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
76
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
77
                            const data_size_t* used_data_indices,
78
                            data_size_t num_data, double* score) const;
Guolin Ke's avatar
Guolin Ke committed
79
80

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
81
  * \brief Prediction on one record 
Guolin Ke's avatar
Guolin Ke committed
82
83
84
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
85
86
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
87
88
89
90

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
91
92
93
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
94
  /*! \brief Get feature of specific split*/
Guolin Ke's avatar
Guolin Ke committed
95
  inline int split_feature(int split_idx) const { return split_feature_[split_idx]; }
wxchan's avatar
wxchan committed
96

Guolin Ke's avatar
Guolin Ke committed
97
98
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
99
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
100
101
  * \param rate The factor of shrinkage
  */
102
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
103
#pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
104
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
105
      leaf_value_[i] *= rate;
Guolin Ke's avatar
Guolin Ke committed
106
    }
Guolin Ke's avatar
Guolin Ke committed
107
    shrinkage_ *= rate;
Guolin Ke's avatar
Guolin Ke committed
108
109
  }

wxchan's avatar
wxchan committed
110
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
111
112
  std::string ToString();

wxchan's avatar
wxchan committed
113
114
115
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

Guolin Ke's avatar
Guolin Ke committed
116
117
118
119
120
121
122
123
124
  template<typename T>
  static bool NumericalDecision(T fval, T threshold) {
    if (fval <= threshold) {
      return true;
    } else {
      return false;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
125
private:
Guolin Ke's avatar
Guolin Ke committed
126

Guolin Ke's avatar
Guolin Ke committed
127
128
  inline int GetLeaf(std::vector<std::unique_ptr<BinIterator>>& iterators, 
    data_size_t data_idx) const;
Guolin Ke's avatar
Guolin Ke committed
129

Guolin Ke's avatar
Guolin Ke committed
130
131
  inline int GetLeafRaw(std::vector<std::unique_ptr<BinIterator>>& iterators,
    data_size_t data_idx) const;
Guolin Ke's avatar
Guolin Ke committed
132
133

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
134
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
135
136
137
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
138
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
139

wxchan's avatar
wxchan committed
140
141
142
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

Guolin Ke's avatar
Guolin Ke committed
143
144
145
146
147
148
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
149
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
150
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
151
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
152
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
153
  std::vector<int> split_feature_inner;
Guolin Ke's avatar
Guolin Ke committed
154
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
155
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
156
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
157
  std::vector<uint32_t> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
158
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
159
  std::vector<double> threshold_;
Guolin Ke's avatar
Guolin Ke committed
160
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
161
  std::vector<double> split_gain_;
Guolin Ke's avatar
Guolin Ke committed
162
163
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
164
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
165
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
166
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
167
168
169
170
171
172
  /*! \brief DataCount of leaves */
  std::vector<data_size_t> leaf_count_;
  /*! \brief Output of non-leaf nodes */
  std::vector<double> internal_value_;
  /*! \brief DataCount of non-leaf nodes */
  std::vector<data_size_t> internal_count_;
Guolin Ke's avatar
Guolin Ke committed
173
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
174
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
175
  double shrinkage_;
Guolin Ke's avatar
Guolin Ke committed
176
177
178
};


179
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
180
181
182
183
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
}

184
inline int Tree::PredictLeafIndex(const double* feature_values) const {
wxchan's avatar
wxchan committed
185
186
187
188
  int leaf = GetLeaf(feature_values);
  return leaf;
}

Guolin Ke's avatar
Guolin Ke committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
inline int Tree::GetLeaf(std::vector<std::unique_ptr<BinIterator>>& iterators,
  data_size_t data_idx) const {
  int node = 0;
  while (node >= 0) {
    if (NumericalDecision<uint32_t>(
      iterators[node]->Get(data_idx),
      threshold_in_bin_[node])) {
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

inline int Tree::GetLeafRaw(std::vector<std::unique_ptr<BinIterator>>& iterators,
  data_size_t data_idx) const {
Guolin Ke's avatar
Guolin Ke committed
206
207
  int node = 0;
  while (node >= 0) {
Guolin Ke's avatar
Guolin Ke committed
208
209
210
    if (NumericalDecision<uint32_t>(
      iterators[split_feature_inner[node]]->Get(data_idx),
      threshold_in_bin_[node])) {
Guolin Ke's avatar
Guolin Ke committed
211
212
213
214
215
216
217
218
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

219
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
220
221
  int node = 0;
  while (node >= 0) {
Guolin Ke's avatar
Guolin Ke committed
222
223
    if (NumericalDecision<double>(
        feature_values[split_feature_[node]],
Guolin Ke's avatar
Guolin Ke committed
224
        threshold_[node])) {
Guolin Ke's avatar
Guolin Ke committed
225
226
227
228
229
230
231
232
233
234
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
235
#endif   // LightGBM_TREE_H_