tree.h 7 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/feature.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
10
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

namespace LightGBM {


/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
35
36
37
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
Guolin Ke's avatar
Guolin Ke committed
38
  * \param bin_type type of this feature, numerical or categorical
Guolin Ke's avatar
Guolin Ke committed
39
40
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
41
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
42
43
  * \param left_value Model Left child output
  * \param right_value Model Right child output
Guolin Ke's avatar
Guolin Ke committed
44
45
  * \param left_cnt Count of left child
  * \param right_cnt Count of right child
Guolin Ke's avatar
Guolin Ke committed
46
47
48
  * \param gain Split gain
  * \return The index of new leaf.
  */
Guolin Ke's avatar
Guolin Ke committed
49
  int Split(int leaf, int feature, BinType bin_type, unsigned int threshold, int real_feature,
50
    double threshold_double, double left_value,
Guolin Ke's avatar
Guolin Ke committed
51
    double right_value, data_size_t left_cnt, data_size_t right_cnt, double gain);
Guolin Ke's avatar
Guolin Ke committed
52
53

  /*! \brief Get the output of one leave */
54
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
55
56

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
57
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
58
59
60
61
62
63
64
65
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data, data_size_t num_data,
                                                       score_t* score) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
66
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
67
68
69
70
71
72
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
73
74
                            const data_size_t* used_data_indices,
                            data_size_t num_data, score_t* score) const;
Guolin Ke's avatar
Guolin Ke committed
75
76

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
77
  * \brief Prediction on one record 
Guolin Ke's avatar
Guolin Ke committed
78
79
80
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
81
82
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
83
84
85
86

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
87
88
89
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
90
  /*! \brief Get feature of specific split*/
91
  inline int split_feature_real(int split_idx) const { return split_feature_real_[split_idx]; }
wxchan's avatar
wxchan committed
92

Guolin Ke's avatar
Guolin Ke committed
93
94
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
95
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
96
97
  * \param rate The factor of shrinkage
  */
98
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
99
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
100
      leaf_value_[i] = leaf_value_[i] * rate;
Guolin Ke's avatar
Guolin Ke committed
101
102
103
    }
  }

wxchan's avatar
wxchan committed
104
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
105
106
  std::string ToString();

wxchan's avatar
wxchan committed
107
108
109
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  template<typename T>
  static bool CategoricalDecision(T fval, T threshold) {
    if (static_cast<int>(fval) == static_cast<int>(threshold)) {
      return true;
    } else {
      return false;
    }
  }

  template<typename T>
  static bool NumericalDecision(T fval, T threshold) {
    if (fval <= threshold) {
      return true;
    } else {
      return false;
    }
  }

  static const char* GetDecisionTypeName(int8_t type) {
    if (type == 0) {
      return "no_greater";
    } else {
      return "is";
    }
  }

  static std::vector<std::function<bool(unsigned int, unsigned int)>> inner_decision_funs;
  static std::vector<std::function<bool(double, double)>> decision_funs;

Guolin Ke's avatar
Guolin Ke committed
139
140
private:
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
141
  * \brief Find leaf index of which record belongs by data
Guolin Ke's avatar
Guolin Ke committed
142
143
144
145
  * \param data The dataset
  * \param data_idx Index of record
  * \return Leaf index
  */
Guolin Ke's avatar
Guolin Ke committed
146
  inline int GetLeaf(const std::vector<std::unique_ptr<BinIterator>>& iterators,
Guolin Ke's avatar
Guolin Ke committed
147
148
149
                                           data_size_t data_idx) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
150
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
151
152
153
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
154
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
155

wxchan's avatar
wxchan committed
156
157
158
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
163
164
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
165
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
166
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
167
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
168
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
169
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
170
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
171
  std::vector<int> split_feature_real_;
Guolin Ke's avatar
Guolin Ke committed
172
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
173
  std::vector<unsigned int> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
174
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
175
  std::vector<double> threshold_;
Guolin Ke's avatar
Guolin Ke committed
176
177
  /*! \brief Decision type, 0 for '<='(numerical feature), 1 for 'is'(categorical feature) */
  std::vector<int8_t> decision_type_;
Guolin Ke's avatar
Guolin Ke committed
178
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
179
  std::vector<double> split_gain_;
Guolin Ke's avatar
Guolin Ke committed
180
181
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
182
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
183
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
184
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
185
186
187
188
189
190
  /*! \brief DataCount of leaves */
  std::vector<data_size_t> leaf_count_;
  /*! \brief Output of non-leaf nodes */
  std::vector<double> internal_value_;
  /*! \brief DataCount of non-leaf nodes */
  std::vector<data_size_t> internal_count_;
Guolin Ke's avatar
Guolin Ke committed
191
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
192
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
193
194
195
};


196
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
197
198
199
200
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
}

201
inline int Tree::PredictLeafIndex(const double* feature_values) const {
wxchan's avatar
wxchan committed
202
203
204
205
  int leaf = GetLeaf(feature_values);
  return leaf;
}

Guolin Ke's avatar
Guolin Ke committed
206
inline int Tree::GetLeaf(const std::vector<std::unique_ptr<BinIterator>>& iterators,
Guolin Ke's avatar
Guolin Ke committed
207
208
209
                                       data_size_t data_idx) const {
  int node = 0;
  while (node >= 0) {
Guolin Ke's avatar
Guolin Ke committed
210
211
212
    if (inner_decision_funs[decision_type_[node]](
        iterators[split_feature_[node]]->Get(data_idx),
        threshold_in_bin_[node])) {
Guolin Ke's avatar
Guolin Ke committed
213
214
215
216
217
218
219
220
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

221
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
222
223
  int node = 0;
  while (node >= 0) {
Guolin Ke's avatar
Guolin Ke committed
224
225
226
    if (decision_funs[decision_type_[node]](
        feature_values[split_feature_real_[node]],
        threshold_[node])) {
Guolin Ke's avatar
Guolin Ke committed
227
228
229
230
231
232
233
234
235
236
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
237
#endif   // LightGBM_TREE_H_