tree.h 7.37 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
9
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
10
11
12

namespace LightGBM {

13
#define kMaxTreeOutput (100)
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
35
36
37
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
38
  * \param bin_type type of this feature, numerical or categorical
Guolin Ke's avatar
Guolin Ke committed
39
40
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
41
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
42
43
  * \param left_value Model Left child output
  * \param right_value Model Right child output
Guolin Ke's avatar
Guolin Ke committed
44
45
  * \param left_cnt Count of left child
  * \param right_cnt Count of right child
Guolin Ke's avatar
Guolin Ke committed
46
47
48
  * \param gain Split gain
  * \return The index of new leaf.
  */
49
  int Split(int leaf, int feature, BinType bin_type, uint32_t threshold, int real_feature,
50
51
            double threshold_double, double left_value,
            double right_value, data_size_t left_cnt, data_size_t right_cnt, double gain);
Guolin Ke's avatar
Guolin Ke committed
52

Guolin Ke's avatar
Guolin Ke committed
53
  /*! \brief Get the output of one leaf */
54
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
55

Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
  /*! \brief Set the output of one leaf */
  inline void SetLeafOutput(int leaf, double output) {
    leaf_value_[leaf] = output;
  }

Guolin Ke's avatar
Guolin Ke committed
61
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
62
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
67
68
69
  void AddPredictionToScore(const Dataset* data,
                            data_size_t num_data,
                            double* score) const;
Guolin Ke's avatar
Guolin Ke committed
70
71

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
72
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
73
74
75
76
77
78
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
79
                            const data_size_t* used_data_indices,
80
                            data_size_t num_data, double* score) const;
Guolin Ke's avatar
Guolin Ke committed
81
82

  /*!
83
  * \brief Prediction on one record
Guolin Ke's avatar
Guolin Ke committed
84
85
86
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
87
88
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
89
90
91
92

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
93
94
95
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
96
  /*! \brief Get feature of specific split*/
Guolin Ke's avatar
Guolin Ke committed
97
  inline int split_feature(int split_idx) const { return split_feature_[split_idx]; }
wxchan's avatar
wxchan committed
98

Guolin Ke's avatar
Guolin Ke committed
99
100
  inline double split_gain(int split_idx) const { return split_gain_[split_idx]; }

Guolin Ke's avatar
Guolin Ke committed
101
102
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
103
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
104
105
  * \param rate The factor of shrinkage
  */
106
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
107
    #pragma omp parallel for schedule(static, 512) if (num_leaves_ >= 1024)
Guolin Ke's avatar
Guolin Ke committed
108
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
109
      leaf_value_[i] *= rate;
110
111
      if (leaf_value_[i] > kMaxTreeOutput) { leaf_value_[i] = kMaxTreeOutput; } 
      else if (leaf_value_[i] < -kMaxTreeOutput) { leaf_value_[i] = -kMaxTreeOutput; }
Guolin Ke's avatar
Guolin Ke committed
112
    }
Guolin Ke's avatar
Guolin Ke committed
113
    shrinkage_ *= rate;
Guolin Ke's avatar
Guolin Ke committed
114
115
  }

wxchan's avatar
wxchan committed
116
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
117
118
  std::string ToString();

wxchan's avatar
wxchan committed
119
120
121
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

122
123
124
  /*! \brief Serialize this object to if-else statement*/
  std::string ToIfElse(int index, bool is_predict_leaf_index);

125
126
127
128
129
130
131
132
133
  template<typename T>
  static bool CategoricalDecision(T fval, T threshold) {
    if (static_cast<int>(fval) == static_cast<int>(threshold)) {
      return true;
    } else {
      return false;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
138
139
140
141
142
  template<typename T>
  static bool NumericalDecision(T fval, T threshold) {
    if (fval <= threshold) {
      return true;
    } else {
      return false;
    }
  }

143
144
145
146
147
148
149
  static const char* GetDecisionTypeName(int8_t type) {
    if (type == 0) {
      return "no_greater";
    } else {
      return "is";
    }
  }
Guolin Ke's avatar
Guolin Ke committed
150

151
152
  static std::vector<bool(*)(uint32_t, uint32_t)> inner_decision_funs;
  static std::vector<bool(*)(double, double)> decision_funs;
Guolin Ke's avatar
Guolin Ke committed
153

154
private:
Guolin Ke's avatar
Guolin Ke committed
155
156

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
157
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
158
159
160
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
161
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
162

wxchan's avatar
wxchan committed
163
164
165
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

166
167
168
  /*! \brief Serialize one node to if-else statement*/
  inline std::string NodeToIfElse(int index, bool is_predict_leaf_index);

Guolin Ke's avatar
Guolin Ke committed
169
170
171
172
173
174
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
175
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
176
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
177
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
178
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
179
  std::vector<int> split_feature_inner;
Guolin Ke's avatar
Guolin Ke committed
180
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
181
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
182
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
183
  std::vector<uint32_t> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
184
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
185
  std::vector<double> threshold_;
186
187
  /*! \brief Decision type, 0 for '<='(numerical feature), 1 for 'is'(categorical feature) */
  std::vector<int8_t> decision_type_;
Guolin Ke's avatar
Guolin Ke committed
188
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
189
  std::vector<double> split_gain_;
Guolin Ke's avatar
Guolin Ke committed
190
191
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
192
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
193
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
194
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
195
196
197
198
199
200
  /*! \brief DataCount of leaves */
  std::vector<data_size_t> leaf_count_;
  /*! \brief Output of non-leaf nodes */
  std::vector<double> internal_value_;
  /*! \brief DataCount of non-leaf nodes */
  std::vector<data_size_t> internal_count_;
Guolin Ke's avatar
Guolin Ke committed
201
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
202
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
203
  double shrinkage_;
204
  bool has_categorical_;
Guolin Ke's avatar
Guolin Ke committed
205
206
};

207
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
208
209
210
211
212
213
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return LeafOutput(leaf);
  } else {
    return 0.0f;
  }
Guolin Ke's avatar
Guolin Ke committed
214
215
}

216
inline int Tree::PredictLeafIndex(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
217
218
219
220
221
222
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return leaf;
  } else {
    return 0;
  }
wxchan's avatar
wxchan committed
223
224
}

225
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
226
  int node = 0;
Guolin Ke's avatar
Guolin Ke committed
227
228
229
  if (has_categorical_) {
    while (node >= 0) {
      if (decision_funs[decision_type_[node]](
230
        feature_values[split_feature_[node]],
Guolin Ke's avatar
Guolin Ke committed
231
232
233
234
235
236
237
238
239
        threshold_[node])) {
        node = left_child_[node];
      } else {
        node = right_child_[node];
      }
    }
  } else {
    while (node >= 0) {
      if (NumericalDecision<double>(
240
        feature_values[split_feature_[node]],
Guolin Ke's avatar
Guolin Ke committed
241
242
243
244
245
        threshold_[node])) {
        node = left_child_[node];
      } else {
        node = right_child_[node];
      }
Guolin Ke's avatar
Guolin Ke committed
246
247
248
249
250
251
252
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
253
#endif   // LightGBM_TREE_H_