tree.h 6.75 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
9
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
10
11
12

namespace LightGBM {

13
#define kMaxTreeOutput (100)
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
35
36
37
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
38
  * \param bin_type type of this feature, numerical or categorical
Guolin Ke's avatar
Guolin Ke committed
39
40
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
41
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
42
43
  * \param left_value Model Left child output
  * \param right_value Model Right child output
Guolin Ke's avatar
Guolin Ke committed
44
45
  * \param left_cnt Count of left child
  * \param right_cnt Count of right child
Guolin Ke's avatar
Guolin Ke committed
46
47
48
  * \param gain Split gain
  * \return The index of new leaf.
  */
49
  int Split(int leaf, int feature, BinType bin_type, uint32_t threshold, int real_feature,
50
51
            double threshold_double, double left_value,
            double right_value, data_size_t left_cnt, data_size_t right_cnt, double gain);
Guolin Ke's avatar
Guolin Ke committed
52

Guolin Ke's avatar
Guolin Ke committed
53
  /*! \brief Get the output of one leaf */
54
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
55

Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
  /*! \brief Set the output of one leaf */
  inline void SetLeafOutput(int leaf, double output) {
    leaf_value_[leaf] = output;
  }

Guolin Ke's avatar
Guolin Ke committed
61
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
62
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
67
68
69
  void AddPredictionToScore(const Dataset* data,
                            data_size_t num_data,
                            double* score) const;
Guolin Ke's avatar
Guolin Ke committed
70
71

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
72
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
73
74
75
76
77
78
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
79
                            const data_size_t* used_data_indices,
80
                            data_size_t num_data, double* score) const;
Guolin Ke's avatar
Guolin Ke committed
81
82

  /*!
83
  * \brief Prediction on one record
Guolin Ke's avatar
Guolin Ke committed
84
85
86
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
87
88
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
89
90
91
92

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
93
94
95
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
96
  /*! \brief Get feature of specific split*/
Guolin Ke's avatar
Guolin Ke committed
97
  inline int split_feature(int split_idx) const { return split_feature_[split_idx]; }
wxchan's avatar
wxchan committed
98

Guolin Ke's avatar
Guolin Ke committed
99
100
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
101
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
102
103
  * \param rate The factor of shrinkage
  */
104
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
105
    #pragma omp parallel for schedule(static, 512) if (num_leaves_ >= 1024)
Guolin Ke's avatar
Guolin Ke committed
106
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
107
      leaf_value_[i] *= rate;
108
109
      if (leaf_value_[i] > kMaxTreeOutput) { leaf_value_[i] = kMaxTreeOutput; } 
      else if (leaf_value_[i] < -kMaxTreeOutput) { leaf_value_[i] = -kMaxTreeOutput; }
Guolin Ke's avatar
Guolin Ke committed
110
    }
Guolin Ke's avatar
Guolin Ke committed
111
    shrinkage_ *= rate;
Guolin Ke's avatar
Guolin Ke committed
112
113
  }

wxchan's avatar
wxchan committed
114
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
115
116
  std::string ToString();

wxchan's avatar
wxchan committed
117
118
119
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

120
121
122
123
124
125
126
127
128
  template<typename T>
  static bool CategoricalDecision(T fval, T threshold) {
    if (static_cast<int>(fval) == static_cast<int>(threshold)) {
      return true;
    } else {
      return false;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
129
130
131
132
133
134
135
136
137
  template<typename T>
  static bool NumericalDecision(T fval, T threshold) {
    if (fval <= threshold) {
      return true;
    } else {
      return false;
    }
  }

138
139
140
141
142
143
144
  static const char* GetDecisionTypeName(int8_t type) {
    if (type == 0) {
      return "no_greater";
    } else {
      return "is";
    }
  }
Guolin Ke's avatar
Guolin Ke committed
145

146
147
  static std::vector<bool(*)(uint32_t, uint32_t)> inner_decision_funs;
  static std::vector<bool(*)(double, double)> decision_funs;
Guolin Ke's avatar
Guolin Ke committed
148

149
private:
Guolin Ke's avatar
Guolin Ke committed
150
151

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
152
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
153
154
155
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
156
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
157

wxchan's avatar
wxchan committed
158
159
160
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

Guolin Ke's avatar
Guolin Ke committed
161
162
163
164
165
166
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
167
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
168
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
169
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
170
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
171
  std::vector<int> split_feature_inner;
Guolin Ke's avatar
Guolin Ke committed
172
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
173
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
174
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
175
  std::vector<uint32_t> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
176
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
177
  std::vector<double> threshold_;
178
179
  /*! \brief Decision type, 0 for '<='(numerical feature), 1 for 'is'(categorical feature) */
  std::vector<int8_t> decision_type_;
Guolin Ke's avatar
Guolin Ke committed
180
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
181
  std::vector<double> split_gain_;
Guolin Ke's avatar
Guolin Ke committed
182
183
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
184
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
185
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
186
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
187
188
189
190
191
192
  /*! \brief DataCount of leaves */
  std::vector<data_size_t> leaf_count_;
  /*! \brief Output of non-leaf nodes */
  std::vector<double> internal_value_;
  /*! \brief DataCount of non-leaf nodes */
  std::vector<data_size_t> internal_count_;
Guolin Ke's avatar
Guolin Ke committed
193
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
194
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
195
  double shrinkage_;
196
  bool has_categorical_;
Guolin Ke's avatar
Guolin Ke committed
197
198
199
};


200
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
201
202
203
204
205
206
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return LeafOutput(leaf);
  } else {
    return 0.0f;
  }
Guolin Ke's avatar
Guolin Ke committed
207
208
}

209
inline int Tree::PredictLeafIndex(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
210
211
212
213
214
215
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return leaf;
  } else {
    return 0;
  }
wxchan's avatar
wxchan committed
216
217
}

218
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
219
220
  int node = 0;
  while (node >= 0) {
221
    if (decision_funs[decision_type_[node]](
222
223
      feature_values[split_feature_[node]],
      threshold_[node])) {
Guolin Ke's avatar
Guolin Ke committed
224
225
226
227
228
229
230
231
232
233
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
234
#endif   // LightGBM_TREE_H_