tree.h 7.35 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
9
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
10
11
12

namespace LightGBM {

13
#define kMaxTreeOutput (100)
Guolin Ke's avatar
Guolin Ke committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
35
36
37
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
38
  * \param bin_type type of this feature, numerical or categorical
Guolin Ke's avatar
Guolin Ke committed
39
40
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
41
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
42
43
  * \param left_value Model Left child output
  * \param right_value Model Right child output
Guolin Ke's avatar
Guolin Ke committed
44
45
  * \param left_cnt Count of left child
  * \param right_cnt Count of right child
Guolin Ke's avatar
Guolin Ke committed
46
47
48
  * \param gain Split gain
  * \return The index of new leaf.
  */
49
  int Split(int leaf, int feature, BinType bin_type, uint32_t threshold, int real_feature,
50
51
            double threshold_double, double left_value,
            double right_value, data_size_t left_cnt, data_size_t right_cnt, double gain);
Guolin Ke's avatar
Guolin Ke committed
52

Guolin Ke's avatar
Guolin Ke committed
53
  /*! \brief Get the output of one leaf */
54
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
55

Guolin Ke's avatar
Guolin Ke committed
56
57
58
59
60
  /*! \brief Set the output of one leaf */
  inline void SetLeafOutput(int leaf, double output) {
    leaf_value_[leaf] = output;
  }

Guolin Ke's avatar
Guolin Ke committed
61
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
62
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
67
68
69
  void AddPredictionToScore(const Dataset* data,
                            data_size_t num_data,
                            double* score) const;
Guolin Ke's avatar
Guolin Ke committed
70
71

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
72
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
73
74
75
76
77
78
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
79
                            const data_size_t* used_data_indices,
80
                            data_size_t num_data, double* score) const;
Guolin Ke's avatar
Guolin Ke committed
81
82

  /*!
83
  * \brief Prediction on one record
Guolin Ke's avatar
Guolin Ke committed
84
85
86
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
87
88
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
89
90
91
92

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
93
94
95
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
96
  /*! \brief Get feature of specific split*/
Guolin Ke's avatar
Guolin Ke committed
97
  inline int split_feature(int split_idx) const { return split_feature_[split_idx]; }
wxchan's avatar
wxchan committed
98

Guolin Ke's avatar
Guolin Ke committed
99
100
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
101
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
102
103
  * \param rate The factor of shrinkage
  */
104
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
105
    #pragma omp parallel for schedule(static, 512) if (num_leaves_ >= 1024)
Guolin Ke's avatar
Guolin Ke committed
106
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
107
      leaf_value_[i] *= rate;
108
109
      if (leaf_value_[i] > kMaxTreeOutput) { leaf_value_[i] = kMaxTreeOutput; } 
      else if (leaf_value_[i] < -kMaxTreeOutput) { leaf_value_[i] = -kMaxTreeOutput; }
Guolin Ke's avatar
Guolin Ke committed
110
    }
Guolin Ke's avatar
Guolin Ke committed
111
    shrinkage_ *= rate;
Guolin Ke's avatar
Guolin Ke committed
112
113
  }

Guolin Ke's avatar
Guolin Ke committed
114
115
116
117
118
119
120
  inline void ReMapFeature(const std::vector<int>& feature_mapper) {
    mapped_feature_ = split_feature_;
    for (int i = 0; i < num_leaves_ - 1; ++i) {
      mapped_feature_[i] = feature_mapper[split_feature_[i]];
    }
  }

wxchan's avatar
wxchan committed
121
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
122
123
  std::string ToString();

wxchan's avatar
wxchan committed
124
125
126
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

127
128
129
130
131
132
133
134
135
  template<typename T>
  static bool CategoricalDecision(T fval, T threshold) {
    if (static_cast<int>(fval) == static_cast<int>(threshold)) {
      return true;
    } else {
      return false;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
136
137
138
139
140
141
142
143
144
  template<typename T>
  static bool NumericalDecision(T fval, T threshold) {
    if (fval <= threshold) {
      return true;
    } else {
      return false;
    }
  }

145
146
147
148
149
150
151
  static const char* GetDecisionTypeName(int8_t type) {
    if (type == 0) {
      return "no_greater";
    } else {
      return "is";
    }
  }
Guolin Ke's avatar
Guolin Ke committed
152

153
154
  static std::vector<bool(*)(uint32_t, uint32_t)> inner_decision_funs;
  static std::vector<bool(*)(double, double)> decision_funs;
Guolin Ke's avatar
Guolin Ke committed
155

156
private:
Guolin Ke's avatar
Guolin Ke committed
157
158

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
159
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
160
161
162
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
163
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
164

wxchan's avatar
wxchan committed
165
166
167
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

Guolin Ke's avatar
Guolin Ke committed
168
169
170
171
172
173
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
174
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
175
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
176
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
177
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
178
  std::vector<int> split_feature_inner;
Guolin Ke's avatar
Guolin Ke committed
179
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
180
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
181
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
182
  std::vector<uint32_t> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
183
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
184
  std::vector<double> threshold_;
185
186
  /*! \brief Decision type, 0 for '<='(numerical feature), 1 for 'is'(categorical feature) */
  std::vector<int8_t> decision_type_;
Guolin Ke's avatar
Guolin Ke committed
187
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
188
  std::vector<double> split_gain_;
Guolin Ke's avatar
Guolin Ke committed
189
190
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
191
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
192
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
193
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
194
195
196
197
198
199
  /*! \brief DataCount of leaves */
  std::vector<data_size_t> leaf_count_;
  /*! \brief Output of non-leaf nodes */
  std::vector<double> internal_value_;
  /*! \brief DataCount of non-leaf nodes */
  std::vector<data_size_t> internal_count_;
Guolin Ke's avatar
Guolin Ke committed
200
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
201
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
202
  double shrinkage_;
203
  bool has_categorical_;
Guolin Ke's avatar
Guolin Ke committed
204
205
  /*! \brief buffer of mapped split_feature_  */
  std::vector<int> mapped_feature_;
Guolin Ke's avatar
Guolin Ke committed
206
207
};

208
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
209
210
211
212
213
214
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return LeafOutput(leaf);
  } else {
    return 0.0f;
  }
Guolin Ke's avatar
Guolin Ke committed
215
216
}

217
inline int Tree::PredictLeafIndex(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
218
219
220
221
222
223
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return leaf;
  } else {
    return 0;
  }
wxchan's avatar
wxchan committed
224
225
}

226
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
227
  int node = 0;
Guolin Ke's avatar
Guolin Ke committed
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
  if (has_categorical_) {
    while (node >= 0) {
      if (decision_funs[decision_type_[node]](
        feature_values[mapped_feature_[node]],
        threshold_[node])) {
        node = left_child_[node];
      } else {
        node = right_child_[node];
      }
    }
  } else {
    while (node >= 0) {
      if (NumericalDecision<double>(
        feature_values[mapped_feature_[node]],
        threshold_[node])) {
        node = left_child_[node];
      } else {
        node = right_child_[node];
      }
Guolin Ke's avatar
Guolin Ke committed
247
248
249
250
251
252
253
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
254
#endif   // LightGBM_TREE_H_