tree.h 6.47 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
9
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

namespace LightGBM {


/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
34
35
36
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
37
  * \param bin_type type of this feature, numerical or categorical
Guolin Ke's avatar
Guolin Ke committed
38
39
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
40
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
41
42
  * \param left_value Model Left child output
  * \param right_value Model Right child output
Guolin Ke's avatar
Guolin Ke committed
43
44
  * \param left_cnt Count of left child
  * \param right_cnt Count of right child
Guolin Ke's avatar
Guolin Ke committed
45
46
47
  * \param gain Split gain
  * \return The index of new leaf.
  */
48
  int Split(int leaf, int feature, BinType bin_type, uint32_t threshold, int real_feature,
49
    double threshold_double, double left_value,
Guolin Ke's avatar
Guolin Ke committed
50
    double right_value, data_size_t left_cnt, data_size_t right_cnt, double gain);
Guolin Ke's avatar
Guolin Ke committed
51

Guolin Ke's avatar
Guolin Ke committed
52
  /*! \brief Get the output of one leaf */
53
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
54

Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
  /*! \brief Set the output of one leaf */
  inline void SetLeafOutput(int leaf, double output) {
    leaf_value_[leaf] = output;
  }

Guolin Ke's avatar
Guolin Ke committed
60
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
61
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
62
63
64
65
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
Guolin Ke's avatar
Guolin Ke committed
66
67
68
  void AddPredictionToScore(const Dataset* data, 
    data_size_t num_data,
    double* score) const;
Guolin Ke's avatar
Guolin Ke committed
69
70

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
71
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
72
73
74
75
76
77
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
78
                            const data_size_t* used_data_indices,
79
                            data_size_t num_data, double* score) const;
Guolin Ke's avatar
Guolin Ke committed
80
81

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
82
  * \brief Prediction on one record 
Guolin Ke's avatar
Guolin Ke committed
83
84
85
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
86
87
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
88
89
90
91

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
92
93
94
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
95
  /*! \brief Get feature of specific split*/
Guolin Ke's avatar
Guolin Ke committed
96
  inline int split_feature(int split_idx) const { return split_feature_[split_idx]; }
wxchan's avatar
wxchan committed
97

Guolin Ke's avatar
Guolin Ke committed
98
99
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
100
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
101
102
  * \param rate The factor of shrinkage
  */
103
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
104
#pragma omp parallel for schedule(static)
Guolin Ke's avatar
Guolin Ke committed
105
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
106
      leaf_value_[i] *= rate;
Guolin Ke's avatar
Guolin Ke committed
107
    }
Guolin Ke's avatar
Guolin Ke committed
108
    shrinkage_ *= rate;
Guolin Ke's avatar
Guolin Ke committed
109
110
  }

wxchan's avatar
wxchan committed
111
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
112
113
  std::string ToString();

wxchan's avatar
wxchan committed
114
115
116
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

117
118
119
120
121
122
123
124
125
  template<typename T>
  static bool CategoricalDecision(T fval, T threshold) {
    if (static_cast<int>(fval) == static_cast<int>(threshold)) {
      return true;
    } else {
      return false;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
126
127
128
129
130
131
132
133
134
  template<typename T>
  static bool NumericalDecision(T fval, T threshold) {
    if (fval <= threshold) {
      return true;
    } else {
      return false;
    }
  }

135
136
137
138
139
140
141
  static const char* GetDecisionTypeName(int8_t type) {
    if (type == 0) {
      return "no_greater";
    } else {
      return "is";
    }
  }
Guolin Ke's avatar
Guolin Ke committed
142

143
144
  static std::vector<bool(*)(uint32_t, uint32_t)> inner_decision_funs;
  static std::vector<bool(*)(double, double)> decision_funs;
Guolin Ke's avatar
Guolin Ke committed
145

146
private:
Guolin Ke's avatar
Guolin Ke committed
147
148

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
149
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
150
151
152
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
153
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
154

wxchan's avatar
wxchan committed
155
156
157
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

Guolin Ke's avatar
Guolin Ke committed
158
159
160
161
162
163
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
164
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
165
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
166
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
167
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
168
  std::vector<int> split_feature_inner;
Guolin Ke's avatar
Guolin Ke committed
169
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
170
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
171
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
172
  std::vector<uint32_t> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
173
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
174
  std::vector<double> threshold_;
175
176
  /*! \brief Decision type, 0 for '<='(numerical feature), 1 for 'is'(categorical feature) */
  std::vector<int8_t> decision_type_;
Guolin Ke's avatar
Guolin Ke committed
177
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
178
  std::vector<double> split_gain_;
Guolin Ke's avatar
Guolin Ke committed
179
180
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
181
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
182
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
183
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
184
185
186
187
188
189
  /*! \brief DataCount of leaves */
  std::vector<data_size_t> leaf_count_;
  /*! \brief Output of non-leaf nodes */
  std::vector<double> internal_value_;
  /*! \brief DataCount of non-leaf nodes */
  std::vector<data_size_t> internal_count_;
Guolin Ke's avatar
Guolin Ke committed
190
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
191
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
192
  double shrinkage_;
193
  bool has_categorical_;
Guolin Ke's avatar
Guolin Ke committed
194
195
196
};


197
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
198
199
200
201
202
203
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return LeafOutput(leaf);
  } else {
    return 0.0f;
  }
Guolin Ke's avatar
Guolin Ke committed
204
205
}

206
inline int Tree::PredictLeafIndex(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
207
208
209
210
211
212
  if (num_leaves_ > 1) {
    int leaf = GetLeaf(feature_values);
    return leaf;
  } else {
    return 0;
  }
wxchan's avatar
wxchan committed
213
214
}

215
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
216
217
  int node = 0;
  while (node >= 0) {
218
    if (decision_funs[decision_type_[node]](
Guolin Ke's avatar
Guolin Ke committed
219
        feature_values[split_feature_[node]],
Guolin Ke's avatar
Guolin Ke committed
220
        threshold_[node])) {
Guolin Ke's avatar
Guolin Ke committed
221
222
223
224
225
226
227
228
229
230
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
231
#endif   // LightGBM_TREE_H_