tree.h 5.78 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/feature.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
10
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

namespace LightGBM {


/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
35
36
37
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
Guolin Ke's avatar
Guolin Ke committed
38
39
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
40
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
45
46
  * \param left_value Model Left child output
  * \param right_value Model Right child output
  * \param gain Split gain
  * \return The index of new leaf.
  */
  int Split(int leaf, int feature, unsigned int threshold, int real_feature,
47
48
    double threshold_double, double left_value,
    double right_value, double gain);
Guolin Ke's avatar
Guolin Ke committed
49
50

  /*! \brief Get the output of one leave */
51
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
52
53

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
54
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
55
56
57
58
59
60
61
62
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data, data_size_t num_data,
                                                       score_t* score) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
63
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
64
65
66
67
68
69
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
70
71
                            const data_size_t* used_data_indices,
                            data_size_t num_data, score_t* score) const;
Guolin Ke's avatar
Guolin Ke committed
72
73

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
74
  * \brief Prediction on one record 
Guolin Ke's avatar
Guolin Ke committed
75
76
77
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
78
79
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
80
81
82
83

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
84
85
86
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
87
  /*! \brief Get feature of specific split*/
88
  inline int split_feature_real(int split_idx) const { return split_feature_real_[split_idx]; }
wxchan's avatar
wxchan committed
89

Guolin Ke's avatar
Guolin Ke committed
90
91
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
92
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
93
94
  * \param rate The factor of shrinkage
  */
95
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
96
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
97
      leaf_value_[i] = leaf_value_[i] * rate;
Guolin Ke's avatar
Guolin Ke committed
98
99
100
    }
  }

wxchan's avatar
wxchan committed
101
  /*! \brief Serialize this object to string*/
Guolin Ke's avatar
Guolin Ke committed
102
103
  std::string ToString();

wxchan's avatar
wxchan committed
104
105
106
  /*! \brief Serialize this object to json*/
  std::string ToJSON();

Guolin Ke's avatar
Guolin Ke committed
107
108
private:
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
109
  * \brief Find leaf index of which record belongs by data
Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
  * \param data The dataset
  * \param data_idx Index of record
  * \return Leaf index
  */
Guolin Ke's avatar
Guolin Ke committed
114
  inline int GetLeaf(const std::vector<std::unique_ptr<BinIterator>>& iterators,
Guolin Ke's avatar
Guolin Ke committed
115
116
117
                                           data_size_t data_idx) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
118
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
119
120
121
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
122
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
123

wxchan's avatar
wxchan committed
124
125
126
  /*! \brief Serialize one node to json*/
  inline std::string NodeToJSON(int index);

Guolin Ke's avatar
Guolin Ke committed
127
128
129
130
131
132
  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
Guolin Ke's avatar
Guolin Ke committed
133
  std::vector<int> left_child_;
Guolin Ke's avatar
Guolin Ke committed
134
  /*! \brief A non-leaf node's right child */
Guolin Ke's avatar
Guolin Ke committed
135
  std::vector<int> right_child_;
Guolin Ke's avatar
Guolin Ke committed
136
  /*! \brief A non-leaf node's split feature */
Guolin Ke's avatar
Guolin Ke committed
137
  std::vector<int> split_feature_;
Guolin Ke's avatar
Guolin Ke committed
138
  /*! \brief A non-leaf node's split feature, the original index */
Guolin Ke's avatar
Guolin Ke committed
139
  std::vector<int> split_feature_real_;
Guolin Ke's avatar
Guolin Ke committed
140
  /*! \brief A non-leaf node's split threshold in bin */
Guolin Ke's avatar
Guolin Ke committed
141
  std::vector<unsigned int> threshold_in_bin_;
Guolin Ke's avatar
Guolin Ke committed
142
  /*! \brief A non-leaf node's split threshold in feature value */
Guolin Ke's avatar
Guolin Ke committed
143
  std::vector<double> threshold_;
Guolin Ke's avatar
Guolin Ke committed
144
  /*! \brief A non-leaf node's split gain */
Guolin Ke's avatar
Guolin Ke committed
145
  std::vector<double> split_gain_;
wxchan's avatar
wxchan committed
146
147
  /*! \brief Output of internal nodes(save internal output for per inference feature importance calc) */
  std::vector<double> internal_value_;
Guolin Ke's avatar
Guolin Ke committed
148
149
  // used for leaf node
  /*! \brief The parent of leaf */
Guolin Ke's avatar
Guolin Ke committed
150
  std::vector<int> leaf_parent_;
Guolin Ke's avatar
Guolin Ke committed
151
  /*! \brief Output of leaves */
Guolin Ke's avatar
Guolin Ke committed
152
  std::vector<double> leaf_value_;
Guolin Ke's avatar
Guolin Ke committed
153
  /*! \brief Depth for leaves */
Guolin Ke's avatar
Guolin Ke committed
154
  std::vector<int> leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
155
156
157
};


158
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
}

163
inline int Tree::PredictLeafIndex(const double* feature_values) const {
wxchan's avatar
wxchan committed
164
165
166
167
  int leaf = GetLeaf(feature_values);
  return leaf;
}

Guolin Ke's avatar
Guolin Ke committed
168
inline int Tree::GetLeaf(const std::vector<std::unique_ptr<BinIterator>>& iterators,
Guolin Ke's avatar
Guolin Ke committed
169
170
171
                                       data_size_t data_idx) const {
  int node = 0;
  while (node >= 0) {
Guolin Ke's avatar
Guolin Ke committed
172
    if (iterators[split_feature_[node]]->Get(data_idx) <= threshold_in_bin_[node]) {
Guolin Ke's avatar
Guolin Ke committed
173
174
175
176
177
178
179
180
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

181
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
182
183
184
185
186
187
188
189
190
191
192
193
194
  int node = 0;
  while (node >= 0) {
    if (feature_values[split_feature_real_[node]] <= threshold_[node]) {
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
195
#endif   // LightGBM_TREE_H_