"vscode:/vscode.git/clone" did not exist on "e179c7c69d12824de096e62ec82953a3a312bc60"
tree.h 5.61 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/feature.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>

namespace LightGBM {


/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
34
35
36
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
Guolin Ke's avatar
Guolin Ke committed
37
38
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
39
  * \param threshold_double Threshold on feature value
Guolin Ke's avatar
Guolin Ke committed
40
41
42
43
44
45
  * \param left_value Model Left child output
  * \param right_value Model Right child output
  * \param gain Split gain
  * \return The index of new leaf.
  */
  int Split(int leaf, int feature, unsigned int threshold, int real_feature,
46
47
    double threshold_double, double left_value,
    double right_value, double gain);
Guolin Ke's avatar
Guolin Ke committed
48
49

  /*! \brief Get the output of one leave */
50
  inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
Guolin Ke's avatar
Guolin Ke committed
51
52

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
53
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
54
55
56
57
58
59
60
61
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data, data_size_t num_data,
                                                       score_t* score) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
62
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
67
68
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
69
70
                            const data_size_t* used_data_indices,
                            data_size_t num_data, score_t* score) const;
Guolin Ke's avatar
Guolin Ke committed
71
72

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
73
  * \brief Prediction on one record 
Guolin Ke's avatar
Guolin Ke committed
74
75
76
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
77
78
  inline double Predict(const double* feature_values) const;
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
79
80
81
82

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

Guolin Ke's avatar
Guolin Ke committed
83
84
85
  /*! \brief Get depth of specific leaf*/
  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }

wxchan's avatar
wxchan committed
86
  /*! \brief Get feature of specific split*/
87
  inline int split_feature_real(int split_idx) const { return split_feature_real_[split_idx]; }
wxchan's avatar
wxchan committed
88

Guolin Ke's avatar
Guolin Ke committed
89
90
  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
91
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
92
93
  * \param rate The factor of shrinkage
  */
94
  inline void Shrinkage(double rate) {
Guolin Ke's avatar
Guolin Ke committed
95
    for (int i = 0; i < num_leaves_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
96
      leaf_value_[i] = leaf_value_[i] * rate;
Guolin Ke's avatar
Guolin Ke committed
97
98
99
100
101
102
103
104
105
106
107
108
    }
  }

  /*! \brief Serialize this object by string*/
  std::string ToString();

  /*! \brief Disable copy */
  Tree& operator=(const Tree&) = delete;
  /*! \brief Disable copy */
  Tree(const Tree&) = delete;
private:
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
109
  * \brief Find leaf index of which record belongs by data
Guolin Ke's avatar
Guolin Ke committed
110
111
112
113
114
115
116
117
  * \param data The dataset
  * \param data_idx Index of record
  * \return Leaf index
  */
  inline int GetLeaf(const std::vector<BinIterator*>& iterators,
                                           data_size_t data_idx) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
118
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
119
120
121
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
122
  inline int GetLeaf(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
  int* left_child_;
  /*! \brief A non-leaf node's right child */
  int* right_child_;
  /*! \brief A non-leaf node's split feature */
  int* split_feature_;
  /*! \brief A non-leaf node's split feature, the original index */
  int* split_feature_real_;
  /*! \brief A non-leaf node's split threshold in bin */
  unsigned int* threshold_in_bin_;
  /*! \brief A non-leaf node's split threshold in feature value */
140
  double* threshold_;
Guolin Ke's avatar
Guolin Ke committed
141
  /*! \brief A non-leaf node's split gain */
142
  double* split_gain_;
Guolin Ke's avatar
Guolin Ke committed
143
144
145
146
  // used for leaf node
  /*! \brief The parent of leaf */
  int* leaf_parent_;
  /*! \brief Output of leaves */
147
  double* leaf_value_;
148
149
  /*! \brief Output of internal nodes(save internal output for per inference feature importance calc) */
  double* internal_value_;
Guolin Ke's avatar
Guolin Ke committed
150
151
  /*! \brief Depth for leaves */
  int* leaf_depth_;
Guolin Ke's avatar
Guolin Ke committed
152
153
154
};


155
inline double Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
156
157
158
159
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
}

160
inline int Tree::PredictLeafIndex(const double* feature_values) const {
wxchan's avatar
wxchan committed
161
162
163
164
  int leaf = GetLeaf(feature_values);
  return leaf;
}

Guolin Ke's avatar
Guolin Ke committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators,
                                       data_size_t data_idx) const {
  int node = 0;
  while (node >= 0) {
    if (iterators[split_feature_[node]]->Get(data_idx) <=
                                  threshold_in_bin_[node]) {
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

179
inline int Tree::GetLeaf(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
180
181
182
183
184
185
186
187
188
189
190
191
192
  int node = 0;
  while (node >= 0) {
    if (feature_values[split_feature_real_[node]] <= threshold_[node]) {
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
193
#endif   // LightGBM_TREE_H_