tree.h 5.21 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#ifndef LIGHTGBM_TREE_H_
#define LIGHTGBM_TREE_H_

#include <LightGBM/meta.h>
#include <LightGBM/feature.h>
#include <LightGBM/dataset.h>

#include <string>
#include <vector>

namespace LightGBM {


/*!
* \brief Tree model
*/
class Tree {
public:
  /*!
  * \brief Constructor
  * \param max_leaves The number of max leaves
  */
  explicit Tree(int max_leaves);

  /*!
  * \brief Construtor, from a string
  * \param str Model string
  */
  explicit Tree(const std::string& str);

  ~Tree();

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
34
35
36
  * \brief Performing a split on tree leaves.
  * \param leaf Index of leaf to be split
  * \param feature Index of feature; the converted index after removing useless features
Guolin Ke's avatar
Guolin Ke committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
  * \param threshold_double Threshold on feature value
  * \param left_value Model Left child output
  * \param right_value Model Right child output
  * \param gain Split gain
  * \return The index of new leaf.
  */
  int Split(int leaf, int feature, unsigned int threshold, int real_feature,
            double threshold_double, score_t left_value,
            score_t right_value, double gain);

  /*! \brief Get the output of one leave */
  inline score_t LeafOutput(int leaf) const { return leaf_value_[leaf]; }

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
53
  * \brief Adding prediction value of this tree model to scores
Guolin Ke's avatar
Guolin Ke committed
54
55
56
57
58
59
60
61
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data, data_size_t num_data,
                                                       score_t* score) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
62
  * \brief Adding prediction value of this tree model to scorese
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
67
68
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
  * \param score Will add prediction to score
  */
  void AddPredictionToScore(const Dataset* data,
Qiwei Ye's avatar
Qiwei Ye committed
69
70
                            const data_size_t* used_data_indices,
                            data_size_t num_data, score_t* score) const;
Guolin Ke's avatar
Guolin Ke committed
71
72

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
73
  * \brief Prediction on one record 
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
  inline score_t Predict(const double* feature_values) const;
wxchan's avatar
wxchan committed
78
  inline int PredictLeafIndex(const double* feature_values) const;
Guolin Ke's avatar
Guolin Ke committed
79
80
81
82
83
84

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

  /*!
  * \brief Shrinkage for the tree's output
Qiwei Ye's avatar
Qiwei Ye committed
85
  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  * \param rate The factor of shrinkage
  */
  inline void Shrinkage(double rate) {
    for (int i = 0; i < num_leaves_; ++i) {
      leaf_value_[i] = static_cast<score_t>(leaf_value_[i] * rate);
    }
  }

  /*! \brief Serialize this object by string*/
  std::string ToString();

  /*! \brief Disable copy */
  Tree& operator=(const Tree&) = delete;
  /*! \brief Disable copy */
  Tree(const Tree&) = delete;
private:
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
103
  * \brief Find leaf index of which record belongs by data
Guolin Ke's avatar
Guolin Ke committed
104
105
106
107
108
109
110
111
  * \param data The dataset
  * \param data_idx Index of record
  * \return Leaf index
  */
  inline int GetLeaf(const std::vector<BinIterator*>& iterators,
                                           data_size_t data_idx) const;

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
112
  * \brief Find leaf index of which record belongs by features
Guolin Ke's avatar
Guolin Ke committed
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
  inline int GetLeaf(const double* feature_values) const;

  /*! \brief Number of max leaves*/
  int max_leaves_;
  /*! \brief Number of current levas*/
  int num_leaves_;
  // following values used for non-leaf node
  /*! \brief A non-leaf node's left child */
  int* left_child_;
  /*! \brief A non-leaf node's right child */
  int* right_child_;
  /*! \brief A non-leaf node's split feature */
  int* split_feature_;
  /*! \brief A non-leaf node's split feature, the original index */
  int* split_feature_real_;
  /*! \brief A non-leaf node's split threshold in bin */
  unsigned int* threshold_in_bin_;
  /*! \brief A non-leaf node's split threshold in feature value */
  double* threshold_;
  /*! \brief A non-leaf node's split gain */
  double* split_gain_;
  // used for leaf node
  /*! \brief The parent of leaf */
  int* leaf_parent_;
  /*! \brief Output of leaves */
  score_t* leaf_value_;
};


wxchan's avatar
wxchan committed
145
inline score_t Tree::Predict(const double* feature_values) const {
Guolin Ke's avatar
Guolin Ke committed
146
147
148
149
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
}

wxchan's avatar
wxchan committed
150
151
152
153
154
inline int Tree::PredictLeafIndex(const double* feature_values) const {
  int leaf = GetLeaf(feature_values);
  return leaf;
}

Guolin Ke's avatar
Guolin Ke committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators,
                                       data_size_t data_idx) const {
  int node = 0;
  while (node >= 0) {
    if (iterators[split_feature_[node]]->Get(data_idx) <=
                                  threshold_in_bin_[node]) {
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

inline int Tree::GetLeaf(const double* feature_values) const {
  int node = 0;
  while (node >= 0) {
    if (feature_values[split_feature_real_[node]] <= threshold_[node]) {
      node = left_child_[node];
    } else {
      node = right_child_[node];
    }
  }
  return ~node;
}

}  // namespace LightGBM

Guolin Ke's avatar
Guolin Ke committed
183
#endif   // LightGBM_TREE_H_