tree.cpp 7.33 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <LightGBM/tree.h>

#include <LightGBM/utils/threading.h>
#include <LightGBM/utils/common.h>

#include <LightGBM/dataset.h>
#include <LightGBM/feature.h>

#include <sstream>
#include <unordered_map>
#include <functional>
#include <vector>
#include <string>

namespace LightGBM {

Tree::Tree(int max_leaves)
  :max_leaves_(max_leaves) {
  num_leaves_ = 0;

  left_child_ = new int[max_leaves_ - 1];
  right_child_ = new int[max_leaves_ - 1];
  split_feature_ = new int[max_leaves_ - 1];
  split_feature_real_ = new int[max_leaves_ - 1];
  threshold_in_bin_ = new unsigned int[max_leaves_ - 1];
26
27
  threshold_ = new double[max_leaves_ - 1];
  split_gain_ = new double[max_leaves_ - 1];
Guolin Ke's avatar
Guolin Ke committed
28
29

  leaf_parent_ = new int[max_leaves_];
30
  leaf_value_ = new double[max_leaves_];
31
  internal_value_ = new double[max_leaves_ - 1];
Guolin Ke's avatar
Guolin Ke committed
32
33
34
  leaf_depth_ = new int[max_leaves_];
  // root is in the depth 1
  leaf_depth_[0] = 1;
Guolin Ke's avatar
Guolin Ke committed
35
36
37
38
39
40
41
42
43
44
45
46
47
  num_leaves_ = 1;
  leaf_parent_[0] = -1;
}
Tree::~Tree() {
  if (leaf_parent_ != nullptr) { delete[] leaf_parent_; }
  if (left_child_ != nullptr) { delete[] left_child_; }
  if (right_child_ != nullptr) { delete[] right_child_; }
  if (split_feature_ != nullptr) { delete[] split_feature_; }
  if (split_feature_real_ != nullptr) { delete[] split_feature_real_; }
  if (threshold_in_bin_ != nullptr) { delete[] threshold_in_bin_; }
  if (threshold_ != nullptr) { delete[] threshold_; }
  if (split_gain_ != nullptr) { delete[] split_gain_; }
  if (leaf_value_ != nullptr) { delete[] leaf_value_; }
48
  if (internal_value_ != nullptr) { delete[] internal_value_; }
Guolin Ke's avatar
Guolin Ke committed
49
  if (leaf_depth_ != nullptr) { delete[] leaf_depth_; }
Guolin Ke's avatar
Guolin Ke committed
50
51
52
}

int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature,
53
  double threshold, double left_value, double right_value, double gain) {
Guolin Ke's avatar
Guolin Ke committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  int new_node_idx = num_leaves_ - 1;
  // update parent info
  int parent = leaf_parent_[leaf];
  if (parent >= 0) {
    // if cur node is left child
    if (left_child_[parent] == ~leaf) {
      left_child_[parent] = new_node_idx;
    } else {
      right_child_[parent] = new_node_idx;
    }
  }
  // add new node
  split_feature_[new_node_idx] = feature;
  split_feature_real_[new_node_idx] = real_feature;
  threshold_in_bin_[new_node_idx] = threshold_bin;
  threshold_[new_node_idx] = threshold;
  split_gain_[new_node_idx] = gain;
  // add two new leaves
  left_child_[new_node_idx] = ~leaf;
  right_child_[new_node_idx] = ~num_leaves_;
  // update new leaves
  leaf_parent_[leaf] = new_node_idx;
  leaf_parent_[num_leaves_] = new_node_idx;
77
78
  // save current leaf value to internal node before change
  internal_value_[new_node_idx] = leaf_value_[leaf];
Guolin Ke's avatar
Guolin Ke committed
79
80
  leaf_value_[leaf] = left_value;
  leaf_value_[num_leaves_] = right_value;
Guolin Ke's avatar
Guolin Ke committed
81
82
83
  // update leaf depth
  leaf_depth_[num_leaves_] = leaf_depth_[leaf] + 1;
  leaf_depth_[leaf]++;
Guolin Ke's avatar
Guolin Ke committed
84
85
86
87
88
89
90
91

  ++num_leaves_;
  return num_leaves_ - 1;
}

void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, score_t* score) const {
  Threading::For<data_size_t>(0, num_data, [this, data, score](int, data_size_t start, data_size_t end) {
    std::vector<BinIterator*> iterators;
92
    for (int i = 0; i < data->num_features(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
93
94
      iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(start));
    }
95
    for (data_size_t i = start; i < end; ++i) {
96
      score[i] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, i)]);
Guolin Ke's avatar
Guolin Ke committed
97
98
99
100
101
102
103
104
105
    }
  });
}

void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_data_indices,
                                             data_size_t num_data, score_t* score) const {
  Threading::For<data_size_t>(0, num_data,
      [this, data, used_data_indices, score](int, data_size_t start, data_size_t end) {
    std::vector<BinIterator*> iterators;
106
    for (int i = 0; i < data->num_features(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
107
108
      iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(used_data_indices[start]));
    }
109
    for (data_size_t i = start; i < end; ++i) {
110
      score[used_data_indices[i]] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, used_data_indices[i])]);
Guolin Ke's avatar
Guolin Ke committed
111
112
113
114
115
116
117
118
119
120
    }
  });
}

std::string Tree::ToString() {
  std::stringstream ss;
  ss << "num_leaves=" << num_leaves_ << std::endl;
  ss << "split_feature="
    << Common::ArrayToString<int>(split_feature_real_, num_leaves_ - 1, ' ') << std::endl;
  ss << "split_gain="
121
    << Common::ArrayToString<double>(split_gain_, num_leaves_ - 1, ' ') << std::endl;
Guolin Ke's avatar
Guolin Ke committed
122
  ss << "threshold="
123
    << Common::ArrayToString<double>(threshold_, num_leaves_ - 1, ' ') << std::endl;
Guolin Ke's avatar
Guolin Ke committed
124
125
126
127
128
129
130
  ss << "left_child="
    << Common::ArrayToString<int>(left_child_, num_leaves_ - 1, ' ') << std::endl;
  ss << "right_child="
    << Common::ArrayToString<int>(right_child_, num_leaves_ - 1, ' ') << std::endl;
  ss << "leaf_parent="
    << Common::ArrayToString<int>(leaf_parent_, num_leaves_, ' ') << std::endl;
  ss << "leaf_value="
131
    << Common::ArrayToString<double>(leaf_value_, num_leaves_, ' ') << std::endl;
132
133
  ss << "internal_value="
    << Common::ArrayToString<double>(internal_value_, num_leaves_ - 1, ' ') << std::endl;
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  ss << std::endl;
  return ss.str();
}

Tree::Tree(const std::string& str) {
  std::vector<std::string> lines = Common::Split(str.c_str(), '\n');
  std::unordered_map<std::string, std::string> key_vals;
  for (const std::string& line : lines) {
    std::vector<std::string> tmp_strs = Common::Split(line.c_str(), '=');
    if (tmp_strs.size() == 2) {
      std::string key = Common::Trim(tmp_strs[0]);
      std::string val = Common::Trim(tmp_strs[1]);
      if (key.size() > 0 && val.size() > 0) {
        key_vals[key] = val;
      }
    }
  }
  if (key_vals.count("num_leaves") <= 0 || key_vals.count("split_feature") <= 0
    || key_vals.count("split_gain") <= 0 || key_vals.count("threshold") <= 0
    || key_vals.count("left_child") <= 0 || key_vals.count("right_child") <= 0
154
155
    || key_vals.count("leaf_parent") <= 0 || key_vals.count("leaf_value") <= 0
    || key_vals.count("internal_value") <= 0) {
156
    Log::Fatal("Tree model string format error");
Guolin Ke's avatar
Guolin Ke committed
157
158
159
160
161
162
163
  }

  Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);

  left_child_ = new int[num_leaves_ - 1];
  right_child_ = new int[num_leaves_ - 1];
  split_feature_real_ = new int[num_leaves_ - 1];
164
165
  threshold_ = new double[num_leaves_ - 1];
  split_gain_ = new double[num_leaves_ - 1];
Guolin Ke's avatar
Guolin Ke committed
166
  leaf_parent_ = new int[num_leaves_];
167
  leaf_value_ = new double[num_leaves_];
168
  internal_value_ = new double[num_leaves_ - 1];
Guolin Ke's avatar
Guolin Ke committed
169
170
171

  split_feature_ = nullptr;
  threshold_in_bin_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
172
  leaf_depth_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
173
174

  Common::StringToIntArray(key_vals["split_feature"], ' ',
175
                           num_leaves_ - 1, split_feature_real_);
176
  Common::StringToDoubleArray(key_vals["split_gain"], ' ',
177
                              num_leaves_ - 1, split_gain_);
178
  Common::StringToDoubleArray(key_vals["threshold"], ' ',
179
                              num_leaves_ - 1, threshold_);
Guolin Ke's avatar
Guolin Ke committed
180
  Common::StringToIntArray(key_vals["left_child"], ' ',
181
                           num_leaves_ - 1, left_child_);
Guolin Ke's avatar
Guolin Ke committed
182
  Common::StringToIntArray(key_vals["right_child"], ' ',
183
                           num_leaves_ - 1, right_child_);
Guolin Ke's avatar
Guolin Ke committed
184
  Common::StringToIntArray(key_vals["leaf_parent"], ' ',
185
                           num_leaves_ , leaf_parent_);
186
  Common::StringToDoubleArray(key_vals["leaf_value"], ' ',
187
                              num_leaves_ , leaf_value_);
188
189
  Common::StringToDoubleArray(key_vals["internal_value"], ' ',
                              num_leaves_ - 1 , internal_value_);
Guolin Ke's avatar
Guolin Ke committed
190
191
192
}

}  // namespace LightGBM