feature_parallel_tree_learner.cpp 3.14 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
#include "parallel_tree_learner.h"

#include <cstring>

#include <vector>

namespace LightGBM {

9
10
11
12

template <typename TREELEARNER_T>
FeatureParallelTreeLearner<TREELEARNER_T>::FeatureParallelTreeLearner(const TreeConfig* tree_config)
  :TREELEARNER_T(tree_config) {
Guolin Ke's avatar
Guolin Ke committed
13
14
}

15
16
template <typename TREELEARNER_T>
FeatureParallelTreeLearner<TREELEARNER_T>::~FeatureParallelTreeLearner() {
Guolin Ke's avatar
Guolin Ke committed
17

Guolin Ke's avatar
Guolin Ke committed
18
}
19
20
21
22

template <typename TREELEARNER_T>
void FeatureParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, bool is_constant_hessian) {
  TREELEARNER_T::Init(train_data, is_constant_hessian);
Guolin Ke's avatar
Guolin Ke committed
23
24
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();
Guolin Ke's avatar
Guolin Ke committed
25
26
  input_buffer_.resize(sizeof(SplitInfo) * 2);
  output_buffer_.resize(sizeof(SplitInfo) * 2);
Guolin Ke's avatar
Guolin Ke committed
27
28
29
}


30
31
32
template <typename TREELEARNER_T>
void FeatureParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
  TREELEARNER_T::BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
33
34
35
  // get feature partition
  std::vector<std::vector<int>> feature_distribution(num_machines_, std::vector<int>());
  std::vector<int> num_bins_distributed(num_machines_, 0);
36
37
  for (int i = 0; i < this->train_data_->num_total_features(); ++i) {
    int inner_feature_index = this->train_data_->InnerFeatureIndex(i);
Guolin Ke's avatar
Guolin Ke committed
38
    if (inner_feature_index == -1) { continue; }
39
    if (this->is_feature_used_[inner_feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
40
      int cur_min_machine = static_cast<int>(ArrayArgs<int>::ArgMin(num_bins_distributed));
Guolin Ke's avatar
Guolin Ke committed
41
      feature_distribution[cur_min_machine].push_back(inner_feature_index);
42
43
      num_bins_distributed[cur_min_machine] += this->train_data_->FeatureNumBin(inner_feature_index);
      this->is_feature_used_[inner_feature_index] = false;
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
    }
  }
  // get local used features
  for (auto fid : feature_distribution[rank_]) {
48
    this->is_feature_used_[fid] = true;
Guolin Ke's avatar
Guolin Ke committed
49
50
51
  }
}

52
53
template <typename TREELEARNER_T>
void FeatureParallelTreeLearner<TREELEARNER_T>::FindBestSplitsForLeaves() {
Guolin Ke's avatar
Guolin Ke committed
54
55
  SplitInfo smaller_best, larger_best;
  // get best split at smaller leaf
56
  smaller_best = this->best_split_per_leaf_[this->smaller_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
57
  // find local best split for larger leaf
58
59
  if (this->larger_leaf_splits_->LeafIndex() >= 0) {
    larger_best = this->best_split_per_leaf_[this->larger_leaf_splits_->LeafIndex()];
Guolin Ke's avatar
Guolin Ke committed
60
61
  }
  // sync global best info
Guolin Ke's avatar
Guolin Ke committed
62
63
  std::memcpy(input_buffer_.data(), &smaller_best, sizeof(SplitInfo));
  std::memcpy(input_buffer_.data() + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
Guolin Ke's avatar
Guolin Ke committed
64

Guolin Ke's avatar
Guolin Ke committed
65
66
  Network::Allreduce(input_buffer_.data(), sizeof(SplitInfo) * 2, sizeof(SplitInfo),
                     output_buffer_.data(), &SplitInfo::MaxReducer);
Guolin Ke's avatar
Guolin Ke committed
67
  // copy back
Guolin Ke's avatar
Guolin Ke committed
68
69
  std::memcpy(&smaller_best, output_buffer_.data(), sizeof(SplitInfo));
  std::memcpy(&larger_best, output_buffer_.data() + sizeof(SplitInfo), sizeof(SplitInfo));
Guolin Ke's avatar
Guolin Ke committed
70
  // update best split
71
72
73
  this->best_split_per_leaf_[this->smaller_leaf_splits_->LeafIndex()] = smaller_best;
  if (this->larger_leaf_splits_->LeafIndex() >= 0) {
    this->best_split_per_leaf_[this->larger_leaf_splits_->LeafIndex()] = larger_best;
Guolin Ke's avatar
Guolin Ke committed
74
75
76
  }
}

77
78
79
// instantiate template classes, otherwise linker cannot find the code
template class FeatureParallelTreeLearner<GPUTreeLearner>;
template class FeatureParallelTreeLearner<SerialTreeLearner>;
Guolin Ke's avatar
Guolin Ke committed
80
}  // namespace LightGBM