"...git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "41ba9e8f00c89d72e5cb71c964722ce1ed4d8445"
feature_parallel_tree_learner.cpp 3.42 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#include <cstring>
#include <vector>

8
9
#include "parallel_tree_learner.h"

Guolin Ke's avatar
Guolin Ke committed
10
11
namespace LightGBM {

12
13

template <typename TREELEARNER_T>
14
FeatureParallelTreeLearner<TREELEARNER_T>::FeatureParallelTreeLearner(const Config* config):TREELEARNER_T(config) {
Guolin Ke's avatar
Guolin Ke committed
15
16
}

17
18
template <typename TREELEARNER_T>
FeatureParallelTreeLearner<TREELEARNER_T>::~FeatureParallelTreeLearner() {
Guolin Ke's avatar
Guolin Ke committed
19
}
20
21
22
23

template <typename TREELEARNER_T>
void FeatureParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, bool is_constant_hessian) {
  TREELEARNER_T::Init(train_data, is_constant_hessian);
Guolin Ke's avatar
Guolin Ke committed
24
25
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();
26
27
28
29
30
31
32

  auto max_cat_threshold = this->config_->max_cat_threshold;
  // need to be able to hold smaller and larger best splits in SyncUpGlobalBestSplit
  int split_info_size = SplitInfo::Size(max_cat_threshold) * 2;

  input_buffer_.resize(split_info_size);
  output_buffer_.resize(split_info_size);
Guolin Ke's avatar
Guolin Ke committed
33
34
35
}


36
37
38
template <typename TREELEARNER_T>
void FeatureParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
  TREELEARNER_T::BeforeTrain();
Guolin Ke's avatar
Guolin Ke committed
39
40
41
  // get feature partition
  std::vector<std::vector<int>> feature_distribution(num_machines_, std::vector<int>());
  std::vector<int> num_bins_distributed(num_machines_, 0);
42
43
  for (int i = 0; i < this->train_data_->num_total_features(); ++i) {
    int inner_feature_index = this->train_data_->InnerFeatureIndex(i);
44
45
46
    if (inner_feature_index == -1) {
      continue;
    }
47
    if (this->col_sampler_.is_feature_used_bytree()[inner_feature_index]) {
Guolin Ke's avatar
Guolin Ke committed
48
      int cur_min_machine = static_cast<int>(ArrayArgs<int>::ArgMin(num_bins_distributed));
Guolin Ke's avatar
Guolin Ke committed
49
      feature_distribution[cur_min_machine].push_back(inner_feature_index);
50
      num_bins_distributed[cur_min_machine] += this->train_data_->FeatureNumBin(inner_feature_index);
51
      this->col_sampler_.SetIsFeatureUsedByTree(inner_feature_index, false);
Guolin Ke's avatar
Guolin Ke committed
52
53
54
55
    }
  }
  // get local used features
  for (auto fid : feature_distribution[rank_]) {
56
    this->col_sampler_.SetIsFeatureUsedByTree(fid, true);
Guolin Ke's avatar
Guolin Ke committed
57
58
59
  }
}

60
template <typename TREELEARNER_T>
61
62
63
void FeatureParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(
      const std::vector<int8_t>& is_feature_used, bool use_subtract, const Tree* tree) {
  TREELEARNER_T::FindBestSplitsFromHistograms(is_feature_used, use_subtract, tree);
Guolin Ke's avatar
Guolin Ke committed
64
  SplitInfo smaller_best_split, larger_best_split;
Guolin Ke's avatar
Guolin Ke committed
65
  // get best split at smaller leaf
66
  smaller_best_split = this->best_split_per_leaf_[this->smaller_leaf_splits_->leaf_index()];
Guolin Ke's avatar
Guolin Ke committed
67
  // find local best split for larger leaf
68
69
  if (this->larger_leaf_splits_->leaf_index() >= 0) {
    larger_best_split = this->best_split_per_leaf_[this->larger_leaf_splits_->leaf_index()];
Guolin Ke's avatar
Guolin Ke committed
70
71
  }
  // sync global best info
Guolin Ke's avatar
Guolin Ke committed
72
  SyncUpGlobalBestSplit(input_buffer_.data(), input_buffer_.data(), &smaller_best_split, &larger_best_split, this->config_->max_cat_threshold);
Guolin Ke's avatar
Guolin Ke committed
73
  // update best split
74
75
76
  this->best_split_per_leaf_[this->smaller_leaf_splits_->leaf_index()] = smaller_best_split;
  if (this->larger_leaf_splits_->leaf_index() >= 0) {
    this->best_split_per_leaf_[this->larger_leaf_splits_->leaf_index()] = larger_best_split;
Guolin Ke's avatar
Guolin Ke committed
77
78
79
  }
}

80
81
82
// instantiate template classes, otherwise linker cannot find the code
template class FeatureParallelTreeLearner<GPUTreeLearner>;
template class FeatureParallelTreeLearner<SerialTreeLearner>;
Guolin Ke's avatar
Guolin Ke committed
83
}  // namespace LightGBM