parallel_tree_learner.h 7.99 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
#ifndef LIGHTGBM_TREELEARNER_PARALLEL_TREE_LEARNER_H_
#define LIGHTGBM_TREELEARNER_PARALLEL_TREE_LEARNER_H_

#include "serial_tree_learner.h"
5
#include "gpu_tree_learner.h"
Guolin Ke's avatar
Guolin Ke committed
6
#include <LightGBM/network.h>
Guolin Ke's avatar
Guolin Ke committed
7

Guolin Ke's avatar
Guolin Ke committed
8
#include <LightGBM/utils/array_args.h>
Guolin Ke's avatar
Guolin Ke committed
9

Guolin Ke's avatar
Guolin Ke committed
10
#include <cstring>
Guolin Ke's avatar
Guolin Ke committed
11
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
12
#include <memory>
Guolin Ke's avatar
Guolin Ke committed
13
14
15
16
17

namespace LightGBM {

/*!
* \brief Feature parallel learning algorithm.
Qiwei Ye's avatar
Qiwei Ye committed
18
19
*        Different machine will find best split on different features, then sync global best split
*        It is recommonded used when #data is small or #feature is large
Guolin Ke's avatar
Guolin Ke committed
20
*/
21
22
template <typename TREELEARNER_T>
class FeatureParallelTreeLearner: public TREELEARNER_T {
Nikita Titov's avatar
Nikita Titov committed
23
 public:
Guolin Ke's avatar
Guolin Ke committed
24
  explicit FeatureParallelTreeLearner(const Config* config);
Guolin Ke's avatar
Guolin Ke committed
25
  ~FeatureParallelTreeLearner();
26
  void Init(const Dataset* train_data, bool is_constant_hessian) override;
Guolin Ke's avatar
Guolin Ke committed
27

Nikita Titov's avatar
Nikita Titov committed
28
 protected:
Guolin Ke's avatar
Guolin Ke committed
29
  void BeforeTrain() override;
Guolin Ke's avatar
Guolin Ke committed
30
  void FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
Nikita Titov's avatar
Nikita Titov committed
31
32

 private:
Guolin Ke's avatar
Guolin Ke committed
33
34
35
36
37
  /*! \brief rank of local machine */
  int rank_;
  /*! \brief Number of machines of this parallel task */
  int num_machines_;
  /*! \brief Buffer for network send */
Guolin Ke's avatar
Guolin Ke committed
38
  std::vector<char> input_buffer_;
Guolin Ke's avatar
Guolin Ke committed
39
  /*! \brief Buffer for network receive */
Guolin Ke's avatar
Guolin Ke committed
40
  std::vector<char> output_buffer_;
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
};

/*!
* \brief Data parallel learning algorithm.
Qiwei Ye's avatar
Qiwei Ye committed
45
46
*        Workers use local data to construct histograms locally, then sync up global histograms.
*        It is recommonded used when #data is large or #feature is small
Guolin Ke's avatar
Guolin Ke committed
47
*/
48
49
template <typename TREELEARNER_T>
class DataParallelTreeLearner: public TREELEARNER_T {
Nikita Titov's avatar
Nikita Titov committed
50
 public:
Guolin Ke's avatar
Guolin Ke committed
51
  explicit DataParallelTreeLearner(const Config* config);
Guolin Ke's avatar
Guolin Ke committed
52
  ~DataParallelTreeLearner();
53
  void Init(const Dataset* train_data, bool is_constant_hessian) override;
Guolin Ke's avatar
Guolin Ke committed
54
  void ResetConfig(const Config* config) override;
55

Nikita Titov's avatar
Nikita Titov committed
56
 protected:
Guolin Ke's avatar
Guolin Ke committed
57
  void BeforeTrain() override;
Guolin Ke's avatar
Guolin Ke committed
58
59
  void FindBestSplits() override;
  void FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
Guolin Ke's avatar
Guolin Ke committed
60
61
62
63
64
65
66
67
68
69
  void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override;

  inline data_size_t GetGlobalDataCountInLeaf(int leaf_idx) const override {
    if (leaf_idx >= 0) {
      return global_data_count_in_leaf_[leaf_idx];
    } else {
      return 0;
    }
  }

Nikita Titov's avatar
Nikita Titov committed
70
 private:
Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
  /*! \brief Rank of local machine */
  int rank_;
  /*! \brief Number of machines of this parallel task */
  int num_machines_;
  /*! \brief Buffer for network send */
Guolin Ke's avatar
Guolin Ke committed
76
  std::vector<char> input_buffer_;
Guolin Ke's avatar
Guolin Ke committed
77
  /*! \brief Buffer for network receive */
Guolin Ke's avatar
Guolin Ke committed
78
  std::vector<char> output_buffer_;
Guolin Ke's avatar
Guolin Ke committed
79
80
  /*! \brief different machines will aggregate histograms for different features,
       use this to mark local aggregate features*/
Guolin Ke's avatar
Guolin Ke committed
81
  std::vector<bool> is_feature_aggregated_;
Guolin Ke's avatar
Guolin Ke committed
82
  /*! \brief Block start index for reduce scatter */
Guolin Ke's avatar
Guolin Ke committed
83
  std::vector<comm_size_t> block_start_;
Guolin Ke's avatar
Guolin Ke committed
84
  /*! \brief Block size for reduce scatter */
Guolin Ke's avatar
Guolin Ke committed
85
  std::vector<comm_size_t> block_len_;
Hui Xue's avatar
Hui Xue committed
86
  /*! \brief Write positions for feature histograms */
Guolin Ke's avatar
Guolin Ke committed
87
  std::vector<comm_size_t> buffer_write_start_pos_;
Hui Xue's avatar
Hui Xue committed
88
  /*! \brief Read positions for local feature histograms */
Guolin Ke's avatar
Guolin Ke committed
89
  std::vector<comm_size_t> buffer_read_start_pos_;
Guolin Ke's avatar
Guolin Ke committed
90
  /*! \brief Size for reduce scatter */
Guolin Ke's avatar
Guolin Ke committed
91
  comm_size_t reduce_scatter_size_;
Guolin Ke's avatar
Guolin Ke committed
92
  /*! \brief Store global number of data in leaves  */
Guolin Ke's avatar
Guolin Ke committed
93
  std::vector<data_size_t> global_data_count_in_leaf_;
Guolin Ke's avatar
Guolin Ke committed
94
95
};

Guolin Ke's avatar
Guolin Ke committed
96
97
98
99
100
101
/*!
* \brief Voting based data parallel learning algorithm.
* Like data parallel, but not aggregate histograms for all features.
* Here using voting to reduce features, and only aggregate histograms for selected features.
* When #data is large and #feature is large, you can use this to have better speed-up
*/
102
103
template <typename TREELEARNER_T>
class VotingParallelTreeLearner: public TREELEARNER_T {
Nikita Titov's avatar
Nikita Titov committed
104
 public:
Guolin Ke's avatar
Guolin Ke committed
105
  explicit VotingParallelTreeLearner(const Config* config);
Guolin Ke's avatar
Guolin Ke committed
106
  ~VotingParallelTreeLearner() { }
107
  void Init(const Dataset* train_data, bool is_constant_hessian) override;
Guolin Ke's avatar
Guolin Ke committed
108
  void ResetConfig(const Config* config) override;
109

Nikita Titov's avatar
Nikita Titov committed
110
 protected:
Guolin Ke's avatar
Guolin Ke committed
111
  void BeforeTrain() override;
Guolin Ke's avatar
Guolin Ke committed
112
  bool BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) override;
Guolin Ke's avatar
Guolin Ke committed
113
114
  void FindBestSplits() override;
  void FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
Guolin Ke's avatar
Guolin Ke committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override;

  inline data_size_t GetGlobalDataCountInLeaf(int leaf_idx) const override {
    if (leaf_idx >= 0) {
      return global_data_count_in_leaf_[leaf_idx];
    } else {
      return 0;
    }
  }
  /*!
  * \brief Perform global voting
  * \param leaf_idx index of leaf
  * \param splits All splits from local voting
  * \param out Result of gobal voting, only store feature indices
  */
130
  void GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits,
Guolin Ke's avatar
Guolin Ke committed
131
132
133
134
135
136
137
138
139
    std::vector<int>* out);
  /*!
  * \brief Copy local histgram to buffer
  * \param smaller_top_features Selected features for smaller leaf
  * \param larger_top_features Selected features for larger leaf
  */
  void CopyLocalHistogram(const std::vector<int>& smaller_top_features,
    const std::vector<int>& larger_top_features);

Nikita Titov's avatar
Nikita Titov committed
140
 private:
Guolin Ke's avatar
Guolin Ke committed
141
  /*! \brief Tree config used in local mode */
Guolin Ke's avatar
Guolin Ke committed
142
  Config local_config_;
Guolin Ke's avatar
Guolin Ke committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  /*! \brief Voting size */
  int top_k_;
  /*! \brief Rank of local machine*/
  int rank_;
  /*! \brief Number of machines */
  int num_machines_;
  /*! \brief Buffer for network send */
  std::vector<char> input_buffer_;
  /*! \brief Buffer for network receive */
  std::vector<char> output_buffer_;
  /*! \brief different machines will aggregate histograms for different features,
  use this to mark local aggregate features*/
  std::vector<bool> smaller_is_feature_aggregated_;
  /*! \brief different machines will aggregate histograms for different features,
  use this to mark local aggregate features*/
  std::vector<bool> larger_is_feature_aggregated_;
  /*! \brief Block start index for reduce scatter */
Guolin Ke's avatar
Guolin Ke committed
160
  std::vector<comm_size_t> block_start_;
Guolin Ke's avatar
Guolin Ke committed
161
  /*! \brief Block size for reduce scatter */
Guolin Ke's avatar
Guolin Ke committed
162
  std::vector<comm_size_t> block_len_;
Guolin Ke's avatar
Guolin Ke committed
163
  /*! \brief Read positions for feature histgrams at smaller leaf */
Guolin Ke's avatar
Guolin Ke committed
164
  std::vector<comm_size_t> smaller_buffer_read_start_pos_;
Guolin Ke's avatar
Guolin Ke committed
165
  /*! \brief Read positions for feature histgrams at larger leaf */
Guolin Ke's avatar
Guolin Ke committed
166
  std::vector<comm_size_t> larger_buffer_read_start_pos_;
Guolin Ke's avatar
Guolin Ke committed
167
  /*! \brief Size for reduce scatter */
Guolin Ke's avatar
Guolin Ke committed
168
  comm_size_t reduce_scatter_size_;
Guolin Ke's avatar
Guolin Ke committed
169
170
171
172
173
174
175
176
177
178
  /*! \brief Store global number of data in leaves  */
  std::vector<data_size_t> global_data_count_in_leaf_;
  /*! \brief Store global split information for smaller leaf  */
  std::unique_ptr<LeafSplits> smaller_leaf_splits_global_;
  /*! \brief Store global split information for larger leaf  */
  std::unique_ptr<LeafSplits> larger_leaf_splits_global_;
  /*! \brief Store global histogram for smaller leaf  */
  std::unique_ptr<FeatureHistogram[]> smaller_leaf_histogram_array_global_;
  /*! \brief Store global histogram for larger leaf  */
  std::unique_ptr<FeatureHistogram[]> larger_leaf_histogram_array_global_;
Guolin Ke's avatar
Guolin Ke committed
179
180
181
182

  std::vector<HistogramBinEntry> smaller_leaf_histogram_data_;
  std::vector<HistogramBinEntry> larger_leaf_histogram_data_;
  std::vector<FeatureMetainfo> feature_metas_;
Guolin Ke's avatar
Guolin Ke committed
183
};
Guolin Ke's avatar
Guolin Ke committed
184

185
// To-do: reduce the communication cost by using bitset to communicate.
186
inline void SyncUpGlobalBestSplit(char* input_buffer_, char* output_buffer_, SplitInfo* smaller_best_split, SplitInfo* larger_best_split, int max_cat_threshold) {
187
  // sync global best info
188
  int size = SplitInfo::Size(max_cat_threshold);
189
190
  smaller_best_split->CopyTo(input_buffer_);
  larger_best_split->CopyTo(input_buffer_ + size);
191
  Network::Allreduce(input_buffer_, size * 2, size, output_buffer_,
Guolin Ke's avatar
Guolin Ke committed
192
193
                     [] (const char* src, char* dst, int size, comm_size_t len) {
    comm_size_t used_size = 0;
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
    LightSplitInfo p1, p2;
    while (used_size < len) {
      p1.CopyFrom(src);
      p2.CopyFrom(dst);
      if (p1 > p2) {
        std::memcpy(dst, src, size);
      }
      src += size;
      dst += size;
      used_size += size;
    }
  });
  // copy back
  smaller_best_split->CopyFrom(output_buffer_);
  larger_best_split->CopyFrom(output_buffer_ + size);
}

Guolin Ke's avatar
Guolin Ke committed
211
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
212
#endif   // LightGBM_TREELEARNER_PARALLEL_TREE_LEARNER_H_