"git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "4306b22c5f34b12004247c6a04add8f752498d48"
ordered_sparse_bin.hpp 11.8 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
10
11
12
#ifndef LIGHTGBM_IO_ORDERED_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_ORDERED_SPARSE_BIN_HPP_

#include <LightGBM/bin.h>

#include <cstring>
#include <cstdint>

#include <vector>
#include <mutex>
#include <algorithm>

13
14
#include "sparse_bin.hpp"

Guolin Ke's avatar
Guolin Ke committed
15
16
17
namespace LightGBM {

/*!
Qiwei Ye's avatar
Qiwei Ye committed
18
19
20
21
22
23
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
*        There are 2 advantages by using ordered bin.
*        1. group the data by leafs to improve the cache hit.
*        2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
*        However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
*        So we only using ordered bin for sparse situations.
Guolin Ke's avatar
Guolin Ke committed
24
25
*/
template <typename VAL_T>
26
class OrderedSparseBin: public OrderedBin {
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
31
public:
  /*! \brief Pair to store one bin entry */
  struct SparsePair {
    data_size_t ridx;  // data(row) index
    VAL_T bin;  // bin for this data
32
    SparsePair() : ridx(0), bin(0) {}
Guolin Ke's avatar
Guolin Ke committed
33
34
  };

35
36
  OrderedSparseBin(const SparseBin<VAL_T>* bin_data)
    :bin_data_(bin_data) {
Guolin Ke's avatar
Guolin Ke committed
37
    data_size_t cur_pos = 0;
38
    data_size_t i_delta = -1;
39
    int non_zero_cnt = 0;
40
    while (bin_data_->NextNonzero(&i_delta, &cur_pos)) {
41
      ++non_zero_cnt;
Guolin Ke's avatar
Guolin Ke committed
42
    }
43
    ordered_pair_.resize(non_zero_cnt);
Guolin Ke's avatar
Guolin Ke committed
44
    leaf_cnt_.push_back(non_zero_cnt);
Guolin Ke's avatar
Guolin Ke committed
45
46
47
48
49
50
51
52
53
54
55
56
  }

  ~OrderedSparseBin() {
  }

  void Init(const char* used_idices, int num_leaves) override {
    // initialize the leaf information
    leaf_start_ = std::vector<data_size_t>(num_leaves, 0);
    leaf_cnt_ = std::vector<data_size_t>(num_leaves, 0);
    if (used_idices == nullptr) {
      // if using all data, copy all non-zero pair
      data_size_t j = 0;
57
58
59
60
61
62
      data_size_t cur_pos = 0;
      data_size_t i_delta = -1;
      while (bin_data_->NextNonzero(&i_delta, &cur_pos)) {
        ordered_pair_[j].ridx = cur_pos;
        ordered_pair_[j].bin = bin_data_->vals_[i_delta];
        ++j;
Guolin Ke's avatar
Guolin Ke committed
63
      }
64
      leaf_cnt_[0] = static_cast<data_size_t>(j);
Guolin Ke's avatar
Guolin Ke committed
65
66
67
68
    } else {
      // if using part of data(bagging)
      data_size_t j = 0;
      data_size_t cur_pos = 0;
69
70
71
      data_size_t i_delta = -1;
      while (bin_data_->NextNonzero(&i_delta, &cur_pos)) {
        if (used_idices[cur_pos]) {
Guolin Ke's avatar
Guolin Ke committed
72
          ordered_pair_[j].ridx = cur_pos;
73
          ordered_pair_[j].bin = bin_data_->vals_[i_delta];
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
78
79
80
          ++j;
        }
      }
      leaf_cnt_[0] = j;
    }
  }

Guolin Ke's avatar
Guolin Ke committed
81
  void ConstructHistogram(int leaf, const float* gradient, const float* hessian, int num_bin,
82
                          HistogramBinEntry* out) const override {
Guolin Ke's avatar
Guolin Ke committed
83
84
85
    // get current leaf boundary
    const data_size_t start = leaf_start_[leaf];
    const data_size_t end = start + leaf_cnt_[leaf];
Guolin Ke's avatar
Guolin Ke committed
86
87
    const data_size_t group_rest = (end - start) & KNumSumupGroupMask;
    const data_size_t rest = (end - start) & 0x7;
88
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    for (; i < end - group_rest;) {
      std::vector<HistogramBinEntry> tmp_sumup_buf(num_bin);
      for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
        const VAL_T bin0 = ordered_pair_[i].bin;
        const VAL_T bin1 = ordered_pair_[i + 1].bin;
        const VAL_T bin2 = ordered_pair_[i + 2].bin;
        const VAL_T bin3 = ordered_pair_[i + 3].bin;
        const VAL_T bin4 = ordered_pair_[i + 4].bin;
        const VAL_T bin5 = ordered_pair_[i + 5].bin;
        const VAL_T bin6 = ordered_pair_[i + 6].bin;
        const VAL_T bin7 = ordered_pair_[i + 7].bin;

        const auto g0 = gradient[ordered_pair_[i].ridx];
        const auto h0 = hessian[ordered_pair_[i].ridx];
        const auto g1 = gradient[ordered_pair_[i + 1].ridx];
        const auto h1 = hessian[ordered_pair_[i + 1].ridx];
        const auto g2 = gradient[ordered_pair_[i + 2].ridx];
        const auto h2 = hessian[ordered_pair_[i + 2].ridx];
        const auto g3 = gradient[ordered_pair_[i + 3].ridx];
        const auto h3 = hessian[ordered_pair_[i + 3].ridx];
        const auto g4 = gradient[ordered_pair_[i + 4].ridx];
        const auto h4 = hessian[ordered_pair_[i + 4].ridx];
        const auto g5 = gradient[ordered_pair_[i + 5].ridx];
        const auto h5 = hessian[ordered_pair_[i + 5].ridx];
        const auto g6 = gradient[ordered_pair_[i + 6].ridx];
        const auto h6 = hessian[ordered_pair_[i + 6].ridx];
        const auto g7 = gradient[ordered_pair_[i + 7].ridx];
        const auto h7 = hessian[ordered_pair_[i + 7].ridx];

        AddGradientToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
                               g0, g1, g2, g3, g4, g5, g6, g7);
        AddHessianToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
                              h0, h1, h2, h3, h4, h5, h6, h7);
        AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
      }
      for (int j = 0; j < num_bin; ++j) {
        out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
        out[j].sum_hessians += tmp_sumup_buf[j].sum_hessians;
        out[j].cnt += tmp_sumup_buf[j].cnt;
      }
    }
Guolin Ke's avatar
Guolin Ke committed
130
    // use data on current leaf to construct histogram
Guolin Ke's avatar
Guolin Ke committed
131
    for (; i < end - rest; i += 8) {
132
133
134
135
136

      const VAL_T bin0 = ordered_pair_[i].bin;
      const VAL_T bin1 = ordered_pair_[i + 1].bin;
      const VAL_T bin2 = ordered_pair_[i + 2].bin;
      const VAL_T bin3 = ordered_pair_[i + 3].bin;
Guolin Ke's avatar
Guolin Ke committed
137
138
139
140
      const VAL_T bin4 = ordered_pair_[i + 4].bin;
      const VAL_T bin5 = ordered_pair_[i + 5].bin;
      const VAL_T bin6 = ordered_pair_[i + 6].bin;
      const VAL_T bin7 = ordered_pair_[i + 7].bin;
141
142
143
144
145
146
147
148
149

      const auto g0 = gradient[ordered_pair_[i].ridx];
      const auto h0 = hessian[ordered_pair_[i].ridx];
      const auto g1 = gradient[ordered_pair_[i + 1].ridx];
      const auto h1 = hessian[ordered_pair_[i + 1].ridx];
      const auto g2 = gradient[ordered_pair_[i + 2].ridx];
      const auto h2 = hessian[ordered_pair_[i + 2].ridx];
      const auto g3 = gradient[ordered_pair_[i + 3].ridx];
      const auto h3 = hessian[ordered_pair_[i + 3].ridx];
Guolin Ke's avatar
Guolin Ke committed
150
151
152
153
154
155
156
157
      const auto g4 = gradient[ordered_pair_[i + 4].ridx];
      const auto h4 = hessian[ordered_pair_[i + 4].ridx];
      const auto g5 = gradient[ordered_pair_[i + 5].ridx];
      const auto h5 = hessian[ordered_pair_[i + 5].ridx];
      const auto g6 = gradient[ordered_pair_[i + 6].ridx];
      const auto h6 = hessian[ordered_pair_[i + 6].ridx];
      const auto g7 = gradient[ordered_pair_[i + 7].ridx];
      const auto h7 = hessian[ordered_pair_[i + 7].ridx];
158

Guolin Ke's avatar
Guolin Ke committed
159
160
161
162
163
      AddGradientToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
                             g0, g1, g2, g3, g4, g5, g6, g7);
      AddHessianToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
                            h0, h1, h2, h3, h4, h5, h6, h7);
      AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
164
165
166
167
168
169
170
171
172
173
174
175
    }

    for (; i < end; ++i) {

      const VAL_T bin0 = ordered_pair_[i].bin;

      const auto g0 = gradient[ordered_pair_[i].ridx];
      const auto h0 = hessian[ordered_pair_[i].ridx];

      out[bin0].sum_gradients += g0;
      out[bin0].sum_hessians += h0;
      ++out[bin0].cnt;
Guolin Ke's avatar
Guolin Ke committed
176
    }
177
178
  }

Guolin Ke's avatar
Guolin Ke committed
179
  void ConstructHistogram(int leaf, const float* gradient, int num_bin,
180
181
182
183
                          HistogramBinEntry* out) const override {
    // get current leaf boundary
    const data_size_t start = leaf_start_[leaf];
    const data_size_t end = start + leaf_cnt_[leaf];
Guolin Ke's avatar
Guolin Ke committed
184
185
    const data_size_t group_rest = (end - start) & KNumSumupGroupMask;
    const data_size_t rest = (end - start) & 0x7;
186
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    for (; i < end - group_rest;) {
      std::vector<TmpGradCntPair> tmp_sumup_buf(num_bin);
      for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
        const VAL_T bin0 = ordered_pair_[i].bin;
        const VAL_T bin1 = ordered_pair_[i + 1].bin;
        const VAL_T bin2 = ordered_pair_[i + 2].bin;
        const VAL_T bin3 = ordered_pair_[i + 3].bin;
        const VAL_T bin4 = ordered_pair_[i + 4].bin;
        const VAL_T bin5 = ordered_pair_[i + 5].bin;
        const VAL_T bin6 = ordered_pair_[i + 6].bin;
        const VAL_T bin7 = ordered_pair_[i + 7].bin;

        const auto g0 = gradient[ordered_pair_[i].ridx];
        const auto g1 = gradient[ordered_pair_[i + 1].ridx];
        const auto g2 = gradient[ordered_pair_[i + 2].ridx];
        const auto g3 = gradient[ordered_pair_[i + 3].ridx];
        const auto g4 = gradient[ordered_pair_[i + 4].ridx];
        const auto g5 = gradient[ordered_pair_[i + 5].ridx];
        const auto g6 = gradient[ordered_pair_[i + 6].ridx];
        const auto g7 = gradient[ordered_pair_[i + 7].ridx];

        AddGradientToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
                               g0, g1, g2, g3, g4, g5, g6, g7);
        AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
      }
      for (int j = 0; j < num_bin; ++j) {
        out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
        out[j].cnt += tmp_sumup_buf[j].cnt;
      }
    }
217
    // use data on current leaf to construct histogram
Guolin Ke's avatar
Guolin Ke committed
218
    for (; i < end - rest; i += 8) {
219
220
221
222
223

      const VAL_T bin0 = ordered_pair_[i].bin;
      const VAL_T bin1 = ordered_pair_[i + 1].bin;
      const VAL_T bin2 = ordered_pair_[i + 2].bin;
      const VAL_T bin3 = ordered_pair_[i + 3].bin;
Guolin Ke's avatar
Guolin Ke committed
224
225
226
227
      const VAL_T bin4 = ordered_pair_[i + 4].bin;
      const VAL_T bin5 = ordered_pair_[i + 5].bin;
      const VAL_T bin6 = ordered_pair_[i + 6].bin;
      const VAL_T bin7 = ordered_pair_[i + 7].bin;
228
229
230
231
232

      const auto g0 = gradient[ordered_pair_[i].ridx];
      const auto g1 = gradient[ordered_pair_[i + 1].ridx];
      const auto g2 = gradient[ordered_pair_[i + 2].ridx];
      const auto g3 = gradient[ordered_pair_[i + 3].ridx];
Guolin Ke's avatar
Guolin Ke committed
233
234
235
236
      const auto g4 = gradient[ordered_pair_[i + 4].ridx];
      const auto g5 = gradient[ordered_pair_[i + 5].ridx];
      const auto g6 = gradient[ordered_pair_[i + 6].ridx];
      const auto g7 = gradient[ordered_pair_[i + 7].ridx];
237

Guolin Ke's avatar
Guolin Ke committed
238
239
240
      AddGradientToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
                             g0, g1, g2, g3, g4, g5, g6, g7);
      AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
241
    }
Guolin Ke's avatar
Guolin Ke committed
242

243
    for (; i < end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
244

245
      const VAL_T bin0 = ordered_pair_[i].bin;
Guolin Ke's avatar
Guolin Ke committed
246

247
      const auto g0 = gradient[ordered_pair_[i].ridx];
Guolin Ke's avatar
Guolin Ke committed
248

249
250
251
      out[bin0].sum_gradients += g0;
      ++out[bin0].cnt;
    }
Guolin Ke's avatar
Guolin Ke committed
252
253
  }

Guolin Ke's avatar
Guolin Ke committed
254
  void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) override {
Guolin Ke's avatar
Guolin Ke committed
255
256
257
258
259
260
261
    // get current leaf boundary
    const data_size_t l_start = leaf_start_[leaf];
    const data_size_t l_end = l_start + leaf_cnt_[leaf];
    // new left leaf end after split
    data_size_t new_left_end = l_start;

    for (data_size_t i = l_start; i < l_end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
262
      if (is_in_leaf[ordered_pair_[i].ridx] == mark) {
Guolin Ke's avatar
Guolin Ke committed
263
264
265
266
267
268
269
270
271
        std::swap(ordered_pair_[new_left_end], ordered_pair_[i]);
        ++new_left_end;
      }
    }

    leaf_start_[right_leaf] = new_left_end;
    leaf_cnt_[leaf] = new_left_end - l_start;
    leaf_cnt_[right_leaf] = l_end - new_left_end;
  }
Guolin Ke's avatar
Guolin Ke committed
272
273
274
  data_size_t NonZeroCount(int leaf) const override {
    return static_cast<data_size_t>(leaf_cnt_[leaf]);
  }
Guolin Ke's avatar
Guolin Ke committed
275
276
277
278
279
280
  /*! \brief Disable copy */
  OrderedSparseBin<VAL_T>& operator=(const OrderedSparseBin<VAL_T>&) = delete;
  /*! \brief Disable copy */
  OrderedSparseBin<VAL_T>(const OrderedSparseBin<VAL_T>&) = delete;

private:
281
  const SparseBin<VAL_T>* bin_data_;
Guolin Ke's avatar
Guolin Ke committed
282
283
284
285
286
287
288
  /*! \brief Store non-zero pair , group by leaf */
  std::vector<SparsePair> ordered_pair_;
  /*! \brief leaf_start_[i] means data in i-th leaf start from */
  std::vector<data_size_t> leaf_start_;
  /*! \brief leaf_cnt_[i] means number of data in i-th leaf */
  std::vector<data_size_t> leaf_cnt_;
};
289
290
291
292
293
294

template <typename VAL_T>
OrderedBin* SparseBin<VAL_T>::CreateOrderedBin() const {
  return new OrderedSparseBin<VAL_T>(this);
}

Guolin Ke's avatar
Guolin Ke committed
295
}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
296
#endif   // LightGBM_IO_ORDERED_SPARSE_BIN_HPP_