multi_val_sparse_bin.hpp 11.9 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
8
9
10
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

18
template <typename INDEX_T, typename VAL_T>
19
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
    INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
28
    int num_threads = OMP_NUM_THREADS();
29
30
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
31
32
33
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
34
    }
35
36
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
37
38
  }

39
  ~MultiValSparseBin() {}
40

41
  data_size_t num_data() const override { return num_data_; }
42

43
  int num_bin() const override { return num_bin_; }
44

45
46
47
48
  double num_element_per_row() const override {
    return estimate_element_per_row_;
  }

49
50
51
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
52
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
53
    if (tid == 0) {
54
55
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
56
57
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
58
      for (auto val : values) {
59
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
60
61
      }
    } else {
62
63
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
64
65
66
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
67
      for (auto val : values) {
68
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
69
70
71
72
      }
    }
  }

73
  void MergeData(const INDEX_T* sizes) {
74
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
75
    for (data_size_t i = 0; i < num_data_; ++i) {
76
77
78
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
79
      std::vector<INDEX_T> offsets(1 + t_data_.size());
80
      offsets[0] = sizes[0];
81
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
82
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
83
      }
84
      data_.resize(row_ptr_[num_data_]);
85
#pragma omp parallel for schedule(static, 1)
86
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
87
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
88
                    data_.data() + offsets[tid]);
89
      }
90
91
    } else {
      data_.resize(row_ptr_[num_data_]);
92
    }
93
94
95
96
97
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
98
99
100
101
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
102
103
104
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
105
106
  }

107
  bool IsSparse() override { return true; }
108

109
#define ACC_GH(hist, i, g, h)               \
110
  const auto ti = static_cast<int>(i) << 1; \
111
112
  hist[ti] += g;                            \
  hist[ti + 1] += h;
113

114
115
116
117
118
  template <bool use_indices, bool use_prefetch, bool use_hessians>
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
119
120
121
122
123
124
125
    data_size_t i = start;
    if (use_prefetch) {
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
        const auto idx = use_indices ? data_indices[i] : i;
126
127
        const auto pf_idx =
            use_indices ? data_indices[i + pf_offset] : i + pf_offset;
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
        PREFETCH_T0(gradients + pf_idx);
        if (use_hessians) {
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
        PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
        for (auto j = j_start; j < j_end; ++j) {
          const VAL_T bin = data_[j];
          if (use_hessians) {
            ACC_GH(out, bin, gradients[idx], hessians[idx]);
          } else {
            ACC_GH(out, bin, gradients[idx], 1.0f);
          }
        }
      }
    }
    for (; i < end; ++i) {
      const auto idx = use_indices ? data_indices[i] : i;
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
      for (auto j = j_start; j < j_end; ++j) {
        const VAL_T bin = data_[j];
        if (use_hessians) {
          ACC_GH(out, bin, gradients[idx], hessians[idx]);
        } else {
          ACC_GH(out, bin, gradients[idx], 1.0f);
        }
      }
    }
  }
160
#undef ACC_GH
161

162
163
164
165
166
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
                                              gradients, hessians, out);
167
168
169
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
170
171
172
173
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients,
                                                hessians, out);
174
175
  }

176
177
178
179
180
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
                                               gradients, nullptr, out);
181
182
183
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
184
185
186
187
                          const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients,
                                                 nullptr, out);
188
189
  }

190
  MultiValBin* CreateLike(data_size_t num_data, int num_bin, int,
191
                          double estimate_element_per_row) const override {
192
193
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data, num_bin,
                                                 estimate_element_per_row);
194
195
  }

196
197
198
  void ReSize(data_size_t num_data, int num_bin, int,
              double estimate_element_per_row) override {
    num_data_ = num_data;
199
200
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
201
    INDEX_T estimate_num_data =
202
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
203
    size_t npart = 1 + t_data_.size();
204
205
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
206
207
208
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
209
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
210
211
        t_data_[i].resize(avg_num_data, 0);
      }
212
    }
213
214
215
    if (num_data_ + 1 > static_cast<data_size_t>(row_ptr_.size())) {
      row_ptr_.resize(num_data_ + 1);
    }
216
217
  }

218
219
220
221
222
223
  template <bool SUBROW, bool SUBCOL>
  void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices,
                 data_size_t num_used_indices,
                 const std::vector<uint32_t>& lower,
                 const std::vector<uint32_t>& upper,
                 const std::vector<uint32_t>& delta) {
224
    const auto other =
225
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
226
227
228
    if (SUBROW) {
      CHECK(num_data_ == num_used_indices);
    }
Guolin Ke's avatar
Guolin Ke committed
229
230
231
232
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
233
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
234
235
236
237
238
239
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
240
      INDEX_T size = 0;
241
      for (data_size_t i = start; i < end; ++i) {
242
243
244
245
        const auto j_start =
            SUBROW ? other->RowPtr(used_indices[i]) : other->RowPtr(i);
        const auto j_end =
            SUBROW ? other->RowPtr(used_indices[i] + 1) : other->RowPtr(i + 1);
246
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
247
248
249
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
250
        const auto pre_size = size;
251
        for (auto j = j_start; j < j_end; ++j) {
252
253
254
255
256
257
258
259
260
261
          const auto val = other->data_[j];
          if (SUBCOL) {
            while (val >= upper[k]) {
              ++k;
            }
            if (val >= lower[k]) {
              buf[size++] = static_cast<VAL_T>(val - delta[k]);
            }
          } else {
            buf[size++] = val;
262
263
264
265
266
267
268
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
269
270
  }

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
    CopyInner<true, false>(full_bin, used_indices, num_used_indices,
                           std::vector<uint32_t>(), std::vector<uint32_t>(),
                           std::vector<uint32_t>());
  }

  void CopySubcol(const MultiValBin* full_bin, const std::vector<int>&,
                  const std::vector<uint32_t>& lower,
                  const std::vector<uint32_t>& upper,
                  const std::vector<uint32_t>& delta) override {
    CopyInner<false, true>(full_bin, nullptr, num_data_, lower, upper, delta);
  }

  void CopySubrowAndSubcol(const MultiValBin* full_bin,
                           const data_size_t* used_indices,
                           data_size_t num_used_indices,
                           const std::vector<int>&,
                           const std::vector<uint32_t>& lower,
                           const std::vector<uint32_t>& upper,
                           const std::vector<uint32_t>& delta) override {
    CopyInner<true, true>(full_bin, used_indices, num_used_indices, lower,
                          upper, delta);
  }

296
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
297

298
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
299

300
 private:
301
302
  data_size_t num_data_;
  int num_bin_;
303
  double estimate_element_per_row_;
304
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
305
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
306
307
308
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
309
  std::vector<INDEX_T> t_size_;
310

311
312
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
313
314
315
316
317
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
318
319
};

320
321
322
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
323
324
325
}

}  // namespace LightGBM
326
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_