multi_val_sparse_bin.hpp 11.9 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

8
9
10
#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

18
template <typename INDEX_T, typename VAL_T>
19
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
    INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
28
    int num_threads = OMP_NUM_THREADS();
29
30
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
31
32
33
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
34
    }
35
36
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
37
38
  }

39
  ~MultiValSparseBin() {}
40

41
  data_size_t num_data() const override { return num_data_; }
42

43
  int num_bin() const override { return num_bin_; }
44

45
46
47
48
  double num_element_per_row() const override {
    return estimate_element_per_row_;
  }

49
50
  const std::vector<uint32_t>& offsets() const override { return offsets_; }

51
52
53
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
54
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
55
    if (tid == 0) {
56
57
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
58
59
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
60
      for (auto val : values) {
61
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
62
63
      }
    } else {
64
65
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
66
67
68
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
69
      for (auto val : values) {
70
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
71
72
73
74
      }
    }
  }

75
  void MergeData(const INDEX_T* sizes) {
76
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
77
    for (data_size_t i = 0; i < num_data_; ++i) {
78
79
80
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
81
      std::vector<INDEX_T> offsets(1 + t_data_.size());
82
      offsets[0] = sizes[0];
83
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
84
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
85
      }
86
      data_.resize(row_ptr_[num_data_]);
87
#pragma omp parallel for schedule(static, 1)
88
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
89
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
90
                    data_.data() + offsets[tid]);
91
      }
92
93
    } else {
      data_.resize(row_ptr_[num_data_]);
94
    }
95
96
97
98
99
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
100
101
102
103
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
104
105
106
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
107
108
  }

109
  bool IsSparse() override { return true; }
110

Guolin Ke's avatar
Guolin Ke committed
111
  template <bool USE_INDICES, bool USE_PREFETCH, bool ORDERED>
112
113
114
115
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
116
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
117
118
    hist_t* grad = out;
    hist_t* hess = out + 1;
119
    const VAL_T* data_ptr = data_.data();
Guolin Ke's avatar
Guolin Ke committed
120
    if (USE_PREFETCH) {
121
122
123
124
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
125
        const auto idx = USE_INDICES ? data_indices[i] : i;
126
        const auto pf_idx =
Guolin Ke's avatar
Guolin Ke committed
127
128
129
            USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
        if (!ORDERED) {
          PREFETCH_T0(gradients + pf_idx);
130
131
132
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
133
        PREFETCH_T0(data_ptr + row_ptr_[pf_idx]);
134
135
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
136
137
        const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
        const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
138
        for (auto j = j_start; j < j_end; ++j) {
139
140
141
          const auto ti = static_cast<uint32_t>(data_ptr[j]) << 1;
          grad[ti] += gradient;
          hess[ti] += hessian;
142
143
144
145
        }
      }
    }
    for (; i < end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
146
      const auto idx = USE_INDICES ? data_indices[i] : i;
147
148
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
149
150
      const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
      const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
151
      for (auto j = j_start; j < j_end; ++j) {
152
153
154
        const auto ti = static_cast<uint32_t>(data_ptr[j]) << 1;
        grad[ti] += gradient;
        hess[ti] += hessian;
155
156
157
158
      }
    }
  }

159
160
161
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
Guolin Ke's avatar
Guolin Ke committed
162
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
163
                                               gradients, hessians, out);
164
165
166
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
167
168
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
Guolin Ke's avatar
Guolin Ke committed
169
170
    ConstructHistogramInner<false, false, false>(
        nullptr, start, end, gradients, hessians, out);
171
172
  }

Guolin Ke's avatar
Guolin Ke committed
173
174
175
176
177
178
  void ConstructHistogramOrdered(const data_size_t* data_indices,
                                 data_size_t start, data_size_t end,
                                 const score_t* gradients,
                                 const score_t* hessians,
                                 hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
179
                                              gradients, hessians, out);
180
181
  }

182
  MultiValBin* CreateLike(data_size_t num_data, int num_bin, int,
183
184
                          double estimate_element_per_row,
                          const std::vector<uint32_t>& /*offsets*/) const override {
185
186
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data, num_bin,
                                                 estimate_element_per_row);
187
188
  }

189
  void ReSize(data_size_t num_data, int num_bin, int,
190
              double estimate_element_per_row, const std::vector<uint32_t>& /*offsets*/) override {
191
    num_data_ = num_data;
192
193
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
194
    INDEX_T estimate_num_data =
195
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
196
    size_t npart = 1 + t_data_.size();
197
198
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
199
200
201
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
202
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
203
204
        t_data_[i].resize(avg_num_data, 0);
      }
205
    }
206
207
208
    if (num_data_ + 1 > static_cast<data_size_t>(row_ptr_.size())) {
      row_ptr_.resize(num_data_ + 1);
    }
209
210
  }

211
212
213
214
215
216
  template <bool SUBROW, bool SUBCOL>
  void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices,
                 data_size_t num_used_indices,
                 const std::vector<uint32_t>& lower,
                 const std::vector<uint32_t>& upper,
                 const std::vector<uint32_t>& delta) {
217
    const auto other =
218
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
219
    if (SUBROW) {
Nikita Titov's avatar
Nikita Titov committed
220
      CHECK_EQ(num_data_, num_used_indices);
221
    }
Guolin Ke's avatar
Guolin Ke committed
222
223
224
225
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
226
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
227
228
229
230
231
232
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
233
      INDEX_T size = 0;
234
      for (data_size_t i = start; i < end; ++i) {
235
236
237
238
        const auto j_start =
            SUBROW ? other->RowPtr(used_indices[i]) : other->RowPtr(i);
        const auto j_end =
            SUBROW ? other->RowPtr(used_indices[i] + 1) : other->RowPtr(i + 1);
239
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
240
241
242
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
243
        const auto pre_size = size;
244
        for (auto j = j_start; j < j_end; ++j) {
245
246
247
248
249
250
251
252
253
254
          const auto val = other->data_[j];
          if (SUBCOL) {
            while (val >= upper[k]) {
              ++k;
            }
            if (val >= lower[k]) {
              buf[size++] = static_cast<VAL_T>(val - delta[k]);
            }
          } else {
            buf[size++] = val;
255
256
257
258
259
260
261
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
262
263
  }

264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
    CopyInner<true, false>(full_bin, used_indices, num_used_indices,
                           std::vector<uint32_t>(), std::vector<uint32_t>(),
                           std::vector<uint32_t>());
  }

  void CopySubcol(const MultiValBin* full_bin, const std::vector<int>&,
                  const std::vector<uint32_t>& lower,
                  const std::vector<uint32_t>& upper,
                  const std::vector<uint32_t>& delta) override {
    CopyInner<false, true>(full_bin, nullptr, num_data_, lower, upper, delta);
  }

  void CopySubrowAndSubcol(const MultiValBin* full_bin,
                           const data_size_t* used_indices,
                           data_size_t num_used_indices,
                           const std::vector<int>&,
                           const std::vector<uint32_t>& lower,
                           const std::vector<uint32_t>& upper,
                           const std::vector<uint32_t>& delta) override {
    CopyInner<true, true>(full_bin, used_indices, num_used_indices, lower,
                          upper, delta);
  }

289
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
290

291
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
292

293
 private:
294
295
  data_size_t num_data_;
  int num_bin_;
296
  double estimate_element_per_row_;
297
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
298
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
299
300
301
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
302
  std::vector<INDEX_T> t_size_;
303
  std::vector<uint32_t> offsets_;
304

305
306
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
307
308
309
310
311
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
312
313
};

314
315
316
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
317
318
319
}

}  // namespace LightGBM
320
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_