multi_val_sparse_bin.hpp 12.2 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

8
9
#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>
10
#include <LightGBM/utils/threading.h>
11

Nikita Titov's avatar
Nikita Titov committed
12
#include <algorithm>
13
14
15
16
17
18
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

19
template <typename INDEX_T, typename VAL_T>
20
class MultiValSparseBin : public MultiValBin {
21
 public:
22
23
24
25
26
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
27
    row_ptr_.resize(num_data_ + 1, 0);
28
    INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
29
    int num_threads = OMP_NUM_THREADS();
30
31
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
32
33
34
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
35
    }
36
37
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
38
39
  }

40
  ~MultiValSparseBin() {}
41

42
  data_size_t num_data() const override { return num_data_; }
43

44
  int num_bin() const override { return num_bin_; }
45

46
47
48
49
  double num_element_per_row() const override {
    return estimate_element_per_row_;
  }

50
51
  const std::vector<uint32_t>& offsets() const override { return offsets_; }

52
53
54
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
55
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
56
    if (tid == 0) {
57
58
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
59
60
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
61
      for (auto val : values) {
62
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
63
64
      }
    } else {
65
66
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
67
68
69
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
70
      for (auto val : values) {
71
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
72
73
74
75
      }
    }
  }

76
  void MergeData(const INDEX_T* sizes) {
77
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
78
    for (data_size_t i = 0; i < num_data_; ++i) {
79
80
81
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
82
      std::vector<INDEX_T> offsets(1 + t_data_.size());
83
      offsets[0] = sizes[0];
84
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
85
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
86
      }
87
      data_.resize(row_ptr_[num_data_]);
88
#pragma omp parallel for schedule(static, 1)
89
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
90
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
91
                    data_.data() + offsets[tid]);
92
      }
93
94
    } else {
      data_.resize(row_ptr_[num_data_]);
95
    }
96
97
98
99
100
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
101
102
103
104
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
105
106
107
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
108
109
  }

110
  bool IsSparse() override { return true; }
111

Guolin Ke's avatar
Guolin Ke committed
112
  template <bool USE_INDICES, bool USE_PREFETCH, bool ORDERED>
113
114
115
116
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
117
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
118
119
    hist_t* grad = out;
    hist_t* hess = out + 1;
120
    const VAL_T* data_ptr = data_.data();
Guolin Ke's avatar
Guolin Ke committed
121
    if (USE_PREFETCH) {
122
123
124
125
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
126
        const auto idx = USE_INDICES ? data_indices[i] : i;
127
        const auto pf_idx =
Guolin Ke's avatar
Guolin Ke committed
128
129
130
            USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
        if (!ORDERED) {
          PREFETCH_T0(gradients + pf_idx);
131
132
133
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
134
        PREFETCH_T0(data_ptr + row_ptr_[pf_idx]);
135
136
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
137
138
        const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
        const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
139
        for (auto j = j_start; j < j_end; ++j) {
140
141
142
          const auto ti = static_cast<uint32_t>(data_ptr[j]) << 1;
          grad[ti] += gradient;
          hess[ti] += hessian;
143
144
145
146
        }
      }
    }
    for (; i < end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
147
      const auto idx = USE_INDICES ? data_indices[i] : i;
148
149
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
150
151
      const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
      const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
152
      for (auto j = j_start; j < j_end; ++j) {
153
154
155
        const auto ti = static_cast<uint32_t>(data_ptr[j]) << 1;
        grad[ti] += gradient;
        hess[ti] += hessian;
156
157
158
159
      }
    }
  }

160
161
162
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
Guolin Ke's avatar
Guolin Ke committed
163
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
164
                                               gradients, hessians, out);
165
166
167
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
168
169
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
Guolin Ke's avatar
Guolin Ke committed
170
171
    ConstructHistogramInner<false, false, false>(
        nullptr, start, end, gradients, hessians, out);
172
173
  }

Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
179
  void ConstructHistogramOrdered(const data_size_t* data_indices,
                                 data_size_t start, data_size_t end,
                                 const score_t* gradients,
                                 const score_t* hessians,
                                 hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
180
                                              gradients, hessians, out);
181
182
  }

183
  MultiValBin* CreateLike(data_size_t num_data, int num_bin, int,
184
185
                          double estimate_element_per_row,
                          const std::vector<uint32_t>& /*offsets*/) const override {
186
187
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data, num_bin,
                                                 estimate_element_per_row);
188
189
  }

190
  void ReSize(data_size_t num_data, int num_bin, int,
191
              double estimate_element_per_row, const std::vector<uint32_t>& /*offsets*/) override {
192
    num_data_ = num_data;
193
194
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
195
    INDEX_T estimate_num_data =
196
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
197
    size_t npart = 1 + t_data_.size();
198
199
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
200
201
202
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
203
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
204
205
        t_data_[i].resize(avg_num_data, 0);
      }
206
    }
207
208
209
    if (num_data_ + 1 > static_cast<data_size_t>(row_ptr_.size())) {
      row_ptr_.resize(num_data_ + 1);
    }
210
211
  }

212
213
214
215
216
217
  template <bool SUBROW, bool SUBCOL>
  void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices,
                 data_size_t num_used_indices,
                 const std::vector<uint32_t>& lower,
                 const std::vector<uint32_t>& upper,
                 const std::vector<uint32_t>& delta) {
218
    const auto other =
219
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
220
    if (SUBROW) {
Nikita Titov's avatar
Nikita Titov committed
221
      CHECK_EQ(num_data_, num_used_indices);
222
    }
Guolin Ke's avatar
Guolin Ke committed
223
224
225
226
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
227
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
228
229
230
231
232
233
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
234
      INDEX_T size = 0;
235
      for (data_size_t i = start; i < end; ++i) {
236
237
238
239
        const auto j_start =
            SUBROW ? other->RowPtr(used_indices[i]) : other->RowPtr(i);
        const auto j_end =
            SUBROW ? other->RowPtr(used_indices[i] + 1) : other->RowPtr(i + 1);
240
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
241
242
243
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
244
        const auto pre_size = size;
245
        for (auto j = j_start; j < j_end; ++j) {
246
247
248
249
250
251
252
253
254
255
          const auto val = other->data_[j];
          if (SUBCOL) {
            while (val >= upper[k]) {
              ++k;
            }
            if (val >= lower[k]) {
              buf[size++] = static_cast<VAL_T>(val - delta[k]);
            }
          } else {
            buf[size++] = val;
256
257
258
259
260
261
262
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
263
264
  }

265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
  void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
    CopyInner<true, false>(full_bin, used_indices, num_used_indices,
                           std::vector<uint32_t>(), std::vector<uint32_t>(),
                           std::vector<uint32_t>());
  }

  void CopySubcol(const MultiValBin* full_bin, const std::vector<int>&,
                  const std::vector<uint32_t>& lower,
                  const std::vector<uint32_t>& upper,
                  const std::vector<uint32_t>& delta) override {
    CopyInner<false, true>(full_bin, nullptr, num_data_, lower, upper, delta);
  }

  void CopySubrowAndSubcol(const MultiValBin* full_bin,
                           const data_size_t* used_indices,
                           data_size_t num_used_indices,
                           const std::vector<int>&,
                           const std::vector<uint32_t>& lower,
                           const std::vector<uint32_t>& upper,
                           const std::vector<uint32_t>& delta) override {
    CopyInner<true, true>(full_bin, used_indices, num_used_indices, lower,
                          upper, delta);
  }

290
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
291

292
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
293

294
295
296
297
298
299
300
301
302

  #ifdef USE_CUDA_EXP
  const void* GetRowWiseData(uint8_t* bit_type,
    size_t* total_size,
    bool* is_sparse,
    const void** out_data_ptr,
    uint8_t* data_ptr_bit_type) const override;
  #endif  // USE_CUDA_EXP

303
 private:
304
305
  data_size_t num_data_;
  int num_bin_;
306
  double estimate_element_per_row_;
307
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
308
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
309
310
311
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
312
  std::vector<INDEX_T> t_size_;
313
  std::vector<uint32_t> offsets_;
314

315
316
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
317
318
319
320
321
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
322
323
};

324
325
326
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
327
328
329
}

}  // namespace LightGBM
330

331
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_