multi_val_sparse_bin.hpp 11.7 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

8
9
10
#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

18
template <typename INDEX_T, typename VAL_T>
19
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
    INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
28
    int num_threads = OMP_NUM_THREADS();
29
30
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
31
32
33
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
34
    }
35
36
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
37
38
  }

39
  ~MultiValSparseBin() {}
40

41
  data_size_t num_data() const override { return num_data_; }
42

43
  int num_bin() const override { return num_bin_; }
44

45
46
47
48
  double num_element_per_row() const override {
    return estimate_element_per_row_;
  }

49
50
51
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
52
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
53
    if (tid == 0) {
54
55
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
56
57
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
58
      for (auto val : values) {
59
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
60
61
      }
    } else {
62
63
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
64
65
66
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
67
      for (auto val : values) {
68
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
69
70
71
72
      }
    }
  }

73
  void MergeData(const INDEX_T* sizes) {
74
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
75
    for (data_size_t i = 0; i < num_data_; ++i) {
76
77
78
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
79
      std::vector<INDEX_T> offsets(1 + t_data_.size());
80
      offsets[0] = sizes[0];
81
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
82
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
83
      }
84
      data_.resize(row_ptr_[num_data_]);
85
#pragma omp parallel for schedule(static, 1)
86
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
87
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
88
                    data_.data() + offsets[tid]);
89
      }
90
91
    } else {
      data_.resize(row_ptr_[num_data_]);
92
    }
93
94
95
96
97
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
98
99
100
101
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
102
103
104
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
105
106
  }

107
  bool IsSparse() override { return true; }
108

Guolin Ke's avatar
Guolin Ke committed
109
  template <bool USE_INDICES, bool USE_PREFETCH, bool ORDERED>
110
111
112
113
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
114
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
115
116
117
    hist_t* grad = out;
    hist_t* hess = out + 1;
    if (USE_PREFETCH) {
118
119
120
121
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
122
        const auto idx = USE_INDICES ? data_indices[i] : i;
123
        const auto pf_idx =
Guolin Ke's avatar
Guolin Ke committed
124
125
126
            USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
        if (!ORDERED) {
          PREFETCH_T0(gradients + pf_idx);
127
128
129
130
131
132
133
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
        PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
        for (auto j = j_start; j < j_end; ++j) {
Guolin Ke's avatar
Guolin Ke committed
134
135
136
137
          const auto ti = static_cast<uint32_t>(data_[j]) << 1;
          if (ORDERED) {
            grad[ti] += gradients[i];
            hess[ti] += hessians[i];
138
          } else {
Guolin Ke's avatar
Guolin Ke committed
139
140
            grad[ti] += gradients[idx];
            hess[ti] += hessians[idx];
141
142
143
144
145
          }
        }
      }
    }
    for (; i < end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
146
      const auto idx = USE_INDICES ? data_indices[i] : i;
147
148
149
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
      for (auto j = j_start; j < j_end; ++j) {
Guolin Ke's avatar
Guolin Ke committed
150
151
152
153
        const auto ti = static_cast<uint32_t>(data_[j]) << 1;
        if (ORDERED) {
          grad[ti] += gradients[i];
          hess[ti] += hessians[i];
154
        } else {
Guolin Ke's avatar
Guolin Ke committed
155
156
          grad[ti] += gradients[idx];
          hess[ti] += hessians[idx];
157
158
159
160
161
        }
      }
    }
  }

162
163
164
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
Guolin Ke's avatar
Guolin Ke committed
165
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
166
                                               gradients, hessians, out);
167
168
169
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
170
171
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
Guolin Ke's avatar
Guolin Ke committed
172
173
    ConstructHistogramInner<false, false, false>(
        nullptr, start, end, gradients, hessians, out);
174
175
  }

Guolin Ke's avatar
Guolin Ke committed
176
177
178
179
180
181
  void ConstructHistogramOrdered(const data_size_t* data_indices,
                                 data_size_t start, data_size_t end,
                                 const score_t* gradients,
                                 const score_t* hessians,
                                 hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
182
                                              gradients, hessians, out);
183
184
  }

185
  MultiValBin* CreateLike(data_size_t num_data, int num_bin, int,
186
                          double estimate_element_per_row) const override {
187
188
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data, num_bin,
                                                 estimate_element_per_row);
189
190
  }

191
192
193
  void ReSize(data_size_t num_data, int num_bin, int,
              double estimate_element_per_row) override {
    num_data_ = num_data;
194
195
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
196
    INDEX_T estimate_num_data =
197
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
198
    size_t npart = 1 + t_data_.size();
199
200
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
201
202
203
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
204
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
205
206
        t_data_[i].resize(avg_num_data, 0);
      }
207
    }
208
209
210
    if (num_data_ + 1 > static_cast<data_size_t>(row_ptr_.size())) {
      row_ptr_.resize(num_data_ + 1);
    }
211
212
  }

213
214
215
216
217
218
  template <bool SUBROW, bool SUBCOL>
  void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices,
                 data_size_t num_used_indices,
                 const std::vector<uint32_t>& lower,
                 const std::vector<uint32_t>& upper,
                 const std::vector<uint32_t>& delta) {
219
    const auto other =
220
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
221
    if (SUBROW) {
Nikita Titov's avatar
Nikita Titov committed
222
      CHECK_EQ(num_data_, num_used_indices);
223
    }
Guolin Ke's avatar
Guolin Ke committed
224
225
226
227
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
228
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
229
230
231
232
233
234
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
235
      INDEX_T size = 0;
236
      for (data_size_t i = start; i < end; ++i) {
237
238
239
240
        const auto j_start =
            SUBROW ? other->RowPtr(used_indices[i]) : other->RowPtr(i);
        const auto j_end =
            SUBROW ? other->RowPtr(used_indices[i] + 1) : other->RowPtr(i + 1);
241
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
242
243
244
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
245
        const auto pre_size = size;
246
        for (auto j = j_start; j < j_end; ++j) {
247
248
249
250
251
252
253
254
255
256
          const auto val = other->data_[j];
          if (SUBCOL) {
            while (val >= upper[k]) {
              ++k;
            }
            if (val >= lower[k]) {
              buf[size++] = static_cast<VAL_T>(val - delta[k]);
            }
          } else {
            buf[size++] = val;
257
258
259
260
261
262
263
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
264
265
  }

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
  void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
    CopyInner<true, false>(full_bin, used_indices, num_used_indices,
                           std::vector<uint32_t>(), std::vector<uint32_t>(),
                           std::vector<uint32_t>());
  }

  void CopySubcol(const MultiValBin* full_bin, const std::vector<int>&,
                  const std::vector<uint32_t>& lower,
                  const std::vector<uint32_t>& upper,
                  const std::vector<uint32_t>& delta) override {
    CopyInner<false, true>(full_bin, nullptr, num_data_, lower, upper, delta);
  }

  void CopySubrowAndSubcol(const MultiValBin* full_bin,
                           const data_size_t* used_indices,
                           data_size_t num_used_indices,
                           const std::vector<int>&,
                           const std::vector<uint32_t>& lower,
                           const std::vector<uint32_t>& upper,
                           const std::vector<uint32_t>& delta) override {
    CopyInner<true, true>(full_bin, used_indices, num_used_indices, lower,
                          upper, delta);
  }

291
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
292

293
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
294

295
 private:
296
297
  data_size_t num_data_;
  int num_bin_;
298
  double estimate_element_per_row_;
299
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
300
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
301
302
303
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
304
  std::vector<INDEX_T> t_size_;
305

306
307
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
308
309
310
311
312
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
313
314
};

315
316
317
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
318
319
320
}

}  // namespace LightGBM
321
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_