multi_val_sparse_bin.hpp 11.2 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
8
9
10
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

18
template <typename INDEX_T, typename VAL_T>
19
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
28
29
    INDEX_T estimate_num_data =
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1) *
        static_cast<INDEX_T>(num_data_);
30
    int num_threads = 1;
31
32
33
#pragma omp parallel
#pragma omp master
    { num_threads = omp_get_num_threads(); }
34
35
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
36
37
38
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
39
    }
40
41
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
42
43
  }

44
  ~MultiValSparseBin() {}
45

46
  data_size_t num_data() const override { return num_data_; }
47

48
  int num_bin() const override { return num_bin_; }
49

50
51
52
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
53
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
54
    if (tid == 0) {
55
56
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
57
58
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
59
      for (auto val : values) {
60
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
61
62
      }
    } else {
63
64
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
65
66
67
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
68
      for (auto val : values) {
69
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
70
71
72
73
      }
    }
  }

74
  void MergeData(const INDEX_T* sizes) {
75
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
76
    for (INDEX_T i = 0; i < static_cast<INDEX_T>(num_data_); ++i) {
77
78
79
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
80
      std::vector<INDEX_T> offsets(1 + t_data_.size());
81
      offsets[0] = sizes[0];
82
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
83
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
84
      }
85
      data_.resize(row_ptr_[num_data_]);
86
#pragma omp parallel for schedule(static)
87
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
88
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
89
                    data_.data() + offsets[tid]);
90
      }
91
92
    } else {
      data_.resize(row_ptr_[num_data_]);
93
    }
94
95
96
97
98
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
99
100
101
102
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
103
104
105
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
106
107
  }

108
  bool IsSparse() override { return true; }
109
110
111
112
113
114
115

  void ReSize(data_size_t num_data) override {
    if (num_data_ != num_data) {
      num_data_ = num_data;
    }
  }

116
#define ACC_GH(hist, i, g, h)               \
117
  const auto ti = static_cast<int>(i) << 1; \
118
119
  hist[ti] += g;                            \
  hist[ti + 1] += h;
120

121
122
123
124
125
  template <bool use_indices, bool use_prefetch, bool use_hessians>
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
126
127
128
129
130
131
132
    data_size_t i = start;
    if (use_prefetch) {
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
        const auto idx = use_indices ? data_indices[i] : i;
133
134
        const auto pf_idx =
            use_indices ? data_indices[i + pf_offset] : i + pf_offset;
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
        PREFETCH_T0(gradients + pf_idx);
        if (use_hessians) {
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
        PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
        for (auto j = j_start; j < j_end; ++j) {
          const VAL_T bin = data_[j];
          if (use_hessians) {
            ACC_GH(out, bin, gradients[idx], hessians[idx]);
          } else {
            ACC_GH(out, bin, gradients[idx], 1.0f);
          }
        }
      }
    }
    for (; i < end; ++i) {
      const auto idx = use_indices ? data_indices[i] : i;
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
      for (auto j = j_start; j < j_end; ++j) {
        const VAL_T bin = data_[j];
        if (use_hessians) {
          ACC_GH(out, bin, gradients[idx], hessians[idx]);
        } else {
          ACC_GH(out, bin, gradients[idx], 1.0f);
        }
      }
    }
  }
167
#undef ACC_GH
168

169
170
171
172
173
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
                                              gradients, hessians, out);
174
175
176
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
177
178
179
180
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients,
                                                hessians, out);
181
182
  }

183
184
185
186
187
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
                                               gradients, nullptr, out);
188
189
190
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
191
192
193
194
                          const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients,
                                                 nullptr, out);
195
196
  }

197
198
  void CopySubset(const Bin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
199
    auto other_bin = dynamic_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
200
    row_ptr_.resize(num_data_ + 1, 0);
201
202
203
    INDEX_T estimate_num_data =
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1) *
        static_cast<INDEX_T>(num_data_);
204
    data_.clear();
205
    data_.reserve(estimate_num_data);
206
    for (data_size_t i = 0; i < num_used_indices; ++i) {
207
      for (auto j = other_bin->row_ptr_[used_indices[i]];
208
           j < other_bin->row_ptr_[used_indices[i] + 1]; ++j) {
209
210
        data_.push_back(other_bin->data_[j]);
      }
211
212
213
214
215
216
217
      row_ptr_[i + 1] = row_ptr_[i] + other_bin->row_ptr_[used_indices[i] + 1] -
                        other_bin->row_ptr_[used_indices[i]];
    }
  }

  MultiValBin* CreateLike(int num_bin, int,
                          double estimate_element_per_row) const override {
218
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data_, num_bin,
219
220
221
222
223
224
225
                                        estimate_element_per_row);
  }

  void ReSizeForSubFeature(int num_bin, int,
                           double estimate_element_per_row) override {
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
226
227
228
    INDEX_T estimate_num_data =
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1) *
        static_cast<INDEX_T>(num_data_);
229
    size_t npart = 1 + t_data_.size();
230
231
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
232
233
234
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
235
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
236
237
        t_data_[i].resize(avg_num_data, 0);
      }
238
239
240
    }
  }

241
242
243
244
245
  void CopySubFeature(const MultiValBin* full_bin, const std::vector<int>&,
                      const std::vector<uint32_t>& lower,
                      const std::vector<uint32_t>& upper,
                      const std::vector<uint32_t>& delta) override {
    const auto other =
246
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
Guolin Ke's avatar
Guolin Ke committed
247
248
249
250
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
251
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
252
253
254
255
256
257
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
258
      INDEX_T size = 0;
259
260
261
      for (data_size_t i = start; i < end; ++i) {
        const auto j_start = other->RowPtr(i);
        const auto j_end = other->RowPtr(i + 1);
262
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
263
264
265
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
266
        const auto pre_size = size;
267
268
269
270
271
272
273
274
275
276
277
278
279
280
        for (auto j = j_start; j < j_end; ++j) {
          auto val = other->data_[j];
          while (val >= upper[k]) {
            ++k;
          }
          if (val >= lower[k]) {
            buf[size++] = static_cast<VAL_T>(val - delta[k]);
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
281
282
  }

283
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
284

285
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
286

287
 private:
288
289
  data_size_t num_data_;
  int num_bin_;
290
  double estimate_element_per_row_;
291
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
292
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
293
294
295
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
296
  std::vector<INDEX_T> t_size_;
297

298
299
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
300
301
302
303
304
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
305
306
};

307
308
309
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
310
311
312
}

}  // namespace LightGBM
313
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_