multi_val_sparse_bin.hpp 11.1 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
8
9
10
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

18
template <typename INDEX_T, typename VAL_T>
19
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
    INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
28
    int num_threads = OMP_NUM_THREADS();
29
30
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
31
32
33
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
34
    }
35
36
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
37
38
  }

39
  ~MultiValSparseBin() {}
40

41
  data_size_t num_data() const override { return num_data_; }
42

43
  int num_bin() const override { return num_bin_; }
44

45
46
47
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
48
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
49
    if (tid == 0) {
50
51
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
52
53
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
54
      for (auto val : values) {
55
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
56
57
      }
    } else {
58
59
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
60
61
62
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
63
      for (auto val : values) {
64
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
65
66
67
68
      }
    }
  }

69
  void MergeData(const INDEX_T* sizes) {
70
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
71
    for (data_size_t i = 0; i < num_data_; ++i) {
72
73
74
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
75
      std::vector<INDEX_T> offsets(1 + t_data_.size());
76
      offsets[0] = sizes[0];
77
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
78
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
79
      }
80
      data_.resize(row_ptr_[num_data_]);
81
#pragma omp parallel for schedule(static, 1)
82
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
83
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
84
                    data_.data() + offsets[tid]);
85
      }
86
87
    } else {
      data_.resize(row_ptr_[num_data_]);
88
    }
89
90
91
92
93
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
94
95
96
97
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
98
99
100
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
101
102
  }

103
  bool IsSparse() override { return true; }
104
105
106
107
108
109
110

  void ReSize(data_size_t num_data) override {
    if (num_data_ != num_data) {
      num_data_ = num_data;
    }
  }

111
#define ACC_GH(hist, i, g, h)               \
112
  const auto ti = static_cast<int>(i) << 1; \
113
114
  hist[ti] += g;                            \
  hist[ti + 1] += h;
115

116
117
118
119
120
  template <bool use_indices, bool use_prefetch, bool use_hessians>
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
121
122
123
124
125
126
127
    data_size_t i = start;
    if (use_prefetch) {
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
        const auto idx = use_indices ? data_indices[i] : i;
128
129
        const auto pf_idx =
            use_indices ? data_indices[i + pf_offset] : i + pf_offset;
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
        PREFETCH_T0(gradients + pf_idx);
        if (use_hessians) {
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
        PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
        for (auto j = j_start; j < j_end; ++j) {
          const VAL_T bin = data_[j];
          if (use_hessians) {
            ACC_GH(out, bin, gradients[idx], hessians[idx]);
          } else {
            ACC_GH(out, bin, gradients[idx], 1.0f);
          }
        }
      }
    }
    for (; i < end; ++i) {
      const auto idx = use_indices ? data_indices[i] : i;
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
      for (auto j = j_start; j < j_end; ++j) {
        const VAL_T bin = data_[j];
        if (use_hessians) {
          ACC_GH(out, bin, gradients[idx], hessians[idx]);
        } else {
          ACC_GH(out, bin, gradients[idx], 1.0f);
        }
      }
    }
  }
162
#undef ACC_GH
163

164
165
166
167
168
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
                                              gradients, hessians, out);
169
170
171
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
172
173
174
175
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients,
                                                hessians, out);
176
177
  }

178
179
180
181
182
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
                                               gradients, nullptr, out);
183
184
185
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
186
187
188
189
                          const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients,
                                                 nullptr, out);
190
191
  }

192
193
  void CopySubset(const Bin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
194
    auto other_bin = dynamic_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
195
    row_ptr_.resize(num_data_ + 1, 0);
196
    INDEX_T estimate_num_data =
197
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
198
    data_.clear();
199
    data_.reserve(estimate_num_data);
200
    for (data_size_t i = 0; i < num_used_indices; ++i) {
201
      for (auto j = other_bin->row_ptr_[used_indices[i]];
202
           j < other_bin->row_ptr_[used_indices[i] + 1]; ++j) {
203
204
        data_.push_back(other_bin->data_[j]);
      }
205
206
207
208
209
210
211
      row_ptr_[i + 1] = row_ptr_[i] + other_bin->row_ptr_[used_indices[i] + 1] -
                        other_bin->row_ptr_[used_indices[i]];
    }
  }

  MultiValBin* CreateLike(int num_bin, int,
                          double estimate_element_per_row) const override {
212
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data_, num_bin,
213
214
215
216
217
218
219
                                        estimate_element_per_row);
  }

  void ReSizeForSubFeature(int num_bin, int,
                           double estimate_element_per_row) override {
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
220
    INDEX_T estimate_num_data =
221
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
222
    size_t npart = 1 + t_data_.size();
223
224
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
225
226
227
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
228
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
229
230
        t_data_[i].resize(avg_num_data, 0);
      }
231
232
233
    }
  }

234
235
236
237
238
  void CopySubFeature(const MultiValBin* full_bin, const std::vector<int>&,
                      const std::vector<uint32_t>& lower,
                      const std::vector<uint32_t>& upper,
                      const std::vector<uint32_t>& delta) override {
    const auto other =
239
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
Guolin Ke's avatar
Guolin Ke committed
240
241
242
243
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
244
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
245
246
247
248
249
250
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
251
      INDEX_T size = 0;
252
253
254
      for (data_size_t i = start; i < end; ++i) {
        const auto j_start = other->RowPtr(i);
        const auto j_end = other->RowPtr(i + 1);
255
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
256
257
258
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
259
        const auto pre_size = size;
260
261
262
263
264
265
266
267
268
269
270
271
272
273
        for (auto j = j_start; j < j_end; ++j) {
          auto val = other->data_[j];
          while (val >= upper[k]) {
            ++k;
          }
          if (val >= lower[k]) {
            buf[size++] = static_cast<VAL_T>(val - delta[k]);
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
274
275
  }

276
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
277

278
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
279

280
 private:
281
282
  data_size_t num_data_;
  int num_bin_;
283
  double estimate_element_per_row_;
284
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
285
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
286
287
288
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
289
  std::vector<INDEX_T> t_size_;
290

291
292
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
293
294
295
296
297
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
298
299
};

300
301
302
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
303
304
305
}

}  // namespace LightGBM
306
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_