multi_val_sparse_bin.hpp 11.1 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
8
9
10
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
18
19
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

template <typename VAL_T>
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
28
    data_size_t estimate_num_data =
        static_cast<data_size_t>(num_data_ * estimate_element_per_row_ * 1.1);
29
    int num_threads = 1;
30
31
32
#pragma omp parallel
#pragma omp master
    { num_threads = omp_get_num_threads(); }
33
34
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
35
36
37
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
38
    }
39
40
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
41
42
  }

43
  ~MultiValSparseBin() {}
44

45
  data_size_t num_data() const override { return num_data_; }
46

47
  int num_bin() const override { return num_bin_; }
48

49
50
51
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
52
53
    row_ptr_[idx + 1] = static_cast<data_size_t>(values.size());
    if (tid == 0) {
54
55
56
      if (t_size_[tid] + row_ptr_[idx + 1] > static_cast<data_size_t>(data_.size())) {
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
57
      for (auto val : values) {
58
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
59
60
      }
    } else {
61
62
63
64
      if (t_size_[tid] + row_ptr_[idx + 1] > static_cast<data_size_t>(t_data_[tid - 1].size())) {
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
65
      for (auto val : values) {
66
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
67
68
69
70
      }
    }
  }

71
72
  void MergeData(const data_size_t* sizes) {
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
73
74
75
76
    for (data_size_t i = 0; i < num_data_; ++i) {
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
77
78
      std::vector<data_size_t> offsets(1 + t_data_.size());
      offsets[0] = sizes[0];
79
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
80
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
81
      }
82
      data_.resize(row_ptr_[num_data_]);
83
#pragma omp parallel for schedule(static)
84
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
85
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
86
                    data_.data() + offsets[tid]);
87
      }
88
89
    } else {
      data_.resize(row_ptr_[num_data_]);
90
    }
91
92
93
94
95
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
96
97
98
99
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
100
101
102
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
103
104
  }

105
  bool IsSparse() override { return true; }
106
107
108
109
110
111
112

  void ReSize(data_size_t num_data) override {
    if (num_data_ != num_data) {
      num_data_ = num_data;
    }
  }

113
#define ACC_GH(hist, i, g, h)               \
114
  const auto ti = static_cast<int>(i) << 1; \
115
116
  hist[ti] += g;                            \
  hist[ti + 1] += h;
117

118
119
120
121
122
  template <bool use_indices, bool use_prefetch, bool use_hessians>
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
123
124
125
126
127
128
129
    data_size_t i = start;
    if (use_prefetch) {
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
        const auto idx = use_indices ? data_indices[i] : i;
130
131
        const auto pf_idx =
            use_indices ? data_indices[i + pf_offset] : i + pf_offset;
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
        PREFETCH_T0(gradients + pf_idx);
        if (use_hessians) {
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
        PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
        for (auto j = j_start; j < j_end; ++j) {
          const VAL_T bin = data_[j];
          if (use_hessians) {
            ACC_GH(out, bin, gradients[idx], hessians[idx]);
          } else {
            ACC_GH(out, bin, gradients[idx], 1.0f);
          }
        }
      }
    }
    for (; i < end; ++i) {
      const auto idx = use_indices ? data_indices[i] : i;
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
      for (auto j = j_start; j < j_end; ++j) {
        const VAL_T bin = data_[j];
        if (use_hessians) {
          ACC_GH(out, bin, gradients[idx], hessians[idx]);
        } else {
          ACC_GH(out, bin, gradients[idx], 1.0f);
        }
      }
    }
  }
164
#undef ACC_GH
165

166
167
168
169
170
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
                                              gradients, hessians, out);
171
172
173
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
174
175
176
177
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients,
                                                hessians, out);
178
179
  }

180
181
182
183
184
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
                                               gradients, nullptr, out);
185
186
187
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
188
189
190
191
                          const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients,
                                                 nullptr, out);
192
193
  }

194
195
  void CopySubset(const Bin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
196
197
    auto other_bin = dynamic_cast<const MultiValSparseBin<VAL_T>*>(full_bin);
    row_ptr_.resize(num_data_ + 1, 0);
198
199
    data_size_t estimate_num_data =
        static_cast<data_size_t>(num_data_ * estimate_element_per_row_ * 1.5);
200
    data_.clear();
201
    data_.reserve(estimate_num_data);
202
    for (data_size_t i = 0; i < num_used_indices; ++i) {
203
204
      for (data_size_t j = other_bin->row_ptr_[used_indices[i]];
           j < other_bin->row_ptr_[used_indices[i] + 1]; ++j) {
205
206
        data_.push_back(other_bin->data_[j]);
      }
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
      row_ptr_[i + 1] = row_ptr_[i] + other_bin->row_ptr_[used_indices[i] + 1] -
                        other_bin->row_ptr_[used_indices[i]];
    }
  }

  MultiValBin* CreateLike(int num_bin, int,
                          double estimate_element_per_row) const override {
    return new MultiValSparseBin<VAL_T>(num_data_, num_bin,
                                        estimate_element_per_row);
  }

  void ReSizeForSubFeature(int num_bin, int,
                           double estimate_element_per_row) override {
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
    data_size_t estimate_num_data =
        static_cast<data_size_t>(num_data_ * estimate_element_per_row_ * 1.1);
    size_t npart = 1 + t_data_.size();
    data_size_t avg_num_data =
        static_cast<data_size_t>(estimate_num_data / npart);
    if (static_cast<data_size_t>(data_.size()) < avg_num_data) {
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
      if (static_cast<data_size_t>(t_data_[i].size()) < avg_num_data) {
        t_data_[i].resize(avg_num_data, 0);
      }
234
235
236
    }
  }

237
238
239
240
241
242
  void CopySubFeature(const MultiValBin* full_bin, const std::vector<int>&,
                      const std::vector<uint32_t>& lower,
                      const std::vector<uint32_t>& upper,
                      const std::vector<uint32_t>& delta) override {
    const auto other =
        reinterpret_cast<const MultiValSparseBin<VAL_T>*>(full_bin);
Guolin Ke's avatar
Guolin Ke committed
243
244
245
246
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
    std::vector<data_size_t> sizes(t_data_.size() + 1, 0);
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
      data_size_t size = 0;
      for (data_size_t i = start; i < end; ++i) {
        const auto j_start = other->RowPtr(i);
        const auto j_end = other->RowPtr(i + 1);
        if (size + (j_end - j_start) > static_cast<data_size_t>(buf.size())) {
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
        const data_size_t pre_size = size;
        for (auto j = j_start; j < j_end; ++j) {
          auto val = other->data_[j];
          while (val >= upper[k]) {
            ++k;
          }
          if (val >= lower[k]) {
            buf[size++] = static_cast<VAL_T>(val - delta[k]);
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
277
278
  }

279
280
  inline data_size_t RowPtr(data_size_t idx) const { return row_ptr_[idx]; }

281
282
  MultiValSparseBin<VAL_T>* Clone() override;

283
 private:
284
285
  data_size_t num_data_;
  int num_bin_;
286
  double estimate_element_per_row_;
287
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
288
289
290
291
292
  std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, 32>>
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
  std::vector<data_size_t> t_size_;
293

294
295
296
297
298
299
  MultiValSparseBin<VAL_T>(const MultiValSparseBin<VAL_T>& other)
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
300
301
};

302
template <typename VAL_T>
303
304
305
306
307
MultiValSparseBin<VAL_T>* MultiValSparseBin<VAL_T>::Clone() {
  return new MultiValSparseBin<VAL_T>(*this);
}

}  // namespace LightGBM
308
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_