multi_val_sparse_bin.hpp 11.1 KB
Newer Older
1
2
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
Nikita Titov's avatar
Nikita Titov committed
3
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
4
5
6
7
8
9
10
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_

#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

18
template <typename INDEX_T, typename VAL_T>
19
class MultiValSparseBin : public MultiValBin {
20
 public:
21
22
23
24
25
  explicit MultiValSparseBin(data_size_t num_data, int num_bin,
                             double estimate_element_per_row)
      : num_data_(num_data),
        num_bin_(num_bin),
        estimate_element_per_row_(estimate_element_per_row) {
26
    row_ptr_.resize(num_data_ + 1, 0);
27
    INDEX_T estimate_num_data = static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
28
    int num_threads = 1;
29
30
31
#pragma omp parallel
#pragma omp master
    { num_threads = omp_get_num_threads(); }
32
33
    if (num_threads > 1) {
      t_data_.resize(num_threads - 1);
34
35
36
      for (size_t i = 0; i < t_data_.size(); ++i) {
        t_data_[i].resize(estimate_num_data / num_threads);
      }
37
    }
38
39
    t_size_.resize(num_threads, 0);
    data_.resize(estimate_num_data / num_threads);
40
41
  }

42
  ~MultiValSparseBin() {}
43

44
  data_size_t num_data() const override { return num_data_; }
45

46
  int num_bin() const override { return num_bin_; }
47

48
49
50
  void PushOneRow(int tid, data_size_t idx,
                  const std::vector<uint32_t>& values) override {
    const int pre_alloc_size = 50;
51
    row_ptr_[idx + 1] = static_cast<INDEX_T>(values.size());
52
    if (tid == 0) {
53
54
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(data_.size())) {
55
56
        data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
      }
57
      for (auto val : values) {
58
        data_[t_size_[tid]++] = static_cast<VAL_T>(val);
59
60
      }
    } else {
61
62
      if (t_size_[tid] + row_ptr_[idx + 1] >
          static_cast<INDEX_T>(t_data_[tid - 1].size())) {
63
64
65
        t_data_[tid - 1].resize(t_size_[tid] +
                                row_ptr_[idx + 1] * pre_alloc_size);
      }
66
      for (auto val : values) {
67
        t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
68
69
70
71
      }
    }
  }

72
  void MergeData(const INDEX_T* sizes) {
73
    Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
74
    for (data_size_t i = 0; i < num_data_; ++i) {
75
76
77
      row_ptr_[i + 1] += row_ptr_[i];
    }
    if (t_data_.size() > 0) {
78
      std::vector<INDEX_T> offsets(1 + t_data_.size());
79
      offsets[0] = sizes[0];
80
      for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
81
        offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
82
      }
83
      data_.resize(row_ptr_[num_data_]);
84
#pragma omp parallel for schedule(static, 1)
85
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
86
        std::copy_n(t_data_[tid].data(), sizes[tid + 1],
87
                    data_.data() + offsets[tid]);
88
      }
89
90
    } else {
      data_.resize(row_ptr_[num_data_]);
91
    }
92
93
94
95
96
  }

  void FinishLoad() override {
    MergeData(t_size_.data());
    t_size_.clear();
97
98
99
100
    row_ptr_.shrink_to_fit();
    data_.shrink_to_fit();
    t_data_.clear();
    t_data_.shrink_to_fit();
101
102
103
    // update estimate_element_per_row_ by all data
    estimate_element_per_row_ =
        static_cast<double>(row_ptr_[num_data_]) / num_data_;
104
105
  }

106
  bool IsSparse() override { return true; }
107
108
109
110
111
112
113

  void ReSize(data_size_t num_data) override {
    if (num_data_ != num_data) {
      num_data_ = num_data;
    }
  }

114
#define ACC_GH(hist, i, g, h)               \
115
  const auto ti = static_cast<int>(i) << 1; \
116
117
  hist[ti] += g;                            \
  hist[ti + 1] += h;
118

119
120
121
122
123
  template <bool use_indices, bool use_prefetch, bool use_hessians>
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* gradients,
                               const score_t* hessians, hist_t* out) const {
124
125
126
127
128
129
130
    data_size_t i = start;
    if (use_prefetch) {
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
        const auto idx = use_indices ? data_indices[i] : i;
131
132
        const auto pf_idx =
            use_indices ? data_indices[i + pf_offset] : i + pf_offset;
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        PREFETCH_T0(gradients + pf_idx);
        if (use_hessians) {
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(row_ptr_.data() + pf_idx);
        PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
        const auto j_start = RowPtr(idx);
        const auto j_end = RowPtr(idx + 1);
        for (auto j = j_start; j < j_end; ++j) {
          const VAL_T bin = data_[j];
          if (use_hessians) {
            ACC_GH(out, bin, gradients[idx], hessians[idx]);
          } else {
            ACC_GH(out, bin, gradients[idx], 1.0f);
          }
        }
      }
    }
    for (; i < end; ++i) {
      const auto idx = use_indices ? data_indices[i] : i;
      const auto j_start = RowPtr(idx);
      const auto j_end = RowPtr(idx + 1);
      for (auto j = j_start; j < j_end; ++j) {
        const VAL_T bin = data_[j];
        if (use_hessians) {
          ACC_GH(out, bin, gradients[idx], hessians[idx]);
        } else {
          ACC_GH(out, bin, gradients[idx], 1.0f);
        }
      }
    }
  }
165
#undef ACC_GH
166

167
168
169
170
171
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
                                              gradients, hessians, out);
172
173
174
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
175
176
177
178
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients,
                                                hessians, out);
179
180
  }

181
182
183
184
185
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
                                               gradients, nullptr, out);
186
187
188
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
189
190
191
192
                          const score_t* gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients,
                                                 nullptr, out);
193
194
  }

195
196
  void CopySubset(const Bin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
197
    auto other_bin = dynamic_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
198
    row_ptr_.resize(num_data_ + 1, 0);
199
    INDEX_T estimate_num_data =
200
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
201
    data_.clear();
202
    data_.reserve(estimate_num_data);
203
    for (data_size_t i = 0; i < num_used_indices; ++i) {
204
      for (auto j = other_bin->row_ptr_[used_indices[i]];
205
           j < other_bin->row_ptr_[used_indices[i] + 1]; ++j) {
206
207
        data_.push_back(other_bin->data_[j]);
      }
208
209
210
211
212
213
214
      row_ptr_[i + 1] = row_ptr_[i] + other_bin->row_ptr_[used_indices[i] + 1] -
                        other_bin->row_ptr_[used_indices[i]];
    }
  }

  MultiValBin* CreateLike(int num_bin, int,
                          double estimate_element_per_row) const override {
215
    return new MultiValSparseBin<INDEX_T, VAL_T>(num_data_, num_bin,
216
217
218
219
220
221
222
                                        estimate_element_per_row);
  }

  void ReSizeForSubFeature(int num_bin, int,
                           double estimate_element_per_row) override {
    num_bin_ = num_bin;
    estimate_element_per_row_ = estimate_element_per_row;
223
    INDEX_T estimate_num_data =
224
        static_cast<INDEX_T>(estimate_element_per_row_ * 1.1 * num_data_);
225
    size_t npart = 1 + t_data_.size();
226
227
    INDEX_T avg_num_data = static_cast<INDEX_T>(estimate_num_data / npart);
    if (static_cast<INDEX_T>(data_.size()) < avg_num_data) {
228
229
230
      data_.resize(avg_num_data, 0);
    }
    for (size_t i = 0; i < t_data_.size(); ++i) {
231
      if (static_cast<INDEX_T>(t_data_[i].size()) < avg_num_data) {
232
233
        t_data_[i].resize(avg_num_data, 0);
      }
234
235
236
    }
  }

237
238
239
240
241
  void CopySubFeature(const MultiValBin* full_bin, const std::vector<int>&,
                      const std::vector<uint32_t>& lower,
                      const std::vector<uint32_t>& upper,
                      const std::vector<uint32_t>& delta) override {
    const auto other =
242
        reinterpret_cast<const MultiValSparseBin<INDEX_T, VAL_T>*>(full_bin);
Guolin Ke's avatar
Guolin Ke committed
243
244
245
246
    int n_block = 1;
    data_size_t block_size = num_data_;
    Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
                                      num_data_, 1024, &n_block, &block_size);
247
    std::vector<INDEX_T> sizes(t_data_.size() + 1, 0);
248
249
250
251
252
253
    const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
254
      INDEX_T size = 0;
255
256
257
      for (data_size_t i = start; i < end; ++i) {
        const auto j_start = other->RowPtr(i);
        const auto j_end = other->RowPtr(i + 1);
258
        if (size + (j_end - j_start) > static_cast<INDEX_T>(buf.size())) {
259
260
261
          buf.resize(size + (j_end - j_start) * pre_alloc_size);
        }
        int k = 0;
262
        const auto pre_size = size;
263
264
265
266
267
268
269
270
271
272
273
274
275
276
        for (auto j = j_start; j < j_end; ++j) {
          auto val = other->data_[j];
          while (val >= upper[k]) {
            ++k;
          }
          if (val >= lower[k]) {
            buf[size++] = static_cast<VAL_T>(val - delta[k]);
          }
        }
        row_ptr_[i + 1] = size - pre_size;
      }
      sizes[tid] = size;
    }
    MergeData(sizes.data());
277
278
  }

279
  inline INDEX_T RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
280

281
  MultiValSparseBin<INDEX_T, VAL_T>* Clone() override;
282

283
 private:
284
285
  data_size_t num_data_;
  int num_bin_;
286
  double estimate_element_per_row_;
287
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
288
  std::vector<INDEX_T, Common::AlignmentAllocator<INDEX_T, 32>>
289
290
291
      row_ptr_;
  std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
      t_data_;
292
  std::vector<INDEX_T> t_size_;
293

294
295
  MultiValSparseBin<INDEX_T, VAL_T>(
      const MultiValSparseBin<INDEX_T, VAL_T>& other)
296
297
298
299
300
      : num_data_(other.num_data_),
        num_bin_(other.num_bin_),
        estimate_element_per_row_(other.estimate_element_per_row_),
        data_(other.data_),
        row_ptr_(other.row_ptr_) {}
301
302
};

303
304
305
template <typename INDEX_T, typename VAL_T>
MultiValSparseBin<INDEX_T, VAL_T>* MultiValSparseBin<INDEX_T, VAL_T>::Clone() {
  return new MultiValSparseBin<INDEX_T, VAL_T>(*this);
306
307
308
}

}  // namespace LightGBM
309
#endif  // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_