dense_bin.hpp 17.7 KB
Newer Older
1
2
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
3
4
 * Licensed under the MIT License. See LICENSE file in the project root for
 * license information.
5
 */
Guolin Ke's avatar
Guolin Ke committed
6
7
8
#ifndef LIGHTGBM_IO_DENSE_BIN_HPP_
#define LIGHTGBM_IO_DENSE_BIN_HPP_

9
10
#include <LightGBM/bin.h>

Guolin Ke's avatar
Guolin Ke committed
11
#include <cstdint>
12
13
#include <cstring>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
14
15
16

namespace LightGBM {

17
template <typename VAL_T, bool IS_4BIT>
Guolin Ke's avatar
Guolin Ke committed
18
19
class DenseBin;

20
21
template <typename VAL_T, bool IS_4BIT>
class DenseBinIterator : public BinIterator {
Nikita Titov's avatar
Nikita Titov committed
22
 public:
23
24
25
26
27
28
29
  explicit DenseBinIterator(const DenseBin<VAL_T, IS_4BIT>* bin_data,
                            uint32_t min_bin, uint32_t max_bin,
                            uint32_t most_freq_bin)
      : bin_data_(bin_data),
        min_bin_(static_cast<VAL_T>(min_bin)),
        max_bin_(static_cast<VAL_T>(max_bin)),
        most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
Guolin Ke's avatar
Guolin Ke committed
30
    if (most_freq_bin_ == 0) {
31
      offset_ = 1;
Guolin Ke's avatar
Guolin Ke committed
32
    } else {
33
      offset_ = 0;
Guolin Ke's avatar
Guolin Ke committed
34
35
    }
  }
36
  inline uint32_t RawGet(data_size_t idx) override;
Guolin Ke's avatar
Guolin Ke committed
37
  inline uint32_t Get(data_size_t idx) override;
38
  inline void Reset(data_size_t) override {}
Nikita Titov's avatar
Nikita Titov committed
39

40
 private:
41
  const DenseBin<VAL_T, IS_4BIT>* bin_data_;
Guolin Ke's avatar
Guolin Ke committed
42
43
  VAL_T min_bin_;
  VAL_T max_bin_;
Guolin Ke's avatar
Guolin Ke committed
44
  VAL_T most_freq_bin_;
45
  uint8_t offset_;
Guolin Ke's avatar
Guolin Ke committed
46
};
Guolin Ke's avatar
Guolin Ke committed
47
/*!
48
49
50
51
52
 * \brief Used to store bins for dense feature
 * Use template to reduce memory cost
 */
template <typename VAL_T, bool IS_4BIT>
class DenseBin : public Bin {
53
 public:
54
  friend DenseBinIterator<VAL_T, IS_4BIT>;
Guolin Ke's avatar
Guolin Ke committed
55
  explicit DenseBin(data_size_t num_data)
56
57
58
59
60
61
62
63
      : num_data_(num_data) {
    if (IS_4BIT) {
      CHECK_EQ(sizeof(VAL_T), 1);
      data_.resize((num_data_ + 1) / 2, static_cast<uint8_t>(0));
      buf_.resize((num_data_ + 1) / 2, static_cast<uint8_t>(0));
    } else {
      data_.resize(num_data_, static_cast<VAL_T>(0));
    }
Guolin Ke's avatar
Guolin Ke committed
64
65
  }

66
  ~DenseBin() {}
Guolin Ke's avatar
Guolin Ke committed
67
68

  void Push(int, data_size_t idx, uint32_t value) override {
69
70
71
72
73
74
75
76
77
78
79
80
    if (IS_4BIT) {
      const int i1 = idx >> 1;
      const int i2 = (idx & 1) << 2;
      const uint8_t val = static_cast<uint8_t>(value) << i2;
      if (i2 == 0) {
        data_[i1] = val;
      } else {
        buf_[i1] = val;
      }
    } else {
      data_[idx] = static_cast<VAL_T>(value);
    }
Guolin Ke's avatar
Guolin Ke committed
81
82
  }

Guolin Ke's avatar
Guolin Ke committed
83
84
85
  void ReSize(data_size_t num_data) override {
    if (num_data_ != num_data) {
      num_data_ = num_data;
86
87
88
89
90
      if (IS_4BIT) {
        data_.resize((num_data_ + 1) / 2, static_cast<VAL_T>(0));
      } else {
        data_.resize(num_data_);
      }
Guolin Ke's avatar
Guolin Ke committed
91
92
93
    }
  }

94
95
  BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin,
                           uint32_t most_freq_bin) const override;
Guolin Ke's avatar
Guolin Ke committed
96

97
98
99
100
101
102
  template <bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN>
  void ConstructHistogramInner(const data_size_t* data_indices,
                               data_size_t start, data_size_t end,
                               const score_t* ordered_gradients,
                               const score_t* ordered_hessians,
                               hist_t* out) const {
103
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
104
105
106
107
    hist_t* grad = out;
    hist_t* hess = out + 1;
    hist_cnt_t* cnt = reinterpret_cast<hist_cnt_t*>(hess);
    if (USE_PREFETCH) {
108
109
110
      const data_size_t pf_offset = 64 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;
      for (; i < pf_end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
111
        const auto idx = USE_INDICES ? data_indices[i] : i;
112
113
114
115
116
117
118
119
        const auto pf_idx =
            USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
        if (IS_4BIT) {
          PREFETCH_T0(data_.data() + (pf_idx >> 1));
        } else {
          PREFETCH_T0(data_.data() + pf_idx);
        }
        const auto ti = static_cast<uint32_t>(data(idx)) << 1;
Guolin Ke's avatar
Guolin Ke committed
120
121
122
        if (USE_HESSIAN) {
          grad[ti] += ordered_gradients[i];
          hess[ti] += ordered_hessians[i];
123
        } else {
Guolin Ke's avatar
Guolin Ke committed
124
125
          grad[ti] += ordered_gradients[i];
          ++cnt[ti];
126
127
        }
      }
128
    }
129
    for (; i < end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
130
      const auto idx = USE_INDICES ? data_indices[i] : i;
131
      const auto ti = static_cast<uint32_t>(data(idx)) << 1;
Guolin Ke's avatar
Guolin Ke committed
132
133
134
      if (USE_HESSIAN) {
        grad[ti] += ordered_gradients[i];
        hess[ti] += ordered_hessians[i];
135
      } else {
Guolin Ke's avatar
Guolin Ke committed
136
137
        grad[ti] += ordered_gradients[i];
        ++cnt[ti];
138
      }
139
140
    }
  }
141

142
143
144
145
146
147
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* ordered_gradients,
                          const score_t* ordered_hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(
        data_indices, start, end, ordered_gradients, ordered_hessians, out);
148
  }
149

150
  void ConstructHistogram(data_size_t start, data_size_t end,
151
152
153
154
155
                          const score_t* ordered_gradients,
                          const score_t* ordered_hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, true>(
        nullptr, start, end, ordered_gradients, ordered_hessians, out);
Guolin Ke's avatar
Guolin Ke committed
156
157
  }

158
159
160
161
162
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* ordered_gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
                                               ordered_gradients, nullptr, out);
163
164
  }

165
  void ConstructHistogram(data_size_t start, data_size_t end,
166
167
168
169
                          const score_t* ordered_gradients,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(
        nullptr, start, end, ordered_gradients, nullptr, out);
170
171
  }

172
173
174
175
176
177
178
179
180
181
182

  template <bool MISS_IS_ZERO, bool MISS_IS_NA, bool MFB_IS_ZERO,
            bool MFB_IS_NA, bool USE_MIN_BIN>
  data_size_t SplitInner(uint32_t min_bin, uint32_t max_bin,
                         uint32_t default_bin, uint32_t most_freq_bin,
                         bool default_left, uint32_t threshold,
                         const data_size_t* data_indices, data_size_t cnt,
                         data_size_t* lte_indices,
                         data_size_t* gt_indices) const {
    auto th = static_cast<VAL_T>(threshold + min_bin);
    auto t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
Guolin Ke's avatar
Guolin Ke committed
183
    if (most_freq_bin == 0) {
184
185
      --th;
      --t_zero_bin;
Guolin Ke's avatar
Guolin Ke committed
186
    }
187
188
    const auto minb = static_cast<VAL_T>(min_bin);
    const auto maxb = static_cast<VAL_T>(max_bin);
Guolin Ke's avatar
Guolin Ke committed
189
190
    data_size_t lte_count = 0;
    data_size_t gt_count = 0;
Guolin Ke's avatar
Guolin Ke committed
191
192
    data_size_t* default_indices = gt_indices;
    data_size_t* default_count = &gt_count;
Guolin Ke's avatar
Guolin Ke committed
193
194
195
196
197
198
    data_size_t* missing_default_indices = gt_indices;
    data_size_t* missing_default_count = &gt_count;
    if (most_freq_bin <= threshold) {
      default_indices = lte_indices;
      default_count = &lte_count;
    }
199
    if (MISS_IS_ZERO || MISS_IS_NA) {
200
201
202
203
      if (default_left) {
        missing_default_indices = lte_indices;
        missing_default_count = &lte_count;
      }
204
205
206
207
208
209
210
211
212
213
214
    }
    if (min_bin < max_bin) {
      for (data_size_t i = 0; i < cnt; ++i) {
        const data_size_t idx = data_indices[i];
        const auto bin = data(idx);
        if ((MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) ||
            (MISS_IS_NA && !MFB_IS_NA && bin == maxb)) {
          missing_default_indices[(*missing_default_count)++] = idx;
        } else if ((USE_MIN_BIN && (bin < minb || bin > maxb)) ||
                   (!USE_MIN_BIN && bin == 0)) {
          if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
215
216
217
218
            missing_default_indices[(*missing_default_count)++] = idx;
          } else {
            default_indices[(*default_count)++] = idx;
          }
219
220
221
222
        } else if (bin > th) {
          gt_indices[gt_count++] = idx;
        } else {
          lte_indices[lte_count++] = idx;
223
224
225
        }
      }
    } else {
226
227
228
229
230
      data_size_t* max_bin_indices = gt_indices;
      data_size_t* max_bin_count = &gt_count;
      if (maxb <= th) {
        max_bin_indices = lte_indices;
        max_bin_count = &lte_count;
231
      }
232
233
234
235
236
237
238
      for (data_size_t i = 0; i < cnt; ++i) {
        const data_size_t idx = data_indices[i];
        const auto bin = data(idx);
        if (MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) {
          missing_default_indices[(*missing_default_count)++] = idx;
        } else if (bin != maxb) {
          if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
Guolin Ke's avatar
Guolin Ke committed
239
240
            missing_default_indices[(*missing_default_count)++] = idx;
          } else {
241
            default_indices[(*default_count)++] = idx;
Guolin Ke's avatar
Guolin Ke committed
242
          }
243
244
        } else {
          if (MISS_IS_NA && !MFB_IS_NA) {
Guolin Ke's avatar
Guolin Ke committed
245
246
            missing_default_indices[(*missing_default_count)++] = idx;
          } else {
247
            max_bin_indices[(*max_bin_count)++] = idx;
Guolin Ke's avatar
Guolin Ke committed
248
          }
249
        }
Guolin Ke's avatar
Guolin Ke committed
250
251
252
253
      }
    }
    return lte_count;
  }
Guolin Ke's avatar
Guolin Ke committed
254

255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
  data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
                    uint32_t most_freq_bin, MissingType missing_type,
                    bool default_left, uint32_t threshold,
                    const data_size_t* data_indices, data_size_t cnt,
                    data_size_t* lte_indices,
                    data_size_t* gt_indices) const override {
#define ARGUMENTS                                                        \
  min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
      data_indices, cnt, lte_indices, gt_indices
    if (missing_type == MissingType::None) {
      return SplitInner<false, false, false, false, true>(ARGUMENTS);
    } else if (missing_type == MissingType::Zero) {
      if (default_bin == most_freq_bin) {
        return SplitInner<true, false, true, false, true>(ARGUMENTS);
      } else {
        return SplitInner<true, false, false, false, true>(ARGUMENTS);
      }
    } else {
      if (max_bin == most_freq_bin + min_bin && most_freq_bin > 0) {
        return SplitInner<false, true, false, true, true>(ARGUMENTS);
      } else {
        return SplitInner<false, true, false, false, true>(ARGUMENTS);
      }
    }
#undef ARGUMENTS
  }

  data_size_t Split(uint32_t max_bin, uint32_t default_bin,
                    uint32_t most_freq_bin, MissingType missing_type,
                    bool default_left, uint32_t threshold,
                    const data_size_t* data_indices, data_size_t cnt,
                    data_size_t* lte_indices,
                    data_size_t* gt_indices) const override {
#define ARGUMENTS                                                  \
  1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
      data_indices, cnt, lte_indices, gt_indices
    if (missing_type == MissingType::None) {
      return SplitInner<false, false, false, false, false>(ARGUMENTS);
    } else if (missing_type == MissingType::Zero) {
      if (default_bin == most_freq_bin) {
        return SplitInner<true, false, true, false, false>(ARGUMENTS);
      } else {
        return SplitInner<true, false, false, false, false>(ARGUMENTS);
      }
    } else {
      if (max_bin == most_freq_bin + 1 && most_freq_bin > 0) {
        return SplitInner<false, true, false, true, false>(ARGUMENTS);
      } else {
        return SplitInner<false, true, false, false, false>(ARGUMENTS);
      }
    }
#undef ARGUMENTS
  }

  template <bool USE_MIN_BIN>
  data_size_t SplitCategoricalInner(uint32_t min_bin, uint32_t max_bin,
                                    uint32_t most_freq_bin,
                                    const uint32_t* threshold,
                                    int num_threahold,
                                    const data_size_t* data_indices,
                                    data_size_t cnt, data_size_t* lte_indices,
                                    data_size_t* gt_indices) const {
317
318
319
320
    data_size_t lte_count = 0;
    data_size_t gt_count = 0;
    data_size_t* default_indices = gt_indices;
    data_size_t* default_count = &gt_count;
Guolin Ke's avatar
Guolin Ke committed
321
    if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) {
322
323
324
      default_indices = lte_indices;
      default_count = &lte_count;
    }
325
    for (data_size_t i = 0; i < cnt; ++i) {
326
      const data_size_t idx = data_indices[i];
327
328
      const uint32_t bin = data(idx);
      if (USE_MIN_BIN && (bin < min_bin || bin > max_bin)) {
329
        default_indices[(*default_count)++] = idx;
330
331
332
333
      } else if (!USE_MIN_BIN && bin == 0) {
        default_indices[(*default_count)++] = idx;
      } else if (Common::FindInBitset(threshold, num_threahold,
                                      bin - min_bin)) {
334
335
336
337
338
339
340
341
        lte_indices[lte_count++] = idx;
      } else {
        gt_indices[gt_count++] = idx;
      }
    }
    return lte_count;
  }

342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
                               uint32_t most_freq_bin,
                               const uint32_t* threshold, int num_threahold,
                               const data_size_t* data_indices, data_size_t cnt,
                               data_size_t* lte_indices,
                               data_size_t* gt_indices) const override {
    return SplitCategoricalInner<true>(min_bin, max_bin, most_freq_bin,
                                       threshold, num_threahold, data_indices,
                                       cnt, lte_indices, gt_indices);
  }

  data_size_t SplitCategorical(uint32_t max_bin, uint32_t most_freq_bin,
                               const uint32_t* threshold, int num_threahold,
                               const data_size_t* data_indices, data_size_t cnt,
                               data_size_t* lte_indices,
                               data_size_t* gt_indices) const override {
    return SplitCategoricalInner<false>(1, max_bin, most_freq_bin, threshold,
                                        num_threahold, data_indices, cnt,
                                        lte_indices, gt_indices);
  }

Guolin Ke's avatar
Guolin Ke committed
363
364
  data_size_t num_data() const override { return num_data_; }

365
366
367
368
369
370
371
372
373
374
375
376
  void FinishLoad() override {
    if (IS_4BIT) {
      if (buf_.empty()) {
        return;
      }
      int len = (num_data_ + 1) / 2;
      for (int i = 0; i < len; ++i) {
        data_[i] |= buf_[i];
      }
      buf_.clear();
    }
  }
Guolin Ke's avatar
Guolin Ke committed
377

378
379
380
  void LoadFromMemory(
      const void* memory,
      const std::vector<data_size_t>& local_used_indices) override {
Guolin Ke's avatar
Guolin Ke committed
381
    const VAL_T* mem_data = reinterpret_cast<const VAL_T*>(memory);
Guolin Ke's avatar
Guolin Ke committed
382
    if (!local_used_indices.empty()) {
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
      if (IS_4BIT) {
        const data_size_t rest = num_data_ & 1;
        for (int i = 0; i < num_data_ - rest; i += 2) {
          // get old bins
          data_size_t idx = local_used_indices[i];
          const auto bin1 = static_cast<uint8_t>(
              (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
          idx = local_used_indices[i + 1];
          const auto bin2 = static_cast<uint8_t>(
              (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
          // add
          const int i1 = i >> 1;
          data_[i1] = (bin1 | (bin2 << 4));
        }
        if (rest) {
          data_size_t idx = local_used_indices[num_data_ - 1];
          data_[num_data_ >> 1] =
              (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
        }
      } else {
        for (int i = 0; i < num_data_; ++i) {
          data_[i] = mem_data[local_used_indices[i]];
        }
Guolin Ke's avatar
Guolin Ke committed
406
407
      }
    } else {
408
      for (size_t i = 0; i < data_.size(); ++i) {
Guolin Ke's avatar
Guolin Ke committed
409
410
411
412
413
        data_[i] = mem_data[i];
      }
    }
  }

414
415
416
417
418
  inline VAL_T data(data_size_t idx) const {
    if (IS_4BIT) {
      return (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
    } else {
      return data_[idx];
419
420
421
    }
  }

422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  void CopySubrow(const Bin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
    auto other_bin = dynamic_cast<const DenseBin<VAL_T, IS_4BIT>*>(full_bin);
    if (IS_4BIT) {
      const data_size_t rest = num_used_indices & 1;
      for (int i = 0; i < num_used_indices - rest; i += 2) {
        data_size_t idx = used_indices[i];
        const auto bin1 = static_cast<uint8_t>(
            (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
        idx = used_indices[i + 1];
        const auto bin2 = static_cast<uint8_t>(
            (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
        const int i1 = i >> 1;
        data_[i1] = (bin1 | (bin2 << 4));
      }
      if (rest) {
        data_size_t idx = used_indices[num_used_indices - 1];
        data_[num_used_indices >> 1] =
            (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
      }
    } else {
      for (int i = 0; i < num_used_indices; ++i) {
        data_[i] = other_bin->data_[used_indices[i]];
      }
    }
Guolin Ke's avatar
Guolin Ke committed
447
448
  }

449
450
  void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
    writer->Write(data_.data(), sizeof(VAL_T) * data_.size());
Guolin Ke's avatar
Guolin Ke committed
451
452
  }

453
454
455
  size_t SizesInByte() const override { return sizeof(VAL_T) * data_.size(); }

  DenseBin<VAL_T, IS_4BIT>* Clone() override;
456

457
 private:
Guolin Ke's avatar
Guolin Ke committed
458
  data_size_t num_data_;
459
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
460
  std::vector<uint8_t> buf_;
461

462
463
  DenseBin<VAL_T, IS_4BIT>(const DenseBin<VAL_T, IS_4BIT>& other)
      : num_data_(other.num_data_), data_(other.data_) {}
Guolin Ke's avatar
Guolin Ke committed
464
465
};

466
467
468
template <typename VAL_T, bool IS_4BIT>
DenseBin<VAL_T, IS_4BIT>* DenseBin<VAL_T, IS_4BIT>::Clone() {
  return new DenseBin<VAL_T, IS_4BIT>(*this);
469
470
}

471
472
473
template <typename VAL_T, bool IS_4BIT>
uint32_t DenseBinIterator<VAL_T, IS_4BIT>::Get(data_size_t idx) {
  auto ret = bin_data_->data(idx);
Guolin Ke's avatar
Guolin Ke committed
474
  if (ret >= min_bin_ && ret <= max_bin_) {
475
    return ret - min_bin_ + offset_;
Guolin Ke's avatar
Guolin Ke committed
476
  } else {
Guolin Ke's avatar
Guolin Ke committed
477
    return most_freq_bin_;
Guolin Ke's avatar
Guolin Ke committed
478
479
  }
}
480

481
482
483
template <typename VAL_T, bool IS_4BIT>
inline uint32_t DenseBinIterator<VAL_T, IS_4BIT>::RawGet(data_size_t idx) {
  return bin_data_->data(idx);
484
485
}

486
487
488
489
490
template <typename VAL_T, bool IS_4BIT>
BinIterator* DenseBin<VAL_T, IS_4BIT>::GetIterator(
    uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
  return new DenseBinIterator<VAL_T, IS_4BIT>(this, min_bin, max_bin,
                                              most_freq_bin);
Guolin Ke's avatar
Guolin Ke committed
491
}
Guolin Ke's avatar
Guolin Ke committed
492

Guolin Ke's avatar
Guolin Ke committed
493
}  // namespace LightGBM
494
#endif  // LightGBM_IO_DENSE_BIN_HPP_