dense_nbits_bin.hpp 12.7 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
8
9
10
#ifndef LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#define LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_

#include <LightGBM/bin.h>

#include <cstdint>
11
12
#include <cstring>
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
13
14
15
16
17

namespace LightGBM {

class Dense4bitsBin;

Guolin Ke's avatar
Guolin Ke committed
18
class Dense4bitsBinIterator : public BinIterator {
Nikita Titov's avatar
Nikita Titov committed
19
 public:
Guolin Ke's avatar
Guolin Ke committed
20
  explicit Dense4bitsBinIterator(const Dense4bitsBin* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
Guolin Ke's avatar
Guolin Ke committed
21
22
    : bin_data_(bin_data), min_bin_(static_cast<uint8_t>(min_bin)),
    max_bin_(static_cast<uint8_t>(max_bin)),
Guolin Ke's avatar
Guolin Ke committed
23
24
    most_freq_bin_(static_cast<uint8_t>(most_freq_bin)) {
    if (most_freq_bin_ == 0) {
25
      offset_ = 1;
Guolin Ke's avatar
Guolin Ke committed
26
    } else {
27
      offset_ = 0;
Guolin Ke's avatar
Guolin Ke committed
28
29
    }
  }
30
  inline uint32_t RawGet(data_size_t idx) override;
Guolin Ke's avatar
Guolin Ke committed
31
  inline uint32_t Get(data_size_t idx) override;
Guolin Ke's avatar
Guolin Ke committed
32
  inline void Reset(data_size_t) override {}
Nikita Titov's avatar
Nikita Titov committed
33
34

 private:
Guolin Ke's avatar
Guolin Ke committed
35
36
37
  const Dense4bitsBin* bin_data_;
  uint8_t min_bin_;
  uint8_t max_bin_;
Guolin Ke's avatar
Guolin Ke committed
38
  uint8_t most_freq_bin_;
39
  uint8_t offset_;
Guolin Ke's avatar
Guolin Ke committed
40
41
};

Guolin Ke's avatar
Guolin Ke committed
42
class Dense4bitsBin : public Bin {
Nikita Titov's avatar
Nikita Titov committed
43
 public:
Guolin Ke's avatar
Guolin Ke committed
44
  friend Dense4bitsBinIterator;
Guolin Ke's avatar
Guolin Ke committed
45
  explicit Dense4bitsBin(data_size_t num_data)
Guolin Ke's avatar
Guolin Ke committed
46
47
    : num_data_(num_data) {
    int len = (num_data_ + 1) / 2;
48
    data_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0));
Guolin Ke's avatar
Guolin Ke committed
49
    buf_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0));
Guolin Ke's avatar
Guolin Ke committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  }

  ~Dense4bitsBin() {
  }

  void Push(int, data_size_t idx, uint32_t value) override {
    const int i1 = idx >> 1;
    const int i2 = (idx & 1) << 2;
    const uint8_t val = static_cast<uint8_t>(value) << i2;
    if (i2 == 0) {
      data_[i1] = val;
    } else {
      buf_[i1] = val;
    }
  }

  void ReSize(data_size_t num_data) override {
    if (num_data_ != num_data) {
      num_data_ = num_data;
Guolin Ke's avatar
Guolin Ke committed
69
      const int len = (num_data_ + 1) / 2;
Guolin Ke's avatar
Guolin Ke committed
70
71
72
73
      data_.resize(len);
    }
  }

Guolin Ke's avatar
Guolin Ke committed
74
  inline BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override;
Guolin Ke's avatar
Guolin Ke committed
75

76
77
78
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
    const score_t* ordered_gradients, const score_t* ordered_hessians,
    HistogramBinEntry* out) const override {
79
80
81
82
83
84
85
86
87
88
89
90
    const data_size_t pf_offset = 64;
    const data_size_t pf_end = end - pf_offset - kCacheLineSize;
    data_size_t i = start;
    for (; i < pf_end; i++) {
      PREFETCH_T0(data_.data() + (data_indices[i + pf_offset] >> 1));
      const data_size_t idx = data_indices[i];
      const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      out[bin].sum_hessians += ordered_hessians[i];
      ++out[bin].cnt;
    }
    for (; i < end; i++) {
91
92
93
94
95
96
97
      const data_size_t idx = data_indices[i];
      const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      out[bin].sum_hessians += ordered_hessians[i];
      ++out[bin].cnt;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
98

99
100
101
  void ConstructHistogram(data_size_t start, data_size_t end,
    const score_t* ordered_gradients, const score_t* ordered_hessians,
    HistogramBinEntry* out) const override {
102
103
104
105
106
107
108
109
110
111
112
    const data_size_t pf_offset = 64;
    const data_size_t pf_end = end - pf_offset - kCacheLineSize;
    data_size_t i = start;
    for (; i < pf_end; i++) {
      PREFETCH_T0(data_.data() + ((i + pf_offset) >> 1));
      const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      out[bin].sum_hessians += ordered_hessians[i];
      ++out[bin].cnt;
    }
    for (; i < end; i++) {
113
114
115
116
      const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      out[bin].sum_hessians += ordered_hessians[i];
      ++out[bin].cnt;
Guolin Ke's avatar
Guolin Ke committed
117
118
119
    }
  }

120
121
122
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
    const score_t* ordered_gradients,
    HistogramBinEntry* out) const override {
123
124
125
126
127
128
129
130
131
132
133
    const data_size_t pf_offset = 64;
    const data_size_t pf_end = end - pf_offset - kCacheLineSize;
    data_size_t i = start;
    for (; i < pf_end; i++) {
      PREFETCH_T0(data_.data() + (data_indices[i + pf_offset] >> 1));
      const data_size_t idx = data_indices[i];
      const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      ++out[bin].cnt;
    }
    for (; i < end; i++) {
134
135
136
137
138
139
      const data_size_t idx = data_indices[i];
      const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      ++out[bin].cnt;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
140

141
142
143
  void ConstructHistogram(data_size_t start, data_size_t end,
    const score_t* ordered_gradients,
    HistogramBinEntry* out) const override {
144
145
146
147
148
149
150
151
152
153
    const data_size_t pf_offset = 64;
    const data_size_t pf_end = end - pf_offset - kCacheLineSize;
    data_size_t i = start;
    for (; i < pf_end; i++) {
      PREFETCH_T0(data_.data() + ((i + pf_offset) >> 1));
      const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      ++out[bin].cnt;
    }
    for (; i < end; i++) {
154
155
156
      const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
      out[bin].sum_gradients += ordered_gradients[i];
      ++out[bin].cnt;
157
158
159
    }
  }

Guolin Ke's avatar
Guolin Ke committed
160
  data_size_t Split(
Guolin Ke's avatar
Guolin Ke committed
161
    uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left,
Guolin Ke's avatar
Guolin Ke committed
162
    uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
163
    data_size_t* lte_indices, data_size_t* gt_indices) const override {
Guolin Ke's avatar
Guolin Ke committed
164
165
    if (num_data <= 0) { return 0; }
    uint8_t th = static_cast<uint8_t>(threshold + min_bin);
166
167
    const uint8_t minb = static_cast<uint8_t>(min_bin);
    const uint8_t maxb = static_cast<uint8_t>(max_bin);
Guolin Ke's avatar
Guolin Ke committed
168
    uint8_t t_default_bin = static_cast<uint8_t>(min_bin + default_bin);
Guolin Ke's avatar
Guolin Ke committed
169
170
    uint8_t t_most_freq_bin = static_cast<uint8_t>(min_bin + most_freq_bin);
    if (most_freq_bin == 0) {
Guolin Ke's avatar
Guolin Ke committed
171
      th -= 1;
Guolin Ke's avatar
Guolin Ke committed
172
      t_default_bin -= 1;
Guolin Ke's avatar
Guolin Ke committed
173
      t_most_freq_bin -= 1;
Guolin Ke's avatar
Guolin Ke committed
174
175
176
177
178
    }
    data_size_t lte_count = 0;
    data_size_t gt_count = 0;
    data_size_t* default_indices = gt_indices;
    data_size_t* default_count = &gt_count;
Guolin Ke's avatar
Guolin Ke committed
179
180
181
182
183
184
    data_size_t* missing_default_indices = gt_indices;
    data_size_t* missing_default_count = &gt_count;
    if (most_freq_bin <= threshold) {
      default_indices = lte_indices;
      default_count = &lte_count;
    }
185
186
187
188
    if (missing_type == MissingType::NaN) {
      if (default_left) {
        missing_default_indices = lte_indices;
        missing_default_count = &lte_count;
Guolin Ke's avatar
Guolin Ke committed
189
      }
190
191
192
      for (data_size_t i = 0; i < num_data; ++i) {
        const data_size_t idx = data_indices[i];
        const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
Guolin Ke's avatar
Guolin Ke committed
193
        if (bin == maxb) {
194
          missing_default_indices[(*missing_default_count)++] = idx;
Guolin Ke's avatar
Guolin Ke committed
195
196
        } else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
          default_indices[(*default_count)++] = idx;
197
198
199
200
        } else if (bin > th) {
          gt_indices[gt_count++] = idx;
        } else {
          lte_indices[lte_count++] = idx;
201
202
203
        }
      }
    } else {
Guolin Ke's avatar
Guolin Ke committed
204
205
206
207
      if ((default_left && missing_type == MissingType::Zero)
          || (default_bin <= threshold && missing_type != MissingType::Zero)) {
        missing_default_indices = lte_indices;
        missing_default_count = &lte_count;
208
      }
Guolin Ke's avatar
Guolin Ke committed
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
      if (default_bin == most_freq_bin) {
        for (data_size_t i = 0; i < num_data; ++i) {
          const data_size_t idx = data_indices[i];
          const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
          if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
            missing_default_indices[(*missing_default_count)++] = idx;
          } else if (bin > th) {
            gt_indices[gt_count++] = idx;
          } else {
            lte_indices[lte_count++] = idx;
          }
        }
      } else {
        for (data_size_t i = 0; i < num_data; ++i) {
          const data_size_t idx = data_indices[i];
          const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
          if (bin == t_default_bin) {
            missing_default_indices[(*missing_default_count)++] = idx;
          } else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
            default_indices[(*default_count)++] = idx;
          } else if (bin > th) {
            gt_indices[gt_count++] = idx;
          } else {
            lte_indices[lte_count++] = idx;
          }
234
        }
Guolin Ke's avatar
Guolin Ke committed
235
236
237
238
      }
    }
    return lte_count;
  }
239

Guolin Ke's avatar
Guolin Ke committed
240
  data_size_t SplitCategorical(
Guolin Ke's avatar
Guolin Ke committed
241
    uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
242
    const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
243
244
245
246
247
248
    data_size_t* lte_indices, data_size_t* gt_indices) const override {
    if (num_data <= 0) { return 0; }
    data_size_t lte_count = 0;
    data_size_t gt_count = 0;
    data_size_t* default_indices = gt_indices;
    data_size_t* default_count = &gt_count;
Guolin Ke's avatar
Guolin Ke committed
249
    if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) {
250
251
252
253
254
255
256
257
      default_indices = lte_indices;
      default_count = &lte_count;
    }
    for (data_size_t i = 0; i < num_data; ++i) {
      const data_size_t idx = data_indices[i];
      const uint32_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
      if (bin < min_bin || bin > max_bin) {
        default_indices[(*default_count)++] = idx;
258
      } else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
259
260
261
262
263
264
265
266
        lte_indices[lte_count++] = idx;
      } else {
        gt_indices[gt_count++] = idx;
      }
    }
    return lte_count;
  }

Guolin Ke's avatar
Guolin Ke committed
267
268
269
270
271
272
  data_size_t num_data() const override { return num_data_; }

  /*! \brief not ordered bin for dense feature */
  OrderedBin* CreateOrderedBin() const override { return nullptr; }

  void FinishLoad() override {
Guolin Ke's avatar
Guolin Ke committed
273
    if (buf_.empty()) { return; }
Guolin Ke's avatar
Guolin Ke committed
274
275
276
277
278
279
280
281
282
283
    int len = (num_data_ + 1) / 2;
    for (int i = 0; i < len; ++i) {
      data_[i] |= buf_[i];
    }
    buf_.clear();
  }

  void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override {
    const uint8_t* mem_data = reinterpret_cast<const uint8_t*>(memory);
    if (!local_used_indices.empty()) {
Guolin Ke's avatar
Guolin Ke committed
284
285
      const data_size_t rest = num_data_ & 1;
      for (int i = 0; i < num_data_ - rest; i += 2) {
Guolin Ke's avatar
Guolin Ke committed
286
        // get old bins
Guolin Ke's avatar
Guolin Ke committed
287
288
289
290
        data_size_t idx = local_used_indices[i];
        const auto bin1 = static_cast<uint8_t>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
        idx = local_used_indices[i + 1];
        const auto bin2 = static_cast<uint8_t>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
Guolin Ke's avatar
Guolin Ke committed
291
        // add
Guolin Ke's avatar
Guolin Ke committed
292
        const int i1 = i >> 1;
Guolin Ke's avatar
Guolin Ke committed
293
294
        data_[i1] = (bin1 | (bin2 << 4));
      }
Guolin Ke's avatar
Guolin Ke committed
295
      if (rest) {
Guolin Ke's avatar
Guolin Ke committed
296
        data_size_t idx = local_used_indices[num_data_ - 1];
Guolin Ke's avatar
Guolin Ke committed
297
        data_[num_data_ >> 1] = (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
Guolin Ke's avatar
Guolin Ke committed
298
299
300
301
302
303
304
305
306
      }
    } else {
      for (size_t i = 0; i < data_.size(); ++i) {
        data_[i] = mem_data[i];
      }
    }
  }

  void CopySubset(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override {
Guolin Ke's avatar
Guolin Ke committed
307
    auto other_bin = dynamic_cast<const Dense4bitsBin*>(full_bin);
Guolin Ke's avatar
Guolin Ke committed
308
309
    const data_size_t rest = num_used_indices & 1;
    for (int i = 0; i < num_used_indices - rest; i += 2) {
Guolin Ke's avatar
Guolin Ke committed
310
311
312
313
      data_size_t idx = used_indices[i];
      const auto bin1 = static_cast<uint8_t>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
      idx = used_indices[i + 1];
      const auto bin2 = static_cast<uint8_t>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
Guolin Ke's avatar
Guolin Ke committed
314
      const int i1 = i >> 1;
Guolin Ke's avatar
Guolin Ke committed
315
316
      data_[i1] = (bin1 | (bin2 << 4));
    }
Guolin Ke's avatar
Guolin Ke committed
317
    if (rest) {
Guolin Ke's avatar
Guolin Ke committed
318
      data_size_t idx = used_indices[num_used_indices - 1];
Guolin Ke's avatar
Guolin Ke committed
319
      data_[num_used_indices >> 1] = (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
Guolin Ke's avatar
Guolin Ke committed
320
321
322
    }
  }

323
324
  void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
    writer->Write(data_.data(), sizeof(uint8_t) * data_.size());
Guolin Ke's avatar
Guolin Ke committed
325
326
327
328
329
330
  }

  size_t SizesInByte() const override {
    return sizeof(uint8_t) * data_.size();
  }

331
332
333
334
  Dense4bitsBin* Clone() override {
    return new Dense4bitsBin(*this);
  }

Nikita Titov's avatar
Nikita Titov committed
335
 protected:
336
  Dense4bitsBin(const Dense4bitsBin& other)
337
338
    : num_data_(other.num_data_), data_(other.data_), buf_(other.buf_) {}

Guolin Ke's avatar
Guolin Ke committed
339
340
341
342
343
344
345
346
  data_size_t num_data_;
  std::vector<uint8_t> data_;
  std::vector<uint8_t> buf_;
};

uint32_t Dense4bitsBinIterator::Get(data_size_t idx) {
  const auto bin = (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
  if (bin >= min_bin_ && bin <= max_bin_) {
347
    return bin - min_bin_ + offset_;
Guolin Ke's avatar
Guolin Ke committed
348
  } else {
Guolin Ke's avatar
Guolin Ke committed
349
    return most_freq_bin_;
Guolin Ke's avatar
Guolin Ke committed
350
351
352
  }
}

353
354
355
356
uint32_t Dense4bitsBinIterator::RawGet(data_size_t idx) {
  return (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}

Guolin Ke's avatar
Guolin Ke committed
357
358
inline BinIterator* Dense4bitsBin::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
  return new Dense4bitsBinIterator(this, min_bin, max_bin, most_freq_bin);
Guolin Ke's avatar
Guolin Ke committed
359
360
361
362
}

}  // namespace LightGBM
#endif   // LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_