bin.cpp 16 KB
Newer Older
1
#include <LightGBM/utils/common.h>
Guolin Ke's avatar
Guolin Ke committed
2
3
4
#include <LightGBM/bin.h>

#include "dense_bin.hpp"
Guolin Ke's avatar
Guolin Ke committed
5
#include "dense_nbits_bin.hpp"
Guolin Ke's avatar
Guolin Ke committed
6
#include "sparse_bin.hpp"
7
#include "ordered_sparse_bin.hpp"
Guolin Ke's avatar
Guolin Ke committed
8
9
10
11
12
13
14
15
16
17
18

#include <cmath>
#include <cstring>
#include <cstdint>

#include <limits>
#include <vector>
#include <algorithm>

namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
19
BinMapper::BinMapper() {
Guolin Ke's avatar
Guolin Ke committed
20
21
22
}

// deep copy function for BinMapper
Guolin Ke's avatar
Guolin Ke committed
23
BinMapper::BinMapper(const BinMapper& other) {
Guolin Ke's avatar
Guolin Ke committed
24
  num_bin_ = other.num_bin_;
Guolin Ke's avatar
Guolin Ke committed
25
  missing_type_ = other.missing_type_;
Guolin Ke's avatar
Guolin Ke committed
26
27
  is_trival_ = other.is_trival_;
  sparse_rate_ = other.sparse_rate_;
28
29
30
31
32
33
34
  bin_type_ = other.bin_type_;
  if (bin_type_ == BinType::NumericalBin) {
    bin_upper_bound_ = other.bin_upper_bound_;
  } else {
    bin_2_categorical_ = other.bin_2_categorical_;
    categorical_2_bin_ = other.categorical_2_bin_;
  }
35
36
  min_val_ = other.min_val_;
  max_val_ = other.max_val_;
Guolin Ke's avatar
Guolin Ke committed
37
  default_bin_ = other.default_bin_;
Guolin Ke's avatar
Guolin Ke committed
38
39
}

Guolin Ke's avatar
Guolin Ke committed
40
BinMapper::BinMapper(const void* memory) {
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
  CopyFrom(reinterpret_cast<const char*>(memory));
}

BinMapper::~BinMapper() {
Guolin Ke's avatar
Guolin Ke committed
45

Guolin Ke's avatar
Guolin Ke committed
46
47
}

48
49
50
51
52
bool NeedFilter(std::vector<int>& cnt_in_bin, int total_cnt, int filter_cnt, BinType bin_type) {
  if (bin_type == BinType::NumericalBin) {
    int sum_left = 0;
    for (size_t i = 0; i < cnt_in_bin.size() - 1; ++i) {
      sum_left += cnt_in_bin[i];
Guolin Ke's avatar
Guolin Ke committed
53
      if (sum_left >= filter_cnt && total_cnt - sum_left >= filter_cnt) {
54
55
56
57
58
59
        return false;
      }
    }
  } else {
    for (size_t i = 0; i < cnt_in_bin.size() - 1; ++i) {
      int sum_left = cnt_in_bin[i];
Guolin Ke's avatar
Guolin Ke committed
60
      if (sum_left >= filter_cnt && total_cnt - sum_left >= filter_cnt) {
61
62
        return false;
      }
Guolin Ke's avatar
Guolin Ke committed
63
64
65
66
    }
  }
  return true;
}
Guolin Ke's avatar
Guolin Ke committed
67

Guolin Ke's avatar
Guolin Ke committed
68
std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts, 
Guolin Ke's avatar
Guolin Ke committed
69
                                  int num_distinct_values, int max_bin, size_t total_cnt, int min_data_in_bin) {
Guolin Ke's avatar
Guolin Ke committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
  std::vector<double> bin_upper_bound;
  if (num_distinct_values <= max_bin) {
    bin_upper_bound.clear();
    int cur_cnt_inbin = 0;
    for (int i = 0; i < num_distinct_values - 1; ++i) {
      cur_cnt_inbin += counts[i];
      if (cur_cnt_inbin >= min_data_in_bin) {
        bin_upper_bound.push_back((distinct_values[i] + distinct_values[i + 1]) / 2);
        cur_cnt_inbin = 0;
      }
    }
    cur_cnt_inbin += counts[num_distinct_values - 1];
    bin_upper_bound.push_back(std::numeric_limits<double>::infinity());
  } else {
    if (min_data_in_bin > 0) {
      max_bin = std::min(max_bin, static_cast<int>(total_cnt / min_data_in_bin));
      max_bin = std::max(max_bin, 1);
    }
    double mean_bin_size = static_cast<double>(total_cnt) / max_bin;

    // mean size for one bin
    int rest_bin_cnt = max_bin;
    int rest_sample_cnt = static_cast<int>(total_cnt);
    std::vector<bool> is_big_count_value(num_distinct_values, false);
    for (int i = 0; i < num_distinct_values; ++i) {
      if (counts[i] >= mean_bin_size) {
        is_big_count_value[i] = true;
        --rest_bin_cnt;
        rest_sample_cnt -= counts[i];
      }
    }
    mean_bin_size = static_cast<double>(rest_sample_cnt) / rest_bin_cnt;
    std::vector<double> upper_bounds(max_bin, std::numeric_limits<double>::infinity());
    std::vector<double> lower_bounds(max_bin, std::numeric_limits<double>::infinity());

    int bin_cnt = 0;
    lower_bounds[bin_cnt] = distinct_values[0];
    int cur_cnt_inbin = 0;
    for (int i = 0; i < num_distinct_values - 1; ++i) {
      if (!is_big_count_value[i]) {
        rest_sample_cnt -= counts[i];
      }
      cur_cnt_inbin += counts[i];
      // need a new bin
      if (is_big_count_value[i] || cur_cnt_inbin >= mean_bin_size ||
        (is_big_count_value[i + 1] && cur_cnt_inbin >= std::max(1.0, mean_bin_size * 0.5f))) {
        upper_bounds[bin_cnt] = distinct_values[i];
        ++bin_cnt;
        lower_bounds[bin_cnt] = distinct_values[i + 1];
        if (bin_cnt >= max_bin - 1) { break; }
        cur_cnt_inbin = 0;
        if (!is_big_count_value[i]) {
          --rest_bin_cnt;
          mean_bin_size = rest_sample_cnt / static_cast<double>(rest_bin_cnt);
        }
      }
    }
    ++bin_cnt;
    // update bin upper bound
    bin_upper_bound.resize(bin_cnt);
    for (int i = 0; i < bin_cnt - 1; ++i) {
      bin_upper_bound[i] = (upper_bounds[i] + lower_bounds[i + 1]) / 2.0f;
    }
    // last bin upper bound
    bin_upper_bound[bin_cnt - 1] = std::numeric_limits<double>::infinity();
  }
  return bin_upper_bound;
}
Guolin Ke's avatar
Guolin Ke committed
138

Guolin Ke's avatar
Guolin Ke committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, const int* counts,
                                             int num_distinct_values, int max_bin, size_t total_sample_cnt, int min_data_in_bin) {
  std::vector<double> bin_upper_bound;
  int left_cnt_data = 0;
  int cnt_missing = 0;
  int right_cnt_data = 0;
  for (int i = 0; i < num_distinct_values; ++i) {
    if (distinct_values[i] <= -kZeroAsMissingValueRange) {
      left_cnt_data += counts[i];
    } else if (distinct_values[i] > kZeroAsMissingValueRange) {
      right_cnt_data += counts[i];
    } else {
      cnt_missing += counts[i];
    }
  }

  int left_cnt = 0;
  for (int i = 0; i < num_distinct_values; ++i) {
    if (distinct_values[i] > -kZeroAsMissingValueRange) {
      left_cnt = i;
      break;
    }
  }

  if (left_cnt > 0) {
    int left_max_bin = static_cast<int>(static_cast<double>(left_cnt_data) / (total_sample_cnt - cnt_missing) * (max_bin - 1));
    bin_upper_bound = GreedyFindBin(distinct_values, counts, left_cnt, left_max_bin, left_cnt_data, min_data_in_bin);
    bin_upper_bound.back() = -kZeroAsMissingValueRange;
  }

  int right_start = -1;
  for (int i = left_cnt; i < num_distinct_values; ++i) {
    if (distinct_values[i] > kZeroAsMissingValueRange) {
      right_start = i;
      break;
    }
  }

  if (right_start >= 0) {
    int right_max_bin = max_bin - 1 - static_cast<int>(bin_upper_bound.size());
    auto right_bounds = GreedyFindBin(distinct_values + right_start, counts + right_start,
                                      num_distinct_values - right_start, right_max_bin, right_cnt_data, min_data_in_bin);
    bin_upper_bound.push_back(kZeroAsMissingValueRange);
    bin_upper_bound.insert(bin_upper_bound.end(), right_bounds.begin(), right_bounds.end());
  } else {
    bin_upper_bound.push_back(std::numeric_limits<double>::infinity());
  }
  return bin_upper_bound;
}

189
void BinMapper::FindBin(double* values, int num_sample_values, size_t total_sample_cnt,
Guolin Ke's avatar
Guolin Ke committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
  int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type, bool use_missing, bool zero_as_missing) {
  int na_cnt = 0;
  int tmp_num_sample_values = 0;
  for (int i = 0; i < num_sample_values; ++i) {
    if (!std::isnan(values[i])) {
      values[tmp_num_sample_values++] = values[i];
    }
  }
  if (!use_missing) {
    missing_type_ = MissingType::None;
  } else if (zero_as_missing) {
    missing_type_ = MissingType::Zero;
  } else {
    if (tmp_num_sample_values == num_sample_values) {
      missing_type_ = MissingType::None;
    } else {
      missing_type_ = MissingType::NaN;
    }
    na_cnt = num_sample_values - tmp_num_sample_values;
  }
  num_sample_values = tmp_num_sample_values;

212
213
  bin_type_ = bin_type;
  default_bin_ = 0;
Guolin Ke's avatar
Guolin Ke committed
214
  int zero_cnt = static_cast<int>(total_sample_cnt - num_sample_values - na_cnt);
Guolin Ke's avatar
Guolin Ke committed
215
  // find distinct_values first
216
  std::vector<double> distinct_values;
217
  std::vector<int> counts;
Guolin Ke's avatar
Guolin Ke committed
218

219
  std::sort(values, values + num_sample_values);
Guolin Ke's avatar
Guolin Ke committed
220
221

  // push zero in the front
Guolin Ke's avatar
Guolin Ke committed
222
  if (num_sample_values == 0 || (values[0] > 0.0f && zero_cnt > 0)) {
Guolin Ke's avatar
Guolin Ke committed
223
    distinct_values.push_back(0.0f);
Guolin Ke's avatar
Guolin Ke committed
224
    counts.push_back(zero_cnt);
Guolin Ke's avatar
Guolin Ke committed
225
  }
Guolin Ke's avatar
Guolin Ke committed
226

227
228
  if (num_sample_values > 0) {
    distinct_values.push_back(values[0]);
Guolin Ke's avatar
Guolin Ke committed
229
230
    counts.push_back(1);
  }
Guolin Ke's avatar
Guolin Ke committed
231

232
233
234
  for (int i = 1; i < num_sample_values; ++i) {
    if (values[i] != values[i - 1]) {
      if (values[i - 1] < 0.0f && values[i] > 0.0f) {
Guolin Ke's avatar
Guolin Ke committed
235
        distinct_values.push_back(0.0f);
Guolin Ke's avatar
Guolin Ke committed
236
237
        counts.push_back(zero_cnt);
      }
238
      distinct_values.push_back(values[i]);
239
      counts.push_back(1);
Guolin Ke's avatar
Guolin Ke committed
240
    } else {
241
      ++counts.back();
Guolin Ke's avatar
Guolin Ke committed
242
243
    }
  }
Guolin Ke's avatar
Guolin Ke committed
244
245

  // push zero in the back
246
  if (num_sample_values > 0 && values[num_sample_values - 1] < 0.0f && zero_cnt > 0) {
Guolin Ke's avatar
Guolin Ke committed
247
    distinct_values.push_back(0.0f);
Guolin Ke's avatar
Guolin Ke committed
248
249
    counts.push_back(zero_cnt);
  }
250
251
  min_val_ = distinct_values.front();
  max_val_ = distinct_values.back();
252
  std::vector<int> cnt_in_bin;
253
  int num_distinct_values = static_cast<int>(distinct_values.size());
254
  if (bin_type_ == BinType::NumericalBin) {
Guolin Ke's avatar
Guolin Ke committed
255
256
257
258
    if (missing_type_ == MissingType::Zero) {
      bin_upper_bound_ = FindBinWithZeroAsMissing(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin);
      if (bin_upper_bound_.size() == 2) {
        missing_type_ = MissingType::None;
Guolin Ke's avatar
Guolin Ke committed
259
      }
Guolin Ke's avatar
Guolin Ke committed
260
261
    } else if (missing_type_ == MissingType::None) {
      bin_upper_bound_ = GreedyFindBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin);
Guolin Ke's avatar
Guolin Ke committed
262
    } else {
Guolin Ke's avatar
Guolin Ke committed
263
264
      bin_upper_bound_ = GreedyFindBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin - 1, total_sample_cnt - na_cnt, min_data_in_bin);
      bin_upper_bound_.push_back(NaN);
Guolin Ke's avatar
Guolin Ke committed
265
266
267
268
269
    }
    num_bin_ = static_cast<int>(bin_upper_bound_.size());
    {
      cnt_in_bin.resize(num_bin_, 0);
      int i_bin = 0;
270
      for (int i = 0; i < num_distinct_values; ++i) {
Guolin Ke's avatar
Guolin Ke committed
271
272
273
274
        if (distinct_values[i] > bin_upper_bound_[i_bin]) {
          ++i_bin;
        } 
        cnt_in_bin[i_bin] += counts[i];
275
      }
Guolin Ke's avatar
Guolin Ke committed
276
277
278
      if (missing_type_ == MissingType::NaN) {
        cnt_in_bin[num_bin_ - 1] = na_cnt;
      }
279
280
281
    }
    CHECK(num_bin_ <= max_bin);
  } else {
Guolin Ke's avatar
Guolin Ke committed
282
283
    // No missing handle for categorical features 
    missing_type_ = MissingType::None;
284
285
286
287
288
289
290
291
292
293
294
295
    // convert to int type first
    std::vector<int> distinct_values_int;
    std::vector<int> counts_int;
    distinct_values_int.push_back(static_cast<int>(distinct_values[0]));
    counts_int.push_back(counts[0]);
    for (size_t i = 1; i < distinct_values.size(); ++i) {
      if (static_cast<int>(distinct_values[i]) != distinct_values_int.back()) {
        distinct_values_int.push_back(static_cast<int>(distinct_values[i]));
        counts_int.push_back(counts[i]);
      } else {
        counts_int.back() += counts[i];
      }
Guolin Ke's avatar
Guolin Ke committed
296
    }
297
298
    // sort by counts
    Common::SortForPair<int, int>(counts_int, distinct_values_int, 0, true);
299
    // will ignore the categorical of small counts
300
301
302
303
304
305
306
307
308
309
310
    const int cut_cnt = static_cast<int>(total_sample_cnt * 0.98f);
    categorical_2_bin_.clear();
    bin_2_categorical_.clear();
    num_bin_ = 0;
    int used_cnt = 0;
    max_bin = std::min(static_cast<int>(distinct_values_int.size()), max_bin);
    while (used_cnt < cut_cnt || num_bin_ < max_bin) {
      bin_2_categorical_.push_back(distinct_values_int[num_bin_]);
      categorical_2_bin_[distinct_values_int[num_bin_]] = static_cast<unsigned int>(num_bin_);
      used_cnt += counts_int[num_bin_];
      ++num_bin_;
Guolin Ke's avatar
Guolin Ke committed
311
    }
312
313
314
    cnt_in_bin = counts_int;
    counts_int.resize(num_bin_);
    counts_int.back() += static_cast<int>(total_sample_cnt - used_cnt);
Guolin Ke's avatar
Guolin Ke committed
315
  }
Guolin Ke's avatar
Guolin Ke committed
316

Guolin Ke's avatar
Guolin Ke committed
317
318
319
320
321
  // check trival(num_bin_ == 1) feature
  if (num_bin_ <= 1) {
    is_trival_ = true;
  } else {
    is_trival_ = false;
Guolin Ke's avatar
Guolin Ke committed
322
  }
323
324
  // check useless bin
  if (!is_trival_ && NeedFilter(cnt_in_bin, static_cast<int>(total_sample_cnt), min_split_data, bin_type_)) {
Guolin Ke's avatar
Guolin Ke committed
325
    is_trival_ = true;
Guolin Ke's avatar
Guolin Ke committed
326
  }
327
328
329
330

  if (!is_trival_) {
    default_bin_ = ValueToBin(0);
  }
Guolin Ke's avatar
Guolin Ke committed
331
  // calculate sparse rate
332
  sparse_rate_ = static_cast<double>(cnt_in_bin[default_bin_]) / static_cast<double>(total_sample_cnt);
Guolin Ke's avatar
Guolin Ke committed
333
334
335
336
337
338
}


int BinMapper::SizeForSpecificBin(int bin) {
  int size = 0;
  size += sizeof(int);
Guolin Ke's avatar
Guolin Ke committed
339
  size += sizeof(MissingType);
Guolin Ke's avatar
Guolin Ke committed
340
  size += sizeof(bool);
341
  size += sizeof(double);
342
  size += sizeof(BinType);
Guolin Ke's avatar
Guolin Ke committed
343
  size += 2 * sizeof(double);
344
  size += bin * sizeof(double);
Guolin Ke's avatar
Guolin Ke committed
345
  size += sizeof(uint32_t);
Guolin Ke's avatar
Guolin Ke committed
346
347
348
  return size;
}

Guolin Ke's avatar
Guolin Ke committed
349
void BinMapper::CopyTo(char * buffer) const {
Guolin Ke's avatar
Guolin Ke committed
350
351
  std::memcpy(buffer, &num_bin_, sizeof(num_bin_));
  buffer += sizeof(num_bin_);
Guolin Ke's avatar
Guolin Ke committed
352
353
  std::memcpy(buffer, &missing_type_, sizeof(missing_type_));
  buffer += sizeof(missing_type_);
Guolin Ke's avatar
Guolin Ke committed
354
355
356
357
  std::memcpy(buffer, &is_trival_, sizeof(is_trival_));
  buffer += sizeof(is_trival_);
  std::memcpy(buffer, &sparse_rate_, sizeof(sparse_rate_));
  buffer += sizeof(sparse_rate_);
358
359
  std::memcpy(buffer, &bin_type_, sizeof(bin_type_));
  buffer += sizeof(bin_type_);
Guolin Ke's avatar
Guolin Ke committed
360
  std::memcpy(buffer, &min_val_, sizeof(min_val_));
361
  buffer += sizeof(min_val_);
Guolin Ke's avatar
Guolin Ke committed
362
  std::memcpy(buffer, &max_val_, sizeof(max_val_));
363
  buffer += sizeof(max_val_);
Guolin Ke's avatar
Guolin Ke committed
364
  std::memcpy(buffer, &default_bin_, sizeof(default_bin_));
Guolin Ke's avatar
Guolin Ke committed
365
  buffer += sizeof(default_bin_);
366
367
368
369
370
  if (bin_type_ == BinType::NumericalBin) {
    std::memcpy(buffer, bin_upper_bound_.data(), num_bin_ * sizeof(double));
  } else {
    std::memcpy(buffer, bin_2_categorical_.data(), num_bin_ * sizeof(int));
  }
Guolin Ke's avatar
Guolin Ke committed
371
372
373
374
375
}

void BinMapper::CopyFrom(const char * buffer) {
  std::memcpy(&num_bin_, buffer, sizeof(num_bin_));
  buffer += sizeof(num_bin_);
Guolin Ke's avatar
Guolin Ke committed
376
377
  std::memcpy(&missing_type_, buffer, sizeof(missing_type_));
  buffer += sizeof(missing_type_);
Guolin Ke's avatar
Guolin Ke committed
378
379
380
381
  std::memcpy(&is_trival_, buffer, sizeof(is_trival_));
  buffer += sizeof(is_trival_);
  std::memcpy(&sparse_rate_, buffer, sizeof(sparse_rate_));
  buffer += sizeof(sparse_rate_);
382
383
  std::memcpy(&bin_type_, buffer, sizeof(bin_type_));
  buffer += sizeof(bin_type_);
384
385
386
387
  std::memcpy(&min_val_, buffer, sizeof(min_val_));
  buffer += sizeof(min_val_);
  std::memcpy(&max_val_, buffer, sizeof(max_val_));
  buffer += sizeof(max_val_);
Guolin Ke's avatar
Guolin Ke committed
388
389
  std::memcpy(&default_bin_, buffer, sizeof(default_bin_));
  buffer += sizeof(default_bin_);
390
391
392
393
394
395
396
397
398
399
400
  if (bin_type_ == BinType::NumericalBin) {
    bin_upper_bound_ = std::vector<double>(num_bin_);
    std::memcpy(bin_upper_bound_.data(), buffer, num_bin_ * sizeof(double));
  } else {
    bin_2_categorical_ = std::vector<int>(num_bin_);
    std::memcpy(bin_2_categorical_.data(), buffer, num_bin_ * sizeof(int));
    categorical_2_bin_.clear();
    for (int i = 0; i < num_bin_; ++i) {
      categorical_2_bin_[bin_2_categorical_[i]] = static_cast<unsigned int>(i);
    }
  }
Guolin Ke's avatar
Guolin Ke committed
401
402
403
404
}

void BinMapper::SaveBinaryToFile(FILE* file) const {
  fwrite(&num_bin_, sizeof(num_bin_), 1, file);
Guolin Ke's avatar
Guolin Ke committed
405
  fwrite(&missing_type_, sizeof(missing_type_), 1, file);
Guolin Ke's avatar
Guolin Ke committed
406
407
  fwrite(&is_trival_, sizeof(is_trival_), 1, file);
  fwrite(&sparse_rate_, sizeof(sparse_rate_), 1, file);
408
  fwrite(&bin_type_, sizeof(bin_type_), 1, file);
409
410
  fwrite(&min_val_, sizeof(min_val_), 1, file);
  fwrite(&max_val_, sizeof(max_val_), 1, file);
Guolin Ke's avatar
Guolin Ke committed
411
  fwrite(&default_bin_, sizeof(default_bin_), 1, file);
412
413
414
415
416
  if (bin_type_ == BinType::NumericalBin) {
    fwrite(bin_upper_bound_.data(), sizeof(double), num_bin_, file);
  } else {
    fwrite(bin_2_categorical_.data(), sizeof(int), num_bin_, file);
  }
Guolin Ke's avatar
Guolin Ke committed
417
418
419
}

size_t BinMapper::SizesInByte() const {
Guolin Ke's avatar
Guolin Ke committed
420
  size_t ret = sizeof(num_bin_) + sizeof(missing_type_) + sizeof(is_trival_) + sizeof(sparse_rate_)
421
422
423
424
425
426
    + sizeof(bin_type_) + sizeof(min_val_) + sizeof(max_val_) + sizeof(default_bin_);
  if (bin_type_ == BinType::NumericalBin) {
    ret += sizeof(double) *  num_bin_;
  } else {
    ret += sizeof(int) * num_bin_;
  }
Guolin Ke's avatar
Guolin Ke committed
427
  return ret;
Guolin Ke's avatar
Guolin Ke committed
428
429
430
431
432
433
434
435
436
437
438
439
440
441
}

template class DenseBin<uint8_t>;
template class DenseBin<uint16_t>;
template class DenseBin<uint32_t>;

template class SparseBin<uint8_t>;
template class SparseBin<uint16_t>;
template class SparseBin<uint32_t>;

template class OrderedSparseBin<uint8_t>;
template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>;

Guolin Ke's avatar
Guolin Ke committed
442
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, 
443
  bool is_enable_sparse, double sparse_threshold, bool* is_sparse) {
Guolin Ke's avatar
Guolin Ke committed
444
  // sparse threshold
445
  if (sparse_rate >= sparse_threshold && is_enable_sparse) {
Guolin Ke's avatar
Guolin Ke committed
446
    *is_sparse = true;
Guolin Ke's avatar
Guolin Ke committed
447
    return CreateSparseBin(num_data, num_bin);
Guolin Ke's avatar
Guolin Ke committed
448
449
  } else {
    *is_sparse = false;
Guolin Ke's avatar
Guolin Ke committed
450
    return CreateDenseBin(num_data, num_bin);
Guolin Ke's avatar
Guolin Ke committed
451
452
453
  }
}

Guolin Ke's avatar
Guolin Ke committed
454
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
Guolin Ke's avatar
Guolin Ke committed
455
456
457
  if (num_bin <= 16) {
    return new Dense4bitsBin(num_data);
  } else if (num_bin <= 256) {
Guolin Ke's avatar
Guolin Ke committed
458
459
460
    return new DenseBin<uint8_t>(num_data);
  } else if (num_bin <= 65536) {
    return new DenseBin<uint16_t>(num_data);
Guolin Ke's avatar
Guolin Ke committed
461
  } else {
Guolin Ke's avatar
Guolin Ke committed
462
    return new DenseBin<uint32_t>(num_data);
Guolin Ke's avatar
Guolin Ke committed
463
464
465
  }
}

Guolin Ke's avatar
Guolin Ke committed
466
467
468
469
470
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) {
  if (num_bin <= 256) {
    return new SparseBin<uint8_t>(num_data);
  } else if (num_bin <= 65536) {
    return new SparseBin<uint16_t>(num_data);
Guolin Ke's avatar
Guolin Ke committed
471
  } else {
Guolin Ke's avatar
Guolin Ke committed
472
    return new SparseBin<uint32_t>(num_data);
Guolin Ke's avatar
Guolin Ke committed
473
474
475
476
  }
}

}  // namespace LightGBM