Unverified Commit bcad692e authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Speed-up "Split" and some code refactorings (#2883)

* commit

* fix msvc

* fix format
parent 1a48fd26
......@@ -218,61 +218,6 @@ class BinMapper {
uint32_t most_freq_bin_;
};
/*!
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages by using ordered bin.
* 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram construction for sparse features.
* However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only using ordered bin for sparse situations.
*/
class OrderedBin {
public:
/*! \brief virtual destructor */
virtual ~OrderedBin() {}
/*!
* \brief Initialization logic.
* \param used_indices If used_indices.size() == 0 means using all data, otherwise, used_indices[i] == true means i-th data is used
(this logic was build for bagging logic)
* \param num_leaves Number of leaves on this iteration
*/
virtual void Init(const char* used_indices, data_size_t num_leaves) = 0;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param hessians Hessians, Note:non-ordered by leaf
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients,
const score_t* hessians, hist_t* out) const = 0;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients, hist_t* out) const = 0;
/*!
* \brief Split current bin, and perform re-order by leaf
* \param leaf Using which leaf's to split
* \param right_leaf The new leaf index after perform this split
* \param is_in_leaf is_in_leaf[i] == mark means the i-th data will be on left leaf after split
* \param mark is_in_leaf[i] == mark means the i-th data will be on left leaf after split
*/
virtual void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) = 0;
virtual data_size_t NonZeroCount(int leaf) const = 0;
};
/*! \brief Iterator for one bin column */
class BinIterator {
public:
......@@ -382,43 +327,33 @@ class Bin {
virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param default_bin default bin for feature value 0
* \param most_freq_bin
* \param missing_type missing type
* \param default_left missing bin will go to left child
* \param threshold The split threshold.
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual data_size_t Split(uint32_t min_bin, uint32_t max_bin,
uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left, uint32_t threshold,
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
uint32_t default_bin, uint32_t most_freq_bin,
MissingType missing_type, bool default_left,
uint32_t threshold, const data_size_t* data_indices,
data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const = 0;
virtual data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
virtual data_size_t Split(uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const = 0;
virtual data_size_t SplitCategorical(
uint32_t max_bin, uint32_t most_freq_bin, const uint32_t* threshold,
int num_threshold, const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param most_freq_bin
* \param threshold The split threshold.
* \param num_threshold Number of threshold
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin, const uint32_t* threshold, int num_threshold,
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
/*!
* \brief After pushed all feature data, call this could have better refactor for bin data
......
......@@ -535,13 +535,16 @@ class Dataset {
void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const;
inline data_size_t Split(int feature,
const uint32_t* threshold, int num_threshold, bool default_left,
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const {
inline data_size_t Split(int feature, const uint32_t* threshold,
int num_threshold, bool default_left,
const data_size_t* data_indices,
data_size_t cnt, data_size_t* lte_indices,
data_size_t* gt_indices) const {
const int group = feature2group_[feature];
const int sub_feature = feature2subfeature_[feature];
return feature_groups_[group]->Split(sub_feature, threshold, num_threshold, default_left, data_indices, num_data, lte_indices, gt_indices);
return feature_groups_[group]->Split(
sub_feature, threshold, num_threshold, default_left, data_indices,
cnt, lte_indices, gt_indices);
}
inline int SubFeatureBinOffset(int i) const {
......
......@@ -228,13 +228,11 @@ class FeatureGroup {
return bin_data_->GetIterator(min_bin, max_bin, most_freq_bin);
}
inline data_size_t Split(
int sub_feature,
const uint32_t* threshold,
int num_threshold,
bool default_left,
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const {
inline data_size_t Split(int sub_feature, const uint32_t* threshold,
int num_threshold, bool default_left,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const {
uint32_t default_bin = bin_mappers_[sub_feature]->GetDefaultBin();
uint32_t most_freq_bin = bin_mappers_[sub_feature]->GetMostFreqBin();
if (!is_multi_val_) {
......@@ -242,21 +240,38 @@ class FeatureGroup {
uint32_t max_bin = bin_offsets_[sub_feature + 1] - 1;
if (bin_mappers_[sub_feature]->bin_type() == BinType::NumericalBin) {
auto missing_type = bin_mappers_[sub_feature]->missing_type();
return bin_data_->Split(min_bin, max_bin, default_bin, most_freq_bin, missing_type, default_left,
*threshold, data_indices, num_data, lte_indices, gt_indices);
if (num_feature_ == 1) {
return bin_data_->Split(max_bin, default_bin, most_freq_bin,
missing_type, default_left, *threshold,
data_indices, cnt, lte_indices, gt_indices);
} else {
return bin_data_->Split(min_bin, max_bin, default_bin, most_freq_bin,
missing_type, default_left, *threshold,
data_indices, cnt, lte_indices, gt_indices);
}
} else {
return bin_data_->SplitCategorical(min_bin, max_bin, most_freq_bin, threshold, num_threshold, data_indices, num_data, lte_indices, gt_indices);
if (num_feature_ == 1) {
return bin_data_->SplitCategorical(max_bin, most_freq_bin, threshold,
num_threshold, data_indices, cnt,
lte_indices, gt_indices);
} else {
return bin_data_->SplitCategorical(
min_bin, max_bin, most_freq_bin, threshold, num_threshold,
data_indices, cnt, lte_indices, gt_indices);
}
}
} else {
int addi = bin_mappers_[sub_feature]->GetMostFreqBin() == 0 ? 0 : 1;
uint32_t min_bin = 1;
uint32_t max_bin = bin_mappers_[sub_feature]->num_bin() - 1 + addi;
if (bin_mappers_[sub_feature]->bin_type() == BinType::NumericalBin) {
auto missing_type = bin_mappers_[sub_feature]->missing_type();
return multi_bin_data_[sub_feature]->Split(min_bin, max_bin, default_bin, most_freq_bin, missing_type, default_left,
*threshold, data_indices, num_data, lte_indices, gt_indices);
return multi_bin_data_[sub_feature]->Split(
max_bin, default_bin, most_freq_bin, missing_type, default_left,
*threshold, data_indices, cnt, lte_indices, gt_indices);
} else {
return multi_bin_data_[sub_feature]->SplitCategorical(min_bin, max_bin, most_freq_bin, threshold, num_threshold, data_indices, num_data, lte_indices, gt_indices);
return multi_bin_data_[sub_feature]->SplitCategorical(
max_bin, most_freq_bin, threshold, num_threshold, data_indices, cnt,
lte_indices, gt_indices);
}
}
}
......
......@@ -75,6 +75,9 @@ const int kAlignedSize = 32;
#define SIZE_ALIGNED(t) ((t) + kAlignedSize - 1) / kAlignedSize * kAlignedSize
// Refer to https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4127?view=vs-2019
#pragma warning(disable : 4127)
} // namespace LightGBM
#endif // LightGBM_META_H_
......@@ -14,7 +14,6 @@
#include <cstring>
#include "dense_bin.hpp"
#include "dense_nbits_bin.hpp"
#include "multi_val_dense_bin.hpp"
#include "multi_val_sparse_bin.hpp"
#include "sparse_bin.hpp"
......@@ -633,9 +632,10 @@ namespace LightGBM {
return ret;
}
template class DenseBin<uint8_t>;
template class DenseBin<uint16_t>;
template class DenseBin<uint32_t>;
template class DenseBin<uint8_t, true>;
template class DenseBin<uint8_t, false>;
template class DenseBin<uint16_t, false>;
template class DenseBin<uint32_t, false>;
template class SparseBin<uint8_t>;
template class SparseBin<uint16_t>;
......@@ -647,13 +647,13 @@ namespace LightGBM {
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
if (num_bin <= 16) {
return new Dense4bitsBin(num_data);
return new DenseBin<uint8_t, true>(num_data);
} else if (num_bin <= 256) {
return new DenseBin<uint8_t>(num_data);
return new DenseBin<uint8_t, false>(num_data);
} else if (num_bin <= 65536) {
return new DenseBin<uint16_t>(num_data);
return new DenseBin<uint16_t, false>(num_data);
} else {
return new DenseBin<uint32_t>(num_data);
return new DenseBin<uint32_t, false>(num_data);
}
}
......
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#ifndef LIGHTGBM_IO_DENSE_BIN_HPP_
#define LIGHTGBM_IO_DENSE_BIN_HPP_
......@@ -13,16 +14,19 @@
namespace LightGBM {
template <typename VAL_T>
template <typename VAL_T, bool IS_4BIT>
class DenseBin;
template <typename VAL_T>
class DenseBinIterator: public BinIterator {
template <typename VAL_T, bool IS_4BIT>
class DenseBinIterator : public BinIterator {
public:
explicit DenseBinIterator(const DenseBin<VAL_T>* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
: bin_data_(bin_data), min_bin_(static_cast<VAL_T>(min_bin)),
max_bin_(static_cast<VAL_T>(max_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
explicit DenseBinIterator(const DenseBin<VAL_T, IS_4BIT>* bin_data,
uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin)
: bin_data_(bin_data),
min_bin_(static_cast<VAL_T>(min_bin)),
max_bin_(static_cast<VAL_T>(max_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
if (most_freq_bin_ == 0) {
offset_ = 1;
} else {
......@@ -34,43 +38,68 @@ class DenseBinIterator: public BinIterator {
inline void Reset(data_size_t) override {}
private:
const DenseBin<VAL_T>* bin_data_;
const DenseBin<VAL_T, IS_4BIT>* bin_data_;
VAL_T min_bin_;
VAL_T max_bin_;
VAL_T most_freq_bin_;
uint8_t offset_;
};
/*!
* \brief Used to store bins for dense feature
* Use template to reduce memory cost
*/
template <typename VAL_T>
class DenseBin: public Bin {
* \brief Used to store bins for dense feature
* Use template to reduce memory cost
*/
template <typename VAL_T, bool IS_4BIT>
class DenseBin : public Bin {
public:
friend DenseBinIterator<VAL_T>;
friend DenseBinIterator<VAL_T, IS_4BIT>;
explicit DenseBin(data_size_t num_data)
: num_data_(num_data), data_(num_data_, static_cast<VAL_T>(0)) {
: num_data_(num_data) {
if (IS_4BIT) {
CHECK_EQ(sizeof(VAL_T), 1);
data_.resize((num_data_ + 1) / 2, static_cast<uint8_t>(0));
buf_.resize((num_data_ + 1) / 2, static_cast<uint8_t>(0));
} else {
data_.resize(num_data_, static_cast<VAL_T>(0));
}
}
~DenseBin() {
}
~DenseBin() {}
void Push(int, data_size_t idx, uint32_t value) override {
data_[idx] = static_cast<VAL_T>(value);
if (IS_4BIT) {
const int i1 = idx >> 1;
const int i2 = (idx & 1) << 2;
const uint8_t val = static_cast<uint8_t>(value) << i2;
if (i2 == 0) {
data_[i1] = val;
} else {
buf_[i1] = val;
}
} else {
data_[idx] = static_cast<VAL_T>(value);
}
}
void ReSize(data_size_t num_data) override {
if (num_data_ != num_data) {
num_data_ = num_data;
data_.resize(num_data_);
if (IS_4BIT) {
data_.resize((num_data_ + 1) / 2, static_cast<VAL_T>(0));
} else {
data_.resize(num_data_);
}
}
}
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override;
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin) const override;
template<bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN>
void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const {
template <bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN>
void ConstructHistogramInner(const data_size_t* data_indices,
data_size_t start, data_size_t end,
const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const {
data_size_t i = start;
hist_t* grad = out;
hist_t* hess = out + 1;
......@@ -80,9 +109,14 @@ class DenseBin: public Bin {
const data_size_t pf_end = end - pf_offset;
for (; i < pf_end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const auto pf_idx = USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
PREFETCH_T0(data_.data() + pf_idx);
const auto ti = static_cast<uint32_t>(data_[idx]) << 1;
const auto pf_idx =
USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
if (IS_4BIT) {
PREFETCH_T0(data_.data() + (pf_idx >> 1));
} else {
PREFETCH_T0(data_.data() + pf_idx);
}
const auto ti = static_cast<uint32_t>(data(idx)) << 1;
if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i];
......@@ -94,7 +128,7 @@ class DenseBin: public Bin {
}
for (; i < end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const auto ti = static_cast<uint32_t>(data_[idx]) << 1;
const auto ti = static_cast<uint32_t>(data(idx)) << 1;
if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i];
......@@ -105,45 +139,53 @@ class DenseBin: public Bin {
}
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<true, true, true>(data_indices, start, end, ordered_gradients, ordered_hessians, out);
void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
data_size_t end, const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<true, true, true>(
data_indices, start, end, ordered_gradients, ordered_hessians, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<false, false, true>(nullptr, start, end, ordered_gradients, ordered_hessians, out);
const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<false, false, true>(
nullptr, start, end, ordered_gradients, ordered_hessians, out);
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<true, true, false>(data_indices, start, end, ordered_gradients, nullptr, out);
void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
data_size_t end, const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<true, true, false>(data_indices, start, end,
ordered_gradients, nullptr, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<false, false, false>(nullptr, start, end, ordered_gradients, nullptr, out);
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<false, false, false>(
nullptr, start, end, ordered_gradients, nullptr, out);
}
data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
VAL_T th = static_cast<VAL_T>(threshold + min_bin);
const VAL_T minb = static_cast<VAL_T>(min_bin);
const VAL_T maxb = static_cast<VAL_T>(max_bin);
VAL_T t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
VAL_T t_most_freq_bin = static_cast<VAL_T>(min_bin + most_freq_bin);
template <bool MISS_IS_ZERO, bool MISS_IS_NA, bool MFB_IS_ZERO,
bool MFB_IS_NA, bool USE_MIN_BIN>
data_size_t SplitInner(uint32_t min_bin, uint32_t max_bin,
uint32_t default_bin, uint32_t most_freq_bin,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const {
auto th = static_cast<VAL_T>(threshold + min_bin);
auto t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
if (most_freq_bin == 0) {
th -= 1;
t_zero_bin -= 1;
t_most_freq_bin -= 1;
--th;
--t_zero_bin;
}
const auto minb = static_cast<VAL_T>(min_bin);
const auto maxb = static_cast<VAL_T>(max_bin);
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
......@@ -154,68 +196,55 @@ class DenseBin: public Bin {
default_indices = lte_indices;
default_count = &lte_count;
}
if (missing_type == MissingType::NaN) {
if (MISS_IS_ZERO || MISS_IS_NA) {
if (default_left) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
}
if (t_most_freq_bin == maxb) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = data_[idx];
if (t_most_freq_bin == bin || bin < minb || bin > maxb) {
}
if (min_bin < max_bin) {
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = data(idx);
if ((MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) ||
(MISS_IS_NA && !MFB_IS_NA && bin == maxb)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if ((USE_MIN_BIN && (bin < minb || bin > maxb)) ||
(!USE_MIN_BIN && bin == 0)) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = data_[idx];
if (bin == maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
if ((default_left && missing_type == MissingType::Zero)
|| (default_bin <= threshold && missing_type != MissingType::Zero)) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
data_size_t* max_bin_indices = gt_indices;
data_size_t* max_bin_count = &gt_count;
if (maxb <= th) {
max_bin_indices = lte_indices;
max_bin_count = &lte_count;
}
if (default_bin == most_freq_bin) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = data_[idx];
if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = data(idx);
if (MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin != maxb) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
default_indices[(*default_count)++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = data_[idx];
if (bin == t_zero_bin) {
} else {
if (MISS_IS_NA && !MFB_IS_NA) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
max_bin_indices[(*max_bin_count)++] = idx;
}
}
}
......@@ -223,11 +252,68 @@ class DenseBin: public Bin {
return lte_count;
}
data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, true>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, true>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, true>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + min_bin && most_freq_bin > 0) {
return SplitInner<false, true, false, true, true>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, true>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
data_size_t Split(uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, false>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, false>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, false>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + 1 && most_freq_bin > 0) {
return SplitInner<false, true, false, true, false>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, false>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
template <bool USE_MIN_BIN>
data_size_t SplitCategoricalInner(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold,
int num_threahold,
const data_size_t* data_indices,
data_size_t cnt, data_size_t* lte_indices,
data_size_t* gt_indices) const {
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
......@@ -236,12 +322,15 @@ class DenseBin: public Bin {
default_indices = lte_indices;
default_count = &lte_count;
}
for (data_size_t i = 0; i < num_data; ++i) {
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i];
const uint32_t bin = data_[idx];
if (bin < min_bin || bin > max_bin) {
const uint32_t bin = data(idx);
if (USE_MIN_BIN && (bin < min_bin || bin > max_bin)) {
default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
} else if (!USE_MIN_BIN && bin == 0) {
default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold,
bin - min_bin)) {
lte_indices[lte_count++] = idx;
} else {
gt_indices[gt_count++] = idx;
......@@ -250,57 +339,138 @@ class DenseBin: public Bin {
return lte_count;
}
data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<true>(min_bin, max_bin, most_freq_bin,
threshold, num_threahold, data_indices,
cnt, lte_indices, gt_indices);
}
data_size_t SplitCategorical(uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<false>(1, max_bin, most_freq_bin, threshold,
num_threahold, data_indices, cnt,
lte_indices, gt_indices);
}
data_size_t num_data() const override { return num_data_; }
void FinishLoad() override {}
void FinishLoad() override {
if (IS_4BIT) {
if (buf_.empty()) {
return;
}
int len = (num_data_ + 1) / 2;
for (int i = 0; i < len; ++i) {
data_[i] |= buf_[i];
}
buf_.clear();
}
}
void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override {
void LoadFromMemory(
const void* memory,
const std::vector<data_size_t>& local_used_indices) override {
const VAL_T* mem_data = reinterpret_cast<const VAL_T*>(memory);
if (!local_used_indices.empty()) {
for (int i = 0; i < num_data_; ++i) {
data_[i] = mem_data[local_used_indices[i]];
if (IS_4BIT) {
const data_size_t rest = num_data_ & 1;
for (int i = 0; i < num_data_ - rest; i += 2) {
// get old bins
data_size_t idx = local_used_indices[i];
const auto bin1 = static_cast<uint8_t>(
(mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = local_used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>(
(mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
// add
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = local_used_indices[num_data_ - 1];
data_[num_data_ >> 1] =
(mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
} else {
for (int i = 0; i < num_data_; ++i) {
data_[i] = mem_data[local_used_indices[i]];
}
}
} else {
for (int i = 0; i < num_data_; ++i) {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = mem_data[i];
}
}
}
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const DenseBin<VAL_T>*>(full_bin);
for (int i = 0; i < num_used_indices; ++i) {
data_[i] = other_bin->data_[used_indices[i]];
inline VAL_T data(data_size_t idx) const {
if (IS_4BIT) {
return (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
} else {
return data_[idx];
}
}
void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
writer->Write(data_.data(), sizeof(VAL_T) * num_data_);
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices,
data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const DenseBin<VAL_T, IS_4BIT>*>(full_bin);
if (IS_4BIT) {
const data_size_t rest = num_used_indices & 1;
for (int i = 0; i < num_used_indices - rest; i += 2) {
data_size_t idx = used_indices[i];
const auto bin1 = static_cast<uint8_t>(
(other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>(
(other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = used_indices[num_used_indices - 1];
data_[num_used_indices >> 1] =
(other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
} else {
for (int i = 0; i < num_used_indices; ++i) {
data_[i] = other_bin->data_[used_indices[i]];
}
}
}
size_t SizesInByte() const override {
return sizeof(VAL_T) * num_data_;
void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
writer->Write(data_.data(), sizeof(VAL_T) * data_.size());
}
DenseBin<VAL_T>* Clone() override;
size_t SizesInByte() const override { return sizeof(VAL_T) * data_.size(); }
DenseBin<VAL_T, IS_4BIT>* Clone() override;
private:
data_size_t num_data_;
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
std::vector<uint8_t> buf_;
DenseBin<VAL_T>(const DenseBin<VAL_T>& other)
: num_data_(other.num_data_), data_(other.data_) {
}
DenseBin<VAL_T, IS_4BIT>(const DenseBin<VAL_T, IS_4BIT>& other)
: num_data_(other.num_data_), data_(other.data_) {}
};
template<typename VAL_T>
DenseBin<VAL_T>* DenseBin<VAL_T>::Clone() {
return new DenseBin<VAL_T>(*this);
template <typename VAL_T, bool IS_4BIT>
DenseBin<VAL_T, IS_4BIT>* DenseBin<VAL_T, IS_4BIT>::Clone() {
return new DenseBin<VAL_T, IS_4BIT>(*this);
}
template <typename VAL_T>
uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) {
auto ret = bin_data_->data_[idx];
template <typename VAL_T, bool IS_4BIT>
uint32_t DenseBinIterator<VAL_T, IS_4BIT>::Get(data_size_t idx) {
auto ret = bin_data_->data(idx);
if (ret >= min_bin_ && ret <= max_bin_) {
return ret - min_bin_ + offset_;
} else {
......@@ -308,15 +478,17 @@ uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) {
}
}
template <typename VAL_T>
inline uint32_t DenseBinIterator<VAL_T>::RawGet(data_size_t idx) {
return bin_data_->data_[idx];
template <typename VAL_T, bool IS_4BIT>
inline uint32_t DenseBinIterator<VAL_T, IS_4BIT>::RawGet(data_size_t idx) {
return bin_data_->data(idx);
}
template <typename VAL_T>
BinIterator* DenseBin<VAL_T>::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
return new DenseBinIterator<VAL_T>(this, min_bin, max_bin, most_freq_bin);
template <typename VAL_T, bool IS_4BIT>
BinIterator* DenseBin<VAL_T, IS_4BIT>::GetIterator(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
return new DenseBinIterator<VAL_T, IS_4BIT>(this, min_bin, max_bin,
most_freq_bin);
}
} // namespace LightGBM
#endif // LightGBM_IO_DENSE_BIN_HPP_
#endif // LightGBM_IO_DENSE_BIN_HPP_
/*!
* Copyright (c) 2017 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#define LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#include <LightGBM/bin.h>
#include <cstdint>
#include <cstring>
#include <vector>
namespace LightGBM {
class Dense4bitsBin;
class Dense4bitsBinIterator : public BinIterator {
public:
explicit Dense4bitsBinIterator(const Dense4bitsBin* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
: bin_data_(bin_data), min_bin_(static_cast<uint8_t>(min_bin)),
max_bin_(static_cast<uint8_t>(max_bin)),
most_freq_bin_(static_cast<uint8_t>(most_freq_bin)) {
if (most_freq_bin_ == 0) {
offset_ = 1;
} else {
offset_ = 0;
}
}
inline uint32_t RawGet(data_size_t idx) override;
inline uint32_t Get(data_size_t idx) override;
inline void Reset(data_size_t) override {}
private:
const Dense4bitsBin* bin_data_;
uint8_t min_bin_;
uint8_t max_bin_;
uint8_t most_freq_bin_;
uint8_t offset_;
};
class Dense4bitsBin : public Bin {
public:
friend Dense4bitsBinIterator;
explicit Dense4bitsBin(data_size_t num_data)
: num_data_(num_data) {
int len = (num_data_ + 1) / 2;
data_.resize(len, static_cast<uint8_t>(0));
buf_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0));
}
~Dense4bitsBin() {
}
void Push(int, data_size_t idx, uint32_t value) override {
const int i1 = idx >> 1;
const int i2 = (idx & 1) << 2;
const uint8_t val = static_cast<uint8_t>(value) << i2;
if (i2 == 0) {
data_[i1] = val;
} else {
buf_[i1] = val;
}
}
void ReSize(data_size_t num_data) override {
if (num_data_ != num_data) {
num_data_ = num_data;
const int len = (num_data_ + 1) / 2;
data_.resize(len);
}
}
inline BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override;
template<bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN>
void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const {
data_size_t i = start;
hist_t* grad = out;
hist_t* hess = out + 1;
hist_cnt_t* cnt = reinterpret_cast<hist_cnt_t*>(hess);
if (USE_PREFETCH) {
const data_size_t pf_offset = 64;
const data_size_t pf_end = end - pf_offset;
for (; i < pf_end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const auto pf_idx = USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
PREFETCH_T0(data_.data() + (pf_idx >> 1));
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
const uint8_t ti = static_cast<uint8_t>(bin) << 1;
if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i];
} else {
grad[ti] += ordered_gradients[i];
++cnt[ti];
}
}
}
for (; i < end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
const uint8_t ti = static_cast<uint8_t>(bin) << 1;
if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i];
} else {
grad[ti] += ordered_gradients[i];
++cnt[ti];
}
}
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<true, true, true>(data_indices, start, end, ordered_gradients, ordered_hessians, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<false, false, true>(nullptr, start, end, ordered_gradients, ordered_hessians, out);
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<true, true, false>(data_indices, start, end, ordered_gradients, nullptr, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<false, false, false>(nullptr, start, end, ordered_gradients, nullptr, out);
}
data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
uint8_t th = static_cast<uint8_t>(threshold + min_bin);
const uint8_t minb = static_cast<uint8_t>(min_bin);
const uint8_t maxb = static_cast<uint8_t>(max_bin);
uint8_t t_zero_bin = static_cast<uint8_t>(min_bin + default_bin);
uint8_t t_most_freq_bin = static_cast<uint8_t>(min_bin + most_freq_bin);
if (most_freq_bin == 0) {
th -= 1;
t_zero_bin -= 1;
t_most_freq_bin -= 1;
}
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count;
data_size_t* missing_default_indices = gt_indices;
data_size_t* missing_default_count = &gt_count;
if (most_freq_bin <= threshold) {
default_indices = lte_indices;
default_count = &lte_count;
}
if (missing_type == MissingType::NaN) {
if (default_left) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
}
if (t_most_freq_bin == maxb) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (t_most_freq_bin == bin || bin < minb || bin > maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin == maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
}
} else {
if ((default_left && missing_type == MissingType::Zero)
|| (default_bin <= threshold && missing_type != MissingType::Zero)) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
}
if (default_bin == most_freq_bin) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin == t_zero_bin) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
}
}
return lte_count;
}
data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count;
if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) {
default_indices = lte_indices;
default_count = &lte_count;
}
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint32_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin < min_bin || bin > max_bin) {
default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
lte_indices[lte_count++] = idx;
} else {
gt_indices[gt_count++] = idx;
}
}
return lte_count;
}
data_size_t num_data() const override { return num_data_; }
void FinishLoad() override {
if (buf_.empty()) { return; }
int len = (num_data_ + 1) / 2;
for (int i = 0; i < len; ++i) {
data_[i] |= buf_[i];
}
buf_.clear();
}
void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override {
const uint8_t* mem_data = reinterpret_cast<const uint8_t*>(memory);
if (!local_used_indices.empty()) {
const data_size_t rest = num_data_ & 1;
for (int i = 0; i < num_data_ - rest; i += 2) {
// get old bins
data_size_t idx = local_used_indices[i];
const auto bin1 = static_cast<uint8_t>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = local_used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
// add
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = local_used_indices[num_data_ - 1];
data_[num_data_ >> 1] = (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
} else {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = mem_data[i];
}
}
}
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const Dense4bitsBin*>(full_bin);
const data_size_t rest = num_used_indices & 1;
for (int i = 0; i < num_used_indices - rest; i += 2) {
data_size_t idx = used_indices[i];
const auto bin1 = static_cast<uint8_t>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = used_indices[num_used_indices - 1];
data_[num_used_indices >> 1] = (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
}
void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
writer->Write(data_.data(), sizeof(uint8_t) * data_.size());
}
size_t SizesInByte() const override {
return sizeof(uint8_t) * data_.size();
}
Dense4bitsBin* Clone() override {
return new Dense4bitsBin(*this);
}
protected:
Dense4bitsBin(const Dense4bitsBin& other)
: num_data_(other.num_data_), data_(other.data_), buf_(other.buf_) {
}
data_size_t num_data_;
std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>> data_;
std::vector<uint8_t> buf_;
};
uint32_t Dense4bitsBinIterator::Get(data_size_t idx) {
const auto bin = (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin >= min_bin_ && bin <= max_bin_) {
return bin - min_bin_ + offset_;
} else {
return most_freq_bin_;
}
}
uint32_t Dense4bitsBinIterator::RawGet(data_size_t idx) {
return (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
inline BinIterator* Dense4bitsBin::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
return new Dense4bitsBinIterator(this, min_bin, max_bin, most_freq_bin);
}
} // namespace LightGBM
#endif // LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#ifndef LIGHTGBM_IO_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_SPARSE_BIN_HPP_
......@@ -9,27 +10,29 @@
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <limits>
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <limits>
#include <utility>
#include <vector>
namespace LightGBM {
template <typename VAL_T> class SparseBin;
template <typename VAL_T>
class SparseBin;
const size_t kNumFastIndex = 64;
template <typename VAL_T>
class SparseBinIterator: public BinIterator {
class SparseBinIterator : public BinIterator {
public:
SparseBinIterator(const SparseBin<VAL_T>* bin_data,
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
: bin_data_(bin_data), min_bin_(static_cast<VAL_T>(min_bin)),
max_bin_(static_cast<VAL_T>(max_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
SparseBinIterator(const SparseBin<VAL_T>* bin_data, uint32_t min_bin,
uint32_t max_bin, uint32_t most_freq_bin)
: bin_data_(bin_data),
min_bin_(static_cast<VAL_T>(min_bin)),
max_bin_(static_cast<VAL_T>(max_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
if (most_freq_bin_ == 0) {
offset_ = 1;
} else {
......@@ -38,7 +41,7 @@ class SparseBinIterator: public BinIterator {
Reset(0);
}
SparseBinIterator(const SparseBin<VAL_T>* bin_data, data_size_t start_idx)
: bin_data_(bin_data) {
: bin_data_(bin_data) {
Reset(start_idx);
}
......@@ -67,22 +70,18 @@ class SparseBinIterator: public BinIterator {
};
template <typename VAL_T>
class SparseBin: public Bin {
class SparseBin : public Bin {
public:
friend class SparseBinIterator<VAL_T>;
explicit SparseBin(data_size_t num_data)
: num_data_(num_data) {
explicit SparseBin(data_size_t num_data) : num_data_(num_data) {
int num_threads = OMP_NUM_THREADS();
push_buffers_.resize(num_threads);
}
~SparseBin() {
}
~SparseBin() {}
void ReSize(data_size_t num_data) override {
num_data_ = num_data;
}
void ReSize(data_size_t num_data) override { num_data_ = num_data; }
void Push(int tid, data_size_t idx, uint32_t value) override {
auto cur_bin = static_cast<VAL_T>(value);
......@@ -91,36 +90,49 @@ class SparseBin: public Bin {
}
}
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override;
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin) const override;
#define ACC_GH(hist, i, g, h) \
#define ACC_GH(hist, i, g, h) \
const auto ti = static_cast<int>(i) << 1; \
hist[ti] += g; \
hist[ti + 1] += h; \
hist[ti] += g; \
hist[ti + 1] += h;
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const override {
void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
data_size_t end, const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const override {
data_size_t i_delta, cur_pos;
InitIndex(data_indices[start], &i_delta, &cur_pos);
data_size_t i = start;
for (;;) {
if (cur_pos < data_indices[i]) {
cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; }
if (i_delta >= num_vals_) {
break;
}
} else if (cur_pos > data_indices[i]) {
if (++i >= end) { break; }
if (++i >= end) {
break;
}
} else {
const VAL_T bin = vals_[i_delta];
ACC_GH(out, bin, ordered_gradients[i], ordered_hessians[i]);
if (++i >= end) { break; }
if (++i >= end) {
break;
}
cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; }
if (i_delta >= num_vals_) {
break;
}
}
}
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const override {
const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const override {
data_size_t i_delta, cur_pos;
InitIndex(start, &i_delta, &cur_pos);
while (cur_pos < start && i_delta < num_vals_) {
......@@ -133,8 +145,9 @@ class SparseBin: public Bin {
}
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const override {
void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
data_size_t end, const score_t* ordered_gradients,
hist_t* out) const override {
data_size_t i_delta, cur_pos;
InitIndex(data_indices[start], &i_delta, &cur_pos);
data_size_t i = start;
......@@ -143,22 +156,31 @@ class SparseBin: public Bin {
for (;;) {
if (cur_pos < data_indices[i]) {
cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; }
if (i_delta >= num_vals_) {
break;
}
} else if (cur_pos > data_indices[i]) {
if (++i >= end) { break; }
if (++i >= end) {
break;
}
} else {
const uint32_t ti = static_cast<uint32_t>(vals_[i_delta]) << 1;
grad[ti] += ordered_gradients[i];
++cnt[ti];
if (++i >= end) { break; }
if (++i >= end) {
break;
}
cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; }
if (i_delta >= num_vals_) {
break;
}
}
}
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const override {
const score_t* ordered_gradients,
hist_t* out) const override {
data_size_t i_delta, cur_pos;
InitIndex(start, &i_delta, &cur_pos);
hist_t* grad = out;
......@@ -173,17 +195,17 @@ class SparseBin: public Bin {
cur_pos += deltas_[++i_delta];
}
}
#undef ACC_GH
#undef ACC_GH
inline void NextNonzeroFast(data_size_t* i_delta, data_size_t* cur_pos) const {
inline void NextNonzeroFast(data_size_t* i_delta,
data_size_t* cur_pos) const {
*cur_pos += deltas_[++(*i_delta)];
if (*i_delta >= num_vals_) {
*cur_pos = num_data_;
}
}
inline bool NextNonzero(data_size_t* i_delta,
data_size_t* cur_pos) const {
inline bool NextNonzero(data_size_t* i_delta, data_size_t* cur_pos) const {
*cur_pos += deltas_[++(*i_delta)];
if (*i_delta < num_vals_) {
return true;
......@@ -193,96 +215,82 @@ class SparseBin: public Bin {
}
}
data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin,
MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
VAL_T th = static_cast<VAL_T>(threshold + min_bin);
const VAL_T minb = static_cast<VAL_T>(min_bin);
const VAL_T maxb = static_cast<VAL_T>(max_bin);
VAL_T t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
VAL_T t_most_freq_bin = static_cast<VAL_T>(min_bin + most_freq_bin);
template <bool MISS_IS_ZERO, bool MISS_IS_NA, bool MFB_IS_ZERO,
bool MFB_IS_NA, bool USE_MIN_BIN>
data_size_t SplitInner(uint32_t min_bin, uint32_t max_bin,
uint32_t default_bin, uint32_t most_freq_bin,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const {
auto th = static_cast<VAL_T>(threshold + min_bin);
auto t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
if (most_freq_bin == 0) {
th -= 1;
t_zero_bin -= 1;
t_most_freq_bin -= 1;
--th;
--t_zero_bin;
}
const auto minb = static_cast<VAL_T>(min_bin);
const auto maxb = static_cast<VAL_T>(max_bin);
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count;
data_size_t* missing_default_indices = gt_indices;
data_size_t* missing_default_count = &gt_count;
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
if (most_freq_bin <= threshold) {
default_indices = lte_indices;
default_count = &lte_count;
}
if (missing_type == MissingType::NaN) {
if (MISS_IS_ZERO || MISS_IS_NA) {
if (default_left) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
}
if (t_most_freq_bin == maxb) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = iterator.InnerRawGet(idx);
if (t_most_freq_bin == bin || bin < minb || bin > maxb) {
}
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
if (min_bin < max_bin) {
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = iterator.InnerRawGet(idx);
if ((MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) ||
(MISS_IS_NA && !MFB_IS_NA && bin == maxb)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if ((USE_MIN_BIN && (bin < minb || bin > maxb)) ||
(!USE_MIN_BIN && bin == 0)) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = iterator.InnerRawGet(idx);
if (bin == maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
if ((default_left && missing_type == MissingType::Zero)
|| (default_bin <= threshold && missing_type != MissingType::Zero)) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
data_size_t* max_bin_indices = gt_indices;
data_size_t* max_bin_count = &gt_count;
if (maxb <= th) {
max_bin_indices = lte_indices;
max_bin_count = &lte_count;
}
if (default_bin == most_freq_bin) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = iterator.InnerRawGet(idx);
if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = iterator.InnerRawGet(idx);
if (MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin != maxb) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
default_indices[(*default_count)++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = iterator.InnerRawGet(idx);
if (bin == t_zero_bin) {
} else {
if (MISS_IS_NA && !MFB_IS_NA) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
max_bin_indices[(*max_bin_count)++] = idx;
}
}
}
......@@ -290,26 +298,85 @@ class SparseBin: public Bin {
return lte_count;
}
data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, true>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, true>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, true>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + min_bin && most_freq_bin > 0) {
return SplitInner<false, true, false, true, true>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, true>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
data_size_t Split(uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, false>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, false>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, false>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + 1 && most_freq_bin > 0) {
return SplitInner<false, true, false, true, false>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, false>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
template <bool USE_MIN_BIN>
data_size_t SplitCategoricalInner(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold,
int num_threahold,
const data_size_t* data_indices,
data_size_t cnt, data_size_t* lte_indices,
data_size_t* gt_indices) const {
data_size_t lte_count = 0;
data_size_t gt_count = 0;
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count;
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) {
default_indices = lte_indices;
default_count = &lte_count;
}
for (data_size_t i = 0; i < num_data; ++i) {
for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i];
uint32_t bin = iterator.InnerRawGet(idx);
if (bin < min_bin || bin > max_bin) {
const uint32_t bin = iterator.RawGet(idx);
if (USE_MIN_BIN && (bin < min_bin || bin > max_bin)) {
default_indices[(*default_count)++] = idx;
} else if (!USE_MIN_BIN && bin == 0) {
default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
} else if (Common::FindInBitset(threshold, num_threahold,
bin - min_bin)) {
lte_indices[lte_count++] = idx;
} else {
gt_indices[gt_count++] = idx;
......@@ -318,6 +385,27 @@ class SparseBin: public Bin {
return lte_count;
}
data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<true>(min_bin, max_bin, most_freq_bin,
threshold, num_threahold, data_indices,
cnt, lte_indices, gt_indices);
}
data_size_t SplitCategorical(uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<false>(1, max_bin, most_freq_bin, threshold,
num_threahold, data_indices, cnt,
lte_indices, gt_indices);
}
data_size_t num_data() const override { return num_data_; }
void FinishLoad() override {
......@@ -326,24 +414,28 @@ class SparseBin: public Bin {
for (size_t i = 0; i < push_buffers_.size(); ++i) {
pair_cnt += push_buffers_[i].size();
}
std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs = push_buffers_[0];
std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs =
push_buffers_[0];
idx_val_pairs.reserve(pair_cnt);
for (size_t i = 1; i < push_buffers_.size(); ++i) {
idx_val_pairs.insert(idx_val_pairs.end(), push_buffers_[i].begin(), push_buffers_[i].end());
idx_val_pairs.insert(idx_val_pairs.end(), push_buffers_[i].begin(),
push_buffers_[i].end());
push_buffers_[i].clear();
push_buffers_[i].shrink_to_fit();
}
// sort by data index
std::sort(idx_val_pairs.begin(), idx_val_pairs.end(),
[](const std::pair<data_size_t, VAL_T>& a, const std::pair<data_size_t, VAL_T>& b) {
return a.first < b.first;
});
[](const std::pair<data_size_t, VAL_T>& a,
const std::pair<data_size_t, VAL_T>& b) {
return a.first < b.first;
});
// load delta array
LoadFromPair(idx_val_pairs);
}
void LoadFromPair(const std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs) {
void LoadFromPair(
const std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs) {
deltas_.clear();
vals_.clear();
deltas_.reserve(idx_val_pairs.size());
......@@ -355,7 +447,9 @@ class SparseBin: public Bin {
const VAL_T bin = idx_val_pairs[i].second;
data_size_t cur_delta = cur_idx - last_idx;
// disallow the multi-val in one row
if (i > 0 && cur_delta == 0) { continue; }
if (i > 0 && cur_delta == 0) {
continue;
}
while (cur_delta >= 256) {
deltas_.push_back(255);
vals_.push_back(0);
......@@ -412,11 +506,13 @@ class SparseBin: public Bin {
}
size_t SizesInByte() const override {
return sizeof(num_vals_) + sizeof(uint8_t) * (num_vals_ + 1)
+ sizeof(VAL_T) * num_vals_;
return sizeof(num_vals_) + sizeof(uint8_t) * (num_vals_ + 1) +
sizeof(VAL_T) * num_vals_;
}
void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override {
void LoadFromMemory(
const void* memory,
const std::vector<data_size_t>& local_used_indices) override {
const char* mem_ptr = reinterpret_cast<const char*>(memory);
data_size_t tmp_num_vals = *(reinterpret_cast<const data_size_t*>(mem_ptr));
mem_ptr += sizeof(tmp_num_vals);
......@@ -443,7 +539,8 @@ class SparseBin: public Bin {
std::vector<std::pair<data_size_t, VAL_T>> tmp_pair;
data_size_t cur_pos = 0;
data_size_t j = -1;
for (data_size_t i = 0; i < static_cast<data_size_t>(local_used_indices.size()); ++i) {
for (data_size_t i = 0;
i < static_cast<data_size_t>(local_used_indices.size()); ++i) {
const data_size_t idx = local_used_indices[i];
while (cur_pos < idx && j < num_vals_) {
NextNonzero(&j, &cur_pos);
......@@ -457,7 +554,8 @@ class SparseBin: public Bin {
}
}
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override {
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices,
data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const SparseBin<VAL_T>*>(full_bin);
deltas_.clear();
vals_.clear();
......@@ -497,12 +595,16 @@ class SparseBin: public Bin {
SparseBin<VAL_T>* Clone() override;
SparseBin<VAL_T>(const SparseBin<VAL_T>& other)
: num_data_(other.num_data_), deltas_(other.deltas_), vals_(other.vals_),
num_vals_(other.num_vals_), push_buffers_(other.push_buffers_),
fast_index_(other.fast_index_), fast_index_shift_(other.fast_index_shift_) {
}
void InitIndex(data_size_t start_idx, data_size_t * i_delta, data_size_t * cur_pos) const {
: num_data_(other.num_data_),
deltas_(other.deltas_),
vals_(other.vals_),
num_vals_(other.num_vals_),
push_buffers_(other.push_buffers_),
fast_index_(other.fast_index_),
fast_index_shift_(other.fast_index_shift_) {}
void InitIndex(data_size_t start_idx, data_size_t* i_delta,
data_size_t* cur_pos) const {
auto idx = start_idx >> fast_index_shift_;
if (static_cast<size_t>(idx) < fast_index_.size()) {
const auto fast_pair = fast_index_[start_idx >> fast_index_shift_];
......@@ -516,7 +618,8 @@ class SparseBin: public Bin {
private:
data_size_t num_data_;
std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>> deltas_;
std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>>
deltas_;
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> vals_;
data_size_t num_vals_;
std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_;
......@@ -524,7 +627,7 @@ class SparseBin: public Bin {
data_size_t fast_index_shift_;
};
template<typename VAL_T>
template <typename VAL_T>
SparseBin<VAL_T>* SparseBin<VAL_T>::Clone() {
return new SparseBin(*this);
}
......@@ -552,9 +655,10 @@ inline void SparseBinIterator<VAL_T>::Reset(data_size_t start_idx) {
}
template <typename VAL_T>
BinIterator* SparseBin<VAL_T>::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
BinIterator* SparseBin<VAL_T>::GetIterator(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin) const {
return new SparseBinIterator<VAL_T>(this, min_bin, max_bin, most_freq_bin);
}
} // namespace LightGBM
#endif // LightGBM_IO_SPARSE_BIN_HPP_
#endif // LightGBM_IO_SPARSE_BIN_HPP_
......@@ -106,7 +106,7 @@ class DataPartition {
const data_size_t begin = leaf_begin_[leaf];
const data_size_t cnt = leaf_count_[leaf];
auto left_start = indices_.data() + begin;
auto left_cnt = runner_.Run<false>(
const auto left_cnt = runner_.Run<false>(
cnt,
[=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
data_size_t* right) {
......
......@@ -144,72 +144,67 @@ class FeatureHistogram {
template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT>
void FuncForNumricalL2() {
#define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT
#define LAMBDA_ARGUMENTS \
double sum_gradient, double sum_hessian, data_size_t num_data, \
const ConstraintEntry &constraints, SplitInfo *output
#define BEFORE_ARGUMENTS sum_gradient, sum_hessian, output, &rand_threshold
#define FUNC_ARGUMENTS \
sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, output, \
rand_threshold
if (meta_->num_bin > 2 && meta_->missing_type != MissingType::None) {
if (meta_->missing_type == MissingType::Zero) {
find_best_threshold_fun_ =
[=](double sum_gradient, double sum_hessian, data_size_t num_data,
const ConstraintEntry& constraints, SplitInfo* output) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
sum_gradient, sum_hessian, output, &rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, true, true, false>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, false, true, false>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
};
find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
BEFORE_ARGUMENTS);
FindBestThresholdSequentially<TEMPLATE_PREFIX, true, true, false>(
FUNC_ARGUMENTS);
FindBestThresholdSequentially<TEMPLATE_PREFIX, false, true, false>(
FUNC_ARGUMENTS);
};
} else {
find_best_threshold_fun_ =
[=](double sum_gradient, double sum_hessian, data_size_t num_data,
const ConstraintEntry& constraints, SplitInfo* output) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
sum_gradient, sum_hessian, output, &rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, true, false, true>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, false, false, true>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
};
find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
BEFORE_ARGUMENTS);
FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, true>(
FUNC_ARGUMENTS);
FindBestThresholdSequentially<TEMPLATE_PREFIX, false, false, true>(
FUNC_ARGUMENTS);
};
}
} else {
if (meta_->missing_type != MissingType::NaN) {
find_best_threshold_fun_ =
[=](double sum_gradient, double sum_hessian, data_size_t num_data,
const ConstraintEntry& constraints, SplitInfo* output) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
sum_gradient, sum_hessian, output, &rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, true, false, false>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
};
find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
BEFORE_ARGUMENTS);
FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, false>(
FUNC_ARGUMENTS);
};
} else {
find_best_threshold_fun_ =
[=](double sum_gradient, double sum_hessian, data_size_t num_data,
const ConstraintEntry& constraints, SplitInfo* output) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
sum_gradient, sum_hessian, output, &rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, true, false, false>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
output->default_left = false;
};
find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
int rand_threshold = 0;
double min_gain_shift =
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
BEFORE_ARGUMENTS);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, true, false, false>(
FUNC_ARGUMENTS);
output->default_left = false;
};
}
}
#undef TEMPLATE_PREFIX
#undef LAMBDA_ARGUMENTS
#undef BEFORE_ARGUMENTS
#undef FUNC_ARGURMENTS
}
void FuncForCategorical() {
......@@ -227,41 +222,38 @@ class FeatureHistogram {
}
}
}
template <bool USE_RAND, bool USE_MC>
void FuncForCategoricalL1() {
#define ARGUMENTS \
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, \
std::placeholders::_4, std::placeholders::_5
if (meta_->config->lambda_l1 > 0) {
if (meta_->config->max_delta_step > 0) {
find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, true, true>,
this, std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
this, ARGUMENTS);
} else {
find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, true, false>,
this, std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
this, ARGUMENTS);
}
} else {
if (meta_->config->max_delta_step > 0) {
find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, false, true>,
this, std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
this, ARGUMENTS);
} else {
find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, false, false>,
this, std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
this, ARGUMENTS);
}
}
#undef ARGUMENTS
}
template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT>
......
......@@ -13,7 +13,6 @@
#include <algorithm>
#include "../io/dense_bin.hpp"
#include "../io/dense_nbits_bin.hpp"
#define GPU_DEBUG 0
......@@ -378,20 +377,20 @@ void GPUTreeLearner::AllocateGPUMemory() {
BinIterator* bin_iters[8];
for (int s_idx = 0; s_idx < 8; ++s_idx) {
bin_iters[s_idx] = train_data_->FeatureGroupIterator(dense_ind[s_idx]);
if (dynamic_cast<Dense4bitsBinIterator*>(bin_iters[s_idx]) == 0) {
if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[s_idx]) == 0) {
Log::Fatal("GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not", dense_ind[s_idx]);
}
}
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
Dense4bitsBinIterator iters[8] = {
*static_cast<Dense4bitsBinIterator*>(bin_iters[0]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[1]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[2]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[3]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[4]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[5]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[6]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[7])};
DenseBinIterator<uint8_t, true> iters[8] = {
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[0]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[1]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[2]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[3]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[4]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[5]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[6]),
*static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[7])};
for (int j = 0; j < num_data_; ++j) {
host4[j].s[0] = (uint8_t)((iters[0].RawGet(j) * dev_bin_mult[0] + ((j+0) & (dev_bin_mult[0] - 1)))
|((iters[1].RawGet(j) * dev_bin_mult[1] + ((j+1) & (dev_bin_mult[1] - 1))) << 4));
......@@ -407,15 +406,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
for (int s_idx = 0; s_idx < 4; ++s_idx) {
BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_ind[s_idx]);
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
if (dynamic_cast<DenseBinIterator<uint8_t>*>(bin_iter) != 0) {
if (dynamic_cast<DenseBinIterator<uint8_t, false>*>(bin_iter) != 0) {
// Dense bin
DenseBinIterator<uint8_t> iter = *static_cast<DenseBinIterator<uint8_t>*>(bin_iter);
DenseBinIterator<uint8_t, false> iter = *static_cast<DenseBinIterator<uint8_t, false>*>(bin_iter);
for (int j = 0; j < num_data_; ++j) {
host4[j].s[s_idx] = (uint8_t)(iter.RawGet(j) * dev_bin_mult[s_idx] + ((j+s_idx) & (dev_bin_mult[s_idx] - 1)));
}
} else if (dynamic_cast<Dense4bitsBinIterator*>(bin_iter) != 0) {
} else if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iter) != 0) {
// Dense 4-bit bin
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter);
DenseBinIterator<uint8_t, true> iter = *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iter);
for (int j = 0; j < num_data_; ++j) {
host4[j].s[s_idx] = (uint8_t)(iter.RawGet(j) * dev_bin_mult[s_idx] + ((j+s_idx) & (dev_bin_mult[s_idx] - 1)));
}
......@@ -450,8 +449,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
for (int i = 0; i < k; ++i) {
if (dword_features_ == 8) {
BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_dword_ind[i]);
if (dynamic_cast<Dense4bitsBinIterator*>(bin_iter) != 0) {
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter);
if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iter) != 0) {
DenseBinIterator<uint8_t, true> iter = *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iter);
#pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) {
host4[j].s[i >> 1] |= (uint8_t)((iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
......@@ -463,15 +462,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
}
} else if (dword_features_ == 4) {
BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_dword_ind[i]);
if (dynamic_cast<DenseBinIterator<uint8_t>*>(bin_iter) != 0) {
DenseBinIterator<uint8_t> iter = *static_cast<DenseBinIterator<uint8_t>*>(bin_iter);
if (dynamic_cast<DenseBinIterator<uint8_t, false>*>(bin_iter) != 0) {
DenseBinIterator<uint8_t, false> iter = *static_cast<DenseBinIterator<uint8_t, false>*>(bin_iter);
#pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) {
host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
+ ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1)));
}
} else if (dynamic_cast<Dense4bitsBinIterator*>(bin_iter) != 0) {
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter);
} else if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iter) != 0) {
DenseBinIterator<uint8_t, true> iter = *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iter);
#pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) {
host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
......
......@@ -263,7 +263,6 @@
<ClInclude Include="..\src\boosting\rf.hpp" />
<ClInclude Include="..\src\boosting\score_updater.hpp" />
<ClInclude Include="..\src\io\dense_bin.hpp" />
<ClInclude Include="..\src\io\dense_nbits_bin.hpp" />
<ClInclude Include="..\src\io\multi_val_dense_bin.hpp" />
<ClInclude Include="..\src\io\multi_val_sparse_bin.hpp" />
<ClInclude Include="..\src\io\parser.hpp" />
......
......@@ -174,9 +174,6 @@
<ClInclude Include="..\src\boosting\goss.hpp">
<Filter>src\boosting</Filter>
</ClInclude>
<ClInclude Include="..\src\io\dense_nbits_bin.hpp">
<Filter>src\io</Filter>
</ClInclude>
<ClInclude Include="..\include\LightGBM\utils\openmp_wrapper.h">
<Filter>include\LightGBM\utils</Filter>
</ClInclude>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment