Unverified Commit bcad692e authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Speed-up "Split" and some code refactorings (#2883)

* commit

* fix msvc

* fix format
parent 1a48fd26
...@@ -218,61 +218,6 @@ class BinMapper { ...@@ -218,61 +218,6 @@ class BinMapper {
uint32_t most_freq_bin_; uint32_t most_freq_bin_;
}; };
/*!
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages by using ordered bin.
* 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram construction for sparse features.
* However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only using ordered bin for sparse situations.
*/
class OrderedBin {
public:
/*! \brief virtual destructor */
virtual ~OrderedBin() {}
/*!
* \brief Initialization logic.
* \param used_indices If used_indices.size() == 0 means using all data, otherwise, used_indices[i] == true means i-th data is used
(this logic was build for bagging logic)
* \param num_leaves Number of leaves on this iteration
*/
virtual void Init(const char* used_indices, data_size_t num_leaves) = 0;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param hessians Hessians, Note:non-ordered by leaf
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients,
const score_t* hessians, hist_t* out) const = 0;
/*!
* \brief Construct histogram by using this bin
* Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-ordered by leaf
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients, hist_t* out) const = 0;
/*!
* \brief Split current bin, and perform re-order by leaf
* \param leaf Using which leaf's to split
* \param right_leaf The new leaf index after perform this split
* \param is_in_leaf is_in_leaf[i] == mark means the i-th data will be on left leaf after split
* \param mark is_in_leaf[i] == mark means the i-th data will be on left leaf after split
*/
virtual void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) = 0;
virtual data_size_t NonZeroCount(int leaf) const = 0;
};
/*! \brief Iterator for one bin column */ /*! \brief Iterator for one bin column */
class BinIterator { class BinIterator {
public: public:
...@@ -382,43 +327,33 @@ class Bin { ...@@ -382,43 +327,33 @@ class Bin {
virtual void ConstructHistogram(data_size_t start, data_size_t end, virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0; const score_t* ordered_gradients, hist_t* out) const = 0;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param default_bin default bin for feature value 0
* \param most_freq_bin
* \param missing_type missing type
* \param default_left missing bin will go to left child
* \param threshold The split threshold.
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual data_size_t Split(uint32_t min_bin, uint32_t max_bin, virtual data_size_t Split(uint32_t min_bin, uint32_t max_bin,
uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left, uint32_t threshold, uint32_t default_bin, uint32_t most_freq_bin,
data_size_t* data_indices, data_size_t num_data, MissingType missing_type, bool default_left,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0; uint32_t threshold, const data_size_t* data_indices,
data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const = 0;
virtual data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
virtual data_size_t Split(uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const = 0;
virtual data_size_t SplitCategorical(
uint32_t max_bin, uint32_t most_freq_bin, const uint32_t* threshold,
int num_threshold, const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature
* \param max_bin max_bin of current used feature
* \param most_freq_bin
* \param threshold The split threshold.
* \param num_threshold Number of threshold
* \param data_indices Used data indices. After called this function. The less than or equal data indices will store on this object.
* \param num_data Number of used data
* \param lte_indices After called this function. The less or equal data indices will store on this object.
* \param gt_indices After called this function. The greater data indices will store on this object.
* \return The number of less than or equal data.
*/
virtual data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin, const uint32_t* threshold, int num_threshold,
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;
/*! /*!
* \brief After pushed all feature data, call this could have better refactor for bin data * \brief After pushed all feature data, call this could have better refactor for bin data
......
...@@ -535,13 +535,16 @@ class Dataset { ...@@ -535,13 +535,16 @@ class Dataset {
void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const; void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const;
inline data_size_t Split(int feature, inline data_size_t Split(int feature, const uint32_t* threshold,
const uint32_t* threshold, int num_threshold, bool default_left, int num_threshold, bool default_left,
data_size_t* data_indices, data_size_t num_data, const data_size_t* data_indices,
data_size_t* lte_indices, data_size_t* gt_indices) const { data_size_t cnt, data_size_t* lte_indices,
data_size_t* gt_indices) const {
const int group = feature2group_[feature]; const int group = feature2group_[feature];
const int sub_feature = feature2subfeature_[feature]; const int sub_feature = feature2subfeature_[feature];
return feature_groups_[group]->Split(sub_feature, threshold, num_threshold, default_left, data_indices, num_data, lte_indices, gt_indices); return feature_groups_[group]->Split(
sub_feature, threshold, num_threshold, default_left, data_indices,
cnt, lte_indices, gt_indices);
} }
inline int SubFeatureBinOffset(int i) const { inline int SubFeatureBinOffset(int i) const {
......
...@@ -228,13 +228,11 @@ class FeatureGroup { ...@@ -228,13 +228,11 @@ class FeatureGroup {
return bin_data_->GetIterator(min_bin, max_bin, most_freq_bin); return bin_data_->GetIterator(min_bin, max_bin, most_freq_bin);
} }
inline data_size_t Split( inline data_size_t Split(int sub_feature, const uint32_t* threshold,
int sub_feature, int num_threshold, bool default_left,
const uint32_t* threshold, const data_size_t* data_indices, data_size_t cnt,
int num_threshold, data_size_t* lte_indices,
bool default_left, data_size_t* gt_indices) const {
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const {
uint32_t default_bin = bin_mappers_[sub_feature]->GetDefaultBin(); uint32_t default_bin = bin_mappers_[sub_feature]->GetDefaultBin();
uint32_t most_freq_bin = bin_mappers_[sub_feature]->GetMostFreqBin(); uint32_t most_freq_bin = bin_mappers_[sub_feature]->GetMostFreqBin();
if (!is_multi_val_) { if (!is_multi_val_) {
...@@ -242,21 +240,38 @@ class FeatureGroup { ...@@ -242,21 +240,38 @@ class FeatureGroup {
uint32_t max_bin = bin_offsets_[sub_feature + 1] - 1; uint32_t max_bin = bin_offsets_[sub_feature + 1] - 1;
if (bin_mappers_[sub_feature]->bin_type() == BinType::NumericalBin) { if (bin_mappers_[sub_feature]->bin_type() == BinType::NumericalBin) {
auto missing_type = bin_mappers_[sub_feature]->missing_type(); auto missing_type = bin_mappers_[sub_feature]->missing_type();
return bin_data_->Split(min_bin, max_bin, default_bin, most_freq_bin, missing_type, default_left, if (num_feature_ == 1) {
*threshold, data_indices, num_data, lte_indices, gt_indices); return bin_data_->Split(max_bin, default_bin, most_freq_bin,
missing_type, default_left, *threshold,
data_indices, cnt, lte_indices, gt_indices);
} else {
return bin_data_->Split(min_bin, max_bin, default_bin, most_freq_bin,
missing_type, default_left, *threshold,
data_indices, cnt, lte_indices, gt_indices);
}
} else { } else {
return bin_data_->SplitCategorical(min_bin, max_bin, most_freq_bin, threshold, num_threshold, data_indices, num_data, lte_indices, gt_indices); if (num_feature_ == 1) {
return bin_data_->SplitCategorical(max_bin, most_freq_bin, threshold,
num_threshold, data_indices, cnt,
lte_indices, gt_indices);
} else {
return bin_data_->SplitCategorical(
min_bin, max_bin, most_freq_bin, threshold, num_threshold,
data_indices, cnt, lte_indices, gt_indices);
}
} }
} else { } else {
int addi = bin_mappers_[sub_feature]->GetMostFreqBin() == 0 ? 0 : 1; int addi = bin_mappers_[sub_feature]->GetMostFreqBin() == 0 ? 0 : 1;
uint32_t min_bin = 1;
uint32_t max_bin = bin_mappers_[sub_feature]->num_bin() - 1 + addi; uint32_t max_bin = bin_mappers_[sub_feature]->num_bin() - 1 + addi;
if (bin_mappers_[sub_feature]->bin_type() == BinType::NumericalBin) { if (bin_mappers_[sub_feature]->bin_type() == BinType::NumericalBin) {
auto missing_type = bin_mappers_[sub_feature]->missing_type(); auto missing_type = bin_mappers_[sub_feature]->missing_type();
return multi_bin_data_[sub_feature]->Split(min_bin, max_bin, default_bin, most_freq_bin, missing_type, default_left, return multi_bin_data_[sub_feature]->Split(
*threshold, data_indices, num_data, lte_indices, gt_indices); max_bin, default_bin, most_freq_bin, missing_type, default_left,
*threshold, data_indices, cnt, lte_indices, gt_indices);
} else { } else {
return multi_bin_data_[sub_feature]->SplitCategorical(min_bin, max_bin, most_freq_bin, threshold, num_threshold, data_indices, num_data, lte_indices, gt_indices); return multi_bin_data_[sub_feature]->SplitCategorical(
max_bin, most_freq_bin, threshold, num_threshold, data_indices, cnt,
lte_indices, gt_indices);
} }
} }
} }
......
...@@ -75,6 +75,9 @@ const int kAlignedSize = 32; ...@@ -75,6 +75,9 @@ const int kAlignedSize = 32;
#define SIZE_ALIGNED(t) ((t) + kAlignedSize - 1) / kAlignedSize * kAlignedSize #define SIZE_ALIGNED(t) ((t) + kAlignedSize - 1) / kAlignedSize * kAlignedSize
// Refer to https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4127?view=vs-2019
#pragma warning(disable : 4127)
} // namespace LightGBM } // namespace LightGBM
#endif // LightGBM_META_H_ #endif // LightGBM_META_H_
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <cstring> #include <cstring>
#include "dense_bin.hpp" #include "dense_bin.hpp"
#include "dense_nbits_bin.hpp"
#include "multi_val_dense_bin.hpp" #include "multi_val_dense_bin.hpp"
#include "multi_val_sparse_bin.hpp" #include "multi_val_sparse_bin.hpp"
#include "sparse_bin.hpp" #include "sparse_bin.hpp"
...@@ -633,9 +632,10 @@ namespace LightGBM { ...@@ -633,9 +632,10 @@ namespace LightGBM {
return ret; return ret;
} }
template class DenseBin<uint8_t>; template class DenseBin<uint8_t, true>;
template class DenseBin<uint16_t>; template class DenseBin<uint8_t, false>;
template class DenseBin<uint32_t>; template class DenseBin<uint16_t, false>;
template class DenseBin<uint32_t, false>;
template class SparseBin<uint8_t>; template class SparseBin<uint8_t>;
template class SparseBin<uint16_t>; template class SparseBin<uint16_t>;
...@@ -647,13 +647,13 @@ namespace LightGBM { ...@@ -647,13 +647,13 @@ namespace LightGBM {
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) { Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
if (num_bin <= 16) { if (num_bin <= 16) {
return new Dense4bitsBin(num_data); return new DenseBin<uint8_t, true>(num_data);
} else if (num_bin <= 256) { } else if (num_bin <= 256) {
return new DenseBin<uint8_t>(num_data); return new DenseBin<uint8_t, false>(num_data);
} else if (num_bin <= 65536) { } else if (num_bin <= 65536) {
return new DenseBin<uint16_t>(num_data); return new DenseBin<uint16_t, false>(num_data);
} else { } else {
return new DenseBin<uint32_t>(num_data); return new DenseBin<uint32_t, false>(num_data);
} }
} }
......
/*! /*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved. * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/ */
#ifndef LIGHTGBM_IO_DENSE_BIN_HPP_ #ifndef LIGHTGBM_IO_DENSE_BIN_HPP_
#define LIGHTGBM_IO_DENSE_BIN_HPP_ #define LIGHTGBM_IO_DENSE_BIN_HPP_
...@@ -13,16 +14,19 @@ ...@@ -13,16 +14,19 @@
namespace LightGBM { namespace LightGBM {
template <typename VAL_T> template <typename VAL_T, bool IS_4BIT>
class DenseBin; class DenseBin;
template <typename VAL_T> template <typename VAL_T, bool IS_4BIT>
class DenseBinIterator: public BinIterator { class DenseBinIterator : public BinIterator {
public: public:
explicit DenseBinIterator(const DenseBin<VAL_T>* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) explicit DenseBinIterator(const DenseBin<VAL_T, IS_4BIT>* bin_data,
: bin_data_(bin_data), min_bin_(static_cast<VAL_T>(min_bin)), uint32_t min_bin, uint32_t max_bin,
max_bin_(static_cast<VAL_T>(max_bin)), uint32_t most_freq_bin)
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) { : bin_data_(bin_data),
min_bin_(static_cast<VAL_T>(min_bin)),
max_bin_(static_cast<VAL_T>(max_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
if (most_freq_bin_ == 0) { if (most_freq_bin_ == 0) {
offset_ = 1; offset_ = 1;
} else { } else {
...@@ -34,43 +38,68 @@ class DenseBinIterator: public BinIterator { ...@@ -34,43 +38,68 @@ class DenseBinIterator: public BinIterator {
inline void Reset(data_size_t) override {} inline void Reset(data_size_t) override {}
private: private:
const DenseBin<VAL_T>* bin_data_; const DenseBin<VAL_T, IS_4BIT>* bin_data_;
VAL_T min_bin_; VAL_T min_bin_;
VAL_T max_bin_; VAL_T max_bin_;
VAL_T most_freq_bin_; VAL_T most_freq_bin_;
uint8_t offset_; uint8_t offset_;
}; };
/*! /*!
* \brief Used to store bins for dense feature * \brief Used to store bins for dense feature
* Use template to reduce memory cost * Use template to reduce memory cost
*/ */
template <typename VAL_T> template <typename VAL_T, bool IS_4BIT>
class DenseBin: public Bin { class DenseBin : public Bin {
public: public:
friend DenseBinIterator<VAL_T>; friend DenseBinIterator<VAL_T, IS_4BIT>;
explicit DenseBin(data_size_t num_data) explicit DenseBin(data_size_t num_data)
: num_data_(num_data), data_(num_data_, static_cast<VAL_T>(0)) { : num_data_(num_data) {
if (IS_4BIT) {
CHECK_EQ(sizeof(VAL_T), 1);
data_.resize((num_data_ + 1) / 2, static_cast<uint8_t>(0));
buf_.resize((num_data_ + 1) / 2, static_cast<uint8_t>(0));
} else {
data_.resize(num_data_, static_cast<VAL_T>(0));
}
} }
~DenseBin() { ~DenseBin() {}
}
void Push(int, data_size_t idx, uint32_t value) override { void Push(int, data_size_t idx, uint32_t value) override {
data_[idx] = static_cast<VAL_T>(value); if (IS_4BIT) {
const int i1 = idx >> 1;
const int i2 = (idx & 1) << 2;
const uint8_t val = static_cast<uint8_t>(value) << i2;
if (i2 == 0) {
data_[i1] = val;
} else {
buf_[i1] = val;
}
} else {
data_[idx] = static_cast<VAL_T>(value);
}
} }
void ReSize(data_size_t num_data) override { void ReSize(data_size_t num_data) override {
if (num_data_ != num_data) { if (num_data_ != num_data) {
num_data_ = num_data; num_data_ = num_data;
data_.resize(num_data_); if (IS_4BIT) {
data_.resize((num_data_ + 1) / 2, static_cast<VAL_T>(0));
} else {
data_.resize(num_data_);
}
} }
} }
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override; BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin) const override;
template<bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN> template <bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN>
void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end, void ConstructHistogramInner(const data_size_t* data_indices,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const { data_size_t start, data_size_t end,
const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const {
data_size_t i = start; data_size_t i = start;
hist_t* grad = out; hist_t* grad = out;
hist_t* hess = out + 1; hist_t* hess = out + 1;
...@@ -80,9 +109,14 @@ class DenseBin: public Bin { ...@@ -80,9 +109,14 @@ class DenseBin: public Bin {
const data_size_t pf_end = end - pf_offset; const data_size_t pf_end = end - pf_offset;
for (; i < pf_end; ++i) { for (; i < pf_end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i; const auto idx = USE_INDICES ? data_indices[i] : i;
const auto pf_idx = USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset; const auto pf_idx =
PREFETCH_T0(data_.data() + pf_idx); USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
const auto ti = static_cast<uint32_t>(data_[idx]) << 1; if (IS_4BIT) {
PREFETCH_T0(data_.data() + (pf_idx >> 1));
} else {
PREFETCH_T0(data_.data() + pf_idx);
}
const auto ti = static_cast<uint32_t>(data(idx)) << 1;
if (USE_HESSIAN) { if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i]; grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i]; hess[ti] += ordered_hessians[i];
...@@ -94,7 +128,7 @@ class DenseBin: public Bin { ...@@ -94,7 +128,7 @@ class DenseBin: public Bin {
} }
for (; i < end; ++i) { for (; i < end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i; const auto idx = USE_INDICES ? data_indices[i] : i;
const auto ti = static_cast<uint32_t>(data_[idx]) << 1; const auto ti = static_cast<uint32_t>(data(idx)) << 1;
if (USE_HESSIAN) { if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i]; grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i]; hess[ti] += ordered_hessians[i];
...@@ -105,45 +139,53 @@ class DenseBin: public Bin { ...@@ -105,45 +139,53 @@ class DenseBin: public Bin {
} }
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end, void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
const score_t* ordered_gradients, const score_t* ordered_hessians, data_size_t end, const score_t* ordered_gradients,
hist_t* out) const override { const score_t* ordered_hessians,
ConstructHistogramInner<true, true, true>(data_indices, start, end, ordered_gradients, ordered_hessians, out); hist_t* out) const override {
ConstructHistogramInner<true, true, true>(
data_indices, start, end, ordered_gradients, ordered_hessians, out);
} }
void ConstructHistogram(data_size_t start, data_size_t end, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients,
hist_t* out) const override { const score_t* ordered_hessians,
ConstructHistogramInner<false, false, true>(nullptr, start, end, ordered_gradients, ordered_hessians, out); hist_t* out) const override {
ConstructHistogramInner<false, false, true>(
nullptr, start, end, ordered_gradients, ordered_hessians, out);
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end, void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
const score_t* ordered_gradients, data_size_t end, const score_t* ordered_gradients,
hist_t* out) const override { hist_t* out) const override {
ConstructHistogramInner<true, true, false>(data_indices, start, end, ordered_gradients, nullptr, out); ConstructHistogramInner<true, true, false>(data_indices, start, end,
ordered_gradients, nullptr, out);
} }
void ConstructHistogram(data_size_t start, data_size_t end, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_gradients,
hist_t* out) const override { hist_t* out) const override {
ConstructHistogramInner<false, false, false>(nullptr, start, end, ordered_gradients, nullptr, out); ConstructHistogramInner<false, false, false>(
nullptr, start, end, ordered_gradients, nullptr, out);
} }
data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left, template <bool MISS_IS_ZERO, bool MISS_IS_NA, bool MFB_IS_ZERO,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data, bool MFB_IS_NA, bool USE_MIN_BIN>
data_size_t* lte_indices, data_size_t* gt_indices) const override { data_size_t SplitInner(uint32_t min_bin, uint32_t max_bin,
if (num_data <= 0) { return 0; } uint32_t default_bin, uint32_t most_freq_bin,
VAL_T th = static_cast<VAL_T>(threshold + min_bin); bool default_left, uint32_t threshold,
const VAL_T minb = static_cast<VAL_T>(min_bin); const data_size_t* data_indices, data_size_t cnt,
const VAL_T maxb = static_cast<VAL_T>(max_bin); data_size_t* lte_indices,
VAL_T t_zero_bin = static_cast<VAL_T>(min_bin + default_bin); data_size_t* gt_indices) const {
VAL_T t_most_freq_bin = static_cast<VAL_T>(min_bin + most_freq_bin); auto th = static_cast<VAL_T>(threshold + min_bin);
auto t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
if (most_freq_bin == 0) { if (most_freq_bin == 0) {
th -= 1; --th;
t_zero_bin -= 1; --t_zero_bin;
t_most_freq_bin -= 1;
} }
const auto minb = static_cast<VAL_T>(min_bin);
const auto maxb = static_cast<VAL_T>(max_bin);
data_size_t lte_count = 0; data_size_t lte_count = 0;
data_size_t gt_count = 0; data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices; data_size_t* default_indices = gt_indices;
...@@ -154,68 +196,55 @@ class DenseBin: public Bin { ...@@ -154,68 +196,55 @@ class DenseBin: public Bin {
default_indices = lte_indices; default_indices = lte_indices;
default_count = &lte_count; default_count = &lte_count;
} }
if (missing_type == MissingType::NaN) { if (MISS_IS_ZERO || MISS_IS_NA) {
if (default_left) { if (default_left) {
missing_default_indices = lte_indices; missing_default_indices = lte_indices;
missing_default_count = &lte_count; missing_default_count = &lte_count;
} }
if (t_most_freq_bin == maxb) { }
for (data_size_t i = 0; i < num_data; ++i) { if (min_bin < max_bin) {
const data_size_t idx = data_indices[i]; for (data_size_t i = 0; i < cnt; ++i) {
const VAL_T bin = data_[idx]; const data_size_t idx = data_indices[i];
if (t_most_freq_bin == bin || bin < minb || bin > maxb) { const auto bin = data(idx);
if ((MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) ||
(MISS_IS_NA && !MFB_IS_NA && bin == maxb)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if ((USE_MIN_BIN && (bin < minb || bin > maxb)) ||
(!USE_MIN_BIN && bin == 0)) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx; missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else { } else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = data_[idx];
if (bin == maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx; default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
} }
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
} }
} }
} else { } else {
if ((default_left && missing_type == MissingType::Zero) data_size_t* max_bin_indices = gt_indices;
|| (default_bin <= threshold && missing_type != MissingType::Zero)) { data_size_t* max_bin_count = &gt_count;
missing_default_indices = lte_indices; if (maxb <= th) {
missing_default_count = &lte_count; max_bin_indices = lte_indices;
max_bin_count = &lte_count;
} }
if (default_bin == most_freq_bin) { for (data_size_t i = 0; i < cnt; ++i) {
for (data_size_t i = 0; i < num_data; ++i) { const data_size_t idx = data_indices[i];
const data_size_t idx = data_indices[i]; const auto bin = data(idx);
const VAL_T bin = data_[idx]; if (MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) {
if (bin < minb || bin > maxb || t_most_freq_bin == bin) { missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin != maxb) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx; missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else { } else {
lte_indices[lte_count++] = idx; default_indices[(*default_count)++] = idx;
} }
} } else {
} else { if (MISS_IS_NA && !MFB_IS_NA) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = data_[idx];
if (bin == t_zero_bin) {
missing_default_indices[(*missing_default_count)++] = idx; missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else { } else {
lte_indices[lte_count++] = idx; max_bin_indices[(*max_bin_count)++] = idx;
} }
} }
} }
...@@ -223,11 +252,68 @@ class DenseBin: public Bin { ...@@ -223,11 +252,68 @@ class DenseBin: public Bin {
return lte_count; return lte_count;
} }
data_size_t SplitCategorical( data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin, uint32_t most_freq_bin, MissingType missing_type,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data, bool default_left, uint32_t threshold,
data_size_t* lte_indices, data_size_t* gt_indices) const override { const data_size_t* data_indices, data_size_t cnt,
if (num_data <= 0) { return 0; } data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, true>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, true>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, true>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + min_bin && most_freq_bin > 0) {
return SplitInner<false, true, false, true, true>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, true>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
data_size_t Split(uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, false>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, false>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, false>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + 1 && most_freq_bin > 0) {
return SplitInner<false, true, false, true, false>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, false>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
template <bool USE_MIN_BIN>
data_size_t SplitCategoricalInner(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold,
int num_threahold,
const data_size_t* data_indices,
data_size_t cnt, data_size_t* lte_indices,
data_size_t* gt_indices) const {
data_size_t lte_count = 0; data_size_t lte_count = 0;
data_size_t gt_count = 0; data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices; data_size_t* default_indices = gt_indices;
...@@ -236,12 +322,15 @@ class DenseBin: public Bin { ...@@ -236,12 +322,15 @@ class DenseBin: public Bin {
default_indices = lte_indices; default_indices = lte_indices;
default_count = &lte_count; default_count = &lte_count;
} }
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i]; const data_size_t idx = data_indices[i];
const uint32_t bin = data_[idx]; const uint32_t bin = data(idx);
if (bin < min_bin || bin > max_bin) { if (USE_MIN_BIN && (bin < min_bin || bin > max_bin)) {
default_indices[(*default_count)++] = idx; default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) { } else if (!USE_MIN_BIN && bin == 0) {
default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold,
bin - min_bin)) {
lte_indices[lte_count++] = idx; lte_indices[lte_count++] = idx;
} else { } else {
gt_indices[gt_count++] = idx; gt_indices[gt_count++] = idx;
...@@ -250,57 +339,138 @@ class DenseBin: public Bin { ...@@ -250,57 +339,138 @@ class DenseBin: public Bin {
return lte_count; return lte_count;
} }
data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<true>(min_bin, max_bin, most_freq_bin,
threshold, num_threahold, data_indices,
cnt, lte_indices, gt_indices);
}
data_size_t SplitCategorical(uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<false>(1, max_bin, most_freq_bin, threshold,
num_threahold, data_indices, cnt,
lte_indices, gt_indices);
}
data_size_t num_data() const override { return num_data_; } data_size_t num_data() const override { return num_data_; }
void FinishLoad() override {} void FinishLoad() override {
if (IS_4BIT) {
if (buf_.empty()) {
return;
}
int len = (num_data_ + 1) / 2;
for (int i = 0; i < len; ++i) {
data_[i] |= buf_[i];
}
buf_.clear();
}
}
void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override { void LoadFromMemory(
const void* memory,
const std::vector<data_size_t>& local_used_indices) override {
const VAL_T* mem_data = reinterpret_cast<const VAL_T*>(memory); const VAL_T* mem_data = reinterpret_cast<const VAL_T*>(memory);
if (!local_used_indices.empty()) { if (!local_used_indices.empty()) {
for (int i = 0; i < num_data_; ++i) { if (IS_4BIT) {
data_[i] = mem_data[local_used_indices[i]]; const data_size_t rest = num_data_ & 1;
for (int i = 0; i < num_data_ - rest; i += 2) {
// get old bins
data_size_t idx = local_used_indices[i];
const auto bin1 = static_cast<uint8_t>(
(mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = local_used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>(
(mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
// add
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = local_used_indices[num_data_ - 1];
data_[num_data_ >> 1] =
(mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
} else {
for (int i = 0; i < num_data_; ++i) {
data_[i] = mem_data[local_used_indices[i]];
}
} }
} else { } else {
for (int i = 0; i < num_data_; ++i) { for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = mem_data[i]; data_[i] = mem_data[i];
} }
} }
} }
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override { inline VAL_T data(data_size_t idx) const {
auto other_bin = dynamic_cast<const DenseBin<VAL_T>*>(full_bin); if (IS_4BIT) {
for (int i = 0; i < num_used_indices; ++i) { return (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
data_[i] = other_bin->data_[used_indices[i]]; } else {
return data_[idx];
} }
} }
void SaveBinaryToFile(const VirtualFileWriter* writer) const override { void CopySubrow(const Bin* full_bin, const data_size_t* used_indices,
writer->Write(data_.data(), sizeof(VAL_T) * num_data_); data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const DenseBin<VAL_T, IS_4BIT>*>(full_bin);
if (IS_4BIT) {
const data_size_t rest = num_used_indices & 1;
for (int i = 0; i < num_used_indices - rest; i += 2) {
data_size_t idx = used_indices[i];
const auto bin1 = static_cast<uint8_t>(
(other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>(
(other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = used_indices[num_used_indices - 1];
data_[num_used_indices >> 1] =
(other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
} else {
for (int i = 0; i < num_used_indices; ++i) {
data_[i] = other_bin->data_[used_indices[i]];
}
}
} }
size_t SizesInByte() const override { void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
return sizeof(VAL_T) * num_data_; writer->Write(data_.data(), sizeof(VAL_T) * data_.size());
} }
DenseBin<VAL_T>* Clone() override; size_t SizesInByte() const override { return sizeof(VAL_T) * data_.size(); }
DenseBin<VAL_T, IS_4BIT>* Clone() override;
private: private:
data_size_t num_data_; data_size_t num_data_;
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_; std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;
std::vector<uint8_t> buf_;
DenseBin<VAL_T>(const DenseBin<VAL_T>& other) DenseBin<VAL_T, IS_4BIT>(const DenseBin<VAL_T, IS_4BIT>& other)
: num_data_(other.num_data_), data_(other.data_) { : num_data_(other.num_data_), data_(other.data_) {}
}
}; };
template<typename VAL_T> template <typename VAL_T, bool IS_4BIT>
DenseBin<VAL_T>* DenseBin<VAL_T>::Clone() { DenseBin<VAL_T, IS_4BIT>* DenseBin<VAL_T, IS_4BIT>::Clone() {
return new DenseBin<VAL_T>(*this); return new DenseBin<VAL_T, IS_4BIT>(*this);
} }
template <typename VAL_T> template <typename VAL_T, bool IS_4BIT>
uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) { uint32_t DenseBinIterator<VAL_T, IS_4BIT>::Get(data_size_t idx) {
auto ret = bin_data_->data_[idx]; auto ret = bin_data_->data(idx);
if (ret >= min_bin_ && ret <= max_bin_) { if (ret >= min_bin_ && ret <= max_bin_) {
return ret - min_bin_ + offset_; return ret - min_bin_ + offset_;
} else { } else {
...@@ -308,15 +478,17 @@ uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) { ...@@ -308,15 +478,17 @@ uint32_t DenseBinIterator<VAL_T>::Get(data_size_t idx) {
} }
} }
template <typename VAL_T> template <typename VAL_T, bool IS_4BIT>
inline uint32_t DenseBinIterator<VAL_T>::RawGet(data_size_t idx) { inline uint32_t DenseBinIterator<VAL_T, IS_4BIT>::RawGet(data_size_t idx) {
return bin_data_->data_[idx]; return bin_data_->data(idx);
} }
template <typename VAL_T> template <typename VAL_T, bool IS_4BIT>
BinIterator* DenseBin<VAL_T>::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const { BinIterator* DenseBin<VAL_T, IS_4BIT>::GetIterator(
return new DenseBinIterator<VAL_T>(this, min_bin, max_bin, most_freq_bin); uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
return new DenseBinIterator<VAL_T, IS_4BIT>(this, min_bin, max_bin,
most_freq_bin);
} }
} // namespace LightGBM } // namespace LightGBM
#endif // LightGBM_IO_DENSE_BIN_HPP_ #endif // LightGBM_IO_DENSE_BIN_HPP_
/*!
* Copyright (c) 2017 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#define LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
#include <LightGBM/bin.h>
#include <cstdint>
#include <cstring>
#include <vector>
namespace LightGBM {
class Dense4bitsBin;
class Dense4bitsBinIterator : public BinIterator {
public:
explicit Dense4bitsBinIterator(const Dense4bitsBin* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
: bin_data_(bin_data), min_bin_(static_cast<uint8_t>(min_bin)),
max_bin_(static_cast<uint8_t>(max_bin)),
most_freq_bin_(static_cast<uint8_t>(most_freq_bin)) {
if (most_freq_bin_ == 0) {
offset_ = 1;
} else {
offset_ = 0;
}
}
inline uint32_t RawGet(data_size_t idx) override;
inline uint32_t Get(data_size_t idx) override;
inline void Reset(data_size_t) override {}
private:
const Dense4bitsBin* bin_data_;
uint8_t min_bin_;
uint8_t max_bin_;
uint8_t most_freq_bin_;
uint8_t offset_;
};
class Dense4bitsBin : public Bin {
public:
friend Dense4bitsBinIterator;
explicit Dense4bitsBin(data_size_t num_data)
: num_data_(num_data) {
int len = (num_data_ + 1) / 2;
data_.resize(len, static_cast<uint8_t>(0));
buf_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0));
}
~Dense4bitsBin() {
}
void Push(int, data_size_t idx, uint32_t value) override {
const int i1 = idx >> 1;
const int i2 = (idx & 1) << 2;
const uint8_t val = static_cast<uint8_t>(value) << i2;
if (i2 == 0) {
data_[i1] = val;
} else {
buf_[i1] = val;
}
}
void ReSize(data_size_t num_data) override {
if (num_data_ != num_data) {
num_data_ = num_data;
const int len = (num_data_ + 1) / 2;
data_.resize(len);
}
}
inline BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override;
template<bool USE_INDICES, bool USE_PREFETCH, bool USE_HESSIAN>
void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const {
data_size_t i = start;
hist_t* grad = out;
hist_t* hess = out + 1;
hist_cnt_t* cnt = reinterpret_cast<hist_cnt_t*>(hess);
if (USE_PREFETCH) {
const data_size_t pf_offset = 64;
const data_size_t pf_end = end - pf_offset;
for (; i < pf_end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const auto pf_idx = USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
PREFETCH_T0(data_.data() + (pf_idx >> 1));
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
const uint8_t ti = static_cast<uint8_t>(bin) << 1;
if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i];
} else {
grad[ti] += ordered_gradients[i];
++cnt[ti];
}
}
}
for (; i < end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
const uint8_t ti = static_cast<uint8_t>(bin) << 1;
if (USE_HESSIAN) {
grad[ti] += ordered_gradients[i];
hess[ti] += ordered_hessians[i];
} else {
grad[ti] += ordered_gradients[i];
++cnt[ti];
}
}
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<true, true, true>(data_indices, start, end, ordered_gradients, ordered_hessians, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
hist_t* out) const override {
ConstructHistogramInner<false, false, true>(nullptr, start, end, ordered_gradients, ordered_hessians, out);
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<true, true, false>(data_indices, start, end, ordered_gradients, nullptr, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients,
hist_t* out) const override {
ConstructHistogramInner<false, false, false>(nullptr, start, end, ordered_gradients, nullptr, out);
}
data_size_t Split(
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
uint8_t th = static_cast<uint8_t>(threshold + min_bin);
const uint8_t minb = static_cast<uint8_t>(min_bin);
const uint8_t maxb = static_cast<uint8_t>(max_bin);
uint8_t t_zero_bin = static_cast<uint8_t>(min_bin + default_bin);
uint8_t t_most_freq_bin = static_cast<uint8_t>(min_bin + most_freq_bin);
if (most_freq_bin == 0) {
th -= 1;
t_zero_bin -= 1;
t_most_freq_bin -= 1;
}
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count;
data_size_t* missing_default_indices = gt_indices;
data_size_t* missing_default_count = &gt_count;
if (most_freq_bin <= threshold) {
default_indices = lte_indices;
default_count = &lte_count;
}
if (missing_type == MissingType::NaN) {
if (default_left) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
}
if (t_most_freq_bin == maxb) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (t_most_freq_bin == bin || bin < minb || bin > maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin == maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
}
} else {
if ((default_left && missing_type == MissingType::Zero)
|| (default_bin <= threshold && missing_type != MissingType::Zero)) {
missing_default_indices = lte_indices;
missing_default_count = &lte_count;
}
if (default_bin == most_freq_bin) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint8_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin == t_zero_bin) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
}
}
}
}
return lte_count;
}
data_size_t SplitCategorical(
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const override {
if (num_data <= 0) { return 0; }
data_size_t lte_count = 0;
data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count;
if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) {
default_indices = lte_indices;
default_count = &lte_count;
}
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const uint32_t bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin < min_bin || bin > max_bin) {
default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) {
lte_indices[lte_count++] = idx;
} else {
gt_indices[gt_count++] = idx;
}
}
return lte_count;
}
data_size_t num_data() const override { return num_data_; }
void FinishLoad() override {
if (buf_.empty()) { return; }
int len = (num_data_ + 1) / 2;
for (int i = 0; i < len; ++i) {
data_[i] |= buf_[i];
}
buf_.clear();
}
void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override {
const uint8_t* mem_data = reinterpret_cast<const uint8_t*>(memory);
if (!local_used_indices.empty()) {
const data_size_t rest = num_data_ & 1;
for (int i = 0; i < num_data_ - rest; i += 2) {
// get old bins
data_size_t idx = local_used_indices[i];
const auto bin1 = static_cast<uint8_t>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = local_used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>((mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
// add
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = local_used_indices[num_data_ - 1];
data_[num_data_ >> 1] = (mem_data[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
} else {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = mem_data[i];
}
}
}
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const Dense4bitsBin*>(full_bin);
const data_size_t rest = num_used_indices & 1;
for (int i = 0; i < num_used_indices - rest; i += 2) {
data_size_t idx = used_indices[i];
const auto bin1 = static_cast<uint8_t>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
idx = used_indices[i + 1];
const auto bin2 = static_cast<uint8_t>((other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf);
const int i1 = i >> 1;
data_[i1] = (bin1 | (bin2 << 4));
}
if (rest) {
data_size_t idx = used_indices[num_used_indices - 1];
data_[num_used_indices >> 1] = (other_bin->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
}
void SaveBinaryToFile(const VirtualFileWriter* writer) const override {
writer->Write(data_.data(), sizeof(uint8_t) * data_.size());
}
size_t SizesInByte() const override {
return sizeof(uint8_t) * data_.size();
}
Dense4bitsBin* Clone() override {
return new Dense4bitsBin(*this);
}
protected:
Dense4bitsBin(const Dense4bitsBin& other)
: num_data_(other.num_data_), data_(other.data_), buf_(other.buf_) {
}
data_size_t num_data_;
std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>> data_;
std::vector<uint8_t> buf_;
};
uint32_t Dense4bitsBinIterator::Get(data_size_t idx) {
const auto bin = (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
if (bin >= min_bin_ && bin <= max_bin_) {
return bin - min_bin_ + offset_;
} else {
return most_freq_bin_;
}
}
uint32_t Dense4bitsBinIterator::RawGet(data_size_t idx) {
return (bin_data_->data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
}
inline BinIterator* Dense4bitsBin::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const {
return new Dense4bitsBinIterator(this, min_bin, max_bin, most_freq_bin);
}
} // namespace LightGBM
#endif // LIGHTGBM_IO_DENSE_NBITS_BIN_HPP_
/*! /*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved. * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information. * Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/ */
#ifndef LIGHTGBM_IO_SPARSE_BIN_HPP_ #ifndef LIGHTGBM_IO_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_SPARSE_BIN_HPP_ #define LIGHTGBM_IO_SPARSE_BIN_HPP_
...@@ -9,27 +10,29 @@ ...@@ -9,27 +10,29 @@
#include <LightGBM/utils/log.h> #include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h> #include <LightGBM/utils/openmp_wrapper.h>
#include <limits>
#include <algorithm> #include <algorithm>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <limits>
#include <utility> #include <utility>
#include <vector> #include <vector>
namespace LightGBM { namespace LightGBM {
template <typename VAL_T> class SparseBin; template <typename VAL_T>
class SparseBin;
const size_t kNumFastIndex = 64; const size_t kNumFastIndex = 64;
template <typename VAL_T> template <typename VAL_T>
class SparseBinIterator: public BinIterator { class SparseBinIterator : public BinIterator {
public: public:
SparseBinIterator(const SparseBin<VAL_T>* bin_data, SparseBinIterator(const SparseBin<VAL_T>* bin_data, uint32_t min_bin,
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) uint32_t max_bin, uint32_t most_freq_bin)
: bin_data_(bin_data), min_bin_(static_cast<VAL_T>(min_bin)), : bin_data_(bin_data),
max_bin_(static_cast<VAL_T>(max_bin)), min_bin_(static_cast<VAL_T>(min_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) { max_bin_(static_cast<VAL_T>(max_bin)),
most_freq_bin_(static_cast<VAL_T>(most_freq_bin)) {
if (most_freq_bin_ == 0) { if (most_freq_bin_ == 0) {
offset_ = 1; offset_ = 1;
} else { } else {
...@@ -38,7 +41,7 @@ class SparseBinIterator: public BinIterator { ...@@ -38,7 +41,7 @@ class SparseBinIterator: public BinIterator {
Reset(0); Reset(0);
} }
SparseBinIterator(const SparseBin<VAL_T>* bin_data, data_size_t start_idx) SparseBinIterator(const SparseBin<VAL_T>* bin_data, data_size_t start_idx)
: bin_data_(bin_data) { : bin_data_(bin_data) {
Reset(start_idx); Reset(start_idx);
} }
...@@ -67,22 +70,18 @@ class SparseBinIterator: public BinIterator { ...@@ -67,22 +70,18 @@ class SparseBinIterator: public BinIterator {
}; };
template <typename VAL_T> template <typename VAL_T>
class SparseBin: public Bin { class SparseBin : public Bin {
public: public:
friend class SparseBinIterator<VAL_T>; friend class SparseBinIterator<VAL_T>;
explicit SparseBin(data_size_t num_data) explicit SparseBin(data_size_t num_data) : num_data_(num_data) {
: num_data_(num_data) {
int num_threads = OMP_NUM_THREADS(); int num_threads = OMP_NUM_THREADS();
push_buffers_.resize(num_threads); push_buffers_.resize(num_threads);
} }
~SparseBin() { ~SparseBin() {}
}
void ReSize(data_size_t num_data) override { void ReSize(data_size_t num_data) override { num_data_ = num_data; }
num_data_ = num_data;
}
void Push(int tid, data_size_t idx, uint32_t value) override { void Push(int tid, data_size_t idx, uint32_t value) override {
auto cur_bin = static_cast<VAL_T>(value); auto cur_bin = static_cast<VAL_T>(value);
...@@ -91,36 +90,49 @@ class SparseBin: public Bin { ...@@ -91,36 +90,49 @@ class SparseBin: public Bin {
} }
} }
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const override; BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin) const override;
#define ACC_GH(hist, i, g, h) \ #define ACC_GH(hist, i, g, h) \
const auto ti = static_cast<int>(i) << 1; \ const auto ti = static_cast<int>(i) << 1; \
hist[ti] += g; \ hist[ti] += g; \
hist[ti + 1] += h; \ hist[ti + 1] += h;
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end, void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const override { data_size_t end, const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const override {
data_size_t i_delta, cur_pos; data_size_t i_delta, cur_pos;
InitIndex(data_indices[start], &i_delta, &cur_pos); InitIndex(data_indices[start], &i_delta, &cur_pos);
data_size_t i = start; data_size_t i = start;
for (;;) { for (;;) {
if (cur_pos < data_indices[i]) { if (cur_pos < data_indices[i]) {
cur_pos += deltas_[++i_delta]; cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; } if (i_delta >= num_vals_) {
break;
}
} else if (cur_pos > data_indices[i]) { } else if (cur_pos > data_indices[i]) {
if (++i >= end) { break; } if (++i >= end) {
break;
}
} else { } else {
const VAL_T bin = vals_[i_delta]; const VAL_T bin = vals_[i_delta];
ACC_GH(out, bin, ordered_gradients[i], ordered_hessians[i]); ACC_GH(out, bin, ordered_gradients[i], ordered_hessians[i]);
if (++i >= end) { break; } if (++i >= end) {
break;
}
cur_pos += deltas_[++i_delta]; cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; } if (i_delta >= num_vals_) {
break;
}
} }
} }
} }
void ConstructHistogram(data_size_t start, data_size_t end, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const override { const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const override {
data_size_t i_delta, cur_pos; data_size_t i_delta, cur_pos;
InitIndex(start, &i_delta, &cur_pos); InitIndex(start, &i_delta, &cur_pos);
while (cur_pos < start && i_delta < num_vals_) { while (cur_pos < start && i_delta < num_vals_) {
...@@ -133,8 +145,9 @@ class SparseBin: public Bin { ...@@ -133,8 +145,9 @@ class SparseBin: public Bin {
} }
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end, void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
const score_t* ordered_gradients, hist_t* out) const override { data_size_t end, const score_t* ordered_gradients,
hist_t* out) const override {
data_size_t i_delta, cur_pos; data_size_t i_delta, cur_pos;
InitIndex(data_indices[start], &i_delta, &cur_pos); InitIndex(data_indices[start], &i_delta, &cur_pos);
data_size_t i = start; data_size_t i = start;
...@@ -143,22 +156,31 @@ class SparseBin: public Bin { ...@@ -143,22 +156,31 @@ class SparseBin: public Bin {
for (;;) { for (;;) {
if (cur_pos < data_indices[i]) { if (cur_pos < data_indices[i]) {
cur_pos += deltas_[++i_delta]; cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; } if (i_delta >= num_vals_) {
break;
}
} else if (cur_pos > data_indices[i]) { } else if (cur_pos > data_indices[i]) {
if (++i >= end) { break; } if (++i >= end) {
break;
}
} else { } else {
const uint32_t ti = static_cast<uint32_t>(vals_[i_delta]) << 1; const uint32_t ti = static_cast<uint32_t>(vals_[i_delta]) << 1;
grad[ti] += ordered_gradients[i]; grad[ti] += ordered_gradients[i];
++cnt[ti]; ++cnt[ti];
if (++i >= end) { break; } if (++i >= end) {
break;
}
cur_pos += deltas_[++i_delta]; cur_pos += deltas_[++i_delta];
if (i_delta >= num_vals_) { break; } if (i_delta >= num_vals_) {
break;
}
} }
} }
} }
void ConstructHistogram(data_size_t start, data_size_t end, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const override { const score_t* ordered_gradients,
hist_t* out) const override {
data_size_t i_delta, cur_pos; data_size_t i_delta, cur_pos;
InitIndex(start, &i_delta, &cur_pos); InitIndex(start, &i_delta, &cur_pos);
hist_t* grad = out; hist_t* grad = out;
...@@ -173,17 +195,17 @@ class SparseBin: public Bin { ...@@ -173,17 +195,17 @@ class SparseBin: public Bin {
cur_pos += deltas_[++i_delta]; cur_pos += deltas_[++i_delta];
} }
} }
#undef ACC_GH #undef ACC_GH
inline void NextNonzeroFast(data_size_t* i_delta, data_size_t* cur_pos) const { inline void NextNonzeroFast(data_size_t* i_delta,
data_size_t* cur_pos) const {
*cur_pos += deltas_[++(*i_delta)]; *cur_pos += deltas_[++(*i_delta)];
if (*i_delta >= num_vals_) { if (*i_delta >= num_vals_) {
*cur_pos = num_data_; *cur_pos = num_data_;
} }
} }
inline bool NextNonzero(data_size_t* i_delta, inline bool NextNonzero(data_size_t* i_delta, data_size_t* cur_pos) const {
data_size_t* cur_pos) const {
*cur_pos += deltas_[++(*i_delta)]; *cur_pos += deltas_[++(*i_delta)];
if (*i_delta < num_vals_) { if (*i_delta < num_vals_) {
return true; return true;
...@@ -193,96 +215,82 @@ class SparseBin: public Bin { ...@@ -193,96 +215,82 @@ class SparseBin: public Bin {
} }
} }
template <bool MISS_IS_ZERO, bool MISS_IS_NA, bool MFB_IS_ZERO,
data_size_t Split( bool MFB_IS_NA, bool USE_MIN_BIN>
uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, data_size_t SplitInner(uint32_t min_bin, uint32_t max_bin,
MissingType missing_type, bool default_left, uint32_t default_bin, uint32_t most_freq_bin,
uint32_t threshold, data_size_t* data_indices, data_size_t num_data, bool default_left, uint32_t threshold,
data_size_t* lte_indices, data_size_t* gt_indices) const override { const data_size_t* data_indices, data_size_t cnt,
if (num_data <= 0) { return 0; } data_size_t* lte_indices,
VAL_T th = static_cast<VAL_T>(threshold + min_bin); data_size_t* gt_indices) const {
const VAL_T minb = static_cast<VAL_T>(min_bin); auto th = static_cast<VAL_T>(threshold + min_bin);
const VAL_T maxb = static_cast<VAL_T>(max_bin); auto t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
VAL_T t_zero_bin = static_cast<VAL_T>(min_bin + default_bin);
VAL_T t_most_freq_bin = static_cast<VAL_T>(min_bin + most_freq_bin);
if (most_freq_bin == 0) { if (most_freq_bin == 0) {
th -= 1; --th;
t_zero_bin -= 1; --t_zero_bin;
t_most_freq_bin -= 1;
} }
const auto minb = static_cast<VAL_T>(min_bin);
const auto maxb = static_cast<VAL_T>(max_bin);
data_size_t lte_count = 0; data_size_t lte_count = 0;
data_size_t gt_count = 0; data_size_t gt_count = 0;
data_size_t* default_indices = gt_indices; data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count; data_size_t* default_count = &gt_count;
data_size_t* missing_default_indices = gt_indices; data_size_t* missing_default_indices = gt_indices;
data_size_t* missing_default_count = &gt_count; data_size_t* missing_default_count = &gt_count;
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
if (most_freq_bin <= threshold) { if (most_freq_bin <= threshold) {
default_indices = lte_indices; default_indices = lte_indices;
default_count = &lte_count; default_count = &lte_count;
} }
if (missing_type == MissingType::NaN) { if (MISS_IS_ZERO || MISS_IS_NA) {
if (default_left) { if (default_left) {
missing_default_indices = lte_indices; missing_default_indices = lte_indices;
missing_default_count = &lte_count; missing_default_count = &lte_count;
} }
if (t_most_freq_bin == maxb) { }
for (data_size_t i = 0; i < num_data; ++i) { SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
const data_size_t idx = data_indices[i]; if (min_bin < max_bin) {
const VAL_T bin = iterator.InnerRawGet(idx); for (data_size_t i = 0; i < cnt; ++i) {
if (t_most_freq_bin == bin || bin < minb || bin > maxb) { const data_size_t idx = data_indices[i];
const auto bin = iterator.InnerRawGet(idx);
if ((MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) ||
(MISS_IS_NA && !MFB_IS_NA && bin == maxb)) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if ((USE_MIN_BIN && (bin < minb || bin > maxb)) ||
(!USE_MIN_BIN && bin == 0)) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx; missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else { } else {
lte_indices[lte_count++] = idx;
}
}
} else {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = iterator.InnerRawGet(idx);
if (bin == maxb) {
missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx; default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
} }
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else {
lte_indices[lte_count++] = idx;
} }
} }
} else { } else {
if ((default_left && missing_type == MissingType::Zero) data_size_t* max_bin_indices = gt_indices;
|| (default_bin <= threshold && missing_type != MissingType::Zero)) { data_size_t* max_bin_count = &gt_count;
missing_default_indices = lte_indices; if (maxb <= th) {
missing_default_count = &lte_count; max_bin_indices = lte_indices;
max_bin_count = &lte_count;
} }
if (default_bin == most_freq_bin) { for (data_size_t i = 0; i < cnt; ++i) {
for (data_size_t i = 0; i < num_data; ++i) { const data_size_t idx = data_indices[i];
const data_size_t idx = data_indices[i]; const auto bin = iterator.InnerRawGet(idx);
const VAL_T bin = iterator.InnerRawGet(idx); if (MISS_IS_ZERO && !MFB_IS_ZERO && bin == t_zero_bin) {
if (bin < minb || bin > maxb || t_most_freq_bin == bin) { missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin != maxb) {
if ((MISS_IS_NA && MFB_IS_NA) || (MISS_IS_ZERO && MFB_IS_ZERO)) {
missing_default_indices[(*missing_default_count)++] = idx; missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else { } else {
lte_indices[lte_count++] = idx; default_indices[(*default_count)++] = idx;
} }
} } else {
} else { if (MISS_IS_NA && !MFB_IS_NA) {
for (data_size_t i = 0; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const VAL_T bin = iterator.InnerRawGet(idx);
if (bin == t_zero_bin) {
missing_default_indices[(*missing_default_count)++] = idx; missing_default_indices[(*missing_default_count)++] = idx;
} else if (bin < minb || bin > maxb || t_most_freq_bin == bin) {
default_indices[(*default_count)++] = idx;
} else if (bin > th) {
gt_indices[gt_count++] = idx;
} else { } else {
lte_indices[lte_count++] = idx; max_bin_indices[(*max_bin_count)++] = idx;
} }
} }
} }
...@@ -290,26 +298,85 @@ class SparseBin: public Bin { ...@@ -290,26 +298,85 @@ class SparseBin: public Bin {
return lte_count; return lte_count;
} }
data_size_t SplitCategorical( data_size_t Split(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin, uint32_t most_freq_bin, MissingType missing_type,
const uint32_t* threshold, int num_threahold, data_size_t* data_indices, data_size_t num_data, bool default_left, uint32_t threshold,
data_size_t* lte_indices, data_size_t* gt_indices) const override { const data_size_t* data_indices, data_size_t cnt,
if (num_data <= 0) { return 0; } data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
min_bin, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, true>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, true>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, true>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + min_bin && most_freq_bin > 0) {
return SplitInner<false, true, false, true, true>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, true>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
data_size_t Split(uint32_t max_bin, uint32_t default_bin,
uint32_t most_freq_bin, MissingType missing_type,
bool default_left, uint32_t threshold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
#define ARGUMENTS \
1, max_bin, default_bin, most_freq_bin, default_left, threshold, \
data_indices, cnt, lte_indices, gt_indices
if (missing_type == MissingType::None) {
return SplitInner<false, false, false, false, false>(ARGUMENTS);
} else if (missing_type == MissingType::Zero) {
if (default_bin == most_freq_bin) {
return SplitInner<true, false, true, false, false>(ARGUMENTS);
} else {
return SplitInner<true, false, false, false, false>(ARGUMENTS);
}
} else {
if (max_bin == most_freq_bin + 1 && most_freq_bin > 0) {
return SplitInner<false, true, false, true, false>(ARGUMENTS);
} else {
return SplitInner<false, true, false, false, false>(ARGUMENTS);
}
}
#undef ARGUMENTS
}
template <bool USE_MIN_BIN>
data_size_t SplitCategoricalInner(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold,
int num_threahold,
const data_size_t* data_indices,
data_size_t cnt, data_size_t* lte_indices,
data_size_t* gt_indices) const {
data_size_t lte_count = 0; data_size_t lte_count = 0;
data_size_t gt_count = 0; data_size_t gt_count = 0;
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
data_size_t* default_indices = gt_indices; data_size_t* default_indices = gt_indices;
data_size_t* default_count = &gt_count; data_size_t* default_count = &gt_count;
SparseBinIterator<VAL_T> iterator(this, data_indices[0]);
if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) { if (Common::FindInBitset(threshold, num_threahold, most_freq_bin)) {
default_indices = lte_indices; default_indices = lte_indices;
default_count = &lte_count; default_count = &lte_count;
} }
for (data_size_t i = 0; i < num_data; ++i) { for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t idx = data_indices[i]; const data_size_t idx = data_indices[i];
uint32_t bin = iterator.InnerRawGet(idx); const uint32_t bin = iterator.RawGet(idx);
if (bin < min_bin || bin > max_bin) { if (USE_MIN_BIN && (bin < min_bin || bin > max_bin)) {
default_indices[(*default_count)++] = idx;
} else if (!USE_MIN_BIN && bin == 0) {
default_indices[(*default_count)++] = idx; default_indices[(*default_count)++] = idx;
} else if (Common::FindInBitset(threshold, num_threahold, bin - min_bin)) { } else if (Common::FindInBitset(threshold, num_threahold,
bin - min_bin)) {
lte_indices[lte_count++] = idx; lte_indices[lte_count++] = idx;
} else { } else {
gt_indices[gt_count++] = idx; gt_indices[gt_count++] = idx;
...@@ -318,6 +385,27 @@ class SparseBin: public Bin { ...@@ -318,6 +385,27 @@ class SparseBin: public Bin {
return lte_count; return lte_count;
} }
data_size_t SplitCategorical(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<true>(min_bin, max_bin, most_freq_bin,
threshold, num_threahold, data_indices,
cnt, lte_indices, gt_indices);
}
data_size_t SplitCategorical(uint32_t max_bin, uint32_t most_freq_bin,
const uint32_t* threshold, int num_threahold,
const data_size_t* data_indices, data_size_t cnt,
data_size_t* lte_indices,
data_size_t* gt_indices) const override {
return SplitCategoricalInner<false>(1, max_bin, most_freq_bin, threshold,
num_threahold, data_indices, cnt,
lte_indices, gt_indices);
}
data_size_t num_data() const override { return num_data_; } data_size_t num_data() const override { return num_data_; }
void FinishLoad() override { void FinishLoad() override {
...@@ -326,24 +414,28 @@ class SparseBin: public Bin { ...@@ -326,24 +414,28 @@ class SparseBin: public Bin {
for (size_t i = 0; i < push_buffers_.size(); ++i) { for (size_t i = 0; i < push_buffers_.size(); ++i) {
pair_cnt += push_buffers_[i].size(); pair_cnt += push_buffers_[i].size();
} }
std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs = push_buffers_[0]; std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs =
push_buffers_[0];
idx_val_pairs.reserve(pair_cnt); idx_val_pairs.reserve(pair_cnt);
for (size_t i = 1; i < push_buffers_.size(); ++i) { for (size_t i = 1; i < push_buffers_.size(); ++i) {
idx_val_pairs.insert(idx_val_pairs.end(), push_buffers_[i].begin(), push_buffers_[i].end()); idx_val_pairs.insert(idx_val_pairs.end(), push_buffers_[i].begin(),
push_buffers_[i].end());
push_buffers_[i].clear(); push_buffers_[i].clear();
push_buffers_[i].shrink_to_fit(); push_buffers_[i].shrink_to_fit();
} }
// sort by data index // sort by data index
std::sort(idx_val_pairs.begin(), idx_val_pairs.end(), std::sort(idx_val_pairs.begin(), idx_val_pairs.end(),
[](const std::pair<data_size_t, VAL_T>& a, const std::pair<data_size_t, VAL_T>& b) { [](const std::pair<data_size_t, VAL_T>& a,
return a.first < b.first; const std::pair<data_size_t, VAL_T>& b) {
}); return a.first < b.first;
});
// load delta array // load delta array
LoadFromPair(idx_val_pairs); LoadFromPair(idx_val_pairs);
} }
void LoadFromPair(const std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs) { void LoadFromPair(
const std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs) {
deltas_.clear(); deltas_.clear();
vals_.clear(); vals_.clear();
deltas_.reserve(idx_val_pairs.size()); deltas_.reserve(idx_val_pairs.size());
...@@ -355,7 +447,9 @@ class SparseBin: public Bin { ...@@ -355,7 +447,9 @@ class SparseBin: public Bin {
const VAL_T bin = idx_val_pairs[i].second; const VAL_T bin = idx_val_pairs[i].second;
data_size_t cur_delta = cur_idx - last_idx; data_size_t cur_delta = cur_idx - last_idx;
// disallow the multi-val in one row // disallow the multi-val in one row
if (i > 0 && cur_delta == 0) { continue; } if (i > 0 && cur_delta == 0) {
continue;
}
while (cur_delta >= 256) { while (cur_delta >= 256) {
deltas_.push_back(255); deltas_.push_back(255);
vals_.push_back(0); vals_.push_back(0);
...@@ -412,11 +506,13 @@ class SparseBin: public Bin { ...@@ -412,11 +506,13 @@ class SparseBin: public Bin {
} }
size_t SizesInByte() const override { size_t SizesInByte() const override {
return sizeof(num_vals_) + sizeof(uint8_t) * (num_vals_ + 1) return sizeof(num_vals_) + sizeof(uint8_t) * (num_vals_ + 1) +
+ sizeof(VAL_T) * num_vals_; sizeof(VAL_T) * num_vals_;
} }
void LoadFromMemory(const void* memory, const std::vector<data_size_t>& local_used_indices) override { void LoadFromMemory(
const void* memory,
const std::vector<data_size_t>& local_used_indices) override {
const char* mem_ptr = reinterpret_cast<const char*>(memory); const char* mem_ptr = reinterpret_cast<const char*>(memory);
data_size_t tmp_num_vals = *(reinterpret_cast<const data_size_t*>(mem_ptr)); data_size_t tmp_num_vals = *(reinterpret_cast<const data_size_t*>(mem_ptr));
mem_ptr += sizeof(tmp_num_vals); mem_ptr += sizeof(tmp_num_vals);
...@@ -443,7 +539,8 @@ class SparseBin: public Bin { ...@@ -443,7 +539,8 @@ class SparseBin: public Bin {
std::vector<std::pair<data_size_t, VAL_T>> tmp_pair; std::vector<std::pair<data_size_t, VAL_T>> tmp_pair;
data_size_t cur_pos = 0; data_size_t cur_pos = 0;
data_size_t j = -1; data_size_t j = -1;
for (data_size_t i = 0; i < static_cast<data_size_t>(local_used_indices.size()); ++i) { for (data_size_t i = 0;
i < static_cast<data_size_t>(local_used_indices.size()); ++i) {
const data_size_t idx = local_used_indices[i]; const data_size_t idx = local_used_indices[i];
while (cur_pos < idx && j < num_vals_) { while (cur_pos < idx && j < num_vals_) {
NextNonzero(&j, &cur_pos); NextNonzero(&j, &cur_pos);
...@@ -457,7 +554,8 @@ class SparseBin: public Bin { ...@@ -457,7 +554,8 @@ class SparseBin: public Bin {
} }
} }
void CopySubrow(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override { void CopySubrow(const Bin* full_bin, const data_size_t* used_indices,
data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const SparseBin<VAL_T>*>(full_bin); auto other_bin = dynamic_cast<const SparseBin<VAL_T>*>(full_bin);
deltas_.clear(); deltas_.clear();
vals_.clear(); vals_.clear();
...@@ -497,12 +595,16 @@ class SparseBin: public Bin { ...@@ -497,12 +595,16 @@ class SparseBin: public Bin {
SparseBin<VAL_T>* Clone() override; SparseBin<VAL_T>* Clone() override;
SparseBin<VAL_T>(const SparseBin<VAL_T>& other) SparseBin<VAL_T>(const SparseBin<VAL_T>& other)
: num_data_(other.num_data_), deltas_(other.deltas_), vals_(other.vals_), : num_data_(other.num_data_),
num_vals_(other.num_vals_), push_buffers_(other.push_buffers_), deltas_(other.deltas_),
fast_index_(other.fast_index_), fast_index_shift_(other.fast_index_shift_) { vals_(other.vals_),
} num_vals_(other.num_vals_),
push_buffers_(other.push_buffers_),
void InitIndex(data_size_t start_idx, data_size_t * i_delta, data_size_t * cur_pos) const { fast_index_(other.fast_index_),
fast_index_shift_(other.fast_index_shift_) {}
void InitIndex(data_size_t start_idx, data_size_t* i_delta,
data_size_t* cur_pos) const {
auto idx = start_idx >> fast_index_shift_; auto idx = start_idx >> fast_index_shift_;
if (static_cast<size_t>(idx) < fast_index_.size()) { if (static_cast<size_t>(idx) < fast_index_.size()) {
const auto fast_pair = fast_index_[start_idx >> fast_index_shift_]; const auto fast_pair = fast_index_[start_idx >> fast_index_shift_];
...@@ -516,7 +618,8 @@ class SparseBin: public Bin { ...@@ -516,7 +618,8 @@ class SparseBin: public Bin {
private: private:
data_size_t num_data_; data_size_t num_data_;
std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>> deltas_; std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>>
deltas_;
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> vals_; std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> vals_;
data_size_t num_vals_; data_size_t num_vals_;
std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_; std::vector<std::vector<std::pair<data_size_t, VAL_T>>> push_buffers_;
...@@ -524,7 +627,7 @@ class SparseBin: public Bin { ...@@ -524,7 +627,7 @@ class SparseBin: public Bin {
data_size_t fast_index_shift_; data_size_t fast_index_shift_;
}; };
template<typename VAL_T> template <typename VAL_T>
SparseBin<VAL_T>* SparseBin<VAL_T>::Clone() { SparseBin<VAL_T>* SparseBin<VAL_T>::Clone() {
return new SparseBin(*this); return new SparseBin(*this);
} }
...@@ -552,9 +655,10 @@ inline void SparseBinIterator<VAL_T>::Reset(data_size_t start_idx) { ...@@ -552,9 +655,10 @@ inline void SparseBinIterator<VAL_T>::Reset(data_size_t start_idx) {
} }
template <typename VAL_T> template <typename VAL_T>
BinIterator* SparseBin<VAL_T>::GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin) const { BinIterator* SparseBin<VAL_T>::GetIterator(uint32_t min_bin, uint32_t max_bin,
uint32_t most_freq_bin) const {
return new SparseBinIterator<VAL_T>(this, min_bin, max_bin, most_freq_bin); return new SparseBinIterator<VAL_T>(this, min_bin, max_bin, most_freq_bin);
} }
} // namespace LightGBM } // namespace LightGBM
#endif // LightGBM_IO_SPARSE_BIN_HPP_ #endif // LightGBM_IO_SPARSE_BIN_HPP_
...@@ -106,7 +106,7 @@ class DataPartition { ...@@ -106,7 +106,7 @@ class DataPartition {
const data_size_t begin = leaf_begin_[leaf]; const data_size_t begin = leaf_begin_[leaf];
const data_size_t cnt = leaf_count_[leaf]; const data_size_t cnt = leaf_count_[leaf];
auto left_start = indices_.data() + begin; auto left_start = indices_.data() + begin;
auto left_cnt = runner_.Run<false>( const auto left_cnt = runner_.Run<false>(
cnt, cnt,
[=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left, [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
data_size_t* right) { data_size_t* right) {
......
...@@ -144,72 +144,67 @@ class FeatureHistogram { ...@@ -144,72 +144,67 @@ class FeatureHistogram {
template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT> template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT>
void FuncForNumricalL2() { void FuncForNumricalL2() {
#define TEMPLATE_PREFIX USE_RAND, USE_MC, USE_L1, USE_MAX_OUTPUT
#define LAMBDA_ARGUMENTS \
double sum_gradient, double sum_hessian, data_size_t num_data, \
const ConstraintEntry &constraints, SplitInfo *output
#define BEFORE_ARGUMENTS sum_gradient, sum_hessian, output, &rand_threshold
#define FUNC_ARGUMENTS \
sum_gradient, sum_hessian, num_data, constraints, min_gain_shift, output, \
rand_threshold
if (meta_->num_bin > 2 && meta_->missing_type != MissingType::None) { if (meta_->num_bin > 2 && meta_->missing_type != MissingType::None) {
if (meta_->missing_type == MissingType::Zero) { if (meta_->missing_type == MissingType::Zero) {
find_best_threshold_fun_ = find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
[=](double sum_gradient, double sum_hessian, data_size_t num_data, int rand_threshold = 0;
const ConstraintEntry& constraints, SplitInfo* output) { double min_gain_shift =
int rand_threshold = 0; BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
double min_gain_shift = BEFORE_ARGUMENTS);
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>( FindBestThresholdSequentially<TEMPLATE_PREFIX, true, true, false>(
sum_gradient, sum_hessian, output, &rand_threshold); FUNC_ARGUMENTS);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1, FindBestThresholdSequentially<TEMPLATE_PREFIX, false, true, false>(
USE_MAX_OUTPUT, true, true, false>( FUNC_ARGUMENTS);
sum_gradient, sum_hessian, num_data, constraints, };
min_gain_shift, output, rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, false, true, false>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
};
} else { } else {
find_best_threshold_fun_ = find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
[=](double sum_gradient, double sum_hessian, data_size_t num_data, int rand_threshold = 0;
const ConstraintEntry& constraints, SplitInfo* output) { double min_gain_shift =
int rand_threshold = 0; BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
double min_gain_shift = BEFORE_ARGUMENTS);
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>( FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, true>(
sum_gradient, sum_hessian, output, &rand_threshold); FUNC_ARGUMENTS);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1, FindBestThresholdSequentially<TEMPLATE_PREFIX, false, false, true>(
USE_MAX_OUTPUT, true, false, true>( FUNC_ARGUMENTS);
sum_gradient, sum_hessian, num_data, constraints, };
min_gain_shift, output, rand_threshold);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
USE_MAX_OUTPUT, false, false, true>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
};
} }
} else { } else {
if (meta_->missing_type != MissingType::NaN) { if (meta_->missing_type != MissingType::NaN) {
find_best_threshold_fun_ = find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
[=](double sum_gradient, double sum_hessian, data_size_t num_data, int rand_threshold = 0;
const ConstraintEntry& constraints, SplitInfo* output) { double min_gain_shift =
int rand_threshold = 0; BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
double min_gain_shift = BEFORE_ARGUMENTS);
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>( FindBestThresholdSequentially<TEMPLATE_PREFIX, true, false, false>(
sum_gradient, sum_hessian, output, &rand_threshold); FUNC_ARGUMENTS);
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1, };
USE_MAX_OUTPUT, true, false, false>(
sum_gradient, sum_hessian, num_data, constraints,
min_gain_shift, output, rand_threshold);
};
} else { } else {
find_best_threshold_fun_ = find_best_threshold_fun_ = [=](LAMBDA_ARGUMENTS) {
[=](double sum_gradient, double sum_hessian, data_size_t num_data, int rand_threshold = 0;
const ConstraintEntry& constraints, SplitInfo* output) { double min_gain_shift =
int rand_threshold = 0; BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>(
double min_gain_shift = BEFORE_ARGUMENTS);
BeforeNumercal<USE_RAND, USE_L1, USE_MAX_OUTPUT>( FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1,
sum_gradient, sum_hessian, output, &rand_threshold); USE_MAX_OUTPUT, true, false, false>(
FindBestThresholdSequentially<USE_RAND, USE_MC, USE_L1, FUNC_ARGUMENTS);
USE_MAX_OUTPUT, true, false, false>( output->default_left = false;
sum_gradient, sum_hessian, num_data, constraints, };
min_gain_shift, output, rand_threshold);
output->default_left = false;
};
} }
} }
#undef TEMPLATE_PREFIX
#undef LAMBDA_ARGUMENTS
#undef BEFORE_ARGUMENTS
#undef FUNC_ARGURMENTS
} }
void FuncForCategorical() { void FuncForCategorical() {
...@@ -227,41 +222,38 @@ class FeatureHistogram { ...@@ -227,41 +222,38 @@ class FeatureHistogram {
} }
} }
} }
template <bool USE_RAND, bool USE_MC> template <bool USE_RAND, bool USE_MC>
void FuncForCategoricalL1() { void FuncForCategoricalL1() {
#define ARGUMENTS \
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, \
std::placeholders::_4, std::placeholders::_5
if (meta_->config->lambda_l1 > 0) { if (meta_->config->lambda_l1 > 0) {
if (meta_->config->max_delta_step > 0) { if (meta_->config->max_delta_step > 0) {
find_best_threshold_fun_ = find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner< std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, true, true>, USE_RAND, USE_MC, true, true>,
this, std::placeholders::_1, std::placeholders::_2, this, ARGUMENTS);
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
} else { } else {
find_best_threshold_fun_ = find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner< std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, true, false>, USE_RAND, USE_MC, true, false>,
this, std::placeholders::_1, std::placeholders::_2, this, ARGUMENTS);
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
} }
} else { } else {
if (meta_->config->max_delta_step > 0) { if (meta_->config->max_delta_step > 0) {
find_best_threshold_fun_ = find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner< std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, false, true>, USE_RAND, USE_MC, false, true>,
this, std::placeholders::_1, std::placeholders::_2, this, ARGUMENTS);
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
} else { } else {
find_best_threshold_fun_ = find_best_threshold_fun_ =
std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner< std::bind(&FeatureHistogram::FindBestThresholdCategoricalInner<
USE_RAND, USE_MC, false, false>, USE_RAND, USE_MC, false, false>,
this, std::placeholders::_1, std::placeholders::_2, this, ARGUMENTS);
std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5);
} }
} }
#undef ARGUMENTS
} }
template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT> template <bool USE_RAND, bool USE_MC, bool USE_L1, bool USE_MAX_OUTPUT>
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include <algorithm> #include <algorithm>
#include "../io/dense_bin.hpp" #include "../io/dense_bin.hpp"
#include "../io/dense_nbits_bin.hpp"
#define GPU_DEBUG 0 #define GPU_DEBUG 0
...@@ -378,20 +377,20 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -378,20 +377,20 @@ void GPUTreeLearner::AllocateGPUMemory() {
BinIterator* bin_iters[8]; BinIterator* bin_iters[8];
for (int s_idx = 0; s_idx < 8; ++s_idx) { for (int s_idx = 0; s_idx < 8; ++s_idx) {
bin_iters[s_idx] = train_data_->FeatureGroupIterator(dense_ind[s_idx]); bin_iters[s_idx] = train_data_->FeatureGroupIterator(dense_ind[s_idx]);
if (dynamic_cast<Dense4bitsBinIterator*>(bin_iters[s_idx]) == 0) { if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[s_idx]) == 0) {
Log::Fatal("GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not", dense_ind[s_idx]); Log::Fatal("GPU tree learner assumes that all bins are Dense4bitsBin when num_bin <= 16, but feature %d is not", dense_ind[s_idx]);
} }
} }
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching // this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
Dense4bitsBinIterator iters[8] = { DenseBinIterator<uint8_t, true> iters[8] = {
*static_cast<Dense4bitsBinIterator*>(bin_iters[0]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[0]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[1]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[1]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[2]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[2]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[3]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[3]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[4]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[4]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[5]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[5]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[6]), *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[6]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[7])}; *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iters[7])};
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[0] = (uint8_t)((iters[0].RawGet(j) * dev_bin_mult[0] + ((j+0) & (dev_bin_mult[0] - 1))) host4[j].s[0] = (uint8_t)((iters[0].RawGet(j) * dev_bin_mult[0] + ((j+0) & (dev_bin_mult[0] - 1)))
|((iters[1].RawGet(j) * dev_bin_mult[1] + ((j+1) & (dev_bin_mult[1] - 1))) << 4)); |((iters[1].RawGet(j) * dev_bin_mult[1] + ((j+1) & (dev_bin_mult[1] - 1))) << 4));
...@@ -407,15 +406,15 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -407,15 +406,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
for (int s_idx = 0; s_idx < 4; ++s_idx) { for (int s_idx = 0; s_idx < 4; ++s_idx) {
BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_ind[s_idx]); BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_ind[s_idx]);
// this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching // this guarantees that the RawGet() function is inlined, rather than using virtual function dispatching
if (dynamic_cast<DenseBinIterator<uint8_t>*>(bin_iter) != 0) { if (dynamic_cast<DenseBinIterator<uint8_t, false>*>(bin_iter) != 0) {
// Dense bin // Dense bin
DenseBinIterator<uint8_t> iter = *static_cast<DenseBinIterator<uint8_t>*>(bin_iter); DenseBinIterator<uint8_t, false> iter = *static_cast<DenseBinIterator<uint8_t, false>*>(bin_iter);
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[s_idx] = (uint8_t)(iter.RawGet(j) * dev_bin_mult[s_idx] + ((j+s_idx) & (dev_bin_mult[s_idx] - 1))); host4[j].s[s_idx] = (uint8_t)(iter.RawGet(j) * dev_bin_mult[s_idx] + ((j+s_idx) & (dev_bin_mult[s_idx] - 1)));
} }
} else if (dynamic_cast<Dense4bitsBinIterator*>(bin_iter) != 0) { } else if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iter) != 0) {
// Dense 4-bit bin // Dense 4-bit bin
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter); DenseBinIterator<uint8_t, true> iter = *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iter);
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[s_idx] = (uint8_t)(iter.RawGet(j) * dev_bin_mult[s_idx] + ((j+s_idx) & (dev_bin_mult[s_idx] - 1))); host4[j].s[s_idx] = (uint8_t)(iter.RawGet(j) * dev_bin_mult[s_idx] + ((j+s_idx) & (dev_bin_mult[s_idx] - 1)));
} }
...@@ -450,8 +449,8 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -450,8 +449,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
for (int i = 0; i < k; ++i) { for (int i = 0; i < k; ++i) {
if (dword_features_ == 8) { if (dword_features_ == 8) {
BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_dword_ind[i]); BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_dword_ind[i]);
if (dynamic_cast<Dense4bitsBinIterator*>(bin_iter) != 0) { if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iter) != 0) {
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter); DenseBinIterator<uint8_t, true> iter = *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iter);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[i >> 1] |= (uint8_t)((iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i] host4[j].s[i >> 1] |= (uint8_t)((iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
...@@ -463,15 +462,15 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -463,15 +462,15 @@ void GPUTreeLearner::AllocateGPUMemory() {
} }
} else if (dword_features_ == 4) { } else if (dword_features_ == 4) {
BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_dword_ind[i]); BinIterator* bin_iter = train_data_->FeatureGroupIterator(dense_dword_ind[i]);
if (dynamic_cast<DenseBinIterator<uint8_t>*>(bin_iter) != 0) { if (dynamic_cast<DenseBinIterator<uint8_t, false>*>(bin_iter) != 0) {
DenseBinIterator<uint8_t> iter = *static_cast<DenseBinIterator<uint8_t>*>(bin_iter); DenseBinIterator<uint8_t, false> iter = *static_cast<DenseBinIterator<uint8_t, false>*>(bin_iter);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i] host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
+ ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1))); + ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1)));
} }
} else if (dynamic_cast<Dense4bitsBinIterator*>(bin_iter) != 0) { } else if (dynamic_cast<DenseBinIterator<uint8_t, true>*>(bin_iter) != 0) {
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter); DenseBinIterator<uint8_t, true> iter = *static_cast<DenseBinIterator<uint8_t, true>*>(bin_iter);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i] host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
......
...@@ -263,7 +263,6 @@ ...@@ -263,7 +263,6 @@
<ClInclude Include="..\src\boosting\rf.hpp" /> <ClInclude Include="..\src\boosting\rf.hpp" />
<ClInclude Include="..\src\boosting\score_updater.hpp" /> <ClInclude Include="..\src\boosting\score_updater.hpp" />
<ClInclude Include="..\src\io\dense_bin.hpp" /> <ClInclude Include="..\src\io\dense_bin.hpp" />
<ClInclude Include="..\src\io\dense_nbits_bin.hpp" />
<ClInclude Include="..\src\io\multi_val_dense_bin.hpp" /> <ClInclude Include="..\src\io\multi_val_dense_bin.hpp" />
<ClInclude Include="..\src\io\multi_val_sparse_bin.hpp" /> <ClInclude Include="..\src\io\multi_val_sparse_bin.hpp" />
<ClInclude Include="..\src\io\parser.hpp" /> <ClInclude Include="..\src\io\parser.hpp" />
......
...@@ -174,9 +174,6 @@ ...@@ -174,9 +174,6 @@
<ClInclude Include="..\src\boosting\goss.hpp"> <ClInclude Include="..\src\boosting\goss.hpp">
<Filter>src\boosting</Filter> <Filter>src\boosting</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\io\dense_nbits_bin.hpp">
<Filter>src\io</Filter>
</ClInclude>
<ClInclude Include="..\include\LightGBM\utils\openmp_wrapper.h"> <ClInclude Include="..\include\LightGBM\utils\openmp_wrapper.h">
<Filter>include\LightGBM\utils</Filter> <Filter>include\LightGBM\utils</Filter>
</ClInclude> </ClInclude>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment