Commit c62dcf73 authored by Guolin Ke's avatar Guolin Ke
Browse files

fix merge bugs.

parent 7a82ba4f
...@@ -12,20 +12,20 @@ ...@@ -12,20 +12,20 @@
namespace LightGBM { namespace LightGBM {
enum BinType { enum BinType {
NumericalBin, NumericalBin,
CategoricalBin CategoricalBin
}; };
enum MissingType { enum MissingType {
None, None,
Zero, Zero,
NaN NaN
}; };
/*! \brief Store data for one histogram bin */ /*! \brief Store data for one histogram bin */
struct HistogramBinEntry { struct HistogramBinEntry {
public: public:
/*! \brief Sum of gradients on this bin */ /*! \brief Sum of gradients on this bin */
double sum_gradients = 0.0f; double sum_gradients = 0.0f;
/*! \brief Sum of hessians on this bin */ /*! \brief Sum of hessians on this bin */
...@@ -53,12 +53,12 @@ public: ...@@ -53,12 +53,12 @@ public:
used_size += type_size; used_size += type_size;
} }
} }
}; };
/*! \brief This class used to convert feature values into bin, /*! \brief This class used to convert feature values into bin,
* and store some meta information for bin*/ * and store some meta information for bin*/
class BinMapper { class BinMapper {
public: public:
BinMapper(); BinMapper();
BinMapper(const BinMapper& other); BinMapper(const BinMapper& other);
explicit BinMapper(const void* memory); explicit BinMapper(const void* memory);
...@@ -183,7 +183,7 @@ public: ...@@ -183,7 +183,7 @@ public:
} }
} }
private: private:
/*! \brief Number of bins */ /*! \brief Number of bins */
int num_bin_; int num_bin_;
MissingType missing_type_; MissingType missing_type_;
...@@ -205,18 +205,18 @@ private: ...@@ -205,18 +205,18 @@ private:
double max_val_; double max_val_;
/*! \brief bin value of feature value 0 */ /*! \brief bin value of feature value 0 */
uint32_t default_bin_; uint32_t default_bin_;
}; };
/*! /*!
* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin * \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
* There are 2 advantages by using ordered bin. * There are 2 advantages by using ordered bin.
* 1. group the data by leafs to improve the cache hit. * 1. group the data by leafs to improve the cache hit.
* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse features. * 2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
* However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature. * However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
* So we only using ordered bin for sparse situations. * So we only using ordered bin for sparse situations.
*/ */
class OrderedBin { class OrderedBin {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~OrderedBin() {} virtual ~OrderedBin() {}
...@@ -260,11 +260,11 @@ public: ...@@ -260,11 +260,11 @@ public:
virtual void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) = 0; virtual void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) = 0;
virtual data_size_t NonZeroCount(int leaf) const = 0; virtual data_size_t NonZeroCount(int leaf) const = 0;
}; };
/*! \brief Iterator for one bin column */ /*! \brief Iterator for one bin column */
class BinIterator { class BinIterator {
public: public:
/*! /*!
* \brief Get bin data on specific row index * \brief Get bin data on specific row index
* \param idx Index of this data * \param idx Index of this data
...@@ -274,16 +274,16 @@ public: ...@@ -274,16 +274,16 @@ public:
virtual uint32_t RawGet(data_size_t idx) = 0; virtual uint32_t RawGet(data_size_t idx) = 0;
virtual void Reset(data_size_t idx) = 0; virtual void Reset(data_size_t idx) = 0;
virtual ~BinIterator() = default; virtual ~BinIterator() = default;
}; };
/*! /*!
* \brief Interface for bin data. This class will store bin data for one feature. * \brief Interface for bin data. This class will store bin data for one feature.
* unlike OrderedBin, this class will store data by original order. * unlike OrderedBin, this class will store data by original order.
* Note that it may cause cache misses when construct histogram, * Note that it may cause cache misses when construct histogram,
* but it doesn't need to re-order operation, So it will be faster than OrderedBin for dense feature * but it doesn't need to re-order operation, So it will be faster than OrderedBin for dense feature
*/ */
class Bin { class Bin {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~Bin() {} virtual ~Bin() {}
/*! /*!
...@@ -429,9 +429,9 @@ public: ...@@ -429,9 +429,9 @@ public:
* \return The bin data object * \return The bin data object
*/ */
static Bin* CreateSparseBin(data_size_t num_data, int num_bin); static Bin* CreateSparseBin(data_size_t num_data, int num_bin);
}; };
inline uint32_t BinMapper::ValueToBin(double value) const { inline uint32_t BinMapper::ValueToBin(double value) const {
if (std::isnan(value)) { if (std::isnan(value)) {
if (missing_type_ == MissingType::NaN) { if (missing_type_ == MissingType::NaN) {
return num_bin_ - 1; return num_bin_ - 1;
...@@ -457,13 +457,17 @@ inline uint32_t BinMapper::ValueToBin(double value) const { ...@@ -457,13 +457,17 @@ inline uint32_t BinMapper::ValueToBin(double value) const {
return l; return l;
} else { } else {
int int_value = static_cast<int>(value); int int_value = static_cast<int>(value);
// convert negative value to NaN bin
if (int_value < 0) {
return num_bin_ - 1;
}
if (categorical_2_bin_.count(int_value)) { if (categorical_2_bin_.count(int_value)) {
return categorical_2_bin_.at(int_value); return categorical_2_bin_.at(int_value);
} else { } else {
return num_bin_ - 1; return num_bin_ - 1;
} }
} }
} }
} // namespace LightGBM } // namespace LightGBM
......
...@@ -409,7 +409,8 @@ inline void Tree::TreeSHAP(const double *feature_values, double *phi, ...@@ -409,7 +409,8 @@ inline void Tree::TreeSHAP(const double *feature_values, double *phi,
// internal node // internal node
} else { } else {
const int hot_index = Decision(feature_values[split_index], node); const int hot_index =
decision_funs[GetDecisionType(decision_type_[node], kCategoricalMask)](feature_values[split_index], threshold_[node]);
const int cold_index = (hot_index == left_child_[node] ? right_child_[node] : left_child_[node]); const int cold_index = (hot_index == left_child_[node] ? right_child_[node] : left_child_[node]);
const double w = data_count(node); const double w = data_count(node);
const double hot_zero_fraction = data_count(hot_index)/w; const double hot_zero_fraction = data_count(hot_index)/w;
......
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
namespace LightGBM { namespace LightGBM {
BinMapper::BinMapper() { BinMapper::BinMapper() {
} }
// deep copy function for BinMapper // deep copy function for BinMapper
BinMapper::BinMapper(const BinMapper& other) { BinMapper::BinMapper(const BinMapper& other) {
num_bin_ = other.num_bin_; num_bin_ = other.num_bin_;
missing_type_ = other.missing_type_; missing_type_ = other.missing_type_;
is_trival_ = other.is_trival_; is_trival_ = other.is_trival_;
...@@ -35,17 +35,17 @@ BinMapper::BinMapper(const BinMapper& other) { ...@@ -35,17 +35,17 @@ BinMapper::BinMapper(const BinMapper& other) {
min_val_ = other.min_val_; min_val_ = other.min_val_;
max_val_ = other.max_val_; max_val_ = other.max_val_;
default_bin_ = other.default_bin_; default_bin_ = other.default_bin_;
} }
BinMapper::BinMapper(const void* memory) { BinMapper::BinMapper(const void* memory) {
CopyFrom(reinterpret_cast<const char*>(memory)); CopyFrom(reinterpret_cast<const char*>(memory));
} }
BinMapper::~BinMapper() { BinMapper::~BinMapper() {
} }
bool NeedFilter(std::vector<int>& cnt_in_bin, int total_cnt, int filter_cnt, BinType bin_type) { bool NeedFilter(const std::vector<int>& cnt_in_bin, int total_cnt, int filter_cnt, BinType bin_type) {
if (bin_type == BinType::NumericalBin) { if (bin_type == BinType::NumericalBin) {
int sum_left = 0; int sum_left = 0;
for (size_t i = 0; i < cnt_in_bin.size() - 1; ++i) { for (size_t i = 0; i < cnt_in_bin.size() - 1; ++i) {
...@@ -55,17 +55,21 @@ bool NeedFilter(std::vector<int>& cnt_in_bin, int total_cnt, int filter_cnt, Bin ...@@ -55,17 +55,21 @@ bool NeedFilter(std::vector<int>& cnt_in_bin, int total_cnt, int filter_cnt, Bin
} }
} }
} else { } else {
if (cnt_in_bin.size() <= 2) {
for (size_t i = 0; i < cnt_in_bin.size() - 1; ++i) { for (size_t i = 0; i < cnt_in_bin.size() - 1; ++i) {
int sum_left = cnt_in_bin[i]; int sum_left = cnt_in_bin[i];
if (sum_left >= filter_cnt && total_cnt - sum_left >= filter_cnt) { if (sum_left >= filter_cnt && total_cnt - sum_left >= filter_cnt) {
return false; return false;
} }
} }
} else {
return false;
}
} }
return true; return true;
} }
std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts, std::vector<double> GreedyFindBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin, size_t total_cnt, int min_data_in_bin) { int num_distinct_values, int max_bin, size_t total_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound; std::vector<double> bin_upper_bound;
if (num_distinct_values <= max_bin) { if (num_distinct_values <= max_bin) {
...@@ -134,13 +138,13 @@ std::vector<double> GreedyFindBin(const double* distinct_values, const int* coun ...@@ -134,13 +138,13 @@ std::vector<double> GreedyFindBin(const double* distinct_values, const int* coun
bin_upper_bound[bin_cnt - 1] = std::numeric_limits<double>::infinity(); bin_upper_bound[bin_cnt - 1] = std::numeric_limits<double>::infinity();
} }
return bin_upper_bound; return bin_upper_bound;
} }
std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, const int* counts, std::vector<double> FindBinWithZeroAsOneBin(const double* distinct_values, const int* counts,
int num_distinct_values, int max_bin, size_t total_sample_cnt, int min_data_in_bin) { int num_distinct_values, int max_bin, size_t total_sample_cnt, int min_data_in_bin) {
std::vector<double> bin_upper_bound; std::vector<double> bin_upper_bound;
int left_cnt_data = 0; int left_cnt_data = 0;
int cnt_missing = 0; int cnt_zero = 0;
int right_cnt_data = 0; int right_cnt_data = 0;
for (int i = 0; i < num_distinct_values; ++i) { for (int i = 0; i < num_distinct_values; ++i) {
if (distinct_values[i] <= -kZeroAsMissingValueRange) { if (distinct_values[i] <= -kZeroAsMissingValueRange) {
...@@ -148,11 +152,11 @@ std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, cons ...@@ -148,11 +152,11 @@ std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, cons
} else if (distinct_values[i] > kZeroAsMissingValueRange) { } else if (distinct_values[i] > kZeroAsMissingValueRange) {
right_cnt_data += counts[i]; right_cnt_data += counts[i];
} else { } else {
cnt_missing += counts[i]; cnt_zero += counts[i];
} }
} }
int left_cnt = 0; int left_cnt = -1;
for (int i = 0; i < num_distinct_values; ++i) { for (int i = 0; i < num_distinct_values; ++i) {
if (distinct_values[i] > -kZeroAsMissingValueRange) { if (distinct_values[i] > -kZeroAsMissingValueRange) {
left_cnt = i; left_cnt = i;
...@@ -160,8 +164,12 @@ std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, cons ...@@ -160,8 +164,12 @@ std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, cons
} }
} }
if (left_cnt < 0) {
left_cnt = num_distinct_values;
}
if (left_cnt > 0) { if (left_cnt > 0) {
int left_max_bin = static_cast<int>(static_cast<double>(left_cnt_data) / (total_sample_cnt - cnt_missing) * (max_bin - 1)); int left_max_bin = static_cast<int>(static_cast<double>(left_cnt_data) / (total_sample_cnt - cnt_zero) * (max_bin - 1));
bin_upper_bound = GreedyFindBin(distinct_values, counts, left_cnt, left_max_bin, left_cnt_data, min_data_in_bin); bin_upper_bound = GreedyFindBin(distinct_values, counts, left_cnt, left_max_bin, left_cnt_data, min_data_in_bin);
bin_upper_bound.back() = -kZeroAsMissingValueRange; bin_upper_bound.back() = -kZeroAsMissingValueRange;
} }
...@@ -184,9 +192,9 @@ std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, cons ...@@ -184,9 +192,9 @@ std::vector<double> FindBinWithZeroAsMissing(const double* distinct_values, cons
bin_upper_bound.push_back(std::numeric_limits<double>::infinity()); bin_upper_bound.push_back(std::numeric_limits<double>::infinity());
} }
return bin_upper_bound; return bin_upper_bound;
} }
void BinMapper::FindBin(double* values, int num_sample_values, size_t total_sample_cnt, void BinMapper::FindBin(double* values, int num_sample_values, size_t total_sample_cnt,
int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type, bool use_missing, bool zero_as_missing) { int max_bin, int min_data_in_bin, int min_split_data, BinType bin_type, bool use_missing, bool zero_as_missing) {
int na_cnt = 0; int na_cnt = 0;
int tmp_num_sample_values = 0; int tmp_num_sample_values = 0;
...@@ -204,9 +212,9 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp ...@@ -204,9 +212,9 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp
missing_type_ = MissingType::None; missing_type_ = MissingType::None;
} else { } else {
missing_type_ = MissingType::NaN; missing_type_ = MissingType::NaN;
}
na_cnt = num_sample_values - tmp_num_sample_values; na_cnt = num_sample_values - tmp_num_sample_values;
} }
}
num_sample_values = tmp_num_sample_values; num_sample_values = tmp_num_sample_values;
bin_type_ = bin_type; bin_type_ = bin_type;
...@@ -253,14 +261,14 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp ...@@ -253,14 +261,14 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp
int num_distinct_values = static_cast<int>(distinct_values.size()); int num_distinct_values = static_cast<int>(distinct_values.size());
if (bin_type_ == BinType::NumericalBin) { if (bin_type_ == BinType::NumericalBin) {
if (missing_type_ == MissingType::Zero) { if (missing_type_ == MissingType::Zero) {
bin_upper_bound_ = FindBinWithZeroAsMissing(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin); bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin);
if (bin_upper_bound_.size() == 2) { if (bin_upper_bound_.size() == 2) {
missing_type_ = MissingType::None; missing_type_ = MissingType::None;
} }
} else if (missing_type_ == MissingType::None) { } else if (missing_type_ == MissingType::None) {
bin_upper_bound_ = GreedyFindBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin); bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin, total_sample_cnt, min_data_in_bin);
} else { } else {
bin_upper_bound_ = GreedyFindBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin - 1, total_sample_cnt - na_cnt, min_data_in_bin); bin_upper_bound_ = FindBinWithZeroAsOneBin(distinct_values.data(), counts.data(), num_distinct_values, max_bin - 1, total_sample_cnt - na_cnt, min_data_in_bin);
bin_upper_bound_.push_back(NaN); bin_upper_bound_.push_back(NaN);
} }
num_bin_ = static_cast<int>(bin_upper_bound_.size()); num_bin_ = static_cast<int>(bin_upper_bound_.size());
...@@ -279,8 +287,6 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp ...@@ -279,8 +287,6 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp
} }
CHECK(num_bin_ <= max_bin); CHECK(num_bin_ <= max_bin);
} else { } else {
// No missing handle for categorical features
missing_type_ = MissingType::None;
// convert to int type first // convert to int type first
std::vector<int> distinct_values_int; std::vector<int> distinct_values_int;
std::vector<int> counts_int; std::vector<int> counts_int;
...@@ -296,22 +302,52 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp ...@@ -296,22 +302,52 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp
} }
// sort by counts // sort by counts
Common::SortForPair<int, int>(counts_int, distinct_values_int, 0, true); Common::SortForPair<int, int>(counts_int, distinct_values_int, 0, true);
// avoid first bin is zero
if (distinct_values_int[0] == 0 && counts_int.size() > 1) {
std::swap(counts_int[0], counts_int[1]);
std::swap(distinct_values_int[0], distinct_values_int[1]);
}
// will ignore the categorical of small counts // will ignore the categorical of small counts
const int cut_cnt = static_cast<int>(total_sample_cnt * 0.98f); int cut_cnt = static_cast<int>((total_sample_cnt - na_cnt) * 0.99f);
size_t cur_cat = 0;
categorical_2_bin_.clear(); categorical_2_bin_.clear();
bin_2_categorical_.clear(); bin_2_categorical_.clear();
num_bin_ = 0; num_bin_ = 0;
int used_cnt = 0; int used_cnt = 0;
max_bin = std::min(static_cast<int>(distinct_values_int.size()), max_bin); max_bin = std::min(static_cast<int>(distinct_values_int.size()), max_bin);
while (used_cnt < cut_cnt || num_bin_ < max_bin) { cnt_in_bin.clear();
bin_2_categorical_.push_back(distinct_values_int[num_bin_]); while (cur_cat < distinct_values_int.size()
categorical_2_bin_[distinct_values_int[num_bin_]] = static_cast<unsigned int>(num_bin_); && (used_cnt < cut_cnt || num_bin_ < max_bin)) {
used_cnt += counts_int[num_bin_]; if (distinct_values_int[cur_cat] < 0) {
na_cnt += counts_int[cur_cat];
cut_cnt -= counts_int[cur_cat];
Log::Warning("Met negative value in categorical features, will convert it to NaN");
} else {
bin_2_categorical_.push_back(distinct_values_int[cur_cat]);
categorical_2_bin_[distinct_values_int[cur_cat]] = static_cast<unsigned int>(num_bin_);
used_cnt += counts_int[cur_cat];
cnt_in_bin.push_back(counts_int[cur_cat]);
++num_bin_; ++num_bin_;
} }
cnt_in_bin = counts_int; ++cur_cat;
counts_int.resize(num_bin_); }
counts_int.back() += static_cast<int>(total_sample_cnt - used_cnt); // need an additional bin for NaN
if (cur_cat == distinct_values_int.size() && na_cnt > 0) {
// use -1 to represent NaN
bin_2_categorical_.push_back(-1);
categorical_2_bin_[-1] = num_bin_;
cnt_in_bin.push_back(0);
++num_bin_;
}
// Use MissingType::None to represent this bin contains all categoricals
if (cur_cat == distinct_values_int.size() && na_cnt == 0) {
missing_type_ = MissingType::None;
} else if (na_cnt == 0) {
missing_type_ = MissingType::Zero;
} else {
missing_type_ = MissingType::NaN;
}
cnt_in_bin.back() += static_cast<int>(total_sample_cnt - used_cnt);
} }
// check trival(num_bin_ == 1) feature // check trival(num_bin_ == 1) feature
...@@ -327,13 +363,16 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp ...@@ -327,13 +363,16 @@ void BinMapper::FindBin(double* values, int num_sample_values, size_t total_samp
if (!is_trival_) { if (!is_trival_) {
default_bin_ = ValueToBin(0); default_bin_ = ValueToBin(0);
if (bin_type_ == BinType::CategoricalBin) {
CHECK(default_bin_ > 0);
}
} }
// calculate sparse rate // calculate sparse rate
sparse_rate_ = static_cast<double>(cnt_in_bin[default_bin_]) / static_cast<double>(total_sample_cnt); sparse_rate_ = static_cast<double>(cnt_in_bin[default_bin_]) / static_cast<double>(total_sample_cnt);
} }
int BinMapper::SizeForSpecificBin(int bin) { int BinMapper::SizeForSpecificBin(int bin) {
int size = 0; int size = 0;
size += sizeof(int); size += sizeof(int);
size += sizeof(MissingType); size += sizeof(MissingType);
...@@ -344,9 +383,9 @@ int BinMapper::SizeForSpecificBin(int bin) { ...@@ -344,9 +383,9 @@ int BinMapper::SizeForSpecificBin(int bin) {
size += bin * sizeof(double); size += bin * sizeof(double);
size += sizeof(uint32_t); size += sizeof(uint32_t);
return size; return size;
} }
void BinMapper::CopyTo(char * buffer) const { void BinMapper::CopyTo(char * buffer) const {
std::memcpy(buffer, &num_bin_, sizeof(num_bin_)); std::memcpy(buffer, &num_bin_, sizeof(num_bin_));
buffer += sizeof(num_bin_); buffer += sizeof(num_bin_);
std::memcpy(buffer, &missing_type_, sizeof(missing_type_)); std::memcpy(buffer, &missing_type_, sizeof(missing_type_));
...@@ -368,9 +407,9 @@ void BinMapper::CopyTo(char * buffer) const { ...@@ -368,9 +407,9 @@ void BinMapper::CopyTo(char * buffer) const {
} else { } else {
std::memcpy(buffer, bin_2_categorical_.data(), num_bin_ * sizeof(int)); std::memcpy(buffer, bin_2_categorical_.data(), num_bin_ * sizeof(int));
} }
} }
void BinMapper::CopyFrom(const char * buffer) { void BinMapper::CopyFrom(const char * buffer) {
std::memcpy(&num_bin_, buffer, sizeof(num_bin_)); std::memcpy(&num_bin_, buffer, sizeof(num_bin_));
buffer += sizeof(num_bin_); buffer += sizeof(num_bin_);
std::memcpy(&missing_type_, buffer, sizeof(missing_type_)); std::memcpy(&missing_type_, buffer, sizeof(missing_type_));
...@@ -398,9 +437,9 @@ void BinMapper::CopyFrom(const char * buffer) { ...@@ -398,9 +437,9 @@ void BinMapper::CopyFrom(const char * buffer) {
categorical_2_bin_[bin_2_categorical_[i]] = static_cast<unsigned int>(i); categorical_2_bin_[bin_2_categorical_[i]] = static_cast<unsigned int>(i);
} }
} }
} }
void BinMapper::SaveBinaryToFile(FILE* file) const { void BinMapper::SaveBinaryToFile(FILE* file) const {
fwrite(&num_bin_, sizeof(num_bin_), 1, file); fwrite(&num_bin_, sizeof(num_bin_), 1, file);
fwrite(&missing_type_, sizeof(missing_type_), 1, file); fwrite(&missing_type_, sizeof(missing_type_), 1, file);
fwrite(&is_trival_, sizeof(is_trival_), 1, file); fwrite(&is_trival_, sizeof(is_trival_), 1, file);
...@@ -414,9 +453,9 @@ void BinMapper::SaveBinaryToFile(FILE* file) const { ...@@ -414,9 +453,9 @@ void BinMapper::SaveBinaryToFile(FILE* file) const {
} else { } else {
fwrite(bin_2_categorical_.data(), sizeof(int), num_bin_, file); fwrite(bin_2_categorical_.data(), sizeof(int), num_bin_, file);
} }
} }
size_t BinMapper::SizesInByte() const { size_t BinMapper::SizesInByte() const {
size_t ret = sizeof(num_bin_) + sizeof(missing_type_) + sizeof(is_trival_) + sizeof(sparse_rate_) size_t ret = sizeof(num_bin_) + sizeof(missing_type_) + sizeof(is_trival_) + sizeof(sparse_rate_)
+ sizeof(bin_type_) + sizeof(min_val_) + sizeof(max_val_) + sizeof(default_bin_); + sizeof(bin_type_) + sizeof(min_val_) + sizeof(max_val_) + sizeof(default_bin_);
if (bin_type_ == BinType::NumericalBin) { if (bin_type_ == BinType::NumericalBin) {
...@@ -425,21 +464,21 @@ size_t BinMapper::SizesInByte() const { ...@@ -425,21 +464,21 @@ size_t BinMapper::SizesInByte() const {
ret += sizeof(int) * num_bin_; ret += sizeof(int) * num_bin_;
} }
return ret; return ret;
} }
template class DenseBin<uint8_t>; template class DenseBin<uint8_t>;
template class DenseBin<uint16_t>; template class DenseBin<uint16_t>;
template class DenseBin<uint32_t>; template class DenseBin<uint32_t>;
template class SparseBin<uint8_t>; template class SparseBin<uint8_t>;
template class SparseBin<uint16_t>; template class SparseBin<uint16_t>;
template class SparseBin<uint32_t>; template class SparseBin<uint32_t>;
template class OrderedSparseBin<uint8_t>; template class OrderedSparseBin<uint8_t>;
template class OrderedSparseBin<uint16_t>; template class OrderedSparseBin<uint16_t>;
template class OrderedSparseBin<uint32_t>; template class OrderedSparseBin<uint32_t>;
Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate,
bool is_enable_sparse, double sparse_threshold, bool* is_sparse) { bool is_enable_sparse, double sparse_threshold, bool* is_sparse) {
// sparse threshold // sparse threshold
if (sparse_rate >= sparse_threshold && is_enable_sparse) { if (sparse_rate >= sparse_threshold && is_enable_sparse) {
...@@ -449,9 +488,9 @@ Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate, ...@@ -449,9 +488,9 @@ Bin* Bin::CreateBin(data_size_t num_data, int num_bin, double sparse_rate,
*is_sparse = false; *is_sparse = false;
return CreateDenseBin(num_data, num_bin); return CreateDenseBin(num_data, num_bin);
} }
} }
Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) { Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
if (num_bin <= 16) { if (num_bin <= 16) {
return new Dense4bitsBin(num_data); return new Dense4bitsBin(num_data);
} else if (num_bin <= 256) { } else if (num_bin <= 256) {
...@@ -461,9 +500,9 @@ Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) { ...@@ -461,9 +500,9 @@ Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
} else { } else {
return new DenseBin<uint32_t>(num_data); return new DenseBin<uint32_t>(num_data);
} }
} }
Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) { Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) {
if (num_bin <= 256) { if (num_bin <= 256) {
return new SparseBin<uint8_t>(num_data); return new SparseBin<uint8_t>(num_data);
} else if (num_bin <= 65536) { } else if (num_bin <= 65536) {
...@@ -471,6 +510,6 @@ Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) { ...@@ -471,6 +510,6 @@ Bin* Bin::CreateSparseBin(data_size_t num_data, int num_bin) {
} else { } else {
return new SparseBin<uint32_t>(num_data); return new SparseBin<uint32_t>(num_data);
} }
} }
} // namespace LightGBM } // namespace LightGBM
...@@ -239,7 +239,7 @@ void OverallConfig::CheckParamConflict() { ...@@ -239,7 +239,7 @@ void OverallConfig::CheckParamConflict() {
} }
// Check max_depth and num_leaves // Check max_depth and num_leaves
if (boosting_config.tree_config.max_depth > 0) { if (boosting_config.tree_config.max_depth > 0) {
int full_num_leaves = std::pow(2, boosting_config.tree_config.max_depth); int full_num_leaves = static_cast<int>(std::pow(2, boosting_config.tree_config.max_depth));
if (full_num_leaves > boosting_config.tree_config.num_leaves if (full_num_leaves > boosting_config.tree_config.num_leaves
&& boosting_config.tree_config.num_leaves == kDefaultNumLeaves) { && boosting_config.tree_config.num_leaves == kDefaultNumLeaves) {
Log::Warning("Accuarcy may be bad since you didn't set num_leaves."); Log::Warning("Accuarcy may be bad since you didn't set num_leaves.");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment