Unverified Commit c30ace21 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

fix all negative values in cat features (#1547)

* fix all negative values in cat features

* fix a bug
parent 00a125d5
...@@ -316,6 +316,9 @@ namespace LightGBM { ...@@ -316,6 +316,9 @@ namespace LightGBM {
} }
} }
} }
num_bin_ = 0;
int rest_cnt = total_sample_cnt - na_cnt;
if (rest_cnt > 0) {
// sort by counts // sort by counts
Common::SortForPair<int, int>(counts_int, distinct_values_int, 0, true); Common::SortForPair<int, int>(counts_int, distinct_values_int, 0, true);
// avoid first bin is zero // avoid first bin is zero
...@@ -332,7 +335,6 @@ namespace LightGBM { ...@@ -332,7 +335,6 @@ namespace LightGBM {
size_t cur_cat = 0; size_t cur_cat = 0;
categorical_2_bin_.clear(); categorical_2_bin_.clear();
bin_2_categorical_.clear(); bin_2_categorical_.clear();
num_bin_ = 0;
int used_cnt = 0; int used_cnt = 0;
max_bin = std::min(static_cast<int>(distinct_values_int.size()), max_bin); max_bin = std::min(static_cast<int>(distinct_values_int.size()), max_bin);
cnt_in_bin.clear(); cnt_in_bin.clear();
...@@ -366,6 +368,7 @@ namespace LightGBM { ...@@ -366,6 +368,7 @@ namespace LightGBM {
} }
cnt_in_bin.back() += static_cast<int>(total_sample_cnt - used_cnt); cnt_in_bin.back() += static_cast<int>(total_sample_cnt - used_cnt);
} }
}
// check trival(num_bin_ == 1) feature // check trival(num_bin_ == 1) feature
if (num_bin_ <= 1) { if (num_bin_ <= 1) {
...@@ -384,8 +387,12 @@ namespace LightGBM { ...@@ -384,8 +387,12 @@ namespace LightGBM {
CHECK(default_bin_ > 0); CHECK(default_bin_ > 0);
} }
} }
if (!is_trival_) {
// calculate sparse rate // calculate sparse rate
sparse_rate_ = static_cast<double>(cnt_in_bin[default_bin_]) / static_cast<double>(total_sample_cnt); sparse_rate_ = static_cast<double>(cnt_in_bin[default_bin_]) / static_cast<double>(total_sample_cnt);
} else {
sparse_rate_ = 1.0f;
}
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment