Unverified Commit c30ace21 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

fix all negative values in cat features (#1547)

* fix all negative values in cat features

* fix a bug
parent 00a125d5
......@@ -316,6 +316,9 @@ namespace LightGBM {
}
}
}
num_bin_ = 0;
int rest_cnt = total_sample_cnt - na_cnt;
if (rest_cnt > 0) {
// sort by counts
Common::SortForPair<int, int>(counts_int, distinct_values_int, 0, true);
// avoid first bin is zero
......@@ -332,7 +335,6 @@ namespace LightGBM {
size_t cur_cat = 0;
categorical_2_bin_.clear();
bin_2_categorical_.clear();
num_bin_ = 0;
int used_cnt = 0;
max_bin = std::min(static_cast<int>(distinct_values_int.size()), max_bin);
cnt_in_bin.clear();
......@@ -366,6 +368,7 @@ namespace LightGBM {
}
cnt_in_bin.back() += static_cast<int>(total_sample_cnt - used_cnt);
}
}
// check trival(num_bin_ == 1) feature
if (num_bin_ <= 1) {
......@@ -384,8 +387,12 @@ namespace LightGBM {
CHECK(default_bin_ > 0);
}
}
if (!is_trival_) {
// calculate sparse rate
sparse_rate_ = static_cast<double>(cnt_in_bin[default_bin_]) / static_cast<double>(total_sample_cnt);
} else {
sparse_rate_ = 1.0f;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment