Unverified Commit c35ecfbf authored by Zhuyi Xue's avatar Zhuyi Xue Committed by GitHub
Browse files

renamed cur_cat => cur_cat_idx and added some comments (#5522)

parent 8b720844
...@@ -352,7 +352,7 @@ namespace LightGBM { ...@@ -352,7 +352,7 @@ namespace LightGBM {
int zero_cnt = static_cast<int>(total_sample_cnt - num_sample_values - na_cnt); int zero_cnt = static_cast<int>(total_sample_cnt - num_sample_values - na_cnt);
// find distinct_values first // find distinct_values first
std::vector<double> distinct_values; std::vector<double> distinct_values;
std::vector<int> counts; std::vector<int> counts; // count of data points for each distinct feature value.
std::stable_sort(values, values + num_sample_values); std::stable_sort(values, values + num_sample_values);
...@@ -389,7 +389,7 @@ namespace LightGBM { ...@@ -389,7 +389,7 @@ namespace LightGBM {
} }
min_val_ = distinct_values.front(); min_val_ = distinct_values.front();
max_val_ = distinct_values.back(); max_val_ = distinct_values.back();
std::vector<int> cnt_in_bin; std::vector<int> cnt_in_bin; // count of data points in each bin.
int num_distinct_values = static_cast<int>(distinct_values.size()); int num_distinct_values = static_cast<int>(distinct_values.size());
if (bin_type_ == BinType::NumericalBin) { if (bin_type_ == BinType::NumericalBin) {
if (missing_type_ == MissingType::Zero) { if (missing_type_ == MissingType::Zero) {
...@@ -446,12 +446,12 @@ namespace LightGBM { ...@@ -446,12 +446,12 @@ namespace LightGBM {
Log::Warning("Met categorical feature which contains sparse values. " Log::Warning("Met categorical feature which contains sparse values. "
"Consider renumbering to consecutive integers started from zero"); "Consider renumbering to consecutive integers started from zero");
} }
// sort by counts // sort by counts in descending order
Common::SortForPair<int, int>(&counts_int, &distinct_values_int, 0, true); Common::SortForPair<int, int>(&counts_int, &distinct_values_int, 0, true);
// will ignore the categorical of small counts // will ignore the categorical of small counts
int cut_cnt = static_cast<int>( int cut_cnt = static_cast<int>(
Common::RoundInt((total_sample_cnt - na_cnt) * 0.99f)); Common::RoundInt((total_sample_cnt - na_cnt) * 0.99f));
size_t cur_cat = 0; size_t cur_cat_idx = 0; // index of current category.
categorical_2_bin_.clear(); categorical_2_bin_.clear();
bin_2_categorical_.clear(); bin_2_categorical_.clear();
int used_cnt = 0; int used_cnt = 0;
...@@ -467,20 +467,20 @@ namespace LightGBM { ...@@ -467,20 +467,20 @@ namespace LightGBM {
categorical_2_bin_[-1] = 0; categorical_2_bin_[-1] = 0;
cnt_in_bin.push_back(0); cnt_in_bin.push_back(0);
num_bin_ = 1; num_bin_ = 1;
while (cur_cat < distinct_values_int.size() while (cur_cat_idx < distinct_values_int.size()
&& (used_cnt < cut_cnt || num_bin_ < max_bin)) { && (used_cnt < cut_cnt || num_bin_ < max_bin)) {
if (counts_int[cur_cat] < min_data_in_bin && cur_cat > 1) { if (counts_int[cur_cat_idx] < min_data_in_bin && cur_cat_idx > 1) {
break; break;
} }
bin_2_categorical_.push_back(distinct_values_int[cur_cat]); bin_2_categorical_.push_back(distinct_values_int[cur_cat_idx]);
categorical_2_bin_[distinct_values_int[cur_cat]] = static_cast<unsigned int>(num_bin_); categorical_2_bin_[distinct_values_int[cur_cat_idx]] = static_cast<unsigned int>(num_bin_);
used_cnt += counts_int[cur_cat]; used_cnt += counts_int[cur_cat_idx];
cnt_in_bin.push_back(counts_int[cur_cat]); cnt_in_bin.push_back(counts_int[cur_cat_idx]);
++num_bin_; ++num_bin_;
++cur_cat; ++cur_cat_idx;
} }
// Use MissingType::None to represent this bin contains all categoricals // Use MissingType::None to represent this bin contains all categoricals
if (cur_cat == distinct_values_int.size() && na_cnt == 0) { if (cur_cat_idx == distinct_values_int.size() && na_cnt == 0) {
missing_type_ = MissingType::None; missing_type_ = MissingType::None;
} else { } else {
missing_type_ = MissingType::NaN; missing_type_ = MissingType::NaN;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment