Unverified Commit 496a07d1 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

fix ranking tasks consistency (#1739)

* fix ndcg consistency.

* more stable sorts

* Update gbdt_model_text.cpp

* Update dataset.cpp

* Update gbdt_model_text.cpp
parent ac6951d3
......@@ -625,11 +625,11 @@ inline static void SortForPair(std::vector<T1>& keys, std::vector<T2>& values, s
arr.emplace_back(keys[i], values[i]);
}
if (!is_reverse) {
std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
std::stable_sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
return a.first < b.first;
});
} else {
std::sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
std::stable_sort(arr.begin(), arr.end(), [](const std::pair<T1, T2>& a, const std::pair<T1, T2>& b) {
return a.first > b.first;
});
}
......
......@@ -304,7 +304,7 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration) cons
}
}
// sort the importance
std::sort(pairs.begin(), pairs.end(),
std::stable_sort(pairs.begin(), pairs.end(),
[](const std::pair<size_t, std::string>& lhs,
const std::pair<size_t, std::string>& rhs) {
return lhs.first > rhs.first;
......
......@@ -235,7 +235,7 @@ namespace LightGBM {
std::vector<double> distinct_values;
std::vector<int> counts;
std::sort(values, values + num_sample_values);
std::stable_sort(values, values + num_sample_values);
// push zero in the front
if (num_sample_values == 0 || (values[0] > 0.0f && zero_cnt > 0)) {
......
......@@ -162,7 +162,7 @@ std::vector<std::vector<int>> FastFeatureBundling(std::vector<std::unique_ptr<Bi
sorted_idx.emplace_back(i);
}
// sort by non zero cnt, bigger first
std::sort(sorted_idx.begin(), sorted_idx.end(),
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[&feature_non_zero_cnt](int a, int b) {
return feature_non_zero_cnt[a] > feature_non_zero_cnt[b];
});
......
......@@ -44,7 +44,7 @@ void DCGCalculator::Init(const std::vector<double>& input_label_gain) {
}
discount_.resize(kMaxPosition);
for (data_size_t i = 0; i < kMaxPosition; ++i) {
discount_[i] = 1.0f / std::log2(2.0f + i);
discount_[i] = 1.0 / std::log2(2.0 + i);
}
}
......@@ -111,7 +111,7 @@ double DCGCalculator::CalDCGAtK(data_size_t k, const label_t* label,
for (data_size_t i = 0; i < num_data; ++i) {
sorted_idx[i] = i;
}
std::sort(sorted_idx.begin(), sorted_idx.end(),
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
if (k > num_data) { k = num_data; }
......@@ -131,7 +131,7 @@ void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const label_t* la
for (data_size_t i = 0; i < num_data; ++i) {
sorted_idx[i] = i;
}
std::sort(sorted_idx.begin(), sorted_idx.end(),
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
double cur_result = 0.0f;
......
......@@ -79,7 +79,7 @@ public:
for (data_size_t i = 0; i < num_data; ++i) {
sorted_idx.emplace_back(i);
}
std::sort(sorted_idx.begin(), sorted_idx.end(),
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
int num_hit = 0;
......
......@@ -100,7 +100,7 @@ public:
for (data_size_t i = 0; i < cnt; ++i) {
sorted_idx.emplace_back(i);
}
std::sort(sorted_idx.begin(), sorted_idx.end(),
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
// get best and worst score
const double best_score = score[sorted_idx[0]];
......
......@@ -40,7 +40,7 @@ namespace LightGBM {
for (data_size_t i = 0; i < cnt_data; ++i) {\
sorted_idx[i] = i;\
}\
std::sort(sorted_idx.begin(), sorted_idx.end(), [=](data_size_t a, data_size_t b) {return data_reader(a) < data_reader(b); });\
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [=](data_size_t a, data_size_t b) {return data_reader(a) < data_reader(b); });\
std::vector<double> weighted_cdf(cnt_data);\
weighted_cdf[0] = weight_reader(sorted_idx[0]);\
for (data_size_t i = 1; i < cnt_data; ++i) {\
......
......@@ -65,6 +65,9 @@ class TestSklearn(unittest.TestCase):
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
self.assertLessEqual(gbm.best_iteration_, 12)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.65)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.65)
def test_regression_with_custom_objective(self):
def objective_ls(y_true, y_pred):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment