Commit 12a96334 authored by Guolin Ke's avatar Guolin Ke
Browse files

change inner prediction score to double type.

parent 5d12a8db
...@@ -19,12 +19,12 @@ namespace LightGBM { ...@@ -19,12 +19,12 @@ namespace LightGBM {
class LambdarankNDCG: public ObjectiveFunction { class LambdarankNDCG: public ObjectiveFunction {
public: public:
explicit LambdarankNDCG(const ObjectiveConfig& config) { explicit LambdarankNDCG(const ObjectiveConfig& config) {
sigmoid_ = static_cast<score_t>(config.sigmoid); sigmoid_ = static_cast<double>(config.sigmoid);
// initialize DCG calculator // initialize DCG calculator
DCGCalculator::Init(config.label_gain); DCGCalculator::Init(config.label_gain);
// copy lable gain to local // copy lable gain to local
for (auto gain : config.label_gain) { for (auto gain : config.label_gain) {
label_gain_.push_back(static_cast<score_t>(gain)); label_gain_.push_back(static_cast<double>(gain));
} }
label_gain_.shrink_to_fit(); label_gain_.shrink_to_fit();
// will optimize NDCG@optimize_pos_at_ // will optimize NDCG@optimize_pos_at_
...@@ -65,7 +65,7 @@ public: ...@@ -65,7 +65,7 @@ public:
ConstructSigmoidTable(); ConstructSigmoidTable();
} }
void GetGradients(const score_t* score, score_t* gradients, void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override { score_t* hessians) const override {
#pragma omp parallel for schedule(guided) #pragma omp parallel for schedule(guided)
for (data_size_t i = 0; i < num_queries_; ++i) { for (data_size_t i = 0; i < num_queries_; ++i) {
...@@ -73,14 +73,14 @@ public: ...@@ -73,14 +73,14 @@ public:
} }
} }
inline void GetGradientsForOneQuery(const score_t* score, inline void GetGradientsForOneQuery(const double* score,
score_t* lambdas, score_t* hessians, data_size_t query_id) const { score_t* lambdas, score_t* hessians, data_size_t query_id) const {
// get doc boundary for current query // get doc boundary for current query
const data_size_t start = query_boundaries_[query_id]; const data_size_t start = query_boundaries_[query_id];
const data_size_t cnt = const data_size_t cnt =
query_boundaries_[query_id + 1] - query_boundaries_[query_id]; query_boundaries_[query_id + 1] - query_boundaries_[query_id];
// get max DCG on current query // get max DCG on current query
const score_t inverse_max_dcg = inverse_max_dcgs_[query_id]; const double inverse_max_dcg = inverse_max_dcgs_[query_id];
// add pointers with offset // add pointers with offset
const float* label = label_ + start; const float* label = label_ + start;
score += start; score += start;
...@@ -99,60 +99,60 @@ public: ...@@ -99,60 +99,60 @@ public:
std::sort(sorted_idx.begin(), sorted_idx.end(), std::sort(sorted_idx.begin(), sorted_idx.end(),
[score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
// get best and worst score // get best and worst score
const score_t best_score = score[sorted_idx[0]]; const double best_score = score[sorted_idx[0]];
data_size_t worst_idx = cnt - 1; data_size_t worst_idx = cnt - 1;
if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) { if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
worst_idx -= 1; worst_idx -= 1;
} }
const score_t wrost_score = score[sorted_idx[worst_idx]]; const double wrost_score = score[sorted_idx[worst_idx]];
// start accmulate lambdas by pairs // start accmulate lambdas by pairs
for (data_size_t i = 0; i < cnt; ++i) { for (data_size_t i = 0; i < cnt; ++i) {
const data_size_t high = sorted_idx[i]; const data_size_t high = sorted_idx[i];
const int high_label = static_cast<int>(label[high]); const int high_label = static_cast<int>(label[high]);
const score_t high_score = score[high]; const double high_score = score[high];
if (high_score == kMinScore) { continue; } if (high_score == kMinScore) { continue; }
const score_t high_label_gain = label_gain_[high_label]; const double high_label_gain = label_gain_[high_label];
const score_t high_discount = DCGCalculator::GetDiscount(i); const double high_discount = DCGCalculator::GetDiscount(i);
score_t high_sum_lambda = 0.0; double high_sum_lambda = 0.0;
score_t high_sum_hessian = 0.0; double high_sum_hessian = 0.0;
for (data_size_t j = 0; j < cnt; ++j) { for (data_size_t j = 0; j < cnt; ++j) {
// skip same data // skip same data
if (i == j) { continue; } if (i == j) { continue; }
const data_size_t low = sorted_idx[j]; const data_size_t low = sorted_idx[j];
const int low_label = static_cast<int>(label[low]); const int low_label = static_cast<int>(label[low]);
const score_t low_score = score[low]; const double low_score = score[low];
// only consider pair with different label // only consider pair with different label
if (high_label <= low_label || low_score == kMinScore) { continue; } if (high_label <= low_label || low_score == kMinScore) { continue; }
const score_t delta_score = high_score - low_score; const double delta_score = high_score - low_score;
const score_t low_label_gain = label_gain_[low_label]; const double low_label_gain = label_gain_[low_label];
const score_t low_discount = DCGCalculator::GetDiscount(j); const double low_discount = DCGCalculator::GetDiscount(j);
// get dcg gap // get dcg gap
const score_t dcg_gap = high_label_gain - low_label_gain; const double dcg_gap = high_label_gain - low_label_gain;
// get discount of this pair // get discount of this pair
const score_t paired_discount = fabs(high_discount - low_discount); const double paired_discount = fabs(high_discount - low_discount);
// get delta NDCG // get delta NDCG
score_t delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
// regular the delta_pair_NDCG by score distance // regular the delta_pair_NDCG by score distance
if (high_label != low_label && best_score != wrost_score) { if (high_label != low_label && best_score != wrost_score) {
delta_pair_NDCG /= (0.01f + fabs(delta_score)); delta_pair_NDCG /= (0.01f + fabs(delta_score));
} }
// calculate lambda for this pair // calculate lambda for this pair
score_t p_lambda = GetSigmoid(delta_score); double p_lambda = GetSigmoid(delta_score);
score_t p_hessian = p_lambda * (2.0f - p_lambda); double p_hessian = p_lambda * (2.0f - p_lambda);
// update // update
p_lambda *= -delta_pair_NDCG; p_lambda *= -delta_pair_NDCG;
p_hessian *= 2 * delta_pair_NDCG; p_hessian *= 2 * delta_pair_NDCG;
high_sum_lambda += p_lambda; high_sum_lambda += p_lambda;
high_sum_hessian += p_hessian; high_sum_hessian += p_hessian;
lambdas[low] -= p_lambda; lambdas[low] -= static_cast<score_t>(p_lambda);
hessians[low] += p_hessian; hessians[low] += static_cast<score_t>(p_hessian);
} }
// update // update
lambdas[high] += high_sum_lambda; lambdas[high] += static_cast<score_t>(high_sum_lambda);
hessians[high] += high_sum_hessian; hessians[high] += static_cast<score_t>(high_sum_hessian);
} }
// if need weights // if need weights
if (weights_ != nullptr) { if (weights_ != nullptr) {
...@@ -164,7 +164,7 @@ public: ...@@ -164,7 +164,7 @@ public:
} }
inline score_t GetSigmoid(score_t score) const { inline double GetSigmoid(double score) const {
if (score <= min_sigmoid_input_) { if (score <= min_sigmoid_input_) {
// too small, use lower bound // too small, use lower bound
return sigmoid_table_[0]; return sigmoid_table_[0];
...@@ -186,7 +186,7 @@ public: ...@@ -186,7 +186,7 @@ public:
_sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_); _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
// cache // cache
for (size_t i = 0; i < _sigmoid_bins; ++i) { for (size_t i = 0; i < _sigmoid_bins; ++i) {
const score_t score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_; const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
sigmoid_table_[i] = 2.0f / (1.0f + std::exp(2.0f * score * sigmoid_)); sigmoid_table_[i] = 2.0f / (1.0f + std::exp(2.0f * score * sigmoid_));
} }
} }
...@@ -197,11 +197,11 @@ public: ...@@ -197,11 +197,11 @@ public:
private: private:
/*! \brief Gains for labels */ /*! \brief Gains for labels */
std::vector<score_t> label_gain_; std::vector<double> label_gain_;
/*! \brief Cache inverse max DCG, speed up calculation */ /*! \brief Cache inverse max DCG, speed up calculation */
std::vector<score_t> inverse_max_dcgs_; std::vector<double> inverse_max_dcgs_;
/*! \brief Simgoid param */ /*! \brief Simgoid param */
score_t sigmoid_; double sigmoid_;
/*! \brief Optimized NDCG@ */ /*! \brief Optimized NDCG@ */
int optimize_pos_at_; int optimize_pos_at_;
/*! \brief Number of queries */ /*! \brief Number of queries */
...@@ -215,15 +215,15 @@ private: ...@@ -215,15 +215,15 @@ private:
/*! \brief Query boundries */ /*! \brief Query boundries */
const data_size_t* query_boundaries_; const data_size_t* query_boundaries_;
/*! \brief Cache result for sigmoid transform to speed up */ /*! \brief Cache result for sigmoid transform to speed up */
std::vector<score_t> sigmoid_table_; std::vector<double> sigmoid_table_;
/*! \brief Number of bins in simoid table */ /*! \brief Number of bins in simoid table */
size_t _sigmoid_bins = 1024 * 1024; size_t _sigmoid_bins = 1024 * 1024;
/*! \brief Minimal input of sigmoid table */ /*! \brief Minimal input of sigmoid table */
score_t min_sigmoid_input_ = -50; double min_sigmoid_input_ = -50;
/*! \brief Maximal input of sigmoid table */ /*! \brief Maximal input of sigmoid table */
score_t max_sigmoid_input_ = 50; double max_sigmoid_input_ = 50;
/*! \brief Factor that covert score to bin in sigmoid table */ /*! \brief Factor that covert score to bin in sigmoid table */
score_t sigmoid_table_idx_factor_; double sigmoid_table_idx_factor_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -22,18 +22,18 @@ public: ...@@ -22,18 +22,18 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const score_t* score, score_t* gradients, void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = (score[i] - label_[i]); gradients[i] = static_cast<score_t>(score[i] - label_[i]);
hessians[i] = 1.0; hessians[i] = 1.0f;
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = (score[i] - label_[i]) * weights_[i]; gradients[i] = static_cast<score_t>(score[i] - label_[i]) * weights_[i];
hessians[i] = weights_[i]; hessians[i] = weights_[i];
} }
} }
...@@ -58,7 +58,7 @@ private: ...@@ -58,7 +58,7 @@ private:
class RegressionL1loss: public ObjectiveFunction { class RegressionL1loss: public ObjectiveFunction {
public: public:
explicit RegressionL1loss(const ObjectiveConfig& config) { explicit RegressionL1loss(const ObjectiveConfig& config) {
eta_ = static_cast<score_t>(config.gaussian_eta); eta_ = static_cast<double>(config.gaussian_eta);
} }
~RegressionL1loss() {} ~RegressionL1loss() {}
...@@ -69,12 +69,12 @@ public: ...@@ -69,12 +69,12 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const score_t* score, score_t* gradients, void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const score_t diff = score[i] - label_[i]; const double diff = score[i] - label_[i];
if (diff >= 0.0f) { if (diff >= 0.0f) {
gradients[i] = 1.0f; gradients[i] = 1.0f;
} else { } else {
...@@ -85,7 +85,7 @@ public: ...@@ -85,7 +85,7 @@ public:
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const score_t diff = score[i] - label_[i]; const double diff = score[i] - label_[i];
if (diff >= 0.0f) { if (diff >= 0.0f) {
gradients[i] = weights_[i]; gradients[i] = weights_[i];
} else { } else {
...@@ -108,7 +108,7 @@ private: ...@@ -108,7 +108,7 @@ private:
/*! \brief Pointer of weights */ /*! \brief Pointer of weights */
const float* weights_; const float* weights_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */ /*! \brief a parameter to control the width of Gaussian function to approximate hessian */
score_t eta_; double eta_;
}; };
/*! /*!
...@@ -117,8 +117,8 @@ private: ...@@ -117,8 +117,8 @@ private:
class RegressionHuberLoss: public ObjectiveFunction { class RegressionHuberLoss: public ObjectiveFunction {
public: public:
explicit RegressionHuberLoss(const ObjectiveConfig& config) { explicit RegressionHuberLoss(const ObjectiveConfig& config) {
delta_ = static_cast<score_t>(config.huber_delta); delta_ = static_cast<double>(config.huber_delta);
eta_ = static_cast<score_t>(config.gaussian_eta); eta_ = static_cast<double>(config.gaussian_eta);
} }
~RegressionHuberLoss() { ~RegressionHuberLoss() {
...@@ -130,21 +130,21 @@ public: ...@@ -130,21 +130,21 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const score_t* score, score_t* gradients, void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const score_t diff = score[i] - label_[i]; const double diff = score[i] - label_[i];
if (std::abs(diff) <= delta_) { if (std::abs(diff) <= delta_) {
gradients[i] = diff; gradients[i] = static_cast<score_t>(diff);
hessians[i] = 1.0f; hessians[i] = 1.0f;
} else { } else {
if (diff >= 0.0f) { if (diff >= 0.0f) {
gradients[i] = delta_; gradients[i] = static_cast<score_t>(delta_);
} else { } else {
gradients[i] = -delta_; gradients[i] = static_cast<score_t>(-delta_);
} }
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_)); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
} }
...@@ -152,16 +152,16 @@ public: ...@@ -152,16 +152,16 @@ public:
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const score_t diff = score[i] - label_[i]; const double diff = score[i] - label_[i];
if (std::abs(diff) <= delta_) { if (std::abs(diff) <= delta_) {
gradients[i] = diff * weights_[i]; gradients[i] = static_cast<score_t>(diff * weights_[i]);
hessians[i] = weights_[i]; hessians[i] = weights_[i];
} else { } else {
if (diff >= 0.0f) { if (diff >= 0.0f) {
gradients[i] = delta_ * weights_[i]; gradients[i] = static_cast<score_t>(delta_ * weights_[i]);
} else { } else {
gradients[i] = -delta_ * weights_[i]; gradients[i] = static_cast<score_t>(-delta_ * weights_[i]);
} }
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i])); hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_, weights_[i]));
} }
...@@ -181,9 +181,9 @@ private: ...@@ -181,9 +181,9 @@ private:
/*! \brief Pointer of weights */ /*! \brief Pointer of weights */
const float* weights_; const float* weights_;
/*! \brief delta for Huber loss */ /*! \brief delta for Huber loss */
score_t delta_; double delta_;
/*! \brief a parameter to control the width of Gaussian function to approximate hessian */ /*! \brief a parameter to control the width of Gaussian function to approximate hessian */
score_t eta_; double eta_;
}; };
...@@ -191,7 +191,7 @@ private: ...@@ -191,7 +191,7 @@ private:
class RegressionFairLoss: public ObjectiveFunction { class RegressionFairLoss: public ObjectiveFunction {
public: public:
explicit RegressionFairLoss(const ObjectiveConfig& config) { explicit RegressionFairLoss(const ObjectiveConfig& config) {
c_ = static_cast<score_t>(config.fair_c); c_ = static_cast<double>(config.fair_c);
} }
~RegressionFairLoss() {} ~RegressionFairLoss() {}
...@@ -202,23 +202,21 @@ public: ...@@ -202,23 +202,21 @@ public:
weights_ = metadata.weights(); weights_ = metadata.weights();
} }
void GetGradients(const score_t* score, score_t* gradients, void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override { score_t* hessians) const override {
if (weights_ == nullptr) { if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const score_t x = score[i] - label_[i]; const double x = score[i] - label_[i];
gradients[i] = c_ * x / (std::fabs(x) + c_); gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_));
hessians[i] = c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)); hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)));
} }
} else { } else {
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
const score_t x = score[i] - label_[i]; const double x = score[i] - label_[i];
gradients[i] = c_ * x / (std::fabs(x) + c_); gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_) * weights_[i]);
gradients[i] *= weights_[i]; hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)) * weights_[i]);
hessians[i] = c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_));
hessians[i] *= weights_[i];
} }
} }
} }
...@@ -235,7 +233,7 @@ private: ...@@ -235,7 +233,7 @@ private:
/*! \brief Pointer of weights */ /*! \brief Pointer of weights */
const float* weights_; const float* weights_;
/*! \brief c for Fair loss */ /*! \brief c for Fair loss */
score_t c_; double c_;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -40,10 +40,10 @@ public: ...@@ -40,10 +40,10 @@ public:
data_partition_->SetUsedDataIndices(used_indices, num_data); data_partition_->SetUsedDataIndices(used_indices, num_data);
} }
void AddPredictionToScore(score_t* out_score) const override { void AddPredictionToScore(double* out_score) const override {
#pragma omp parallel for schedule(guided) #pragma omp parallel for schedule(guided)
for (int i = 0; i < data_partition_->num_leaves(); ++i) { for (int i = 0; i < data_partition_->num_leaves(); ++i) {
score_t output = static_cast<score_t>(last_trained_tree_->LeafOutput(i)); double output = static_cast<double>(last_trained_tree_->LeafOutput(i));
data_size_t cnt_leaf_data = 0; data_size_t cnt_leaf_data = 0;
auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data); auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
for (data_size_t j = 0; j < cnt_leaf_data; ++j) { for (data_size_t j = 0; j < cnt_leaf_data; ++j) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment