"python-package/vscode:/vscode.git/clone" did not exist on "2e9848c67f2aceef05f80db7301c52a8a9fa14c9"
Commit 349cb50d authored by Guolin Ke's avatar Guolin Ke
Browse files

boost_from_average seems doesn't work well in binary classification. remove it.

parent 1446db35
...@@ -33,10 +33,6 @@ public: ...@@ -33,10 +33,6 @@ public:
virtual const char* GetName() const = 0; virtual const char* GetName() const = 0;
virtual std::vector<double> ConvertToRawScore(const std::vector<double>& preds) const {
return preds;
}
ObjectiveFunction() = default; ObjectiveFunction() = default;
/*! \brief Disable copy */ /*! \brief Disable copy */
ObjectiveFunction& operator=(const ObjectiveFunction&) = delete; ObjectiveFunction& operator=(const ObjectiveFunction&) = delete;
......
...@@ -293,36 +293,29 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) { ...@@ -293,36 +293,29 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
} }
bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) { bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
// boosting from average prediction. // boosting from average prediction. It doesn't work well for binary classification, remove it for now.
if (models_.empty() && gbdt_config_->boost_from_average && !train_score_updater_->has_init_score()) { if (models_.empty()
&& gbdt_config_->boost_from_average
&& !train_score_updater_->has_init_score()
&& sigmoid_ < 0.0f) {
std::vector<double> sum_per_class(num_class_, 0.0f); std::vector<double> sum_per_class(num_class_, 0.0f);
auto label = train_data_->metadata().label(); auto label = train_data_->metadata().label();
if (num_class_ > 1) { if (num_class_ > 1) {
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[static_cast<int>(label[i])] += 1.0f; sum_per_class[static_cast<int>(label[i])] += 1.0f;
} }
} else if(sigmoid_ < 0.0f){
for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[0] += label[i];
}
} else { } else {
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
sum_per_class[0] += label[i] > 0; sum_per_class[0] += label[i];
}
}
std::vector<double > init_scores(num_class_);
for (int i = 0; i < num_class_; ++i) {
init_scores[i] = sum_per_class[i] / num_data_;
} }
if (object_function_ != nullptr) {
init_scores = object_function_->ConvertToRawScore(init_scores);
} }
for (int curr_class = 0; curr_class < num_class_; ++curr_class) { for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
double init_score = sum_per_class[curr_class] / num_data_;
std::unique_ptr<Tree> new_tree(new Tree(2)); std::unique_ptr<Tree> new_tree(new Tree(2));
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, init_scores[curr_class], init_scores[curr_class], 0, num_data_, 1); new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, init_score, init_score, 0, num_data_, 1);
train_score_updater_->AddScore(init_scores[curr_class], curr_class); train_score_updater_->AddScore(init_score, curr_class);
for (auto& score_updater : valid_score_updater_) { for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(init_scores[curr_class], curr_class); score_updater->AddScore(init_score, curr_class);
} }
models_.push_back(std::move(new_tree)); models_.push_back(std::move(new_tree));
} }
......
...@@ -74,7 +74,7 @@ int Tree::Split(int leaf, int feature, BinType bin_type, uint32_t threshold_bin, ...@@ -74,7 +74,7 @@ int Tree::Split(int leaf, int feature, BinType bin_type, uint32_t threshold_bin,
} }
threshold_in_bin_[new_node_idx] = threshold_bin; threshold_in_bin_[new_node_idx] = threshold_bin;
threshold_[new_node_idx] = threshold_double; threshold_[new_node_idx] = threshold_double;
split_gain_[new_node_idx] = gain; split_gain_[new_node_idx] = gain == std::numeric_limits<double>::infinity() ? std::numeric_limits<double>::max() : gain;
// add two new leaves // add two new leaves
left_child_[new_node_idx] = ~leaf; left_child_[new_node_idx] = ~leaf;
right_child_[new_node_idx] = ~num_leaves_; right_child_[new_node_idx] = ~num_leaves_;
......
...@@ -86,18 +86,6 @@ public: ...@@ -86,18 +86,6 @@ public:
} }
} }
std::vector<double> ConvertToRawScore(const std::vector<double>& preds) const override {
std::vector<double> ret;
for (auto pred : preds) {
if (pred > kEpsilon && pred < 1.0f) {
ret.push_back(-std::log(1.0f / pred - 1.0f) / sigmoid_);
} else {
ret.push_back(0.0f);
}
}
return ret;
}
const char* GetName() const override { const char* GetName() const override {
return "binary"; return "binary";
} }
......
...@@ -93,18 +93,6 @@ public: ...@@ -93,18 +93,6 @@ public:
} }
} }
std::vector<double> ConvertToRawScore(const std::vector<double>& preds) const override {
std::vector<double> ret;
for (auto pred : preds) {
if (pred > kEpsilon) {
ret.push_back(std::log(pred));
} else {
ret.push_back(0);
}
}
return ret;
}
const char* GetName() const override { const char* GetName() const override {
return "multiclass"; return "multiclass";
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment