"vscode:/vscode.git/clone" did not exist on "b4535b8d843c9f0881d9f30a0869def4a1aec83f"
Commit 63d974bc authored by Allard van Mossel's avatar Allard van Mossel Committed by Guolin Ke
Browse files

Added complexity regularization parameters (L1, L2, min_gain_to_split) (#69)

parent 9ca29e66
......@@ -154,6 +154,9 @@ struct TreeConfig: public ConfigBase {
public:
int min_data_in_leaf = 100;
double min_sum_hessian_in_leaf = 10.0f;
double lambda_l1 = 0.0f;
double lambda_l2 = 0.0f;
double min_gain_to_split = 0.0f;
// should > 1, only one leaf means not need to learning
int num_leaves = 127;
int feature_fraction_seed = 2;
......
......@@ -265,6 +265,12 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params)
GetInt(params, "min_data_in_leaf", &min_data_in_leaf);
GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0);
GetDouble(params, "lambda_l1", &lambda_l1);
CHECK(lambda_l1 >= 0.0f)
GetDouble(params, "lambda_l2", &lambda_l2);
CHECK(lambda_l2 >= 0.0f)
GetDouble(params, "min_gain_to_split", &min_gain_to_split);
CHECK(min_gain_to_split >= 0.0f)
GetInt(params, "num_leaves", &num_leaves);
CHECK(num_leaves > 1);
GetInt(params, "feature_fraction_seed", &feature_fraction_seed);
......
......@@ -26,10 +26,13 @@ public:
* \param min_num_data_one_leaf minimal number of data in one leaf
*/
void Init(const Feature* feature, int feature_idx, data_size_t min_num_data_one_leaf,
double min_sum_hessian_one_leaf) {
double min_sum_hessian_one_leaf, double lambda_l1, double lambda_l2, double min_gain_to_split) {
feature_idx_ = feature_idx;
min_num_data_one_leaf_ = min_num_data_one_leaf;
min_sum_hessian_one_leaf_ = min_sum_hessian_one_leaf;
lambda_l1_ = lambda_l1;
lambda_l2_ = lambda_l2;
min_gain_to_split_ = min_gain_to_split;
bin_data_ = feature->bin_data();
num_bins_ = feature->num_bin();
data_ = new HistogramBinEntry[num_bins_];
......@@ -113,6 +116,7 @@ public:
double sum_right_hessian = kEpsilon;
data_size_t right_count = 0;
double gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_);
double min_gain_shift = gain_shift + min_gain_to_split_;
is_splittable_ = false;
// from right to left, and we don't need data in bin0
for (unsigned int t = num_bins_ - 1; t > 0; --t) {
......@@ -127,16 +131,14 @@ public:
double sum_left_hessian = sum_hessians_ - sum_right_hessian;
// if sum hessian too small
if (sum_left_hessian < min_sum_hessian_one_leaf_) {
break;
}
if (sum_left_hessian < min_sum_hessian_one_leaf_) break;
double sum_left_gradient = sum_gradients_ - sum_right_gradient;
// current split gain
double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian);
// gain is worst than no perform split
if (current_gain < gain_shift) {
continue;
}
// gain with split is worse than without split
if (current_gain < min_gain_shift) continue;
// mark to is splittable
is_splittable_ = true;
// better split point
......@@ -211,23 +213,32 @@ public:
private:
/*!
* \brief Calculate the split gain based on sum_gradients and sum_hessians
* \brief Calculate the split gain based on regularized sum_gradients and sum_hessians
* \param sum_gradients
* \param sum_hessians
* \return split gain
*/
double GetLeafSplitGain(double sum_gradients, double sum_hessians) const {
return (sum_gradients * sum_gradients) / (sum_hessians);
double abs_sum_gradients = std::fabs(sum_gradients);
if (abs_sum_gradients > lambda_l1_) {
double reg_abs_sum_gradients = abs_sum_gradients - lambda_l1_;
return (reg_abs_sum_gradients * reg_abs_sum_gradients) / (sum_hessians + lambda_l2_);
}
return 0.0f;
}
/*!
* \brief Calculate the output of a leaf based on sum_gradients and sum_hessians
* \brief Calculate the output of a leaf based on regularized sum_gradients and sum_hessians
* \param sum_gradients
* \param sum_hessians
* \return leaf output
*/
double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians) const {
return -(sum_gradients) / (sum_hessians);
double abs_sum_gradients = std::fabs(sum_gradients);
if (abs_sum_gradients > lambda_l1_) {
return -std::copysign(abs_sum_gradients - lambda_l1_, sum_gradients) / (sum_hessians + lambda_l2_);
}
return 0.0f;
}
int feature_idx_;
......@@ -235,6 +246,12 @@ private:
data_size_t min_num_data_one_leaf_;
/*! \brief minimal sum hessian of data in one leaf */
double min_sum_hessian_one_leaf_;
/*! \brief lambda of the L1 weights regularization */
double lambda_l1_;
/*! \brief lambda of the L2 weights regularization */
double lambda_l2_;
/*! \brief minimal gain (loss reduction) to split */
double min_gain_to_split_;
/*! \brief the bin data of current feature */
const Bin* bin_data_;
/*! \brief number of bin of histogram */
......
......@@ -16,6 +16,9 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
num_leaves_ = tree_config.num_leaves;
min_num_data_one_leaf_ = static_cast<data_size_t>(tree_config.min_data_in_leaf);
min_sum_hessian_one_leaf_ = static_cast<double>(tree_config.min_sum_hessian_in_leaf);
lambda_l1_ = tree_config.lambda_l1;
lambda_l2_ = tree_config.lambda_l2;
min_gain_to_split_ = tree_config.min_gain_to_split;
feature_fraction_ = tree_config.feature_fraction;
random_ = Random(tree_config.feature_fraction_seed);
histogram_pool_size_ = tree_config.histogram_pool_size;
......@@ -68,7 +71,10 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
for (int j = 0; j < train_data_->num_features(); ++j) {
tmp_histogram_array[j].Init(train_data_->FeatureAt(j),
j, min_num_data_one_leaf_,
min_sum_hessian_one_leaf_);
min_sum_hessian_one_leaf_,
lambda_l1_,
lambda_l2_,
min_gain_to_split_);
}
return tmp_histogram_array;
};
......
......@@ -111,10 +111,16 @@ protected:
const score_t* hessians_;
/*! \brief number of total leaves */
int num_leaves_;
/*! \brief mininal data on one leaf */
/*! \brief minimal data on one leaf */
data_size_t min_num_data_one_leaf_;
/*! \brief mininal sum hessian on one leaf */
/*! \brief minimal sum hessian on one leaf */
double min_sum_hessian_one_leaf_;
/*! \brief lambda of the L1 weights regularization */
double lambda_l1_;
/*! \brief lambda of the L2 weights regularization */
double lambda_l2_;
/*! \brief minimal gain (loss reduction) to split */
double min_gain_to_split_;
/*! \brief sub-feature fraction rate */
double feature_fraction_;
/*! \brief training data partition on leaves */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment