Commit 63d974bc authored by Allard van Mossel's avatar Allard van Mossel Committed by Guolin Ke
Browse files

Added complexity regularization parameters (L1, L2, min_gain_to_split) (#69)

parent 9ca29e66
...@@ -154,6 +154,9 @@ struct TreeConfig: public ConfigBase { ...@@ -154,6 +154,9 @@ struct TreeConfig: public ConfigBase {
public: public:
int min_data_in_leaf = 100; int min_data_in_leaf = 100;
double min_sum_hessian_in_leaf = 10.0f; double min_sum_hessian_in_leaf = 10.0f;
double lambda_l1 = 0.0f;
double lambda_l2 = 0.0f;
double min_gain_to_split = 0.0f;
// should > 1, only one leaf means not need to learning // should > 1, only one leaf means not need to learning
int num_leaves = 127; int num_leaves = 127;
int feature_fraction_seed = 2; int feature_fraction_seed = 2;
......
...@@ -265,6 +265,12 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params) ...@@ -265,6 +265,12 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params)
GetInt(params, "min_data_in_leaf", &min_data_in_leaf); GetInt(params, "min_data_in_leaf", &min_data_in_leaf);
GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf); GetDouble(params, "min_sum_hessian_in_leaf", &min_sum_hessian_in_leaf);
CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0); CHECK(min_sum_hessian_in_leaf > 1.0f || min_data_in_leaf > 0);
GetDouble(params, "lambda_l1", &lambda_l1);
CHECK(lambda_l1 >= 0.0f)
GetDouble(params, "lambda_l2", &lambda_l2);
CHECK(lambda_l2 >= 0.0f)
GetDouble(params, "min_gain_to_split", &min_gain_to_split);
CHECK(min_gain_to_split >= 0.0f)
GetInt(params, "num_leaves", &num_leaves); GetInt(params, "num_leaves", &num_leaves);
CHECK(num_leaves > 1); CHECK(num_leaves > 1);
GetInt(params, "feature_fraction_seed", &feature_fraction_seed); GetInt(params, "feature_fraction_seed", &feature_fraction_seed);
......
...@@ -26,10 +26,13 @@ public: ...@@ -26,10 +26,13 @@ public:
* \param min_num_data_one_leaf minimal number of data in one leaf * \param min_num_data_one_leaf minimal number of data in one leaf
*/ */
void Init(const Feature* feature, int feature_idx, data_size_t min_num_data_one_leaf, void Init(const Feature* feature, int feature_idx, data_size_t min_num_data_one_leaf,
double min_sum_hessian_one_leaf) { double min_sum_hessian_one_leaf, double lambda_l1, double lambda_l2, double min_gain_to_split) {
feature_idx_ = feature_idx; feature_idx_ = feature_idx;
min_num_data_one_leaf_ = min_num_data_one_leaf; min_num_data_one_leaf_ = min_num_data_one_leaf;
min_sum_hessian_one_leaf_ = min_sum_hessian_one_leaf; min_sum_hessian_one_leaf_ = min_sum_hessian_one_leaf;
lambda_l1_ = lambda_l1;
lambda_l2_ = lambda_l2;
min_gain_to_split_ = min_gain_to_split;
bin_data_ = feature->bin_data(); bin_data_ = feature->bin_data();
num_bins_ = feature->num_bin(); num_bins_ = feature->num_bin();
data_ = new HistogramBinEntry[num_bins_]; data_ = new HistogramBinEntry[num_bins_];
...@@ -113,6 +116,7 @@ public: ...@@ -113,6 +116,7 @@ public:
double sum_right_hessian = kEpsilon; double sum_right_hessian = kEpsilon;
data_size_t right_count = 0; data_size_t right_count = 0;
double gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_); double gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_);
double min_gain_shift = gain_shift + min_gain_to_split_;
is_splittable_ = false; is_splittable_ = false;
// from right to left, and we don't need data in bin0 // from right to left, and we don't need data in bin0
for (unsigned int t = num_bins_ - 1; t > 0; --t) { for (unsigned int t = num_bins_ - 1; t > 0; --t) {
...@@ -127,16 +131,14 @@ public: ...@@ -127,16 +131,14 @@ public:
double sum_left_hessian = sum_hessians_ - sum_right_hessian; double sum_left_hessian = sum_hessians_ - sum_right_hessian;
// if sum hessian too small // if sum hessian too small
if (sum_left_hessian < min_sum_hessian_one_leaf_) { if (sum_left_hessian < min_sum_hessian_one_leaf_) break;
break;
}
double sum_left_gradient = sum_gradients_ - sum_right_gradient; double sum_left_gradient = sum_gradients_ - sum_right_gradient;
// current split gain // current split gain
double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian); double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian);
// gain is worst than no perform split // gain with split is worse than without split
if (current_gain < gain_shift) { if (current_gain < min_gain_shift) continue;
continue;
}
// mark to is splittable // mark to is splittable
is_splittable_ = true; is_splittable_ = true;
// better split point // better split point
...@@ -211,23 +213,32 @@ public: ...@@ -211,23 +213,32 @@ public:
private: private:
/*! /*!
* \brief Calculate the split gain based on sum_gradients and sum_hessians * \brief Calculate the split gain based on regularized sum_gradients and sum_hessians
* \param sum_gradients * \param sum_gradients
* \param sum_hessians * \param sum_hessians
* \return split gain * \return split gain
*/ */
double GetLeafSplitGain(double sum_gradients, double sum_hessians) const { double GetLeafSplitGain(double sum_gradients, double sum_hessians) const {
return (sum_gradients * sum_gradients) / (sum_hessians); double abs_sum_gradients = std::fabs(sum_gradients);
if (abs_sum_gradients > lambda_l1_) {
double reg_abs_sum_gradients = abs_sum_gradients - lambda_l1_;
return (reg_abs_sum_gradients * reg_abs_sum_gradients) / (sum_hessians + lambda_l2_);
}
return 0.0f;
} }
/*! /*!
* \brief Calculate the output of a leaf based on sum_gradients and sum_hessians * \brief Calculate the output of a leaf based on regularized sum_gradients and sum_hessians
* \param sum_gradients * \param sum_gradients
* \param sum_hessians * \param sum_hessians
* \return leaf output * \return leaf output
*/ */
double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians) const { double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians) const {
return -(sum_gradients) / (sum_hessians); double abs_sum_gradients = std::fabs(sum_gradients);
if (abs_sum_gradients > lambda_l1_) {
return -std::copysign(abs_sum_gradients - lambda_l1_, sum_gradients) / (sum_hessians + lambda_l2_);
}
return 0.0f;
} }
int feature_idx_; int feature_idx_;
...@@ -235,6 +246,12 @@ private: ...@@ -235,6 +246,12 @@ private:
data_size_t min_num_data_one_leaf_; data_size_t min_num_data_one_leaf_;
/*! \brief minimal sum hessian of data in one leaf */ /*! \brief minimal sum hessian of data in one leaf */
double min_sum_hessian_one_leaf_; double min_sum_hessian_one_leaf_;
/*! \brief lambda of the L1 weights regularization */
double lambda_l1_;
/*! \brief lambda of the L2 weights regularization */
double lambda_l2_;
/*! \brief minimal gain (loss reduction) to split */
double min_gain_to_split_;
/*! \brief the bin data of current feature */ /*! \brief the bin data of current feature */
const Bin* bin_data_; const Bin* bin_data_;
/*! \brief number of bin of histogram */ /*! \brief number of bin of histogram */
......
...@@ -16,6 +16,9 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config) ...@@ -16,6 +16,9 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
num_leaves_ = tree_config.num_leaves; num_leaves_ = tree_config.num_leaves;
min_num_data_one_leaf_ = static_cast<data_size_t>(tree_config.min_data_in_leaf); min_num_data_one_leaf_ = static_cast<data_size_t>(tree_config.min_data_in_leaf);
min_sum_hessian_one_leaf_ = static_cast<double>(tree_config.min_sum_hessian_in_leaf); min_sum_hessian_one_leaf_ = static_cast<double>(tree_config.min_sum_hessian_in_leaf);
lambda_l1_ = tree_config.lambda_l1;
lambda_l2_ = tree_config.lambda_l2;
min_gain_to_split_ = tree_config.min_gain_to_split;
feature_fraction_ = tree_config.feature_fraction; feature_fraction_ = tree_config.feature_fraction;
random_ = Random(tree_config.feature_fraction_seed); random_ = Random(tree_config.feature_fraction_seed);
histogram_pool_size_ = tree_config.histogram_pool_size; histogram_pool_size_ = tree_config.histogram_pool_size;
...@@ -68,7 +71,10 @@ void SerialTreeLearner::Init(const Dataset* train_data) { ...@@ -68,7 +71,10 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
for (int j = 0; j < train_data_->num_features(); ++j) { for (int j = 0; j < train_data_->num_features(); ++j) {
tmp_histogram_array[j].Init(train_data_->FeatureAt(j), tmp_histogram_array[j].Init(train_data_->FeatureAt(j),
j, min_num_data_one_leaf_, j, min_num_data_one_leaf_,
min_sum_hessian_one_leaf_); min_sum_hessian_one_leaf_,
lambda_l1_,
lambda_l2_,
min_gain_to_split_);
} }
return tmp_histogram_array; return tmp_histogram_array;
}; };
......
...@@ -111,10 +111,16 @@ protected: ...@@ -111,10 +111,16 @@ protected:
const score_t* hessians_; const score_t* hessians_;
/*! \brief number of total leaves */ /*! \brief number of total leaves */
int num_leaves_; int num_leaves_;
/*! \brief mininal data on one leaf */ /*! \brief minimal data on one leaf */
data_size_t min_num_data_one_leaf_; data_size_t min_num_data_one_leaf_;
/*! \brief mininal sum hessian on one leaf */ /*! \brief minimal sum hessian on one leaf */
double min_sum_hessian_one_leaf_; double min_sum_hessian_one_leaf_;
/*! \brief lambda of the L1 weights regularization */
double lambda_l1_;
/*! \brief lambda of the L2 weights regularization */
double lambda_l2_;
/*! \brief minimal gain (loss reduction) to split */
double min_gain_to_split_;
/*! \brief sub-feature fraction rate */ /*! \brief sub-feature fraction rate */
double feature_fraction_; double feature_fraction_;
/*! \brief training data partition on leaves */ /*! \brief training data partition on leaves */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment