Commit fb732c34 authored by Tsukasa OMOTO's avatar Tsukasa OMOTO Committed by Guolin Ke
Browse files
parent 27d3eb33
......@@ -21,6 +21,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* ```regression_l2```, L2 loss, alias=```mean_squared_error```,```mse```
* ```regression_l1```, L1 loss, alias=```mean_absolute_error```,```mae```
* ```huber```, [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia")
* ```fair```, [Fair loss](http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html)
* ```binary```, binary classification application
* ```lambdarank```, lambdarank application
* ```multiclass```, multi-class classification application, should set ```num_class``` as well
......@@ -170,6 +171,8 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* parameter for sigmoid function. Will be used in binary classification and lambdarank.
* ```huber_delta```, default=```1.0```, type=double
* parameter for [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia"). Will be used in regression task.
* ```fair_c```, default=```1.0```, type=double
* parameter for [Fair loss](http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html). Will be used in regression task.
* ```scale_pos_weight```, default=```1.0```, type=double
* weight of positive class in binary classification task
* ```is_unbalance```, default=```false```, type=bool
......@@ -187,6 +190,8 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* ```metric```, default={```l2``` for regression}, {```binary_logloss``` for binary classification},{```ndcg``` for lambdarank}, type=multi-enum, options=```l1```,```l2```,```ndcg```,```auc```,```binary_logloss```,```binary_error```
* ```l1```, absolute loss
* ```l2```, square loss
* ```huber```, [Huber loss](https://en.wikipedia.org/wiki/Huber_loss "Huber loss - Wikipedia")
* ```fair```, [Fair loss](http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html)
* ```ndcg```, [NDCG](https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG)
* ```auc```, [AUC](https://en.wikipedia.org/wiki/Area_under_the_curve_(pharmacokinetics))
* ```binary_logloss```, [log loss](https://www.kaggle.com/wiki/LogarithmicLoss)
......
......@@ -631,6 +631,8 @@ The methods of each Class is in alphabetical order.
Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float
Only used in regression. Parameter for Huber loss function.
fair_c : float
Only used in regression. Parameter for Fair loss function.
max_position : int
Only used in lambdarank, will optimize NDCG at this position.
label_gain : list of float
......
......@@ -138,6 +138,7 @@ public:
virtual ~ObjectiveConfig() {}
double sigmoid = 1.0f;
double huber_delta = 1.0f;
double fair_c = 1.0f;
// for lambdarank
std::vector<double> label_gain;
// for lambdarank
......@@ -158,6 +159,7 @@ public:
int num_class = 1;
double sigmoid = 1.0f;
double huber_delta = 1.0f;
double fair_c = 1.0f;
std::vector<double> label_gain;
std::vector<int> eval_at;
void Set(const std::unordered_map<std::string, std::string>& params) override;
......
......@@ -153,7 +153,7 @@ class LGBMModel(LGBMModelBase):
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0, nthread=-1, silent=True,
sigmoid=1.0, huber_delta=1.0, max_position=20, label_gain=None,
sigmoid=1.0, huber_delta=1.0, fair_c=1.0, max_position=20, label_gain=None,
drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False):
"""
......@@ -210,6 +210,8 @@ class LGBMModel(LGBMModelBase):
Only used in binary classification and lambdarank. Parameter for sigmoid function.
huber_delta : float
Only used in regression. Parameter for Huber loss function.
fair_c : float
Only used in regression. Parameter for Fair loss function.
max_position : int
Only used in lambdarank, will optimize NDCG at this position.
label_gain : list of float
......@@ -275,6 +277,7 @@ class LGBMModel(LGBMModelBase):
self.silent = silent
self.sigmoid = sigmoid
self.huber_delta = huber_delta
self.fair_c = fair_c
self.max_position = max_position
self.label_gain = label_gain
self.drop_rate = drop_rate
......@@ -500,7 +503,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0,
seed=0, nthread=-1, silent=True,
huber_delta=1.0,
huber_delta=1.0, fair_c=1.0,
drop_rate=0.1, skip_drop=0.5, max_drop=50,
uniform_drop=False, xgboost_dart_mode=False):
super(LGBMRegressor, self).__init__(boosting_type=boosting_type, num_leaves=num_leaves,
......@@ -512,7 +515,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
subsample_freq=subsample_freq, colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha, reg_lambda=reg_lambda,
seed=seed, nthread=nthread, silent=silent,
huber_delta=huber_delta,
huber_delta=huber_delta, fair_c=fair_c,
drop_rate=drop_rate, skip_drop=skip_drop, max_drop=max_drop,
uniform_drop=uniform_drop, xgboost_dart_mode=xgboost_dart_mode)
......
......@@ -220,6 +220,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
GetBool(params, "is_unbalance", &is_unbalance);
GetDouble(params, "sigmoid", &sigmoid);
GetDouble(params, "huber_delta", &huber_delta);
GetDouble(params, "fair_c", &fair_c);
GetInt(params, "max_position", &max_position);
CHECK(max_position > 0);
GetInt(params, "num_class", &num_class);
......@@ -243,6 +244,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
void MetricConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetDouble(params, "sigmoid", &sigmoid);
GetDouble(params, "huber_delta", &huber_delta);
GetDouble(params, "fair_c", &fair_c);
GetInt(params, "num_class", &num_class);
std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) {
......
......@@ -13,6 +13,8 @@ Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config
return new L1Metric(config);
} else if (type == std::string("huber")) {
return new HuberLossMetric(config);
} else if (type == std::string("fair")) {
return new FairLossMetric(config);
} else if (type == std::string("binary_logloss")) {
return new BinaryLoglossMetric(config);
} else if (type == std::string("binary_error")) {
......
......@@ -15,7 +15,7 @@ namespace LightGBM {
template<typename PointWiseLossCalculator>
class RegressionMetric: public Metric {
public:
explicit RegressionMetric(const MetricConfig&) :huber_delta_(1.0f) {
explicit RegressionMetric(const MetricConfig&) :huber_delta_(1.0f), fair_c_(1.0f) {
}
virtual ~RegressionMetric() {
......@@ -54,13 +54,13 @@ public:
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_);
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_, fair_c_);
}
} else {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_) * weights_[i];
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i], huber_delta_, fair_c_) * weights_[i];
}
}
double loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
......@@ -75,6 +75,8 @@ public:
protected:
/*! \brief delta for Huber loss */
double huber_delta_;
/*! \brief c for Fair loss */
double fair_c_;
private:
/*! \brief Number of data */
......@@ -94,7 +96,7 @@ class L2Metric: public RegressionMetric<L2Metric> {
public:
explicit L2Metric(const MetricConfig& config) :RegressionMetric<L2Metric>(config) {}
inline static score_t LossOnPoint(float label, score_t score, float) {
inline static score_t LossOnPoint(float label, score_t score, float, float) {
return (score - label)*(score - label);
}
......@@ -113,7 +115,7 @@ class L1Metric: public RegressionMetric<L1Metric> {
public:
explicit L1Metric(const MetricConfig& config) :RegressionMetric<L1Metric>(config) {}
inline static score_t LossOnPoint(float label, score_t score, float) {
inline static score_t LossOnPoint(float label, score_t score, float, float) {
return std::fabs(score - label);
}
inline static const char* Name() {
......@@ -128,7 +130,7 @@ public:
huber_delta_ = config.huber_delta;
}
inline static score_t LossOnPoint(float label, score_t score, float delta) {
inline static score_t LossOnPoint(float label, score_t score, float delta, float) {
const double diff = score - label;
if (std::abs(diff) <= delta) {
return 0.5 * diff * diff;
......@@ -142,5 +144,23 @@ public:
}
};
/*! \brief Fair loss for regression task */
// http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html
class FairLossMetric: public RegressionMetric<FairLossMetric> {
public:
explicit FairLossMetric(const MetricConfig& config) :RegressionMetric<FairLossMetric>(config) {
fair_c_ = config.fair_c;
}
inline static score_t LossOnPoint(float label, score_t score, float, float c) {
const double x = std::abs(score - label);
return c * x - c * c * std::log(1.0 + x / c);
}
inline static const char* Name() {
return "fair";
}
};
} // namespace LightGBM
#endif // LightGBM_METRIC_REGRESSION_METRIC_HPP_
......@@ -14,6 +14,8 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
return new RegressionL1loss(config);
} else if (type == std::string("huber")) {
return new RegressionHuberLoss(config);
} else if (type == std::string("fair")) {
return new RegressionFairLoss(config);
} else if (type == std::string("binary")) {
return new BinaryLogloss(config);
} else if (type == std::string("lambdarank")) {
......
......@@ -175,5 +175,57 @@ private:
double delta_;
};
// http://research.microsoft.com/en-us/um/people/zhang/INRIA/Publis/Tutorial-Estim/node24.html
class RegressionFairLoss: public ObjectiveFunction {
public:
explicit RegressionFairLoss(const ObjectiveConfig& config) {
c_ = config.fair_c;
}
~RegressionFairLoss() {}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
label_ = metadata.label();
weights_ = metadata.weights();
}
void GetGradients(const score_t* score, score_t* gradients,
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double x = score[i] - label_[i];
gradients[i] = c_ * x / (std::fabs(x) + c_);
hessians[i] = c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_));
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double x = score[i] - label_[i];
gradients[i] = c_ * x / (std::fabs(x) + c_);
gradients[i] *= weights_[i];
hessians[i] = c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_));
hessians[i] *= weights_[i];
}
}
}
const char* GetName() const override {
return "fair";
}
private:
/*! \brief Number of data */
data_size_t num_data_;
/*! \brief Pointer of label */
const float* label_;
/*! \brief Pointer of weights */
const float* weights_;
/*! \brief c for Fair loss */
double c_;
};
} // namespace LightGBM
#endif // LightGBM_OBJECTIVE_REGRESSION_OBJECTIVE_HPP_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment