Commit 99b483dd authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

add learning rate to dart (#133)

add learning rate to dart (#133)
parent 574d5342
...@@ -28,6 +28,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can ...@@ -28,6 +28,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* number of boosting iterations/trees * number of boosting iterations/trees
* ```learning_rate```, default=```0.1```, type=double, alias=```shrinkage_rate``` * ```learning_rate```, default=```0.1```, type=double, alias=```shrinkage_rate```
* shrinkage rate * shrinkage rate
* in ```dart```, it also affects normalization weights of dropped trees
* ```num_leaves```, default=```127```, type=int, alias=```num_leaf``` * ```num_leaves```, default=```127```, type=int, alias=```num_leaf```
* number of leaves in one tree * number of leaves in one tree
* ```tree_learner```, default=```serial```, type=enum, options=```serial```,```feature```,```data``` * ```tree_learner```, default=```serial```, type=enum, options=```serial```,```feature```,```data```
...@@ -56,7 +57,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can ...@@ -56,7 +57,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* Random seed for feature fraction. * Random seed for feature fraction.
* ```bagging_fraction```, default=```1.0```, type=double, , ```0.0 < bagging_fraction < 1.0```, alias=```sub_row``` * ```bagging_fraction```, default=```1.0```, type=double, , ```0.0 < bagging_fraction < 1.0```, alias=```sub_row```
* Like ```feature_fraction```, but this will random select part of data * Like ```feature_fraction```, but this will random select part of data
* can use this to speed up training * Can use this to speed up training
* Can use this to deal with over-fit * Can use this to deal with over-fit
* Note: To enable bagging, should set ```bagging_freq``` to a non zero value as well * Note: To enable bagging, should set ```bagging_freq``` to a non zero value as well
* ```bagging_freq```, default=```0```, type=int * ```bagging_freq```, default=```0```, type=int
...@@ -74,6 +75,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can ...@@ -74,6 +75,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* The minimal gain to perform split * The minimal gain to perform split
* ```drop_rate```, default=```0.01```, type=double * ```drop_rate```, default=```0.01```, type=double
* only used in ```dart```, will drop ```drop_rate*current_num_models``` before boosting. * only used in ```dart```, will drop ```drop_rate*current_num_models``` before boosting.
* If you want to use ```skip_rate``` like in xgboost, you can use [callbacks](Python-API.md#callbacks) with changing ```drop_rate```.
* ```drop_seed```, default=```4```, type=int * ```drop_seed```, default=```4```, type=int
* only used in ```dart```, used to random seed to choose dropping models. * only used in ```dart```, used to random seed to choose dropping models.
......
...@@ -73,7 +73,6 @@ print('Finish 10 - 20 rounds with model file...') ...@@ -73,7 +73,6 @@ print('Finish 10 - 20 rounds with model file...')
# learning_rates accepts: # learning_rates accepts:
# 1. list/tuple with length = num_boost_round # 1. list/tuple with length = num_boost_round
# 2. function(curr_iter) # 2. function(curr_iter)
# 3. function(curr_iter, total_iter)
gbm = lgb.train(params, gbm = lgb.train(params,
lgb_train, lgb_train,
num_boost_round=10, num_boost_round=10,
...@@ -83,6 +82,16 @@ gbm = lgb.train(params, ...@@ -83,6 +82,16 @@ gbm = lgb.train(params,
print('Finish 20 - 30 rounds with decay learning rates...') print('Finish 20 - 30 rounds with decay learning rates...')
# change other parameters during training
gbm = lgb.train(params,
lgb_train,
num_boost_round=10,
init_model=gbm,
valid_sets=lgb_eval,
callbacks=[lgb.reset_parameter(bagging_fraction=[0.7]*5+[0.6]*5)])
print('Finish 30 - 40 rounds with changing bagging_fraction...')
# self-defined objective function # self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array # f(preds: array, train_data: Dataset) -> grad: array, hess: array
# log likelihood loss # log likelihood loss
...@@ -108,7 +117,7 @@ gbm = lgb.train(params, ...@@ -108,7 +117,7 @@ gbm = lgb.train(params,
feval=binary_error, feval=binary_error,
valid_sets=lgb_eval) valid_sets=lgb_eval)
print('Finish 30 - 40 rounds with self-defined objective function and eval metric...') print('Finish 40 - 50 rounds with self-defined objective function and eval metric...')
print('Start a new training job...') print('Start a new training job...')
# callback # callback
......
...@@ -218,20 +218,21 @@ except ImportError: ...@@ -218,20 +218,21 @@ except ImportError:
except ImportError: except ImportError:
SKLEARN_StratifiedKFold = False SKLEARN_StratifiedKFold = False
def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False): def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
""" """
Make an n-fold list of CVBooster from random indices. Make an n-fold list of CVBooster from random indices.
""" """
np.random.seed(seed) np.random.seed(seed)
if stratified: if stratified:
if SKLEARN_StratifiedKFold: if SKLEARN_StratifiedKFold:
sfk = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=seed) sfk = StratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())] idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())]
else: else:
raise LightGBMError('Scikit-learn is required for stratified cv') raise LightGBMError('Scikit-learn is required for stratified cv')
else: else:
full_data.construct() full_data.construct()
randidx = np.random.permutation(full_data.num_data()) if shuffle:
randidx = np.random.permutation(full_data.num_data())
kstep = int(len(randidx) / nfold) kstep = int(len(randidx) / nfold)
idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)] idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)]
...@@ -260,7 +261,7 @@ def _agg_cv_result(raw_results): ...@@ -260,7 +261,7 @@ def _agg_cv_result(raw_results):
return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()] return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
metrics=None, fobj=None, feval=None, init_model=None, shuffle=True, metrics=None, fobj=None, feval=None, init_model=None,
feature_name=None, categorical_feature=None, feature_name=None, categorical_feature=None,
early_stopping_rounds=None, fpreproc=None, early_stopping_rounds=None, fpreproc=None,
verbose_eval=None, show_stdv=True, seed=0, verbose_eval=None, show_stdv=True, seed=0,
...@@ -280,6 +281,8 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -280,6 +281,8 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
Number of folds in CV. Number of folds in CV.
stratified : bool stratified : bool
Perform stratified sampling. Perform stratified sampling.
shuffle: bool
Whether shuffle before split data
folds : a KFold or StratifiedKFold instance folds : a KFold or StratifiedKFold instance
Sklearn KFolds or StratifiedKFolds. Sklearn KFolds or StratifiedKFolds.
metrics : string or list of strings metrics : string or list of strings
...@@ -342,7 +345,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -342,7 +345,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
params['metric'].extend(metrics) params['metric'].extend(metrics)
results = collections.defaultdict(list) results = collections.defaultdict(list)
cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified) cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified, shuffle)
# setup callbacks # setup callbacks
if callbacks is None: if callbacks is None:
......
...@@ -35,7 +35,6 @@ public: ...@@ -35,7 +35,6 @@ public:
void Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, void Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) override { const std::vector<const Metric*>& training_metrics) override {
GBDT::Init(config, train_data, object_function, training_metrics); GBDT::Init(config, train_data, object_function, training_metrics);
shrinkage_rate_ = 1.0;
random_for_drop_ = Random(gbdt_config_->drop_seed); random_for_drop_ = Random(gbdt_config_->drop_seed);
} }
/*! /*!
...@@ -56,7 +55,7 @@ public: ...@@ -56,7 +55,7 @@ public:
void ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, void ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) { const std::vector<const Metric*>& training_metrics) {
GBDT::ResetTrainingData(config, train_data, object_function, training_metrics); GBDT::ResetTrainingData(config, train_data, object_function, training_metrics);
shrinkage_rate_ = 1.0; shrinkage_rate_ = gbdt_config_->learning_rate / (gbdt_config_->learning_rate + static_cast<double>(drop_index_.size()));
} }
/*! /*!
...@@ -106,10 +105,17 @@ private: ...@@ -106,10 +105,17 @@ private:
train_score_updater_->AddScore(models_[curr_tree].get(), curr_class); train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
} }
} }
shrinkage_rate_ = 1.0 / (1.0 + drop_index_.size()); shrinkage_rate_ = gbdt_config_->learning_rate / (gbdt_config_->learning_rate + static_cast<double>(drop_index_.size()));
} }
/*! /*!
* \brief normalize dropped trees * \brief normalize dropped trees
* NOTE: num_drop_tree(k), learning_rate(lr), shrinkage_rate_ = lr / (k + lr)
* step 1: shrink tree to -1 -> drop tree
* step 2: shrink tree to k / (k + lr) - 1 from -1
* -> normalize for valid data
* step 3: shrink tree to k / (k + lr) from k / (k + lr) - 1
* -> normalize for train data
* end with tree weight = k / (k + lr)
*/ */
void Normalize() { void Normalize() {
double k = static_cast<double>(drop_index_.size()); double k = static_cast<double>(drop_index_.size());
...@@ -122,7 +128,7 @@ private: ...@@ -122,7 +128,7 @@ private:
score_updater->AddScore(models_[curr_tree].get(), curr_class); score_updater->AddScore(models_[curr_tree].get(), curr_class);
} }
// update training score // update training score
models_[curr_tree]->Shrinkage(-k); models_[curr_tree]->Shrinkage(-k / gbdt_config_->learning_rate);
train_score_updater_->AddScore(models_[curr_tree].get(), curr_class); train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
} }
} }
......
...@@ -56,6 +56,13 @@ def test_regression(): ...@@ -56,6 +56,13 @@ def test_regression():
preds = lgb_model.predict(x_test) preds = lgb_model.predict(x_test)
assert mean_squared_error(preds, y_test) < 100 assert mean_squared_error(preds, y_test) < 100
def test_lambdarank():
from sklearn.datasets import load_svmlight_file
X_train, y_train = load_svmlight_file('../../examples/lambdarank/rank.train')
X_test, y_test = load_svmlight_file('../../examples/lambdarank/rank.test')
q_train = np.loadtxt('../../examples/lambdarank/rank.train.query')
lgb_model = lgb.LGBMRanker().fit(X_train, y_train, group=q_train, eval_at=[1])
def test_regression_with_custom_objective(): def test_regression_with_custom_objective():
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston from sklearn.datasets import load_boston
...@@ -118,6 +125,7 @@ def test_early_stopping(): ...@@ -118,6 +125,7 @@ def test_early_stopping():
test_binary_classification() test_binary_classification()
test_multiclass_classification() test_multiclass_classification()
test_regression() test_regression()
test_lambdarank()
test_regression_with_custom_objective() test_regression_with_custom_objective()
test_binary_classification_with_custom_objective() test_binary_classification_with_custom_objective()
test_early_stopping() test_early_stopping()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment