Commit 99b483dd authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

add learning rate to dart (#133)

add learning rate to dart (#133)
parent 574d5342
......@@ -28,6 +28,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* number of boosting iterations/trees
* ```learning_rate```, default=```0.1```, type=double, alias=```shrinkage_rate```
* shrinkage rate
* in ```dart```, it also affects normalization weights of dropped trees
* ```num_leaves```, default=```127```, type=int, alias=```num_leaf```
* number of leaves in one tree
* ```tree_learner```, default=```serial```, type=enum, options=```serial```,```feature```,```data```
......@@ -56,7 +57,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* Random seed for feature fraction.
* ```bagging_fraction```, default=```1.0```, type=double, , ```0.0 < bagging_fraction < 1.0```, alias=```sub_row```
* Like ```feature_fraction```, but this will random select part of data
* can use this to speed up training
* Can use this to speed up training
* Can use this to deal with over-fit
* Note: To enable bagging, should set ```bagging_freq``` to a non zero value as well
* ```bagging_freq```, default=```0```, type=int
......@@ -74,6 +75,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
* The minimal gain to perform split
* ```drop_rate```, default=```0.01```, type=double
* only used in ```dart```, will drop ```drop_rate*current_num_models``` before boosting.
* If you want to use ```skip_rate``` like in xgboost, you can use [callbacks](Python-API.md#callbacks) with changing ```drop_rate```.
* ```drop_seed```, default=```4```, type=int
* only used in ```dart```, used to random seed to choose dropping models.
......
......@@ -73,7 +73,6 @@ print('Finish 10 - 20 rounds with model file...')
# learning_rates accepts:
# 1. list/tuple with length = num_boost_round
# 2. function(curr_iter)
# 3. function(curr_iter, total_iter)
gbm = lgb.train(params,
lgb_train,
num_boost_round=10,
......@@ -83,6 +82,16 @@ gbm = lgb.train(params,
print('Finish 20 - 30 rounds with decay learning rates...')
# change other parameters during training
gbm = lgb.train(params,
lgb_train,
num_boost_round=10,
init_model=gbm,
valid_sets=lgb_eval,
callbacks=[lgb.reset_parameter(bagging_fraction=[0.7]*5+[0.6]*5)])
print('Finish 30 - 40 rounds with changing bagging_fraction...')
# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array
# log likelihood loss
......@@ -108,7 +117,7 @@ gbm = lgb.train(params,
feval=binary_error,
valid_sets=lgb_eval)
print('Finish 30 - 40 rounds with self-defined objective function and eval metric...')
print('Finish 40 - 50 rounds with self-defined objective function and eval metric...')
print('Start a new training job...')
# callback
......
......@@ -218,20 +218,21 @@ except ImportError:
except ImportError:
SKLEARN_StratifiedKFold = False
def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False):
def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
"""
Make an n-fold list of CVBooster from random indices.
"""
np.random.seed(seed)
if stratified:
if SKLEARN_StratifiedKFold:
sfk = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=seed)
sfk = StratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())]
else:
raise LightGBMError('Scikit-learn is required for stratified cv')
else:
full_data.construct()
randidx = np.random.permutation(full_data.num_data())
if shuffle:
randidx = np.random.permutation(full_data.num_data())
kstep = int(len(randidx) / nfold)
idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)]
......@@ -260,7 +261,7 @@ def _agg_cv_result(raw_results):
return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
metrics=None, fobj=None, feval=None, init_model=None,
shuffle=True, metrics=None, fobj=None, feval=None, init_model=None,
feature_name=None, categorical_feature=None,
early_stopping_rounds=None, fpreproc=None,
verbose_eval=None, show_stdv=True, seed=0,
......@@ -280,6 +281,8 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
Number of folds in CV.
stratified : bool
Perform stratified sampling.
shuffle: bool
Whether shuffle before split data
folds : a KFold or StratifiedKFold instance
Sklearn KFolds or StratifiedKFolds.
metrics : string or list of strings
......@@ -342,7 +345,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
params['metric'].extend(metrics)
results = collections.defaultdict(list)
cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified)
cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified, shuffle)
# setup callbacks
if callbacks is None:
......
......@@ -35,7 +35,6 @@ public:
void Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) override {
GBDT::Init(config, train_data, object_function, training_metrics);
shrinkage_rate_ = 1.0;
random_for_drop_ = Random(gbdt_config_->drop_seed);
}
/*!
......@@ -56,7 +55,7 @@ public:
void ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) {
GBDT::ResetTrainingData(config, train_data, object_function, training_metrics);
shrinkage_rate_ = 1.0;
shrinkage_rate_ = gbdt_config_->learning_rate / (gbdt_config_->learning_rate + static_cast<double>(drop_index_.size()));
}
/*!
......@@ -106,10 +105,17 @@ private:
train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
}
}
shrinkage_rate_ = 1.0 / (1.0 + drop_index_.size());
shrinkage_rate_ = gbdt_config_->learning_rate / (gbdt_config_->learning_rate + static_cast<double>(drop_index_.size()));
}
/*!
* \brief normalize dropped trees
* NOTE: num_drop_tree(k), learning_rate(lr), shrinkage_rate_ = lr / (k + lr)
* step 1: shrink tree to -1 -> drop tree
* step 2: shrink tree to k / (k + lr) - 1 from -1
* -> normalize for valid data
* step 3: shrink tree to k / (k + lr) from k / (k + lr) - 1
* -> normalize for train data
* end with tree weight = k / (k + lr)
*/
void Normalize() {
double k = static_cast<double>(drop_index_.size());
......@@ -122,7 +128,7 @@ private:
score_updater->AddScore(models_[curr_tree].get(), curr_class);
}
// update training score
models_[curr_tree]->Shrinkage(-k);
models_[curr_tree]->Shrinkage(-k / gbdt_config_->learning_rate);
train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
}
}
......
......@@ -56,6 +56,13 @@ def test_regression():
preds = lgb_model.predict(x_test)
assert mean_squared_error(preds, y_test) < 100
def test_lambdarank():
from sklearn.datasets import load_svmlight_file
X_train, y_train = load_svmlight_file('../../examples/lambdarank/rank.train')
X_test, y_test = load_svmlight_file('../../examples/lambdarank/rank.test')
q_train = np.loadtxt('../../examples/lambdarank/rank.train.query')
lgb_model = lgb.LGBMRanker().fit(X_train, y_train, group=q_train, eval_at=[1])
def test_regression_with_custom_objective():
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
......@@ -118,6 +125,7 @@ def test_early_stopping():
test_binary_classification()
test_multiclass_classification()
test_regression()
test_lambdarank()
test_regression_with_custom_objective()
test_binary_classification_with_custom_objective()
test_early_stopping()
\ No newline at end of file
test_early_stopping()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment