add learning rate to dart (#133)

add learning rate to dart (#133)

add learning rate to dart (#133)
99b483dd · wxchan · Guolin Ke · 574d5342 · 99b483dd · 99b483dd
Commit 99b483dd authored Dec 19, 2016 by wxchan Committed by Guolin Ke Dec 19, 2016
5 changed files
--- a/docs/Parameters.md
+++ b/docs/Parameters.md
@@ -28,6 +28,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
  * number of boosting iterations/trees
 * ```learning_rate```, default=```0.1```, type=double, alias=```shrinkage_rate```
  * shrinkage rate
+  * in ```dart```, it also affects normalization weights of dropped trees
 * ```num_leaves```, default=```127```, type=int, alias=```num_leaf```
  * number of leaves in one tree
 * ```tree_learner```, default=```serial```, type=enum, options=```serial```,```feature```,```data```
@@ -56,7 +57,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
  * Random seed for feature fraction.
 * ```bagging_fraction```, default=```1.0```, type=double, , ```0.0 < bagging_fraction < 1.0```, alias=```sub_row```
  * Like ```feature_fraction```, but this will random select part of data
-  * can use this to speed up training
+  * Can use this to speed up training
  * Can use this to deal with over-fit
  * Note: To enable bagging, should set ```bagging_freq``` to a non zero value as well
 * ```bagging_freq```, default=```0```, type=int
@@ -74,6 +75,7 @@ The parameter format is ```key1=value1 key2=value2 ... ``` . And parameters can
  * The minimal gain to perform split 
 * ```drop_rate```, default=```0.01```, type=double
  * only used in ```dart```, will drop ```drop_rate*current_num_models``` before boosting. 
+  * If you want to use ```skip_rate``` like in xgboost, you can use [callbacks](Python-API.md#callbacks) with changing ```drop_rate```.
 * ```drop_seed```, default=```4```, type=int
  * only used in ```dart```, used to random seed to choose dropping models.


--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -73,7 +73,6 @@ print('Finish 10 - 20 rounds with model file...')
 # learning_rates accepts:
 # 1. list/tuple with length = num_boost_round
 # 2. function(curr_iter)
-# 3. function(curr_iter, total_iter)
 gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
@@ -83,6 +82,16 @@ gbm = lgb.train(params,

 print('Finish 20 - 30 rounds with decay learning rates...')

+# change other parameters during training
+gbm = lgb.train(params,
+                lgb_train,
+                num_boost_round=10,
+                init_model=gbm,
+                valid_sets=lgb_eval,
+                callbacks=[lgb.reset_parameter(bagging_fraction=[0.7]*5+[0.6]*5)])
+
+print('Finish 30 - 40 rounds with changing bagging_fraction...')
+
 # self-defined objective function
 # f(preds: array, train_data: Dataset) -> grad: array, hess: array
 # log likelihood loss
@@ -108,7 +117,7 @@ gbm = lgb.train(params,
                feval=binary_error,
                valid_sets=lgb_eval)

-print('Finish 30 - 40 rounds with self-defined objective function and eval metric...')
+print('Finish 40 - 50 rounds with self-defined objective function and eval metric...')

 print('Start a new training job...')
 # callback

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -218,19 +218,20 @@ except ImportError:
    except ImportError:
        SKLEARN_StratifiedKFold = False

-def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False):
+def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
    """
    Make an n-fold list of CVBooster from random indices.
    """
    np.random.seed(seed)
    if stratified:
        if SKLEARN_StratifiedKFold:
-            sfk = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=seed)
+            sfk = StratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
            idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())]
        else:
            raise LightGBMError('Scikit-learn is required for stratified cv')
    else:
        full_data.construct()
+        if shuffle:
            randidx = np.random.permutation(full_data.num_data())
        kstep = int(len(randidx) / nfold)
        idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)]
@@ -260,7 +261,7 @@ def _agg_cv_result(raw_results):
    return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]

 def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
-       metrics=None, fobj=None, feval=None, init_model=None,
+       shuffle=True, metrics=None, fobj=None, feval=None, init_model=None,
       feature_name=None, categorical_feature=None,
       early_stopping_rounds=None, fpreproc=None,
       verbose_eval=None, show_stdv=True, seed=0,
@@ -280,6 +281,8 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
        Number of folds in CV.
    stratified : bool
        Perform stratified sampling.
+    shuffle: bool
+        Whether shuffle before split data
    folds : a KFold or StratifiedKFold instance
        Sklearn KFolds or StratifiedKFolds.
    metrics : string or list of strings
@@ -342,7 +345,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
            params['metric'].extend(metrics)

    results = collections.defaultdict(list)
-    cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified)
+    cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified, shuffle)

    # setup callbacks
    if callbacks is None:

--- a/src/boosting/dart.hpp
+++ b/src/boosting/dart.hpp
@@ -35,7 +35,6 @@ public:
  void Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
    const std::vector<const Metric*>& training_metrics) override {
    GBDT::Init(config, train_data, object_function, training_metrics);
-    shrinkage_rate_ = 1.0;
    random_for_drop_ = Random(gbdt_config_->drop_seed);
  }
  /*!
@@ -56,7 +55,7 @@ public:
  void ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
    const std::vector<const Metric*>& training_metrics) {
    GBDT::ResetTrainingData(config, train_data, object_function, training_metrics);
-    shrinkage_rate_ = 1.0;
+    shrinkage_rate_ = gbdt_config_->learning_rate / (gbdt_config_->learning_rate + static_cast<double>(drop_index_.size()));
  }

  /*!
@@ -106,10 +105,17 @@ private:
        train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
      }
    }
-    shrinkage_rate_ = 1.0 / (1.0 + drop_index_.size());
+    shrinkage_rate_ = gbdt_config_->learning_rate / (gbdt_config_->learning_rate + static_cast<double>(drop_index_.size()));
  }
  /*!
  * \brief normalize dropped trees
+  * NOTE: num_drop_tree(k), learning_rate(lr), shrinkage_rate_ = lr / (k + lr)
+  *       step 1: shrink tree to -1 -> drop tree
+  *       step 2: shrink tree to k / (k + lr) - 1 from -1
+  *               -> normalize for valid data
+  *       step 3: shrink tree to k / (k + lr) from k / (k + lr) - 1
+  *               -> normalize for train data
+  *       end with tree weight = k / (k + lr)
  */
  void Normalize() {
    double k = static_cast<double>(drop_index_.size());
@@ -122,7 +128,7 @@ private:
          score_updater->AddScore(models_[curr_tree].get(), curr_class);
        }
        // update training score
-        models_[curr_tree]->Shrinkage(-k);
+        models_[curr_tree]->Shrinkage(-k / gbdt_config_->learning_rate);
        train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
      }
    }

--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -56,6 +56,13 @@ def test_regression():
    preds = lgb_model.predict(x_test)
    assert mean_squared_error(preds, y_test) < 100

+def test_lambdarank():
+    from sklearn.datasets import load_svmlight_file
+    X_train, y_train = load_svmlight_file('../../examples/lambdarank/rank.train')
+    X_test, y_test = load_svmlight_file('../../examples/lambdarank/rank.test')
+    q_train = np.loadtxt('../../examples/lambdarank/rank.train.query')
+    lgb_model = lgb.LGBMRanker().fit(X_train, y_train, group=q_train, eval_at=[1])
+
 def test_regression_with_custom_objective():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
@@ -118,6 +125,7 @@ def test_early_stopping():
 test_binary_classification()
 test_multiclass_classification()
 test_regression()
+test_lambdarank()
 test_regression_with_custom_objective()
 test_binary_classification_with_custom_objective()
 test_early_stopping()