[docs] added new parameters aliases (#1537)

* added new aliases for params * run helper/parameter_generator.py * removed useless test

[docs] added new parameters aliases (#1537)
* added new aliases for params * run helper/parameter_generator.py * removed useless test
00a125d5 · Nikita Titov · Qiwei Ye · 7ed1ed3e · 00a125d5 · 00a125d5
Commit 00a125d5 authored Jul 25, 2018 by Nikita Titov Committed by Qiwei Ye Jul 25, 2018
9 changed files
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -140,7 +140,7 @@ lgb.cv <- function(params = list(),
    begin_iteration <- predictor$current_iter() + 1
  }
  # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one
-  n_trees <- c("num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds")
+  n_trees <- c("num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "num_boost_round", "n_estimators")
  if (any(names(params) %in% n_trees)) {
    end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1]]] - 1
  } else {

--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -117,7 +117,7 @@ lgb.train <- function(params = list(),
    begin_iteration <- predictor$current_iter() + 1
  }
  # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one
-  n_rounds <- c("num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds")
+  n_rounds <- c("num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "num_boost_round", "n_estimators")
  if (any(names(params) %in% n_rounds)) {
    end_iteration <- begin_iteration + params[[which(names(params) %in% n_rounds)[1]]] - 1
  } else {

--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -73,7 +73,7 @@ Core Parameters
      -  ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Applications>`__
-   -  ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``xentropy`` for general probability labels in [0, 1]
+   -  ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
   -  multi-class classification application
@@ -109,13 +109,13 @@ Core Parameters
   -  ``goss``, Gradient-based One-Side Sampling
-  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``data_filename``
+-  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
   -  path of training data, LightGBM will train from this data
   -  **Note**: can be used only in CLI version
-  ``valid`` :raw-html:`<a id="valid" title="Permalink to this parameter" href="#valid">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``test``, ``valid_data``, ``valid_data_file``, ``test_data``, ``valid_filenames``
+-  ``valid`` :raw-html:`<a id="valid" title="Permalink to this parameter" href="#valid">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``test``, ``valid_data``, ``valid_data_file``, ``test_data``, ``test_data_file``, ``valid_filenames``
   -  path(s) of validation/test data, LightGBM will output metrics for these data
@@ -123,7 +123,7 @@ Core Parameters
   -  **Note**: can be used only in CLI version
-  ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0``
+-  ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0``
   -  number of boosting iterations
@@ -131,17 +131,17 @@ Core Parameters
   -  **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
-  ``learning_rate`` :raw-html:`<a id="learning_rate" title="Permalink to this parameter" href="#learning_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, aliases: ``shrinkage_rate``, constraints: ``learning_rate > 0.0``
+-  ``learning_rate`` :raw-html:`<a id="learning_rate" title="Permalink to this parameter" href="#learning_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, aliases: ``shrinkage_rate``, ``eta``, constraints: ``learning_rate > 0.0``
   -  shrinkage rate
   -  in ``dart``, it also affects on normalization weights of dropped trees
-  ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, constraints: ``num_leaves > 1``
+-  ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, constraints: ``num_leaves > 1``
   -  max number of leaves in one tree
-  ``tree_learner`` :raw-html:`<a id="tree_learner" title="Permalink to this parameter" href="#tree_learner">&#x1F517;&#xFE0E;</a>`, default = ``serial``, type = enum, options: ``serial``, ``feature``, ``data``, ``voting``, aliases: ``tree``, ``tree_learner_type``
+-  ``tree_learner`` :raw-html:`<a id="tree_learner" title="Permalink to this parameter" href="#tree_learner">&#x1F517;&#xFE0E;</a>`, default = ``serial``, type = enum, options: ``serial``, ``feature``, ``data``, ``voting``, aliases: ``tree``, ``tree_type``, ``tree_learner_type``
   -  ``serial``, single machine tree learner
@@ -153,7 +153,7 @@ Core Parameters
   -  refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details
-  ``num_threads`` :raw-html:`<a id="num_threads" title="Permalink to this parameter" href="#num_threads">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``num_thread``, ``nthread``, ``nthreads``
+-  ``num_threads`` :raw-html:`<a id="num_threads" title="Permalink to this parameter" href="#num_threads">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``num_thread``, ``nthread``, ``nthreads``, ``n_jobs``
   -  number of threads for LightGBM
@@ -177,7 +177,7 @@ Core Parameters
   -  **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
-  ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``random_seed``
+-  ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``random_seed``, ``random_state``
   -  this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``
@@ -252,7 +252,7 @@ Learning Control Parameters
   -  L1 regularization
-  ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, constraints: ``lambda_l2 >= 0.0``
+-  ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, constraints: ``lambda_l2 >= 0.0``
   -  L2 regularization
@@ -260,17 +260,17 @@ Learning Control Parameters
   -  the minimal gain to perform split
-  ``drop_rate`` :raw-html:`<a id="drop_rate" title="Permalink to this parameter" href="#drop_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, constraints: ``0.0 <= drop_rate <= 1.0``
+-  ``drop_rate`` :raw-html:`<a id="drop_rate" title="Permalink to this parameter" href="#drop_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, aliases: ``rate_drop``, constraints: ``0.0 <= drop_rate <= 1.0``
   -  used only in ``dart``
-   -  dropout rate
+   -  dropout rate: a fraction of previous trees to drop during the dropout
 -  ``max_drop`` :raw-html:`<a id="max_drop" title="Permalink to this parameter" href="#max_drop">&#x1F517;&#xFE0E;</a>`, default = ``50``, type = int
   -  used only in ``dart``
-   -  max number of dropped trees on one iteration
+   -  max number of dropped trees during one boosting iteration
   -  ``<=0`` means no limit
@@ -278,7 +278,7 @@ Learning Control Parameters
   -  used only in ``dart``
-   -  probability of skipping drop
+   -  probability of skipping the dropout procedure during a boosting iteration
 -  ``xgboost_dart_mode`` :raw-html:`<a id="xgboost_dart_mode" title="Permalink to this parameter" href="#xgboost_dart_mode">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
@@ -350,7 +350,7 @@ Learning Control Parameters
   -  you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature
-  ``feature_contri`` :raw-html:`<a id="feature_contri" title="Permalink to this parameter" href="#feature_contri">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-double, aliases: ``fc``, ``fp``, ``feature_penalty``
+-  ``feature_contri`` :raw-html:`<a id="feature_contri" title="Permalink to this parameter" href="#feature_contri">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-double, aliases: ``feature_contrib``, ``fc``, ``fp``, ``feature_penalty``
   -  used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature
@@ -397,13 +397,13 @@ IO Parameters
   -  set this to larger value if data is very sparse
-  ``histogram_pool_size`` :raw-html:`<a id="histogram_pool_size" title="Permalink to this parameter" href="#histogram_pool_size">&#x1F517;&#xFE0E;</a>`, default = ``-1.0``, type = double
+-  ``histogram_pool_size`` :raw-html:`<a id="histogram_pool_size" title="Permalink to this parameter" href="#histogram_pool_size">&#x1F517;&#xFE0E;</a>`, default = ``-1.0``, type = double, aliases: ``hist_pool_size``
   -  max cache size in MB for historical histogram
   -  ``< 0`` means no limit
-  ``data_random_seed`` :raw-html:`<a id="data_random_seed" title="Permalink to this parameter" href="#data_random_seed">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int
+-  ``data_random_seed`` :raw-html:`<a id="data_random_seed" title="Permalink to this parameter" href="#data_random_seed">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int, aliases: ``data_seed``
   -  random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
@@ -413,7 +413,7 @@ IO Parameters
   -  **Note**: can be used only in CLI version
-  ``snapshot_freq`` :raw-html:`<a id="snapshot_freq" title="Permalink to this parameter" href="#snapshot_freq">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int
+-  ``snapshot_freq`` :raw-html:`<a id="snapshot_freq" title="Permalink to this parameter" href="#snapshot_freq">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int, aliases: ``save_period``
   -  frequency of saving model file snapshot
@@ -431,7 +431,7 @@ IO Parameters
   -  **Note**: can be used only in CLI version
-  ``output_result`` :raw-html:`<a id="output_result" title="Permalink to this parameter" href="#output_result">&#x1F517;&#xFE0E;</a>`, default = ``LightGBM_predict_result.txt``, type = string, aliases: ``predict_result``, ``prediction_result``
+-  ``output_result`` :raw-html:`<a id="output_result" title="Permalink to this parameter" href="#output_result">&#x1F517;&#xFE0E;</a>`, default = ``LightGBM_predict_result.txt``, type = string, aliases: ``predict_result``, ``prediction_result``, ``predict_name``, ``prediction_name``, ``pred_name``, ``name_pred``
   -  filename of prediction result in ``prediction`` task
@@ -644,7 +644,7 @@ Objective Parameters
   -  used only in ``multi-class`` classification application
-  ``is_unbalance`` :raw-html:`<a id="is_unbalance" title="Permalink to this parameter" href="#is_unbalance">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``unbalanced_sets``
+-  ``is_unbalance`` :raw-html:`<a id="is_unbalance" title="Permalink to this parameter" href="#is_unbalance">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``unbalance``, ``unbalanced_sets``
   -  used only in ``binary`` application
@@ -729,7 +729,7 @@ Metric Parameters
   -  metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments
-      -  ``""`` (empty string or not specific) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added)
+      -  ``""`` (empty string or not specified) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added)
      -  ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na``

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -110,7 +110,7 @@ public:
  // descl2 = ``mape``, `MAPE loss <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`__, aliases: ``mean_absolute_percentage_error``
  // descl2 = ``gamma``, Gamma regression with log-link. It might be useful, e.g., for modeling insurance claims severity, or for any target that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Applications>`__
  // descl2 = ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Applications>`__
-  // desc = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``xentropy`` for general probability labels in [0, 1]
+  // desc = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
  // desc = multi-class classification application
  // descl2 = ``multiclass``, `softmax <https://en.wikipedia.org/wiki/Softmax_function>`__ objective function, aliases: ``softmax``
  // descl2 = ``multiclassova``, `One-vs-All <https://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest>`__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr``
@@ -135,33 +135,33 @@ public:
  // desc = ``goss``, Gradient-based One-Side Sampling
  std::string boosting = "gbdt";
-  // alias = train, train_data, data_filename
+  // alias = train, train_data, train_data_file, data_filename
  // desc = path of training data, LightGBM will train from this data
  // desc = **Note**: can be used only in CLI version
  std::string data = "";
-  // alias = test, valid_data, valid_data_file, test_data, valid_filenames
+  // alias = test, valid_data, valid_data_file, test_data, test_data_file, valid_filenames
  // default = ""
  // desc = path(s) of validation/test data, LightGBM will output metrics for these data
  // desc = support multiple validation data, separated by ``,``
  // desc = **Note**: can be used only in CLI version
  std::vector<std::string> valid;
-  // alias = num_iteration, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators
+  // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators
  // check = >=0
  // desc = number of boosting iterations
  // desc = **Note**: for Python/R-package, **this parameter is ignored**, use ``num_boost_round`` (Python) or ``nrounds`` (R) input arguments of ``train`` and ``cv`` methods instead
  // desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
  int num_iterations = 100;
-  // alias = shrinkage_rate
+  // alias = shrinkage_rate, eta
  // check = >0.0
  // desc = shrinkage rate
  // desc = in ``dart``, it also affects on normalization weights of dropped trees
  double learning_rate = 0.1;
  // default = 31
-  // alias = num_leaf
+  // alias = num_leaf, max_leaves, max_leaf
  // check = >1
  // desc = max number of leaves in one tree
  int num_leaves = kDefaultNumLeaves;
@@ -169,7 +169,7 @@ public:
  // [doc-only]
  // type = enum
  // options = serial, feature, data, voting
-  // alias = tree, tree_learner_type
+  // alias = tree, tree_type, tree_learner_type
  // desc = ``serial``, single machine tree learner
  // desc = ``feature``, feature parallel tree learner, aliases: ``feature_parallel``
  // desc = ``data``, data parallel tree learner, aliases: ``data_parallel``
@@ -177,7 +177,7 @@ public:
  // desc = refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details
  std::string tree_learner = "serial";
-  // alias = num_thread, nthread, nthreads
+  // alias = num_thread, nthread, nthreads, n_jobs
  // desc = number of threads for LightGBM
  // desc = ``0`` means default number of threads in OpenMP
  // desc = for the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPUs use `hyper-threading <https://en.wikipedia.org/wiki/Hyper-threading>`__ to generate 2 threads per CPU core)
@@ -197,7 +197,7 @@ public:
  std::string device_type = "cpu";
  // [doc-only]
-  // alias = random_seed
+  // alias = random_seed, random_state
  // desc = this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``
  // desc = will be overridden, if you set other seeds
  int seed = 0;
@@ -266,7 +266,7 @@ public:
  // desc = L1 regularization
  double lambda_l1 = 0.0;
-  // alias = reg_lambda
+  // alias = reg_lambda, lambda
  // check = >=0.0
  // desc = L2 regularization
  double lambda_l2 = 0.0;
@@ -276,21 +276,22 @@ public:
  // desc = the minimal gain to perform split
  double min_gain_to_split = 0.0;
+  // alias = rate_drop
  // check = >=0.0
  // check = <=1.0
  // desc = used only in ``dart``
-  // desc = dropout rate
+  // desc = dropout rate: a fraction of previous trees to drop during the dropout
  double drop_rate = 0.1;
  // desc = used only in ``dart``
-  // desc = max number of dropped trees on one iteration
+  // desc = max number of dropped trees during one boosting iteration
  // desc = ``<=0`` means no limit
  int max_drop = 50;
  // check = >=0.0
  // check = <=1.0
  // desc = used only in ``dart``
-  // desc = probability of skipping drop
+  // desc = probability of skipping the dropout procedure during a boosting iteration
  double skip_drop = 0.5;
  // desc = used only in ``dart``
@@ -355,7 +356,7 @@ public:
  std::vector<int8_t> monotone_constraints;
  // type = multi-double
-  // alias = fc, fp, feature_penalty
+  // alias = feature_contrib, fc, fp, feature_penalty
  // default = None
  // desc = used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature
  // desc = you need to specify all features in order
@@ -395,10 +396,12 @@ public:
  // desc = set this to larger value if data is very sparse
  int bin_construct_sample_cnt = 200000;
+  // alias = hist_pool_size
  // desc = max cache size in MB for historical histogram
  // desc = ``< 0`` means no limit
  double histogram_pool_size = -1.0;
+  // alias = data_seed
  // desc = random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
  int data_random_seed = 1;
@@ -407,6 +410,7 @@ public:
  // desc = **Note**: can be used only in CLI version
  std::string output_model = "LightGBM_model.txt";
+  // alias = save_period
  // desc = frequency of saving model file snapshot
  // desc = set this to positive value to enable this function. For example, the model file will be snapshotted at each iteration if ``snapshot_freq=1``
  // desc = **Note**: can be used only in CLI version
@@ -419,7 +423,7 @@ public:
  // desc = **Note**: can be used only in CLI version
  std::string input_model = "";
-  // alias = predict_result, prediction_result
+  // alias = predict_result, prediction_result, predict_name, prediction_name, pred_name, name_pred
  // desc = filename of prediction result in ``prediction`` task
  // desc = **Note**: can be used only in CLI version
  std::string output_result = "LightGBM_predict_result.txt";
@@ -588,7 +592,7 @@ public:
  // desc = used only in ``multi-class`` classification application
  int num_class = 1;
-  // alias = unbalanced_sets
+  // alias = unbalance, unbalanced_sets
  // desc = used only in ``binary`` application
  // desc = set this to ``true`` if training data are unbalance
  // desc = **Note**: this parameter cannot be used at the same time with ``scale_pos_weight``, choose only **one** of them
@@ -658,7 +662,7 @@ public:
  // default = ""
  // type = multi-enum
  // desc = metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments
-  // descl2 = ``""`` (empty string or not specific) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added)
+  // descl2 = ``""`` (empty string or not specified) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added)
  // descl2 = ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na``
  // descl2 = ``l1``, absolute loss, aliases: ``mean_absolute_error``, ``mae``, ``regression_l1``
  // descl2 = ``l2``, square loss, aliases: ``mean_squared_error``, ``mse``, ``regression_l2``, ``regression``

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -660,12 +660,8 @@ class Dataset(object):
                warnings.warn('{0} keyword has been found in `params` and will be ignored. '
                              'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key))
        self.predictor = predictor
-        if "verbosity" in params:
-            params.setdefault("verbose", params.pop("verbosity"))
        if silent:
            params["verbose"] = 0
-        elif "verbose" not in params:
-            params["verbose"] = 1
        # get categorical features
        if categorical_feature is not None:
            categorical_indices = set()
@@ -1340,12 +1336,8 @@ class Booster(object):
        self.best_iteration = -1
        self.best_score = {}
        params = {} if params is None else params
-        if "verbosity" in params:
-            params.setdefault("verbose", params.pop("verbosity"))
        if silent:
            params["verbose"] = 0
-        elif "verbose" not in params:
-            params["verbose"] = 1
        if train_set is not None:
            # Training task
            if not isinstance(train_set, Dataset):

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -98,7 +98,8 @@ def train(params, train_set, num_boost_round=100,
        The trained Booster model.
    """
    # create predictor first
-    for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]:
+    for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
+                  "num_round", "num_rounds", "num_boost_round", "n_estimators"]:
        if alias in params:
            num_boost_round = int(params.pop(alias))
            warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
@@ -396,7 +397,8 @@ def cv(params, train_set, num_boost_round=100,
    if not isinstance(train_set, Dataset):
        raise TypeError("Traninig only accepts Dataset object")
-    for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]:
+    for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
+                  "num_round", "num_rounds", "num_boost_round", "n_estimators"]:
        if alias in params:
            warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
            num_boost_round = params.pop(alias)

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -397,9 +397,6 @@ class LGBMModel(_LGBMModelBase):
            self._fobj = None
        evals_result = {}
        params = self.get_params()
-        # sklearn interface has another naming convention
-        params.setdefault('seed', params.pop('random_state'))
-        params.setdefault('nthread', params.pop('n_jobs'))
        # user can set verbose with kwargs, it has higher priority
        if 'verbose' not in params and self.silent:
            params['verbose'] = 0

--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -11,13 +11,16 @@ std::unordered_map<std::string, std::string> Config::alias_table({
  {"boost", "boosting"},
  {"train", "data"},
  {"train_data", "data"},
+  {"train_data_file", "data"},
  {"data_filename", "data"},
  {"test", "valid"},
  {"valid_data", "valid"},
  {"valid_data_file", "valid"},
  {"test_data", "valid"},
+  {"test_data_file", "valid"},
  {"valid_filenames", "valid"},
  {"num_iteration", "num_iterations"},
+  {"n_iter", "num_iterations"},
  {"num_tree", "num_iterations"},
  {"num_trees", "num_iterations"},
  {"num_round", "num_iterations"},
@@ -25,14 +28,20 @@ std::unordered_map<std::string, std::string> Config::alias_table({
  {"num_boost_round", "num_iterations"},
  {"n_estimators", "num_iterations"},
  {"shrinkage_rate", "learning_rate"},
+  {"eta", "learning_rate"},
  {"num_leaf", "num_leaves"},
+  {"max_leaves", "num_leaves"},
+  {"max_leaf", "num_leaves"},
  {"tree", "tree_learner"},
+  {"tree_type", "tree_learner"},
  {"tree_learner_type", "tree_learner"},
  {"num_thread", "num_threads"},
  {"nthread", "num_threads"},
  {"nthreads", "num_threads"},
+  {"n_jobs", "num_threads"},
  {"device", "device_type"},
  {"random_seed", "seed"},
+  {"random_state", "seed"},
  {"min_data_per_leaf", "min_data_in_leaf"},
  {"min_data", "min_data_in_leaf"},
  {"min_child_samples", "min_data_in_leaf"},
@@ -53,10 +62,13 @@ std::unordered_map<std::string, std::string> Config::alias_table({
  {"max_leaf_output", "max_delta_step"},
  {"reg_alpha", "lambda_l1"},
  {"reg_lambda", "lambda_l2"},
+  {"lambda", "lambda_l2"},
  {"min_split_gain", "min_gain_to_split"},
+  {"rate_drop", "drop_rate"},
  {"topk", "top_k"},
  {"mc", "monotone_constraints"},
  {"monotone_constraint", "monotone_constraints"},
+  {"feature_contrib", "feature_contri"},
  {"fc", "feature_contri"},
  {"fp", "feature_contri"},
  {"feature_penalty", "feature_contri"},
@@ -66,12 +78,19 @@ std::unordered_map<std::string, std::string> Config::alias_table({
  {"forced_splits", "forcedsplits_filename"},
  {"verbose", "verbosity"},
  {"subsample_for_bin", "bin_construct_sample_cnt"},
+  {"hist_pool_size", "histogram_pool_size"},
+  {"data_seed", "data_random_seed"},
  {"model_output", "output_model"},
  {"model_out", "output_model"},
+  {"save_period", "snapshot_freq"},
  {"model_input", "input_model"},
  {"model_in", "input_model"},
  {"predict_result", "output_result"},
  {"prediction_result", "output_result"},
+  {"predict_name", "output_result"},
+  {"prediction_name", "output_result"},
+  {"pred_name", "output_result"},
+  {"name_pred", "output_result"},
  {"init_score_filename", "initscore_filename"},
  {"init_score_file", "initscore_filename"},
  {"init_score", "initscore_filename"},
@@ -114,6 +133,7 @@ std::unordered_map<std::string, std::string> Config::alias_table({
  {"contrib", "predict_contrib"},
  {"convert_model_file", "convert_model"},
  {"num_classes", "num_class"},
+  {"unbalance", "is_unbalance"},
  {"unbalanced_sets", "is_unbalance"},
  {"metrics", "metric"},
  {"metric_types", "metric"},

--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -179,17 +179,6 @@ class TestSklearn(unittest.TestCase):
        importance_gain_top1 = sorted(importances_gain, reverse=True)[0]
        self.assertNotEqual(importance_split_top1, importance_gain_top1)
-    def test_sklearn_backward_compatibility(self):
-        iris = load_iris()
-        X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
-        # Tests that `seed` is the same as `random_state`
-        clf_1 = lgb.sklearn.LGBMClassifier(seed=42, subsample=0.6, colsample_bytree=0.8)
-        clf_2 = lgb.sklearn.LGBMClassifier(random_state=42, subsample=0.6, colsample_bytree=0.8)
-        y_pred_1 = clf_1.fit(X_train, y_train).predict_proba(X_test)
-        y_pred_2 = clf_2.fit(X_train, y_train).predict_proba(X_test)
-        np.testing.assert_allclose(y_pred_1, y_pred_2)
    # sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1
    @unittest.skipIf(not sklearn_at_least_019, 'scikit-learn version is less than 0.19')
    def test_sklearn_integration(self):