add param aliases from scikit-learn (#4637)

e95d5ab8 · Nikita Titov · GitHub · 2543c8ed · e95d5ab8 · e95d5ab8
Unverified Commit e95d5ab8 authored Oct 05, 2021 by Nikita Titov Committed by GitHub Oct 05, 2021
5 changed files
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -18,6 +18,7 @@
                , "cat_feature"
                , "categorical_column"
                , "cat_column"
+                , "categorical_features"
            )
            , "data_random_seed" = c(
                "data_random_seed"
@@ -61,7 +62,10 @@
                "linear_tree"
                , "linear_trees"
            )
-            , "max_bin" = "max_bin"
+            , "max_bin" = c(
+                "max_bin"
+                , "max_bins"
+            )
            , "max_bin_by_feature" = "max_bin_by_feature"
            , "min_data_in_bin" = "min_data_in_bin"
            , "pre_partition" = c(
@@ -111,6 +115,7 @@
            , "num_rounds"
            , "num_boost_round"
            , "n_estimators"
+            , "max_iter"
        )
    )
    return(c(learning_params, .DATASET_PARAMETERS()))

--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -73,7 +73,7 @@ Core Parameters
   -  **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent functions
-  ``objective`` :raw-html:`<a id="objective" title="Permalink to this parameter" href="#objective">&#x1F517;&#xFE0E;</a>`, default = ``regression``, type = enum, options: ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``, ``gamma``, ``tweedie``, ``binary``, ``multiclass``, ``multiclassova``, ``cross_entropy``, ``cross_entropy_lambda``, ``lambdarank``, ``rank_xendcg``, aliases: ``objective_type``, ``app``, ``application``
+-  ``objective`` :raw-html:`<a id="objective" title="Permalink to this parameter" href="#objective">&#x1F517;&#xFE0E;</a>`, default = ``regression``, type = enum, options: ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``, ``gamma``, ``tweedie``, ``binary``, ``multiclass``, ``multiclassova``, ``cross_entropy``, ``cross_entropy_lambda``, ``lambdarank``, ``rank_xendcg``, aliases: ``objective_type``, ``app``, ``application``, ``loss``
   -  regression application
@@ -153,7 +153,7 @@ Core Parameters
   -  **Note**: can be used only in CLI version
-  ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0``
+-  ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, ``max_iter``, constraints: ``num_iterations >= 0``
   -  number of boosting iterations
@@ -165,7 +165,7 @@ Core Parameters
   -  in ``dart``, it also affects on normalization weights of dropped trees
-  ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, constraints: ``1 < num_leaves <= 131072``
+-  ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, ``max_leaf_nodes``, constraints: ``1 < num_leaves <= 131072``
   -  max number of leaves in one tree
@@ -282,7 +282,7 @@ Learning Control Parameters
   -  ``<= 0`` means no limit
-  ``min_data_in_leaf`` :raw-html:`<a id="min_data_in_leaf" title="Permalink to this parameter" href="#min_data_in_leaf">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``min_data_per_leaf``, ``min_data``, ``min_child_samples``, constraints: ``min_data_in_leaf >= 0``
+-  ``min_data_in_leaf`` :raw-html:`<a id="min_data_in_leaf" title="Permalink to this parameter" href="#min_data_in_leaf">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``min_data_per_leaf``, ``min_data``, ``min_child_samples``, ``min_samples_leaf``, constraints: ``min_data_in_leaf >= 0``
   -  minimal number of data in one leaf. Can be used to deal with over-fitting
@@ -402,11 +402,11 @@ Learning Control Parameters
   -  the final max output of leaves is ``learning_rate * max_delta_step``
-  ``lambda_l1`` :raw-html:`<a id="lambda_l1" title="Permalink to this parameter" href="#lambda_l1">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_alpha``, constraints: ``lambda_l1 >= 0.0``
+-  ``lambda_l1`` :raw-html:`<a id="lambda_l1" title="Permalink to this parameter" href="#lambda_l1">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_alpha``, ``l1_regularization``, constraints: ``lambda_l1 >= 0.0``
   -  L1 regularization
-  ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, constraints: ``lambda_l2 >= 0.0``
+-  ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, ``l2_regularization``, constraints: ``lambda_l2 >= 0.0``
   -  L2 regularization
@@ -504,7 +504,7 @@ Learning Control Parameters
   -  set this to larger value for more accurate result, but it will slow down the training speed
-  ``monotone_constraints`` :raw-html:`<a id="monotone_constraints" title="Permalink to this parameter" href="#monotone_constraints">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-int, aliases: ``mc``, ``monotone_constraint``
+-  ``monotone_constraints`` :raw-html:`<a id="monotone_constraints" title="Permalink to this parameter" href="#monotone_constraints">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-int, aliases: ``mc``, ``monotone_constraint``, ``monotonic_cst``
   -  used for constraints of monotonic features
@@ -672,7 +672,7 @@ Dataset Parameters
      -  **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
-  ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, constraints: ``max_bin > 1``
+-  ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, aliases: ``max_bins``, constraints: ``max_bin > 1``
   -  max number of bins that feature values will be bucketed in
@@ -806,7 +806,7 @@ Dataset Parameters
   -  **Note**: despite the fact that specified columns will be completely ignored during the training, they still should have a valid format allowing LightGBM to load file successfully
-  ``categorical_feature`` :raw-html:`<a id="categorical_feature" title="Permalink to this parameter" href="#categorical_feature">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = multi-int or string, aliases: ``cat_feature``, ``categorical_column``, ``cat_column``
+-  ``categorical_feature`` :raw-html:`<a id="categorical_feature" title="Permalink to this parameter" href="#categorical_feature">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = multi-int or string, aliases: ``cat_feature``, ``categorical_column``, ``cat_column``, ``categorical_features``
   -  used to specify categorical features

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -109,7 +109,7 @@ struct Config {
  // [doc-only]
  // type = enum
  // options = regression, regression_l1, huber, fair, poisson, quantile, mape, gamma, tweedie, binary, multiclass, multiclassova, cross_entropy, cross_entropy_lambda, lambdarank, rank_xendcg
-  // alias = objective_type, app, application
+  // alias = objective_type, app, application, loss
  // desc = regression application
  // descl2 = ``regression``, L2 loss, aliases: ``regression_l2``, ``l2``, ``mean_squared_error``, ``mse``, ``l2_root``, ``root_mean_squared_error``, ``rmse``
  // descl2 = ``regression_l1``, L1 loss, aliases: ``l1``, ``mean_absolute_error``, ``mae``
@@ -161,7 +161,7 @@ struct Config {
  // desc = **Note**: can be used only in CLI version
  std::vector<std::string> valid;
-  // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators
+  // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators, max_iter
  // check = >=0
  // desc = number of boosting iterations
  // desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
@@ -174,7 +174,7 @@ struct Config {
  double learning_rate = 0.1;
  // default = 31
-  // alias = num_leaf, max_leaves, max_leaf
+  // alias = num_leaf, max_leaves, max_leaf, max_leaf_nodes
  // check = >1
  // check = <=131072
  // desc = max number of leaves in one tree
@@ -261,7 +261,7 @@ struct Config {
  // desc = ``<= 0`` means no limit
  int max_depth = -1;
-  // alias = min_data_per_leaf, min_data, min_child_samples
+  // alias = min_data_per_leaf, min_data, min_child_samples, min_samples_leaf
  // check = >=0
  // desc = minimal number of data in one leaf. Can be used to deal with over-fitting
  // desc = **Note**: this is an approximation based on the Hessian, so occasionally you may observe splits which produce leaf nodes that have less than this many observations
@@ -360,12 +360,12 @@ struct Config {
  // desc = the final max output of leaves is ``learning_rate * max_delta_step``
  double max_delta_step = 0.0;
-  // alias = reg_alpha
+  // alias = reg_alpha, l1_regularization
  // check = >=0.0
  // desc = L1 regularization
  double lambda_l1 = 0.0;
-  // alias = reg_lambda, lambda
+  // alias = reg_lambda, lambda, l2_regularization
  // check = >=0.0
  // desc = L2 regularization
  double lambda_l2 = 0.0;
@@ -453,7 +453,7 @@ struct Config {
  int top_k = 20;
  // type = multi-int
-  // alias = mc, monotone_constraint
+  // alias = mc, monotone_constraint, monotonic_cst
  // default = None
  // desc = used for constraints of monotonic features
  // desc = ``1`` means increasing, ``-1`` means decreasing, ``0`` means non-constraint
@@ -586,6 +586,7 @@ struct Config {
  // descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
  bool linear_tree = false;
+  // alias = max_bins
  // check = >1
  // desc = max number of bins that feature values will be bucketed in
  // desc = small number of bins may reduce training accuracy but may increase general power (deal with over-fitting)
@@ -691,7 +692,7 @@ struct Config {
  std::string ignore_column = "";
  // type = multi-int or string
-  // alias = cat_feature, categorical_column, cat_column
+  // alias = cat_feature, categorical_column, cat_column, categorical_features
  // desc = used to specify categorical features
  // desc = use number for index, e.g. ``categorical_feature=0,1,2`` means column\_0, column\_1 and column\_2 are categorical features
  // desc = add a prefix ``name:`` for column name, e.g. ``categorical_feature=name:c1,c2,c3`` means c1, c2 and c3 are categorical features

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -331,7 +331,8 @@ class _ConfigAliases:
               "categorical_feature": {"categorical_feature",
                                       "cat_feature",
                                       "categorical_column",
-                                       "cat_column"},
+                                       "cat_column",
+                                       "categorical_features"},
               "data_random_seed": {"data_random_seed",
                                    "data_seed"},
               "early_stopping_round": {"early_stopping_round",
@@ -371,6 +372,8 @@ class _ConfigAliases:
               "machines": {"machines",
                            "workers",
                            "nodes"},
+               "max_bin": {"max_bin",
+                           "max_bins"},
               "metric": {"metric",
                          "metrics",
                          "metric_types"},
@@ -384,7 +387,8 @@ class _ConfigAliases:
                                  "num_round",
                                  "num_rounds",
                                  "num_boost_round",
-                                  "n_estimators"},
+                                  "n_estimators",
+                                  "max_iter"},
               "num_machines": {"num_machines",
                                "num_machine"},
               "num_threads": {"num_threads",
@@ -395,7 +399,8 @@ class _ConfigAliases:
               "objective": {"objective",
                             "objective_type",
                             "app",
-                             "application"},
+                             "application",
+                             "loss"},
               "pre_partition": {"pre_partition",
                                 "is_pre_partition"},
               "tree_learner": {"tree_learner",

--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -14,6 +14,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
  {"objective_type", "objective"},
  {"app", "objective"},
  {"application", "objective"},
+  {"loss", "objective"},
  {"boosting_type", "boosting"},
  {"boost", "boosting"},
  {"train", "data"},
@@ -34,11 +35,13 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
  {"num_rounds", "num_iterations"},
  {"num_boost_round", "num_iterations"},
  {"n_estimators", "num_iterations"},
+  {"max_iter", "num_iterations"},
  {"shrinkage_rate", "learning_rate"},
  {"eta", "learning_rate"},
  {"num_leaf", "num_leaves"},
  {"max_leaves", "num_leaves"},
  {"max_leaf", "num_leaves"},
+  {"max_leaf_nodes", "num_leaves"},
  {"tree", "tree_learner"},
  {"tree_type", "tree_learner"},
  {"tree_learner_type", "tree_learner"},
@@ -53,6 +56,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
  {"min_data_per_leaf", "min_data_in_leaf"},
  {"min_data", "min_data_in_leaf"},
  {"min_child_samples", "min_data_in_leaf"},
+  {"min_samples_leaf", "min_data_in_leaf"},
  {"min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf"},
  {"min_sum_hessian", "min_sum_hessian_in_leaf"},
  {"min_hessian", "min_sum_hessian_in_leaf"},
@@ -79,13 +83,16 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
  {"max_tree_output", "max_delta_step"},
  {"max_leaf_output", "max_delta_step"},
  {"reg_alpha", "lambda_l1"},
+  {"l1_regularization", "lambda_l1"},
  {"reg_lambda", "lambda_l2"},
  {"lambda", "lambda_l2"},
+  {"l2_regularization", "lambda_l2"},
  {"min_split_gain", "min_gain_to_split"},
  {"rate_drop", "drop_rate"},
  {"topk", "top_k"},
  {"mc", "monotone_constraints"},
  {"monotone_constraint", "monotone_constraints"},
+  {"monotonic_cst", "monotone_constraints"},
  {"monotone_constraining_method", "monotone_constraints_method"},
  {"mc_method", "monotone_constraints_method"},
  {"monotone_splits_penalty", "monotone_penalty"},
@@ -106,6 +113,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
  {"model_out", "output_model"},
  {"save_period", "snapshot_freq"},
  {"linear_trees", "linear_tree"},
+  {"max_bins", "max_bin"},
  {"subsample_for_bin", "bin_construct_sample_cnt"},
  {"data_seed", "data_random_seed"},
  {"is_sparse", "is_enable_sparse"},
@@ -129,6 +137,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
  {"cat_feature", "categorical_feature"},
  {"categorical_column", "categorical_feature"},
  {"cat_column", "categorical_feature"},
+  {"categorical_features", "categorical_feature"},
  {"is_save_binary", "save_binary"},
  {"is_save_binary_file", "save_binary"},
  {"is_predict_raw_score", "predict_raw_score"},