"git@developer.sourcefind.cn:jerrrrry/infinicore.git" did not exist on "da0cef143c7f7102a42a9752014a8539f2108683"
Commit 00a125d5 authored by Nikita Titov's avatar Nikita Titov Committed by Qiwei Ye
Browse files

[docs] added new parameters aliases (#1537)

* added new aliases for params

* run helper/parameter_generator.py

* removed useless test
parent 7ed1ed3e
...@@ -140,7 +140,7 @@ lgb.cv <- function(params = list(), ...@@ -140,7 +140,7 @@ lgb.cv <- function(params = list(),
begin_iteration <- predictor$current_iter() + 1 begin_iteration <- predictor$current_iter() + 1
} }
# Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one
n_trees <- c("num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds") n_trees <- c("num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "num_boost_round", "n_estimators")
if (any(names(params) %in% n_trees)) { if (any(names(params) %in% n_trees)) {
end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1]]] - 1 end_iteration <- begin_iteration + params[[which(names(params) %in% n_trees)[1]]] - 1
} else { } else {
......
...@@ -117,7 +117,7 @@ lgb.train <- function(params = list(), ...@@ -117,7 +117,7 @@ lgb.train <- function(params = list(),
begin_iteration <- predictor$current_iter() + 1 begin_iteration <- predictor$current_iter() + 1
} }
# Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one
n_rounds <- c("num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds") n_rounds <- c("num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", "num_round", "num_rounds", "num_boost_round", "n_estimators")
if (any(names(params) %in% n_rounds)) { if (any(names(params) %in% n_rounds)) {
end_iteration <- begin_iteration + params[[which(names(params) %in% n_rounds)[1]]] - 1 end_iteration <- begin_iteration + params[[which(names(params) %in% n_rounds)[1]]] - 1
} else { } else {
......
...@@ -73,7 +73,7 @@ Core Parameters ...@@ -73,7 +73,7 @@ Core Parameters
- ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Applications>`__ - ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Applications>`__
- ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``xentropy`` for general probability labels in [0, 1] - ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
- multi-class classification application - multi-class classification application
...@@ -109,13 +109,13 @@ Core Parameters ...@@ -109,13 +109,13 @@ Core Parameters
- ``goss``, Gradient-based One-Side Sampling - ``goss``, Gradient-based One-Side Sampling
- ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``data_filename`` - ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
- path of training data, LightGBM will train from this data - path of training data, LightGBM will train from this data
- **Note**: can be used only in CLI version - **Note**: can be used only in CLI version
- ``valid`` :raw-html:`<a id="valid" title="Permalink to this parameter" href="#valid">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``test``, ``valid_data``, ``valid_data_file``, ``test_data``, ``valid_filenames`` - ``valid`` :raw-html:`<a id="valid" title="Permalink to this parameter" href="#valid">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``test``, ``valid_data``, ``valid_data_file``, ``test_data``, ``test_data_file``, ``valid_filenames``
- path(s) of validation/test data, LightGBM will output metrics for these data - path(s) of validation/test data, LightGBM will output metrics for these data
...@@ -123,7 +123,7 @@ Core Parameters ...@@ -123,7 +123,7 @@ Core Parameters
- **Note**: can be used only in CLI version - **Note**: can be used only in CLI version
- ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0`` - ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0``
- number of boosting iterations - number of boosting iterations
...@@ -131,17 +131,17 @@ Core Parameters ...@@ -131,17 +131,17 @@ Core Parameters
- **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems - **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
- ``learning_rate`` :raw-html:`<a id="learning_rate" title="Permalink to this parameter" href="#learning_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, aliases: ``shrinkage_rate``, constraints: ``learning_rate > 0.0`` - ``learning_rate`` :raw-html:`<a id="learning_rate" title="Permalink to this parameter" href="#learning_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, aliases: ``shrinkage_rate``, ``eta``, constraints: ``learning_rate > 0.0``
- shrinkage rate - shrinkage rate
- in ``dart``, it also affects on normalization weights of dropped trees - in ``dart``, it also affects on normalization weights of dropped trees
- ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, constraints: ``num_leaves > 1`` - ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, constraints: ``num_leaves > 1``
- max number of leaves in one tree - max number of leaves in one tree
- ``tree_learner`` :raw-html:`<a id="tree_learner" title="Permalink to this parameter" href="#tree_learner">&#x1F517;&#xFE0E;</a>`, default = ``serial``, type = enum, options: ``serial``, ``feature``, ``data``, ``voting``, aliases: ``tree``, ``tree_learner_type`` - ``tree_learner`` :raw-html:`<a id="tree_learner" title="Permalink to this parameter" href="#tree_learner">&#x1F517;&#xFE0E;</a>`, default = ``serial``, type = enum, options: ``serial``, ``feature``, ``data``, ``voting``, aliases: ``tree``, ``tree_type``, ``tree_learner_type``
- ``serial``, single machine tree learner - ``serial``, single machine tree learner
...@@ -153,7 +153,7 @@ Core Parameters ...@@ -153,7 +153,7 @@ Core Parameters
- refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details - refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details
- ``num_threads`` :raw-html:`<a id="num_threads" title="Permalink to this parameter" href="#num_threads">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``num_thread``, ``nthread``, ``nthreads`` - ``num_threads`` :raw-html:`<a id="num_threads" title="Permalink to this parameter" href="#num_threads">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``num_thread``, ``nthread``, ``nthreads``, ``n_jobs``
- number of threads for LightGBM - number of threads for LightGBM
...@@ -177,7 +177,7 @@ Core Parameters ...@@ -177,7 +177,7 @@ Core Parameters
- **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support - **Note**: refer to `Installation Guide <./Installation-Guide.rst#build-gpu-version>`__ to build LightGBM with GPU support
- ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``random_seed`` - ``seed`` :raw-html:`<a id="seed" title="Permalink to this parameter" href="#seed">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = int, aliases: ``random_seed``, ``random_state``
- this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed`` - this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``
...@@ -252,7 +252,7 @@ Learning Control Parameters ...@@ -252,7 +252,7 @@ Learning Control Parameters
- L1 regularization - L1 regularization
- ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, constraints: ``lambda_l2 >= 0.0`` - ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, constraints: ``lambda_l2 >= 0.0``
- L2 regularization - L2 regularization
...@@ -260,17 +260,17 @@ Learning Control Parameters ...@@ -260,17 +260,17 @@ Learning Control Parameters
- the minimal gain to perform split - the minimal gain to perform split
- ``drop_rate`` :raw-html:`<a id="drop_rate" title="Permalink to this parameter" href="#drop_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, constraints: ``0.0 <= drop_rate <= 1.0`` - ``drop_rate`` :raw-html:`<a id="drop_rate" title="Permalink to this parameter" href="#drop_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.1``, type = double, aliases: ``rate_drop``, constraints: ``0.0 <= drop_rate <= 1.0``
- used only in ``dart`` - used only in ``dart``
- dropout rate - dropout rate: a fraction of previous trees to drop during the dropout
- ``max_drop`` :raw-html:`<a id="max_drop" title="Permalink to this parameter" href="#max_drop">&#x1F517;&#xFE0E;</a>`, default = ``50``, type = int - ``max_drop`` :raw-html:`<a id="max_drop" title="Permalink to this parameter" href="#max_drop">&#x1F517;&#xFE0E;</a>`, default = ``50``, type = int
- used only in ``dart`` - used only in ``dart``
- max number of dropped trees on one iteration - max number of dropped trees during one boosting iteration
- ``<=0`` means no limit - ``<=0`` means no limit
...@@ -278,7 +278,7 @@ Learning Control Parameters ...@@ -278,7 +278,7 @@ Learning Control Parameters
- used only in ``dart`` - used only in ``dart``
- probability of skipping drop - probability of skipping the dropout procedure during a boosting iteration
- ``xgboost_dart_mode`` :raw-html:`<a id="xgboost_dart_mode" title="Permalink to this parameter" href="#xgboost_dart_mode">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool - ``xgboost_dart_mode`` :raw-html:`<a id="xgboost_dart_mode" title="Permalink to this parameter" href="#xgboost_dart_mode">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
...@@ -350,7 +350,7 @@ Learning Control Parameters ...@@ -350,7 +350,7 @@ Learning Control Parameters
- you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature - you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature
- ``feature_contri`` :raw-html:`<a id="feature_contri" title="Permalink to this parameter" href="#feature_contri">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-double, aliases: ``fc``, ``fp``, ``feature_penalty`` - ``feature_contri`` :raw-html:`<a id="feature_contri" title="Permalink to this parameter" href="#feature_contri">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-double, aliases: ``feature_contrib``, ``fc``, ``fp``, ``feature_penalty``
- used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature - used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature
...@@ -397,13 +397,13 @@ IO Parameters ...@@ -397,13 +397,13 @@ IO Parameters
- set this to larger value if data is very sparse - set this to larger value if data is very sparse
- ``histogram_pool_size`` :raw-html:`<a id="histogram_pool_size" title="Permalink to this parameter" href="#histogram_pool_size">&#x1F517;&#xFE0E;</a>`, default = ``-1.0``, type = double - ``histogram_pool_size`` :raw-html:`<a id="histogram_pool_size" title="Permalink to this parameter" href="#histogram_pool_size">&#x1F517;&#xFE0E;</a>`, default = ``-1.0``, type = double, aliases: ``hist_pool_size``
- max cache size in MB for historical histogram - max cache size in MB for historical histogram
- ``< 0`` means no limit - ``< 0`` means no limit
- ``data_random_seed`` :raw-html:`<a id="data_random_seed" title="Permalink to this parameter" href="#data_random_seed">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int - ``data_random_seed`` :raw-html:`<a id="data_random_seed" title="Permalink to this parameter" href="#data_random_seed">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int, aliases: ``data_seed``
- random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode) - random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
...@@ -413,7 +413,7 @@ IO Parameters ...@@ -413,7 +413,7 @@ IO Parameters
- **Note**: can be used only in CLI version - **Note**: can be used only in CLI version
- ``snapshot_freq`` :raw-html:`<a id="snapshot_freq" title="Permalink to this parameter" href="#snapshot_freq">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int - ``snapshot_freq`` :raw-html:`<a id="snapshot_freq" title="Permalink to this parameter" href="#snapshot_freq">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int, aliases: ``save_period``
- frequency of saving model file snapshot - frequency of saving model file snapshot
...@@ -431,7 +431,7 @@ IO Parameters ...@@ -431,7 +431,7 @@ IO Parameters
- **Note**: can be used only in CLI version - **Note**: can be used only in CLI version
- ``output_result`` :raw-html:`<a id="output_result" title="Permalink to this parameter" href="#output_result">&#x1F517;&#xFE0E;</a>`, default = ``LightGBM_predict_result.txt``, type = string, aliases: ``predict_result``, ``prediction_result`` - ``output_result`` :raw-html:`<a id="output_result" title="Permalink to this parameter" href="#output_result">&#x1F517;&#xFE0E;</a>`, default = ``LightGBM_predict_result.txt``, type = string, aliases: ``predict_result``, ``prediction_result``, ``predict_name``, ``prediction_name``, ``pred_name``, ``name_pred``
- filename of prediction result in ``prediction`` task - filename of prediction result in ``prediction`` task
...@@ -644,7 +644,7 @@ Objective Parameters ...@@ -644,7 +644,7 @@ Objective Parameters
- used only in ``multi-class`` classification application - used only in ``multi-class`` classification application
- ``is_unbalance`` :raw-html:`<a id="is_unbalance" title="Permalink to this parameter" href="#is_unbalance">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``unbalanced_sets`` - ``is_unbalance`` :raw-html:`<a id="is_unbalance" title="Permalink to this parameter" href="#is_unbalance">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``unbalance``, ``unbalanced_sets``
- used only in ``binary`` application - used only in ``binary`` application
...@@ -729,7 +729,7 @@ Metric Parameters ...@@ -729,7 +729,7 @@ Metric Parameters
- metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments - metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments
- ``""`` (empty string or not specific) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added) - ``""`` (empty string or not specified) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added)
- ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na`` - ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na``
......
...@@ -110,7 +110,7 @@ public: ...@@ -110,7 +110,7 @@ public:
// descl2 = ``mape``, `MAPE loss <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`__, aliases: ``mean_absolute_percentage_error`` // descl2 = ``mape``, `MAPE loss <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`__, aliases: ``mean_absolute_percentage_error``
// descl2 = ``gamma``, Gamma regression with log-link. It might be useful, e.g., for modeling insurance claims severity, or for any target that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Applications>`__ // descl2 = ``gamma``, Gamma regression with log-link. It might be useful, e.g., for modeling insurance claims severity, or for any target that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Applications>`__
// descl2 = ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Applications>`__ // descl2 = ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Applications>`__
// desc = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``xentropy`` for general probability labels in [0, 1] // desc = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
// desc = multi-class classification application // desc = multi-class classification application
// descl2 = ``multiclass``, `softmax <https://en.wikipedia.org/wiki/Softmax_function>`__ objective function, aliases: ``softmax`` // descl2 = ``multiclass``, `softmax <https://en.wikipedia.org/wiki/Softmax_function>`__ objective function, aliases: ``softmax``
// descl2 = ``multiclassova``, `One-vs-All <https://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest>`__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr`` // descl2 = ``multiclassova``, `One-vs-All <https://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest>`__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr``
...@@ -135,33 +135,33 @@ public: ...@@ -135,33 +135,33 @@ public:
// desc = ``goss``, Gradient-based One-Side Sampling // desc = ``goss``, Gradient-based One-Side Sampling
std::string boosting = "gbdt"; std::string boosting = "gbdt";
// alias = train, train_data, data_filename // alias = train, train_data, train_data_file, data_filename
// desc = path of training data, LightGBM will train from this data // desc = path of training data, LightGBM will train from this data
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
std::string data = ""; std::string data = "";
// alias = test, valid_data, valid_data_file, test_data, valid_filenames // alias = test, valid_data, valid_data_file, test_data, test_data_file, valid_filenames
// default = "" // default = ""
// desc = path(s) of validation/test data, LightGBM will output metrics for these data // desc = path(s) of validation/test data, LightGBM will output metrics for these data
// desc = support multiple validation data, separated by ``,`` // desc = support multiple validation data, separated by ``,``
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
std::vector<std::string> valid; std::vector<std::string> valid;
// alias = num_iteration, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators
// check = >=0 // check = >=0
// desc = number of boosting iterations // desc = number of boosting iterations
// desc = **Note**: for Python/R-package, **this parameter is ignored**, use ``num_boost_round`` (Python) or ``nrounds`` (R) input arguments of ``train`` and ``cv`` methods instead // desc = **Note**: for Python/R-package, **this parameter is ignored**, use ``num_boost_round`` (Python) or ``nrounds`` (R) input arguments of ``train`` and ``cv`` methods instead
// desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems // desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
int num_iterations = 100; int num_iterations = 100;
// alias = shrinkage_rate // alias = shrinkage_rate, eta
// check = >0.0 // check = >0.0
// desc = shrinkage rate // desc = shrinkage rate
// desc = in ``dart``, it also affects on normalization weights of dropped trees // desc = in ``dart``, it also affects on normalization weights of dropped trees
double learning_rate = 0.1; double learning_rate = 0.1;
// default = 31 // default = 31
// alias = num_leaf // alias = num_leaf, max_leaves, max_leaf
// check = >1 // check = >1
// desc = max number of leaves in one tree // desc = max number of leaves in one tree
int num_leaves = kDefaultNumLeaves; int num_leaves = kDefaultNumLeaves;
...@@ -169,7 +169,7 @@ public: ...@@ -169,7 +169,7 @@ public:
// [doc-only] // [doc-only]
// type = enum // type = enum
// options = serial, feature, data, voting // options = serial, feature, data, voting
// alias = tree, tree_learner_type // alias = tree, tree_type, tree_learner_type
// desc = ``serial``, single machine tree learner // desc = ``serial``, single machine tree learner
// desc = ``feature``, feature parallel tree learner, aliases: ``feature_parallel`` // desc = ``feature``, feature parallel tree learner, aliases: ``feature_parallel``
// desc = ``data``, data parallel tree learner, aliases: ``data_parallel`` // desc = ``data``, data parallel tree learner, aliases: ``data_parallel``
...@@ -177,7 +177,7 @@ public: ...@@ -177,7 +177,7 @@ public:
// desc = refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details // desc = refer to `Parallel Learning Guide <./Parallel-Learning-Guide.rst>`__ to get more details
std::string tree_learner = "serial"; std::string tree_learner = "serial";
// alias = num_thread, nthread, nthreads // alias = num_thread, nthread, nthreads, n_jobs
// desc = number of threads for LightGBM // desc = number of threads for LightGBM
// desc = ``0`` means default number of threads in OpenMP // desc = ``0`` means default number of threads in OpenMP
// desc = for the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPUs use `hyper-threading <https://en.wikipedia.org/wiki/Hyper-threading>`__ to generate 2 threads per CPU core) // desc = for the best speed, set this to the number of **real CPU cores**, not the number of threads (most CPUs use `hyper-threading <https://en.wikipedia.org/wiki/Hyper-threading>`__ to generate 2 threads per CPU core)
...@@ -197,7 +197,7 @@ public: ...@@ -197,7 +197,7 @@ public:
std::string device_type = "cpu"; std::string device_type = "cpu";
// [doc-only] // [doc-only]
// alias = random_seed // alias = random_seed, random_state
// desc = this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed`` // desc = this seed is used to generate other seeds, e.g. ``data_random_seed``, ``feature_fraction_seed``
// desc = will be overridden, if you set other seeds // desc = will be overridden, if you set other seeds
int seed = 0; int seed = 0;
...@@ -266,7 +266,7 @@ public: ...@@ -266,7 +266,7 @@ public:
// desc = L1 regularization // desc = L1 regularization
double lambda_l1 = 0.0; double lambda_l1 = 0.0;
// alias = reg_lambda // alias = reg_lambda, lambda
// check = >=0.0 // check = >=0.0
// desc = L2 regularization // desc = L2 regularization
double lambda_l2 = 0.0; double lambda_l2 = 0.0;
...@@ -276,21 +276,22 @@ public: ...@@ -276,21 +276,22 @@ public:
// desc = the minimal gain to perform split // desc = the minimal gain to perform split
double min_gain_to_split = 0.0; double min_gain_to_split = 0.0;
// alias = rate_drop
// check = >=0.0 // check = >=0.0
// check = <=1.0 // check = <=1.0
// desc = used only in ``dart`` // desc = used only in ``dart``
// desc = dropout rate // desc = dropout rate: a fraction of previous trees to drop during the dropout
double drop_rate = 0.1; double drop_rate = 0.1;
// desc = used only in ``dart`` // desc = used only in ``dart``
// desc = max number of dropped trees on one iteration // desc = max number of dropped trees during one boosting iteration
// desc = ``<=0`` means no limit // desc = ``<=0`` means no limit
int max_drop = 50; int max_drop = 50;
// check = >=0.0 // check = >=0.0
// check = <=1.0 // check = <=1.0
// desc = used only in ``dart`` // desc = used only in ``dart``
// desc = probability of skipping drop // desc = probability of skipping the dropout procedure during a boosting iteration
double skip_drop = 0.5; double skip_drop = 0.5;
// desc = used only in ``dart`` // desc = used only in ``dart``
...@@ -355,7 +356,7 @@ public: ...@@ -355,7 +356,7 @@ public:
std::vector<int8_t> monotone_constraints; std::vector<int8_t> monotone_constraints;
// type = multi-double // type = multi-double
// alias = fc, fp, feature_penalty // alias = feature_contrib, fc, fp, feature_penalty
// default = None // default = None
// desc = used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature // desc = used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature
// desc = you need to specify all features in order // desc = you need to specify all features in order
...@@ -395,10 +396,12 @@ public: ...@@ -395,10 +396,12 @@ public:
// desc = set this to larger value if data is very sparse // desc = set this to larger value if data is very sparse
int bin_construct_sample_cnt = 200000; int bin_construct_sample_cnt = 200000;
// alias = hist_pool_size
// desc = max cache size in MB for historical histogram // desc = max cache size in MB for historical histogram
// desc = ``< 0`` means no limit // desc = ``< 0`` means no limit
double histogram_pool_size = -1.0; double histogram_pool_size = -1.0;
// alias = data_seed
// desc = random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode) // desc = random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
int data_random_seed = 1; int data_random_seed = 1;
...@@ -407,6 +410,7 @@ public: ...@@ -407,6 +410,7 @@ public:
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
std::string output_model = "LightGBM_model.txt"; std::string output_model = "LightGBM_model.txt";
// alias = save_period
// desc = frequency of saving model file snapshot // desc = frequency of saving model file snapshot
// desc = set this to positive value to enable this function. For example, the model file will be snapshotted at each iteration if ``snapshot_freq=1`` // desc = set this to positive value to enable this function. For example, the model file will be snapshotted at each iteration if ``snapshot_freq=1``
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
...@@ -419,7 +423,7 @@ public: ...@@ -419,7 +423,7 @@ public:
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
std::string input_model = ""; std::string input_model = "";
// alias = predict_result, prediction_result // alias = predict_result, prediction_result, predict_name, prediction_name, pred_name, name_pred
// desc = filename of prediction result in ``prediction`` task // desc = filename of prediction result in ``prediction`` task
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
std::string output_result = "LightGBM_predict_result.txt"; std::string output_result = "LightGBM_predict_result.txt";
...@@ -588,7 +592,7 @@ public: ...@@ -588,7 +592,7 @@ public:
// desc = used only in ``multi-class`` classification application // desc = used only in ``multi-class`` classification application
int num_class = 1; int num_class = 1;
// alias = unbalanced_sets // alias = unbalance, unbalanced_sets
// desc = used only in ``binary`` application // desc = used only in ``binary`` application
// desc = set this to ``true`` if training data are unbalance // desc = set this to ``true`` if training data are unbalance
// desc = **Note**: this parameter cannot be used at the same time with ``scale_pos_weight``, choose only **one** of them // desc = **Note**: this parameter cannot be used at the same time with ``scale_pos_weight``, choose only **one** of them
...@@ -658,7 +662,7 @@ public: ...@@ -658,7 +662,7 @@ public:
// default = "" // default = ""
// type = multi-enum // type = multi-enum
// desc = metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments // desc = metric(s) to be evaluated on the evaluation sets **in addition** to what is provided in the training arguments
// descl2 = ``""`` (empty string or not specific) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added) // descl2 = ``""`` (empty string or not specified) means that metric corresponding to specified ``objective`` will be used (this is possible only for pre-defined objective functions, otherwise no evaluation metric will be added)
// descl2 = ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na`` // descl2 = ``"None"`` (string, **not** a ``None`` value) means that no metric will be registered, aliases: ``na``
// descl2 = ``l1``, absolute loss, aliases: ``mean_absolute_error``, ``mae``, ``regression_l1`` // descl2 = ``l1``, absolute loss, aliases: ``mean_absolute_error``, ``mae``, ``regression_l1``
// descl2 = ``l2``, square loss, aliases: ``mean_squared_error``, ``mse``, ``regression_l2``, ``regression`` // descl2 = ``l2``, square loss, aliases: ``mean_squared_error``, ``mse``, ``regression_l2``, ``regression``
......
...@@ -660,12 +660,8 @@ class Dataset(object): ...@@ -660,12 +660,8 @@ class Dataset(object):
warnings.warn('{0} keyword has been found in `params` and will be ignored. ' warnings.warn('{0} keyword has been found in `params` and will be ignored. '
'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key)) 'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key))
self.predictor = predictor self.predictor = predictor
if "verbosity" in params:
params.setdefault("verbose", params.pop("verbosity"))
if silent: if silent:
params["verbose"] = 0 params["verbose"] = 0
elif "verbose" not in params:
params["verbose"] = 1
# get categorical features # get categorical features
if categorical_feature is not None: if categorical_feature is not None:
categorical_indices = set() categorical_indices = set()
...@@ -1340,12 +1336,8 @@ class Booster(object): ...@@ -1340,12 +1336,8 @@ class Booster(object):
self.best_iteration = -1 self.best_iteration = -1
self.best_score = {} self.best_score = {}
params = {} if params is None else params params = {} if params is None else params
if "verbosity" in params:
params.setdefault("verbose", params.pop("verbosity"))
if silent: if silent:
params["verbose"] = 0 params["verbose"] = 0
elif "verbose" not in params:
params["verbose"] = 1
if train_set is not None: if train_set is not None:
# Training task # Training task
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
......
...@@ -98,7 +98,8 @@ def train(params, train_set, num_boost_round=100, ...@@ -98,7 +98,8 @@ def train(params, train_set, num_boost_round=100,
The trained Booster model. The trained Booster model.
""" """
# create predictor first # create predictor first
for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]: for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
if alias in params: if alias in params:
num_boost_round = int(params.pop(alias)) num_boost_round = int(params.pop(alias))
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
...@@ -396,7 +397,8 @@ def cv(params, train_set, num_boost_round=100, ...@@ -396,7 +397,8 @@ def cv(params, train_set, num_boost_round=100,
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError("Traninig only accepts Dataset object") raise TypeError("Traninig only accepts Dataset object")
for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]: for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
if alias in params: if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias) num_boost_round = params.pop(alias)
......
...@@ -397,9 +397,6 @@ class LGBMModel(_LGBMModelBase): ...@@ -397,9 +397,6 @@ class LGBMModel(_LGBMModelBase):
self._fobj = None self._fobj = None
evals_result = {} evals_result = {}
params = self.get_params() params = self.get_params()
# sklearn interface has another naming convention
params.setdefault('seed', params.pop('random_state'))
params.setdefault('nthread', params.pop('n_jobs'))
# user can set verbose with kwargs, it has higher priority # user can set verbose with kwargs, it has higher priority
if 'verbose' not in params and self.silent: if 'verbose' not in params and self.silent:
params['verbose'] = 0 params['verbose'] = 0
......
...@@ -11,13 +11,16 @@ std::unordered_map<std::string, std::string> Config::alias_table({ ...@@ -11,13 +11,16 @@ std::unordered_map<std::string, std::string> Config::alias_table({
{"boost", "boosting"}, {"boost", "boosting"},
{"train", "data"}, {"train", "data"},
{"train_data", "data"}, {"train_data", "data"},
{"train_data_file", "data"},
{"data_filename", "data"}, {"data_filename", "data"},
{"test", "valid"}, {"test", "valid"},
{"valid_data", "valid"}, {"valid_data", "valid"},
{"valid_data_file", "valid"}, {"valid_data_file", "valid"},
{"test_data", "valid"}, {"test_data", "valid"},
{"test_data_file", "valid"},
{"valid_filenames", "valid"}, {"valid_filenames", "valid"},
{"num_iteration", "num_iterations"}, {"num_iteration", "num_iterations"},
{"n_iter", "num_iterations"},
{"num_tree", "num_iterations"}, {"num_tree", "num_iterations"},
{"num_trees", "num_iterations"}, {"num_trees", "num_iterations"},
{"num_round", "num_iterations"}, {"num_round", "num_iterations"},
...@@ -25,14 +28,20 @@ std::unordered_map<std::string, std::string> Config::alias_table({ ...@@ -25,14 +28,20 @@ std::unordered_map<std::string, std::string> Config::alias_table({
{"num_boost_round", "num_iterations"}, {"num_boost_round", "num_iterations"},
{"n_estimators", "num_iterations"}, {"n_estimators", "num_iterations"},
{"shrinkage_rate", "learning_rate"}, {"shrinkage_rate", "learning_rate"},
{"eta", "learning_rate"},
{"num_leaf", "num_leaves"}, {"num_leaf", "num_leaves"},
{"max_leaves", "num_leaves"},
{"max_leaf", "num_leaves"},
{"tree", "tree_learner"}, {"tree", "tree_learner"},
{"tree_type", "tree_learner"},
{"tree_learner_type", "tree_learner"}, {"tree_learner_type", "tree_learner"},
{"num_thread", "num_threads"}, {"num_thread", "num_threads"},
{"nthread", "num_threads"}, {"nthread", "num_threads"},
{"nthreads", "num_threads"}, {"nthreads", "num_threads"},
{"n_jobs", "num_threads"},
{"device", "device_type"}, {"device", "device_type"},
{"random_seed", "seed"}, {"random_seed", "seed"},
{"random_state", "seed"},
{"min_data_per_leaf", "min_data_in_leaf"}, {"min_data_per_leaf", "min_data_in_leaf"},
{"min_data", "min_data_in_leaf"}, {"min_data", "min_data_in_leaf"},
{"min_child_samples", "min_data_in_leaf"}, {"min_child_samples", "min_data_in_leaf"},
...@@ -53,10 +62,13 @@ std::unordered_map<std::string, std::string> Config::alias_table({ ...@@ -53,10 +62,13 @@ std::unordered_map<std::string, std::string> Config::alias_table({
{"max_leaf_output", "max_delta_step"}, {"max_leaf_output", "max_delta_step"},
{"reg_alpha", "lambda_l1"}, {"reg_alpha", "lambda_l1"},
{"reg_lambda", "lambda_l2"}, {"reg_lambda", "lambda_l2"},
{"lambda", "lambda_l2"},
{"min_split_gain", "min_gain_to_split"}, {"min_split_gain", "min_gain_to_split"},
{"rate_drop", "drop_rate"},
{"topk", "top_k"}, {"topk", "top_k"},
{"mc", "monotone_constraints"}, {"mc", "monotone_constraints"},
{"monotone_constraint", "monotone_constraints"}, {"monotone_constraint", "monotone_constraints"},
{"feature_contrib", "feature_contri"},
{"fc", "feature_contri"}, {"fc", "feature_contri"},
{"fp", "feature_contri"}, {"fp", "feature_contri"},
{"feature_penalty", "feature_contri"}, {"feature_penalty", "feature_contri"},
...@@ -66,12 +78,19 @@ std::unordered_map<std::string, std::string> Config::alias_table({ ...@@ -66,12 +78,19 @@ std::unordered_map<std::string, std::string> Config::alias_table({
{"forced_splits", "forcedsplits_filename"}, {"forced_splits", "forcedsplits_filename"},
{"verbose", "verbosity"}, {"verbose", "verbosity"},
{"subsample_for_bin", "bin_construct_sample_cnt"}, {"subsample_for_bin", "bin_construct_sample_cnt"},
{"hist_pool_size", "histogram_pool_size"},
{"data_seed", "data_random_seed"},
{"model_output", "output_model"}, {"model_output", "output_model"},
{"model_out", "output_model"}, {"model_out", "output_model"},
{"save_period", "snapshot_freq"},
{"model_input", "input_model"}, {"model_input", "input_model"},
{"model_in", "input_model"}, {"model_in", "input_model"},
{"predict_result", "output_result"}, {"predict_result", "output_result"},
{"prediction_result", "output_result"}, {"prediction_result", "output_result"},
{"predict_name", "output_result"},
{"prediction_name", "output_result"},
{"pred_name", "output_result"},
{"name_pred", "output_result"},
{"init_score_filename", "initscore_filename"}, {"init_score_filename", "initscore_filename"},
{"init_score_file", "initscore_filename"}, {"init_score_file", "initscore_filename"},
{"init_score", "initscore_filename"}, {"init_score", "initscore_filename"},
...@@ -114,6 +133,7 @@ std::unordered_map<std::string, std::string> Config::alias_table({ ...@@ -114,6 +133,7 @@ std::unordered_map<std::string, std::string> Config::alias_table({
{"contrib", "predict_contrib"}, {"contrib", "predict_contrib"},
{"convert_model_file", "convert_model"}, {"convert_model_file", "convert_model"},
{"num_classes", "num_class"}, {"num_classes", "num_class"},
{"unbalance", "is_unbalance"},
{"unbalanced_sets", "is_unbalance"}, {"unbalanced_sets", "is_unbalance"},
{"metrics", "metric"}, {"metrics", "metric"},
{"metric_types", "metric"}, {"metric_types", "metric"},
......
...@@ -179,17 +179,6 @@ class TestSklearn(unittest.TestCase): ...@@ -179,17 +179,6 @@ class TestSklearn(unittest.TestCase):
importance_gain_top1 = sorted(importances_gain, reverse=True)[0] importance_gain_top1 = sorted(importances_gain, reverse=True)[0]
self.assertNotEqual(importance_split_top1, importance_gain_top1) self.assertNotEqual(importance_split_top1, importance_gain_top1)
def test_sklearn_backward_compatibility(self):
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
# Tests that `seed` is the same as `random_state`
clf_1 = lgb.sklearn.LGBMClassifier(seed=42, subsample=0.6, colsample_bytree=0.8)
clf_2 = lgb.sklearn.LGBMClassifier(random_state=42, subsample=0.6, colsample_bytree=0.8)
y_pred_1 = clf_1.fit(X_train, y_train).predict_proba(X_test)
y_pred_2 = clf_2.fit(X_train, y_train).predict_proba(X_test)
np.testing.assert_allclose(y_pred_1, y_pred_2)
# sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1 # sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1
@unittest.skipIf(not sklearn_at_least_019, 'scikit-learn version is less than 0.19') @unittest.skipIf(not sklearn_at_least_019, 'scikit-learn version is less than 0.19')
def test_sklearn_integration(self): def test_sklearn_integration(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment