Commit 8fd71c01 authored by Cass's avatar Cass Committed by Nikita Titov
Browse files

[CLI][python-package][docs] Add n_estimators as num_iteration alias (#1079)

* Add n_estimators as num_iteration alias

Scikit-Learn's ensemble methods use the term `n_estimators` for the number of
iterations of training models. To make it more accessible for newcomers who are
familiar with Scikit-Learn, it would help if the Parameters page mentioned
`n_estimators` and what parameter that maps to within LightGBM's API.

Addresses discussion brought up in #954

* Add n_estimators as num_iterations alias

Adds n_estimators as an alias for num_iterations in the CLI as well as Python
libs. Additionally bumps the default for n_estimators in the Sklearn API to 100
estimators.
parent 1572267d
...@@ -115,7 +115,7 @@ Core Parameters ...@@ -115,7 +115,7 @@ Core Parameters
- support multi validation data, separate by ``,`` - support multi validation data, separate by ``,``
- ``num_iterations``, default=\ ``100``, type=int, - ``num_iterations``, default=\ ``100``, type=int,
alias=\ ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round`` alias=\ ``num_iteration``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``
- number of boosting iterations - number of boosting iterations
......
...@@ -402,6 +402,7 @@ struct ParameterAlias { ...@@ -402,6 +402,7 @@ struct ParameterAlias {
{ "num_trees", "num_iterations" }, { "num_trees", "num_iterations" },
{ "num_rounds", "num_iterations" }, { "num_rounds", "num_iterations" },
{ "num_boost_round", "num_iterations" }, { "num_boost_round", "num_iterations" },
{ "n_estimators", "num_iterations"},
{ "sub_row", "bagging_fraction" }, { "sub_row", "bagging_fraction" },
{ "subsample", "bagging_fraction" }, { "subsample", "bagging_fraction" },
{ "subsample_freq", "bagging_freq" }, { "subsample_freq", "bagging_freq" },
......
...@@ -93,7 +93,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -93,7 +93,7 @@ def train(params, train_set, num_boost_round=100,
The trained Booster model. The trained Booster model.
""" """
# create predictor first # create predictor first
for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds"]: for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]:
if alias in params: if alias in params:
num_boost_round = int(params.pop(alias)) num_boost_round = int(params.pop(alias))
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
...@@ -307,7 +307,7 @@ def _agg_cv_result(raw_results): ...@@ -307,7 +307,7 @@ def _agg_cv_result(raw_results):
return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()] return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
def cv(params, train_set, num_boost_round=10, def cv(params, train_set, num_boost_round=100,
folds=None, nfold=5, stratified=True, shuffle=True, folds=None, nfold=5, stratified=True, shuffle=True,
metrics=None, fobj=None, feval=None, init_model=None, metrics=None, fobj=None, feval=None, init_model=None,
feature_name='auto', categorical_feature='auto', feature_name='auto', categorical_feature='auto',
...@@ -322,7 +322,7 @@ def cv(params, train_set, num_boost_round=10, ...@@ -322,7 +322,7 @@ def cv(params, train_set, num_boost_round=10,
Parameters for Booster. Parameters for Booster.
train_set : Dataset train_set : Dataset
Data to be trained on. Data to be trained on.
num_boost_round : int, optional (default=10) num_boost_round : int, optional (default=100)
Number of boosting iterations. Number of boosting iterations.
folds : a generator or iterator of (train_idx, test_idx) tuples or None, optional (default=None) folds : a generator or iterator of (train_idx, test_idx) tuples or None, optional (default=None)
The train and test indices for the each fold. The train and test indices for the each fold.
...@@ -383,7 +383,7 @@ def cv(params, train_set, num_boost_round=10, ...@@ -383,7 +383,7 @@ def cv(params, train_set, num_boost_round=10,
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError("Traninig only accepts Dataset object") raise TypeError("Traninig only accepts Dataset object")
for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds"]: for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds", "n_estimators"]:
if alias in params: if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias) num_boost_round = params.pop(alias)
......
...@@ -133,7 +133,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -133,7 +133,7 @@ class LGBMModel(_LGBMModelBase):
"""Implementation of the scikit-learn API for LightGBM.""" """Implementation of the scikit-learn API for LightGBM."""
def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1, def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=10, learning_rate=0.1, n_estimators=100,
subsample_for_bin=200000, objective=None, subsample_for_bin=200000, objective=None,
min_split_gain=0., min_child_weight=1e-3, min_child_samples=20, min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
subsample=1., subsample_freq=1, colsample_bytree=1., subsample=1., subsample_freq=1, colsample_bytree=1.,
...@@ -154,7 +154,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -154,7 +154,7 @@ class LGBMModel(_LGBMModelBase):
Maximum tree depth for base learners, -1 means no limit. Maximum tree depth for base learners, -1 means no limit.
learning_rate : float, optional (default=0.1) learning_rate : float, optional (default=0.1)
Boosting learning rate. Boosting learning rate.
n_estimators : int, optional (default=10) n_estimators : int, optional (default=100)
Number of boosted trees to fit. Number of boosted trees to fit.
subsample_for_bin : int, optional (default=50000) subsample_for_bin : int, optional (default=50000)
Number of samples for constructing bins. Number of samples for constructing bins.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment