Unverified Commit c6199311 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python][docs] Refer to string type as `str` and add commas in `list of ...` types (#4557)

* Reffer to string type as `str` and and commas in `list of ...` types

* update `libpath.py` too
parent 2067bdc5
...@@ -22,7 +22,7 @@ def get_runs(trigger_phrase): ...@@ -22,7 +22,7 @@ def get_runs(trigger_phrase):
Parameters Parameters
---------- ----------
trigger_phrase : string trigger_phrase : str
Code phrase that triggers workflow. Code phrase that triggers workflow.
Returns Returns
...@@ -55,7 +55,7 @@ def get_status(runs): ...@@ -55,7 +55,7 @@ def get_status(runs):
Returns Returns
------- -------
status : string status : str
The most recent status of workflow. The most recent status of workflow.
Can be 'success', 'failure' or 'in-progress'. Can be 'success', 'failure' or 'in-progress'.
""" """
......
...@@ -147,7 +147,7 @@ def loglikelihood(preds, train_data): ...@@ -147,7 +147,7 @@ def loglikelihood(preds, train_data):
# self-defined eval metric # self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, eval_result: float, is_higher_better: bool # f(preds: array, train_data: Dataset) -> name: str, eval_result: float, is_higher_better: bool
# binary error # binary error
# NOTE: when you do customized loss function, the default prediction value is margin # NOTE: when you do customized loss function, the default prediction value is margin
# This may make built-in evaluation metric calculate wrong results # This may make built-in evaluation metric calculate wrong results
...@@ -171,7 +171,7 @@ print('Finished 40 - 50 rounds with self-defined objective function and eval met ...@@ -171,7 +171,7 @@ print('Finished 40 - 50 rounds with self-defined objective function and eval met
# another self-defined eval metric # another self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, eval_result: float, is_higher_better: bool # f(preds: array, train_data: Dataset) -> name: str, eval_result: float, is_higher_better: bool
# accuracy # accuracy
# NOTE: when you do customized loss function, the default prediction value is margin # NOTE: when you do customized loss function, the default prediction value is margin
# This may make built-in evaluation metric calculate wrong results # This may make built-in evaluation metric calculate wrong results
......
...@@ -57,9 +57,9 @@ def experiment(objective, label_type, data): ...@@ -57,9 +57,9 @@ def experiment(objective, label_type, data):
Parameters Parameters
---------- ----------
objective : string 'binary' or 'xentropy' objective : {'binary', 'xentropy'}
Objective function. Objective function.
label_type : string 'binary' or 'probability' label_type : {'binary', 'probability'}
Type of the label. Type of the label.
data : dict data : dict
Data for training. Data for training.
......
...@@ -41,7 +41,7 @@ print(f'Feature importances: {list(gbm.feature_importances_)}') ...@@ -41,7 +41,7 @@ print(f'Feature importances: {list(gbm.feature_importances_)}')
# self-defined eval metric # self-defined eval metric
# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool # f(y_true: array, y_pred: array) -> name: str, eval_result: float, is_higher_better: bool
# Root Mean Squared Logarithmic Error (RMSLE) # Root Mean Squared Logarithmic Error (RMSLE)
def rmsle(y_true, y_pred): def rmsle(y_true, y_pred):
return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False
...@@ -56,7 +56,7 @@ gbm.fit(X_train, y_train, ...@@ -56,7 +56,7 @@ gbm.fit(X_train, y_train,
# another self-defined eval metric # another self-defined eval metric
# f(y_true: array, y_pred: array) -> name: string, eval_result: float, is_higher_better: bool # f(y_true: array, y_pred: array) -> name: str, eval_result: float, is_higher_better: bool
# Relative Absolute Error (RAE) # Relative Absolute Error (RAE)
def rae(y_true, y_pred): def rae(y_true, y_pred):
return 'RAE', np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False return 'RAE', np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False
......
...@@ -13,7 +13,7 @@ def check_dependicies(objdump_string: str) -> None: ...@@ -13,7 +13,7 @@ def check_dependicies(objdump_string: str) -> None:
Parameters Parameters
---------- ----------
objdump_string : string objdump_string : str
The dynamic symbol table entries of the file (result of `objdump -T` command). The dynamic symbol table entries of the file (result of `objdump -T` command).
""" """
GLIBC_version = re.compile(r'0{16}[ \t]+GLIBC_(\d{1,2})[.](\d{1,3})[.]?\d{,3}[ \t]+') GLIBC_version = re.compile(r'0{16}[ \t]+GLIBC_(\d{1,2})[.](\d{1,3})[.]?\d{,3}[ \t]+')
......
...@@ -139,7 +139,7 @@ def parse_check( ...@@ -139,7 +139,7 @@ def parse_check(
Parameters Parameters
---------- ----------
check : string check : str
String representation of the constraint. String representation of the constraint.
reverse : bool, optional (default=False) reverse : bool, optional (default=False)
Whether to reverse the sign of the constraint. Whether to reverse the sign of the constraint.
...@@ -171,16 +171,16 @@ def set_one_var_from_string( ...@@ -171,16 +171,16 @@ def set_one_var_from_string(
Parameters Parameters
---------- ----------
name : string name : str
Name of the parameter. Name of the parameter.
param_type : string param_type : str
Type of the parameter. Type of the parameter.
checks : list checks : list
Constraints of the parameter. Constraints of the parameter.
Returns Returns
------- -------
ret : string ret : str
Lines of auto config file with getting and checks of one parameter value. Lines of auto config file with getting and checks of one parameter value.
""" """
ret = "" ret = ""
......
...@@ -684,7 +684,7 @@ class _InnerPredictor: ...@@ -684,7 +684,7 @@ class _InnerPredictor:
Parameters Parameters
---------- ----------
model_file : string, pathlib.Path or None, optional (default=None) model_file : str, pathlib.Path or None, optional (default=None)
Path to the model file. Path to the model file.
booster_handle : object or None, optional (default=None) booster_handle : object or None, optional (default=None)
Handle of Booster. Handle of Booster.
...@@ -742,9 +742,9 @@ class _InnerPredictor: ...@@ -742,9 +742,9 @@ class _InnerPredictor:
Parameters Parameters
---------- ----------
data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
Data source for prediction. Data source for prediction.
When data type is string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration to predict. Start index of the iteration to predict.
num_iteration : int, optional (default=-1) num_iteration : int, optional (default=-1)
...@@ -1130,9 +1130,9 @@ class Dataset: ...@@ -1130,9 +1130,9 @@ class Dataset:
Parameters Parameters
---------- ----------
data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays
Data source of Dataset. Data source of Dataset.
If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None)
Label of the data. Label of the data.
reference : Dataset or None, optional (default=None) reference : Dataset or None, optional (default=None)
...@@ -1149,13 +1149,13 @@ class Dataset: ...@@ -1149,13 +1149,13 @@ class Dataset:
Init score for Dataset. Init score for Dataset.
silent : bool, optional (default=False) silent : bool, optional (default=False)
Whether to print messages during construction. Whether to print messages during construction.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of str, or 'auto', optional (default="auto")
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of strings or int, or 'auto', optional (default="auto") categorical_feature : list of str or int, or 'auto', optional (default="auto")
Categorical features. Categorical features.
If list of int, interpreted as indices. If list of int, interpreted as indices.
If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
...@@ -1774,9 +1774,9 @@ class Dataset: ...@@ -1774,9 +1774,9 @@ class Dataset:
Parameters Parameters
---------- ----------
data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays
Data source of Dataset. Data source of Dataset.
If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None)
Label of the data. Label of the data.
weight : list, numpy 1-D array, pandas Series or None, optional (default=None) weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
...@@ -1841,7 +1841,7 @@ class Dataset: ...@@ -1841,7 +1841,7 @@ class Dataset:
Parameters Parameters
---------- ----------
filename : string or pathlib.Path filename : str or pathlib.Path
Name of the output file. Name of the output file.
Returns Returns
...@@ -1892,7 +1892,7 @@ class Dataset: ...@@ -1892,7 +1892,7 @@ class Dataset:
Parameters Parameters
---------- ----------
field_name : string field_name : str
The field name of the information. The field name of the information.
data : list, numpy 1-D array, pandas Series or None data : list, numpy 1-D array, pandas Series or None
The array of data to be set. The array of data to be set.
...@@ -1941,7 +1941,7 @@ class Dataset: ...@@ -1941,7 +1941,7 @@ class Dataset:
Parameters Parameters
---------- ----------
field_name : string field_name : str
The field name of the information. The field name of the information.
Returns Returns
...@@ -1978,7 +1978,7 @@ class Dataset: ...@@ -1978,7 +1978,7 @@ class Dataset:
Parameters Parameters
---------- ----------
categorical_feature : list of int or strings categorical_feature : list of int or str
Names or indices of categorical features. Names or indices of categorical features.
Returns Returns
...@@ -2056,7 +2056,7 @@ class Dataset: ...@@ -2056,7 +2056,7 @@ class Dataset:
Parameters Parameters
---------- ----------
feature_name : list of strings feature_name : list of str
Feature names. Feature names.
Returns Returns
...@@ -2241,7 +2241,7 @@ class Dataset: ...@@ -2241,7 +2241,7 @@ class Dataset:
Returns Returns
------- -------
data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays or None data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays or None
Raw data used in the Dataset construction. Raw data used in the Dataset construction.
""" """
if self.handle is None: if self.handle is None:
...@@ -2445,7 +2445,7 @@ class Dataset: ...@@ -2445,7 +2445,7 @@ class Dataset:
Parameters Parameters
---------- ----------
filename : string or pathlib.Path filename : str or pathlib.Path
Name of the output file. Name of the output file.
Returns Returns
...@@ -2471,9 +2471,9 @@ class Booster: ...@@ -2471,9 +2471,9 @@ class Booster:
Parameters for Booster. Parameters for Booster.
train_set : Dataset or None, optional (default=None) train_set : Dataset or None, optional (default=None)
Training dataset. Training dataset.
model_file : string, pathlib.Path or None, optional (default=None) model_file : str, pathlib.Path or None, optional (default=None)
Path to the model file. Path to the model file.
model_str : string or None, optional (default=None) model_str : str or None, optional (default=None)
Model will be loaded from this string. Model will be loaded from this string.
silent : bool, optional (default=False) silent : bool, optional (default=False)
Whether to print messages during construction. Whether to print messages during construction.
...@@ -2650,7 +2650,7 @@ class Booster: ...@@ -2650,7 +2650,7 @@ class Booster:
Parameters Parameters
---------- ----------
machines : list, set or string machines : list, set or str
Names of machines. Names of machines.
local_listen_port : int, optional (default=12400) local_listen_port : int, optional (default=12400)
TCP listen port for local machines. TCP listen port for local machines.
...@@ -2692,18 +2692,18 @@ class Booster: ...@@ -2692,18 +2692,18 @@ class Booster:
- ``tree_index`` : int64, which tree a node belongs to. 0-based, so a value of ``6``, for example, means "this node is in the 7th tree". - ``tree_index`` : int64, which tree a node belongs to. 0-based, so a value of ``6``, for example, means "this node is in the 7th tree".
- ``node_depth`` : int64, how far a node is from the root of the tree. The root node has a value of ``1``, its direct children are ``2``, etc. - ``node_depth`` : int64, how far a node is from the root of the tree. The root node has a value of ``1``, its direct children are ``2``, etc.
- ``node_index`` : string, unique identifier for a node. - ``node_index`` : str, unique identifier for a node.
- ``left_child`` : string, ``node_index`` of the child node to the left of a split. ``None`` for leaf nodes. - ``left_child`` : str, ``node_index`` of the child node to the left of a split. ``None`` for leaf nodes.
- ``right_child`` : string, ``node_index`` of the child node to the right of a split. ``None`` for leaf nodes. - ``right_child`` : str, ``node_index`` of the child node to the right of a split. ``None`` for leaf nodes.
- ``parent_index`` : string, ``node_index`` of this node's parent. ``None`` for the root node. - ``parent_index`` : str, ``node_index`` of this node's parent. ``None`` for the root node.
- ``split_feature`` : string, name of the feature used for splitting. ``None`` for leaf nodes. - ``split_feature`` : str, name of the feature used for splitting. ``None`` for leaf nodes.
- ``split_gain`` : float64, gain from adding this split to the tree. ``NaN`` for leaf nodes. - ``split_gain`` : float64, gain from adding this split to the tree. ``NaN`` for leaf nodes.
- ``threshold`` : float64, value of the feature used to decide which side of the split a record will go down. ``NaN`` for leaf nodes. - ``threshold`` : float64, value of the feature used to decide which side of the split a record will go down. ``NaN`` for leaf nodes.
- ``decision_type`` : string, logical operator describing how to compare a value to ``threshold``. - ``decision_type`` : str, logical operator describing how to compare a value to ``threshold``.
For example, ``split_feature = "Column_10", threshold = 15, decision_type = "<="`` means that For example, ``split_feature = "Column_10", threshold = 15, decision_type = "<="`` means that
records where ``Column_10 <= 15`` follow the left side of the split, otherwise follows the right side of the split. ``None`` for leaf nodes. records where ``Column_10 <= 15`` follow the left side of the split, otherwise follows the right side of the split. ``None`` for leaf nodes.
- ``missing_direction`` : string, split direction that missing values should go to. ``None`` for leaf nodes. - ``missing_direction`` : str, split direction that missing values should go to. ``None`` for leaf nodes.
- ``missing_type`` : string, describes what types of values are treated as missing. - ``missing_type`` : str, describes what types of values are treated as missing.
- ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate. - ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate.
- ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node. - ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``count`` : int64, number of records in the training data that fall into this node. - ``count`` : int64, number of records in the training data that fall into this node.
...@@ -2826,7 +2826,7 @@ class Booster: ...@@ -2826,7 +2826,7 @@ class Booster:
Parameters Parameters
---------- ----------
name : string name : str
Name for the training Dataset. Name for the training Dataset.
Returns Returns
...@@ -2844,7 +2844,7 @@ class Booster: ...@@ -2844,7 +2844,7 @@ class Booster:
---------- ----------
data : Dataset data : Dataset
Validation data. Validation data.
name : string name : str
Name of validation data. Name of validation data.
Returns Returns
...@@ -3086,7 +3086,7 @@ class Booster: ...@@ -3086,7 +3086,7 @@ class Booster:
---------- ----------
data : Dataset data : Dataset
Data for the evaluating. Data for the evaluating.
name : string name : str
Name of the data. Name of the data.
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
...@@ -3099,7 +3099,7 @@ class Booster: ...@@ -3099,7 +3099,7 @@ class Booster:
e.g. they are raw margin instead of probability of positive class for binary task in this case. e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset eval_data : Dataset
The evaluation dataset. The evaluation dataset.
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -3147,7 +3147,7 @@ class Booster: ...@@ -3147,7 +3147,7 @@ class Booster:
e.g. they are raw margin instead of probability of positive class for binary task in this case. e.g. they are raw margin instead of probability of positive class for binary task in this case.
train_data : Dataset train_data : Dataset
The training dataset. The training dataset.
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -3180,7 +3180,7 @@ class Booster: ...@@ -3180,7 +3180,7 @@ class Booster:
e.g. they are raw margin instead of probability of positive class for binary task in this case. e.g. they are raw margin instead of probability of positive class for binary task in this case.
valid_data : Dataset valid_data : Dataset
The validation dataset. The validation dataset.
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -3203,7 +3203,7 @@ class Booster: ...@@ -3203,7 +3203,7 @@ class Booster:
Parameters Parameters
---------- ----------
filename : string or pathlib.Path filename : str or pathlib.Path
Filename to save Booster. Filename to save Booster.
num_iteration : int or None, optional (default=None) num_iteration : int or None, optional (default=None)
Index of the iteration that should be saved. Index of the iteration that should be saved.
...@@ -3211,7 +3211,7 @@ class Booster: ...@@ -3211,7 +3211,7 @@ class Booster:
If <= 0, all iterations are saved. If <= 0, all iterations are saved.
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration that should be saved. Start index of the iteration that should be saved.
importance_type : string, optional (default="split") importance_type : str, optional (default="split")
What type of feature importance should be saved. What type of feature importance should be saved.
If "split", result contains numbers of times the feature is used in a model. If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If "gain", result contains total gains of splits which use the feature.
...@@ -3260,7 +3260,7 @@ class Booster: ...@@ -3260,7 +3260,7 @@ class Booster:
Parameters Parameters
---------- ----------
model_str : string model_str : str
Model will be loaded from this string. Model will be loaded from this string.
verbose : bool, optional (default=True) verbose : bool, optional (default=True)
Whether to print messages while loading model. Whether to print messages while loading model.
...@@ -3300,14 +3300,14 @@ class Booster: ...@@ -3300,14 +3300,14 @@ class Booster:
If <= 0, all iterations are saved. If <= 0, all iterations are saved.
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration that should be saved. Start index of the iteration that should be saved.
importance_type : string, optional (default="split") importance_type : str, optional (default="split")
What type of feature importance should be saved. What type of feature importance should be saved.
If "split", result contains numbers of times the feature is used in a model. If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If "gain", result contains total gains of splits which use the feature.
Returns Returns
------- -------
str_repr : string str_repr : str
String representation of Booster. String representation of Booster.
""" """
if num_iteration is None: if num_iteration is None:
...@@ -3353,7 +3353,7 @@ class Booster: ...@@ -3353,7 +3353,7 @@ class Booster:
If <= 0, all iterations are dumped. If <= 0, all iterations are dumped.
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration that should be dumped. Start index of the iteration that should be dumped.
importance_type : string, optional (default="split") importance_type : str, optional (default="split")
What type of feature importance should be dumped. What type of feature importance should be dumped.
If "split", result contains numbers of times the feature is used in a model. If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If "gain", result contains total gains of splits which use the feature.
...@@ -3412,9 +3412,9 @@ class Booster: ...@@ -3412,9 +3412,9 @@ class Booster:
Parameters Parameters
---------- ----------
data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
Data source for prediction. Data source for prediction.
If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration to predict. Start index of the iteration to predict.
If <= 0, starts from the first iteration. If <= 0, starts from the first iteration.
...@@ -3440,7 +3440,7 @@ class Booster: ...@@ -3440,7 +3440,7 @@ class Booster:
data_has_header : bool, optional (default=False) data_has_header : bool, optional (default=False)
Whether the data has header. Whether the data has header.
Used only if data is string. Used only if data is str.
is_reshape : bool, optional (default=True) is_reshape : bool, optional (default=True)
If True, result is reshaped to [nrow, ncol]. If True, result is reshaped to [nrow, ncol].
**kwargs **kwargs
...@@ -3467,9 +3467,9 @@ class Booster: ...@@ -3467,9 +3467,9 @@ class Booster:
Parameters Parameters
---------- ----------
data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
Data source for refit. Data source for refit.
If string or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
label : list, numpy 1-D array or pandas Series / one-column DataFrame label : list, numpy 1-D array or pandas Series / one-column DataFrame
Label for refit. Label for refit.
decay_rate : float, optional (default=0.9) decay_rate : float, optional (default=0.9)
...@@ -3603,7 +3603,7 @@ class Booster: ...@@ -3603,7 +3603,7 @@ class Booster:
Parameters Parameters
---------- ----------
importance_type : string, optional (default="split") importance_type : str, optional (default="split")
How the importance is calculated. How the importance is calculated.
If "split", result contains numbers of times the feature is used in a model. If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If "gain", result contains total gains of splits which use the feature.
...@@ -3636,20 +3636,20 @@ class Booster: ...@@ -3636,20 +3636,20 @@ class Booster:
Parameters Parameters
---------- ----------
feature : int or string feature : int or str
The feature name or index the histogram is calculated for. The feature name or index the histogram is calculated for.
If int, interpreted as index. If int, interpreted as index.
If string, interpreted as name. If str, interpreted as name.
.. warning:: .. warning::
Categorical features are not supported. Categorical features are not supported.
bins : int, string or None, optional (default=None) bins : int, str or None, optional (default=None)
The maximum number of bins. The maximum number of bins.
If None, or int and > number of unique split values and ``xgboost_style=True``, If None, or int and > number of unique split values and ``xgboost_style=True``,
the number of bins equals number of unique split values. the number of bins equals number of unique split values.
If string, it should be one from the list of the supported values by ``numpy.histogram()`` function. If str, it should be one from the list of the supported values by ``numpy.histogram()`` function.
xgboost_style : bool, optional (default=False) xgboost_style : bool, optional (default=False)
Whether the returned result should be in the same form as it is in XGBoost. Whether the returned result should be in the same form as it is in XGBoost.
If False, the returned value is tuple of 2 numpy arrays as it is in ``numpy.histogram()`` function. If False, the returned value is tuple of 2 numpy arrays as it is in ``numpy.histogram()`` function.
...@@ -3816,12 +3816,12 @@ class Booster: ...@@ -3816,12 +3816,12 @@ class Booster:
Parameters Parameters
---------- ----------
key : string key : str
The name of the attribute. The name of the attribute.
Returns Returns
------- -------
value : string or None value : str or None
The attribute value. The attribute value.
Returns None if attribute does not exist. Returns None if attribute does not exist.
""" """
......
...@@ -428,12 +428,12 @@ def _train( ...@@ -428,12 +428,12 @@ def _train(
sum(group) = n_samples. sum(group) = n_samples.
For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
eval_set : list of (X, y) tuples of Dask data collections or None, optional (default=None) eval_set : list of (X, y) tuples of Dask data collections, or None, optional (default=None)
List of (X, y) tuple pairs to use as validation sets. List of (X, y) tuple pairs to use as validation sets.
Note, that not all workers may receive chunks of every eval set within ``eval_set``. When the returned Note, that not all workers may receive chunks of every eval set within ``eval_set``. When the returned
lightgbm estimator is not trained using any chunks of a particular eval set, its corresponding component lightgbm estimator is not trained using any chunks of a particular eval set, its corresponding component
of evals_result_ and best_score_ will be 'not_evaluated'. of evals_result_ and best_score_ will be 'not_evaluated'.
eval_names : list of strings or None, optional (default=None) eval_names : list of str, or None, optional (default=None)
Names of eval_set. Names of eval_set.
eval_sample_weight : list of Dask Arrays, Dask Series or None, optional (default=None) eval_sample_weight : list of Dask Arrays, Dask Series or None, optional (default=None)
Weights for each validation set in eval_set. Weights for each validation set in eval_set.
......
...@@ -50,9 +50,9 @@ def train( ...@@ -50,9 +50,9 @@ def train(
Data to be trained on. Data to be trained on.
num_boost_round : int, optional (default=100) num_boost_round : int, optional (default=100)
Number of boosting iterations. Number of boosting iterations.
valid_sets : list of Datasets or None, optional (default=None) valid_sets : list of Datasets, or None, optional (default=None)
List of data to be evaluated on during training. List of data to be evaluated on during training.
valid_names : list of strings or None, optional (default=None) valid_names : list of str, or None, optional (default=None)
Names of ``valid_sets``. Names of ``valid_sets``.
fobj : callable or None, optional (default=None) fobj : callable or None, optional (default=None)
Customized objective function. Customized objective function.
...@@ -76,7 +76,7 @@ def train( ...@@ -76,7 +76,7 @@ def train(
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i] If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well. and you should group grad and hess in this way as well.
feval : callable, list of callable functions or None, optional (default=None) feval : callable, list of callable functions, or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Each evaluation function should accept two parameters: preds, train_data, Each evaluation function should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples. and return (eval_name, eval_result, is_higher_better) or list of such tuples.
...@@ -87,7 +87,7 @@ def train( ...@@ -87,7 +87,7 @@ def train(
e.g. they are raw margin instead of probability of positive class for binary task in this case. e.g. they are raw margin instead of probability of positive class for binary task in this case.
train_data : Dataset train_data : Dataset
The training dataset. The training dataset.
eval_name : string eval_name : str
The name of evaluation function (without whitespaces). The name of evaluation function (without whitespaces).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -98,15 +98,15 @@ def train( ...@@ -98,15 +98,15 @@ def train(
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``. set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : string, pathlib.Path, Booster or None, optional (default=None) init_model : str, pathlib.Path, Booster or None, optional (default=None)
Filename of LightGBM model or Booster instance used for continue training. Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of str, or 'auto', optional (default="auto")
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of strings or int, or 'auto', optional (default="auto") categorical_feature : list of str or int, or 'auto', optional (default="auto")
Categorical features. Categorical features.
If list of int, interpreted as indices. If list of int, interpreted as indices.
If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
...@@ -156,7 +156,7 @@ def train( ...@@ -156,7 +156,7 @@ def train(
When your model is very large and cause the memory error, When your model is very large and cause the memory error,
you can try to set this param to ``True`` to avoid the model conversion performed during the internal call of ``model_to_string``. you can try to set this param to ``True`` to avoid the model conversion performed during the internal call of ``model_to_string``.
You can still use _InnerPredictor as ``init_model`` for future continue training. You can still use _InnerPredictor as ``init_model`` for future continue training.
callbacks : list of callables or None, optional (default=None) callbacks : list of callables, or None, optional (default=None)
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python API for more information. See Callbacks in Python API for more information.
...@@ -447,7 +447,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -447,7 +447,7 @@ def cv(params, train_set, num_boost_round=100,
Whether to perform stratified sampling. Whether to perform stratified sampling.
shuffle : bool, optional (default=True) shuffle : bool, optional (default=True)
Whether to shuffle before splitting data. Whether to shuffle before splitting data.
metrics : string, list of strings or None, optional (default=None) metrics : str, list of str, or None, optional (default=None)
Evaluation metrics to be monitored while CV. Evaluation metrics to be monitored while CV.
If not None, the metric in ``params`` will be overridden. If not None, the metric in ``params`` will be overridden.
fobj : callable or None, optional (default=None) fobj : callable or None, optional (default=None)
...@@ -472,7 +472,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -472,7 +472,7 @@ def cv(params, train_set, num_boost_round=100,
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i] If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well. and you should group grad and hess in this way as well.
feval : callable, list of callable functions or None, optional (default=None) feval : callable, list of callable functions, or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Each evaluation function should accept two parameters: preds, train_data, Each evaluation function should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples. and return (eval_name, eval_result, is_higher_better) or list of such tuples.
...@@ -483,7 +483,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -483,7 +483,7 @@ def cv(params, train_set, num_boost_round=100,
e.g. they are raw margin instead of probability of positive class for binary task in this case. e.g. they are raw margin instead of probability of positive class for binary task in this case.
train_data : Dataset train_data : Dataset
The training dataset. The training dataset.
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -494,15 +494,15 @@ def cv(params, train_set, num_boost_round=100, ...@@ -494,15 +494,15 @@ def cv(params, train_set, num_boost_round=100,
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``. set ``metrics`` to the string ``"None"``.
init_model : string, pathlib.Path, Booster or None, optional (default=None) init_model : str, pathlib.Path, Booster or None, optional (default=None)
Filename of LightGBM model or Booster instance used for continue training. Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of str, or 'auto', optional (default="auto")
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of strings or int, or 'auto', optional (default="auto") categorical_feature : list of str or int, or 'auto', optional (default="auto")
Categorical features. Categorical features.
If list of int, interpreted as indices. If list of int, interpreted as indices.
If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
...@@ -528,7 +528,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -528,7 +528,7 @@ def cv(params, train_set, num_boost_round=100,
Results are not affected by this parameter, and always contain std. Results are not affected by this parameter, and always contain std.
seed : int, optional (default=0) seed : int, optional (default=0)
Seed used to generate the folds (passed to numpy.random.seed). Seed used to generate the folds (passed to numpy.random.seed).
callbacks : list of callables or None, optional (default=None) callbacks : list of callables, or None, optional (default=None)
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python API for more information. See Callbacks in Python API for more information.
eval_train_metric : bool, optional (default=False) eval_train_metric : bool, optional (default=False)
......
...@@ -11,7 +11,7 @@ def find_lib_path() -> List[str]: ...@@ -11,7 +11,7 @@ def find_lib_path() -> List[str]:
Returns Returns
------- -------
lib_path: list of strings lib_path: list of str
List of all found library paths to LightGBM. List of all found library paths to LightGBM.
""" """
if environ.get('LIGHTGBM_BUILD_DOC', False): if environ.get('LIGHTGBM_BUILD_DOC', False):
......
...@@ -56,16 +56,16 @@ def plot_importance( ...@@ -56,16 +56,16 @@ def plot_importance(
Tuple passed to ``ax.xlim()``. Tuple passed to ``ax.xlim()``.
ylim : tuple of 2 elements or None, optional (default=None) ylim : tuple of 2 elements or None, optional (default=None)
Tuple passed to ``ax.ylim()``. Tuple passed to ``ax.ylim()``.
title : string or None, optional (default="Feature importance") title : str or None, optional (default="Feature importance")
Axes title. Axes title.
If None, title is disabled. If None, title is disabled.
xlabel : string or None, optional (default="Feature importance") xlabel : str or None, optional (default="Feature importance")
X-axis title label. X-axis title label.
If None, title is disabled. If None, title is disabled.
ylabel : string or None, optional (default="Features") ylabel : str or None, optional (default="Features")
Y-axis title label. Y-axis title label.
If None, title is disabled. If None, title is disabled.
importance_type : string, optional (default="split") importance_type : str, optional (default="split")
How the importance is calculated. How the importance is calculated.
If "split", result contains numbers of times the feature is used in a model. If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If "gain", result contains total gains of splits which use the feature.
...@@ -173,14 +173,14 @@ def plot_split_value_histogram( ...@@ -173,14 +173,14 @@ def plot_split_value_histogram(
---------- ----------
booster : Booster or LGBMModel booster : Booster or LGBMModel
Booster or LGBMModel instance of which feature split value histogram should be plotted. Booster or LGBMModel instance of which feature split value histogram should be plotted.
feature : int or string feature : int or str
The feature name or index the histogram is plotted for. The feature name or index the histogram is plotted for.
If int, interpreted as index. If int, interpreted as index.
If string, interpreted as name. If str, interpreted as name.
bins : int, string or None, optional (default=None) bins : int, str or None, optional (default=None)
The maximum number of bins. The maximum number of bins.
If None, the number of bins equals number of unique split values. If None, the number of bins equals number of unique split values.
If string, it should be one from the list of the supported values by ``numpy.histogram()`` function. If str, it should be one from the list of the supported values by ``numpy.histogram()`` function.
ax : matplotlib.axes.Axes or None, optional (default=None) ax : matplotlib.axes.Axes or None, optional (default=None)
Target axes instance. Target axes instance.
If None, new figure and axes will be created. If None, new figure and axes will be created.
...@@ -190,17 +190,17 @@ def plot_split_value_histogram( ...@@ -190,17 +190,17 @@ def plot_split_value_histogram(
Tuple passed to ``ax.xlim()``. Tuple passed to ``ax.xlim()``.
ylim : tuple of 2 elements or None, optional (default=None) ylim : tuple of 2 elements or None, optional (default=None)
Tuple passed to ``ax.ylim()``. Tuple passed to ``ax.ylim()``.
title : string or None, optional (default="Split value histogram for feature with @index/name@ @feature@") title : str or None, optional (default="Split value histogram for feature with @index/name@ @feature@")
Axes title. Axes title.
If None, title is disabled. If None, title is disabled.
@feature@ placeholder can be used, and it will be replaced with the value of ``feature`` parameter. @feature@ placeholder can be used, and it will be replaced with the value of ``feature`` parameter.
@index/name@ placeholder can be used, @index/name@ placeholder can be used,
and it will be replaced with ``index`` word in case of ``int`` type ``feature`` parameter and it will be replaced with ``index`` word in case of ``int`` type ``feature`` parameter
or ``name`` word in case of ``string`` type ``feature`` parameter. or ``name`` word in case of ``str`` type ``feature`` parameter.
xlabel : string or None, optional (default="Feature split value") xlabel : str or None, optional (default="Feature split value")
X-axis title label. X-axis title label.
If None, title is disabled. If None, title is disabled.
ylabel : string or None, optional (default="Count") ylabel : str or None, optional (default="Count")
Y-axis title label. Y-axis title label.
If None, title is disabled. If None, title is disabled.
figsize : tuple of 2 elements or None, optional (default=None) figsize : tuple of 2 elements or None, optional (default=None)
...@@ -288,11 +288,11 @@ def plot_metric( ...@@ -288,11 +288,11 @@ def plot_metric(
---------- ----------
booster : dict or LGBMModel booster : dict or LGBMModel
Dictionary returned from ``lightgbm.train()`` or LGBMModel instance. Dictionary returned from ``lightgbm.train()`` or LGBMModel instance.
metric : string or None, optional (default=None) metric : str or None, optional (default=None)
The metric name to plot. The metric name to plot.
Only one metric supported because different metrics have various scales. Only one metric supported because different metrics have various scales.
If None, first metric picked from dictionary (according to hashcode). If None, first metric picked from dictionary (according to hashcode).
dataset_names : list of strings or None, optional (default=None) dataset_names : list of str, or None, optional (default=None)
List of the dataset names which are used to calculate metric to plot. List of the dataset names which are used to calculate metric to plot.
If None, all datasets are used. If None, all datasets are used.
ax : matplotlib.axes.Axes or None, optional (default=None) ax : matplotlib.axes.Axes or None, optional (default=None)
...@@ -302,13 +302,13 @@ def plot_metric( ...@@ -302,13 +302,13 @@ def plot_metric(
Tuple passed to ``ax.xlim()``. Tuple passed to ``ax.xlim()``.
ylim : tuple of 2 elements or None, optional (default=None) ylim : tuple of 2 elements or None, optional (default=None)
Tuple passed to ``ax.ylim()``. Tuple passed to ``ax.ylim()``.
title : string or None, optional (default="Metric during training") title : str or None, optional (default="Metric during training")
Axes title. Axes title.
If None, title is disabled. If None, title is disabled.
xlabel : string or None, optional (default="Iterations") xlabel : str or None, optional (default="Iterations")
X-axis title label. X-axis title label.
If None, title is disabled. If None, title is disabled.
ylabel : string or None, optional (default="auto") ylabel : str or None, optional (default="auto")
Y-axis title label. Y-axis title label.
If 'auto', metric name is used. If 'auto', metric name is used.
If None, title is disabled. If None, title is disabled.
...@@ -536,7 +536,7 @@ def create_tree_digraph( ...@@ -536,7 +536,7 @@ def create_tree_digraph(
Booster or LGBMModel instance to be converted. Booster or LGBMModel instance to be converted.
tree_index : int, optional (default=0) tree_index : int, optional (default=0)
The index of a target tree to convert. The index of a target tree to convert.
show_info : list of strings or None, optional (default=None) show_info : list of str, or None, optional (default=None)
What information should be shown in nodes. What information should be shown in nodes.
- ``'split_gain'`` : gain from adding this split to the model - ``'split_gain'`` : gain from adding this split to the model
...@@ -548,7 +548,7 @@ def create_tree_digraph( ...@@ -548,7 +548,7 @@ def create_tree_digraph(
- ``'data_percentage'`` : percentage of training data that fall into this node - ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3) precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision. Used to restrict the display of floating point values to a certain precision.
orientation : string, optional (default='horizontal') orientation : str, optional (default='horizontal')
Orientation of the tree. Orientation of the tree.
Can be 'horizontal' or 'vertical'. Can be 'horizontal' or 'vertical'.
**kwargs **kwargs
...@@ -629,7 +629,7 @@ def plot_tree( ...@@ -629,7 +629,7 @@ def plot_tree(
Figure size. Figure size.
dpi : int or None, optional (default=None) dpi : int or None, optional (default=None)
Resolution of the figure. Resolution of the figure.
show_info : list of strings or None, optional (default=None) show_info : list of str, or None, optional (default=None)
What information should be shown in nodes. What information should be shown in nodes.
- ``'split_gain'`` : gain from adding this split to the model - ``'split_gain'`` : gain from adding this split to the model
...@@ -641,7 +641,7 @@ def plot_tree( ...@@ -641,7 +641,7 @@ def plot_tree(
- ``'data_percentage'`` : percentage of training data that fall into this node - ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3) precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision. Used to restrict the display of floating point values to a certain precision.
orientation : string, optional (default='horizontal') orientation : str, optional (default='horizontal')
Orientation of the tree. Orientation of the tree.
Can be 'horizontal' or 'vertical'. Can be 'horizontal' or 'vertical'.
**kwargs **kwargs
......
...@@ -136,7 +136,7 @@ class _EvalFunctionWrapper: ...@@ -136,7 +136,7 @@ class _EvalFunctionWrapper:
sum(group) = n_samples. sum(group) = n_samples.
For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -162,7 +162,7 @@ class _EvalFunctionWrapper: ...@@ -162,7 +162,7 @@ class _EvalFunctionWrapper:
Returns Returns
------- -------
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -206,7 +206,7 @@ _lgbmmodel_doc_fit = ( ...@@ -206,7 +206,7 @@ _lgbmmodel_doc_fit = (
where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
eval_set : list or None, optional (default=None) eval_set : list or None, optional (default=None)
A list of (X, y) tuple pairs to use as validation sets. A list of (X, y) tuple pairs to use as validation sets.
eval_names : list of strings or None, optional (default=None) eval_names : list of str, or None, optional (default=None)
Names of eval_set. Names of eval_set.
eval_sample_weight : {eval_sample_weight_shape} eval_sample_weight : {eval_sample_weight_shape}
Weights of eval data. Weights of eval data.
...@@ -216,8 +216,8 @@ _lgbmmodel_doc_fit = ( ...@@ -216,8 +216,8 @@ _lgbmmodel_doc_fit = (
Init score of eval data. Init score of eval data.
eval_group : {eval_group_shape} eval_group : {eval_group_shape}
Group data of eval data. Group data of eval data.
eval_metric : string, callable, list or None, optional (default=None) eval_metric : str, callable, list or None, optional (default=None)
If string, it should be a built-in evaluation metric to use. If str, it should be a built-in evaluation metric to use.
If callable, it should be a custom evaluation metric, see note below for more details. If callable, it should be a custom evaluation metric, see note below for more details.
If list, it can be a list of built-in metrics, a list of custom evaluation metrics, or a mix of both. If list, it can be a list of built-in metrics, a list of custom evaluation metrics, or a mix of both.
In either case, the ``metric`` from the model parameters will be evaluated and used as well. In either case, the ``metric`` from the model parameters will be evaluated and used as well.
...@@ -241,22 +241,22 @@ _lgbmmodel_doc_fit = ( ...@@ -241,22 +241,22 @@ _lgbmmodel_doc_fit = (
With ``verbose`` = 4 and at least one item in ``eval_set``, With ``verbose`` = 4 and at least one item in ``eval_set``,
an evaluation metric is printed every 4 (instead of 1) boosting stages. an evaluation metric is printed every 4 (instead of 1) boosting stages.
feature_name : list of strings or 'auto', optional (default='auto') feature_name : list of str, or 'auto', optional (default='auto')
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of strings or int, or 'auto', optional (default='auto') categorical_feature : list of str or int, or 'auto', optional (default='auto')
Categorical features. Categorical features.
If list of int, interpreted as indices. If list of int, interpreted as indices.
If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
The output cannot be monotonically constrained with respect to a categorical feature. The output cannot be monotonically constrained with respect to a categorical feature.
callbacks : list of callback functions or None, optional (default=None) callbacks : list of callback functions, or None, optional (default=None)
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python API for more information. See Callbacks in Python API for more information.
init_model : string, pathlib.Path, Booster, LGBMModel or None, optional (default=None) init_model : str, pathlib.Path, Booster, LGBMModel or None, optional (default=None)
Filename of LightGBM model, Booster instance or LGBMModel instance used for continue training. Filename of LightGBM model, Booster instance or LGBMModel instance used for continue training.
Returns Returns
...@@ -289,7 +289,7 @@ _lgbmmodel_doc_custom_eval_note = """ ...@@ -289,7 +289,7 @@ _lgbmmodel_doc_custom_eval_note = """
sum(group) = n_samples. sum(group) = n_samples.
For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc. where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
eval_name : string eval_name : str
The name of evaluation function (without whitespace). The name of evaluation function (without whitespace).
eval_result : float eval_result : float
The eval result. The eval result.
...@@ -377,7 +377,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -377,7 +377,7 @@ class LGBMModel(_LGBMModelBase):
Parameters Parameters
---------- ----------
boosting_type : string, optional (default='gbdt') boosting_type : str, optional (default='gbdt')
'gbdt', traditional Gradient Boosting Decision Tree. 'gbdt', traditional Gradient Boosting Decision Tree.
'dart', Dropouts meet Multiple Additive Regression Trees. 'dart', Dropouts meet Multiple Additive Regression Trees.
'goss', Gradient-based One-Side Sampling. 'goss', Gradient-based One-Side Sampling.
...@@ -395,7 +395,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -395,7 +395,7 @@ class LGBMModel(_LGBMModelBase):
Number of boosted trees to fit. Number of boosted trees to fit.
subsample_for_bin : int, optional (default=200000) subsample_for_bin : int, optional (default=200000)
Number of samples for constructing bins. Number of samples for constructing bins.
objective : string, callable or None, optional (default=None) objective : str, callable or None, optional (default=None)
Specify the learning task and the corresponding learning objective or Specify the learning task and the corresponding learning objective or
a custom objective function to be used (see note below). a custom objective function to be used (see note below).
Default: 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker. Default: 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker.
...@@ -436,7 +436,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -436,7 +436,7 @@ class LGBMModel(_LGBMModelBase):
Number of parallel threads. Number of parallel threads.
silent : bool, optional (default=True) silent : bool, optional (default=True)
Whether to print messages while running boosting. Whether to print messages while running boosting.
importance_type : string, optional (default='split') importance_type : str, optional (default='split')
The type of feature importance to be filled into ``feature_importances_``. The type of feature importance to be filled into ``feature_importances_``.
If 'split', result contains numbers of times the feature is used in a model. If 'split', result contains numbers of times the feature is used in a model.
If 'gain', result contains total gains of splits which use the feature. If 'gain', result contains total gains of splits which use the feature.
...@@ -737,9 +737,9 @@ class LGBMModel(_LGBMModelBase): ...@@ -737,9 +737,9 @@ class LGBMModel(_LGBMModelBase):
sample_weight_shape="array-like of shape = [n_samples] or None, optional (default=None)", sample_weight_shape="array-like of shape = [n_samples] or None, optional (default=None)",
init_score_shape="array-like of shape = [n_samples] or None, optional (default=None)", init_score_shape="array-like of shape = [n_samples] or None, optional (default=None)",
group_shape="array-like or None, optional (default=None)", group_shape="array-like or None, optional (default=None)",
eval_sample_weight_shape="list of arrays or None, optional (default=None)", eval_sample_weight_shape="list of arrays, or None, optional (default=None)",
eval_init_score_shape="list of arrays or None, optional (default=None)", eval_init_score_shape="list of arrays, or None, optional (default=None)",
eval_group_shape="list of arrays or None, optional (default=None)" eval_group_shape="list of arrays, or None, optional (default=None)"
) + "\n\n" + _lgbmmodel_doc_custom_eval_note ) + "\n\n" + _lgbmmodel_doc_custom_eval_note
def predict(self, X, raw_score=False, start_iteration=0, num_iteration=None, def predict(self, X, raw_score=False, start_iteration=0, num_iteration=None,
...@@ -796,7 +796,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -796,7 +796,7 @@ class LGBMModel(_LGBMModelBase):
@property @property
def objective_(self): def objective_(self):
""":obj:`string` or :obj:`callable`: The concrete objective used while fitting this model.""" """:obj:`str` or :obj:`callable`: The concrete objective used while fitting this model."""
if self._n_features is None: if self._n_features is None:
raise LGBMNotFittedError('No objective found. Need to call fit beforehand.') raise LGBMNotFittedError('No objective found. Need to call fit beforehand.')
return self._objective return self._objective
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment