Commit de51990e authored by Nikita Titov's avatar Nikita Titov Committed by Guolin Ke
Browse files

[docs] [python] docstrings improving (#894)

* fixed description of Dataset

* fixed description of Booster

* fixed description of train

* fixed description of cv

* fixed description of callbacks

* fixed description of plotting

* hotfixes
parent a494d7b7
...@@ -551,36 +551,37 @@ class Dataset(object): ...@@ -551,36 +551,37 @@ class Dataset(object):
weight=None, group=None, silent=False, weight=None, group=None, silent=False,
feature_name='auto', categorical_feature='auto', params=None, feature_name='auto', categorical_feature='auto', params=None,
free_raw_data=True): free_raw_data=True):
""" """Constract Dataset.
Parameters Parameters
---------- ----------
data : string/numpy array/scipy.sparse data : string, numpy array or scipy.sparse
Data source of Dataset. Data source of Dataset.
When data type is string, it represents the path of txt file If string, it represents the path to txt file.
label : list or numpy 1-D array, optional label : list or numpy 1-D array, optional (default=None)
Label of the data Label of the data.
max_bin : int, required max_bin : int, optional (default=255)
Max number of discrete bin for features Max number of discrete bins for features.
reference : Other Dataset, optional reference : Dataset or None, optional (default=None)
If this dataset validation, need to use training data as reference If this is Dataset for validation, training data should be used as reference.
weight : list or numpy 1-D array , optional weight : list, numpy 1-D array or None, optional (default=None)
Weight for each instance. Weight for each instance.
group : list or numpy 1-D array , optional group : list, numpy 1-D array or None, optional (default=None)
Group/query size for dataset Group/query size for Dataset.
silent : boolean, optional silent : bool, optional (default=False)
Whether print messages during construction Whether to print messages during construction.
feature_name : list of str, or 'auto' feature_name : list of strings or 'auto', optional (default="auto")
Feature names Feature names.
If 'auto' and data is pandas DataFrame, use data columns name If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of str or int, or 'auto' categorical_feature : list of strings or int, or 'auto', optional (default="auto")
Categorical features, Categorical features.
type int represents index, If list of int, interpreted as indices.
type str represents feature names (need to specify feature_name as well) If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, use pandas categorical columns If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
params: dict, optional params: dict or None, optional (default=None)
Other parameters Other parameters.
free_raw_data: Bool free_raw_data: bool, optional (default=True)
True if need to free raw data after construct inner dataset If True, raw data is freed after constructing inner Dataset.
""" """
self.handle = None self.handle = None
self.data = data self.data = data
...@@ -778,7 +779,13 @@ class Dataset(object): ...@@ -778,7 +779,13 @@ class Dataset(object):
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
def construct(self): def construct(self):
"""Lazy init""" """Lazy init.
Returns
-------
self : Dataset
Returns self.
"""
if self.handle is None: if self.handle is None:
if self.reference is not None: if self.reference is not None:
if self.used_indices is None: if self.used_indices is None:
...@@ -811,24 +818,28 @@ class Dataset(object): ...@@ -811,24 +818,28 @@ class Dataset(object):
def create_valid(self, data, label=None, weight=None, group=None, def create_valid(self, data, label=None, weight=None, group=None,
silent=False, params=None): silent=False, params=None):
""" """Create validation data align with current Dataset.
Create validation data align with current dataset
Parameters Parameters
---------- ----------
data : string/numpy array/scipy.sparse data : string, numpy array or scipy.sparse
Data source of Dataset. Data source of Dataset.
When data type is string, it represents the path of txt file If string, it represents the path to txt file.
label : list or numpy 1-D array, optional label : list or numpy 1-D array, optional (default=None)
Label of the training data. Label of the training data.
weight : list or numpy 1-D array , optional weight : list, numpy 1-D array or None, optional (default=None)
Weight for each instance. Weight for each instance.
group : list or numpy 1-D array , optional group : list, numpy 1-D array or None, optional (default=None)
Group/query size for dataset Group/query size for Dataset.
silent : boolean, optional silent : bool, optional (default=False)
Whether print messages during construction Whether to print messages during construction.
params: dict, optional params: dict or None, optional (default=None)
Other parameters Other parameters.
Returns
-------
self : Dataset
Returns self.
""" """
ret = Dataset(data, label=label, max_bin=self.max_bin, reference=self, ret = Dataset(data, label=label, max_bin=self.max_bin, reference=self,
weight=weight, group=group, silent=silent, params=params, weight=weight, group=group, silent=silent, params=params,
...@@ -838,15 +849,19 @@ class Dataset(object): ...@@ -838,15 +849,19 @@ class Dataset(object):
return ret return ret
def subset(self, used_indices, params=None): def subset(self, used_indices, params=None):
""" """Get subset of current Dataset.
Get subset of current dataset
Parameters Parameters
---------- ----------
used_indices : list of int used_indices : list of int
Used indices of this subset Indices used to create the subset.
params : dict params: dict or None, optional (default=None)
Other parameters Other parameters.
Returns
-------
subset : Dataset
Subset of the current Dataset.
""" """
if params is None: if params is None:
params = self.params params = self.params
...@@ -858,8 +873,7 @@ class Dataset(object): ...@@ -858,8 +873,7 @@ class Dataset(object):
return ret return ret
def save_binary(self, filename): def save_binary(self, filename):
""" """Save Dataset to binary file.
Save Dataset to binary file
Parameters Parameters
---------- ----------
...@@ -886,11 +900,10 @@ class Dataset(object): ...@@ -886,11 +900,10 @@ class Dataset(object):
Parameters Parameters
---------- ----------
field_name: str field_name: string
The field name of the information The field name of the information.
data: list, numpy array or None
data: numpy array or list or None The array of data to be set.
The array ofdata to be set
""" """
if self.handle is None: if self.handle is None:
raise Exception("Cannot set %s before construct dataset" % field_name) raise Exception("Cannot set %s before construct dataset" % field_name)
...@@ -934,13 +947,13 @@ class Dataset(object): ...@@ -934,13 +947,13 @@ class Dataset(object):
Parameters Parameters
---------- ----------
field_name: str field_name: string
The field name of the information The field name of the information.
Returns Returns
------- -------
info : array info : numpy array
A numpy array of information of the data A numpy array with information from the Dataset.
""" """
if self.handle is None: if self.handle is None:
raise Exception("Cannot get %s before construct dataset" % field_name) raise Exception("Cannot get %s before construct dataset" % field_name)
...@@ -967,14 +980,12 @@ class Dataset(object): ...@@ -967,14 +980,12 @@ class Dataset(object):
raise TypeError("Unknown type") raise TypeError("Unknown type")
def set_categorical_feature(self, categorical_feature): def set_categorical_feature(self, categorical_feature):
""" """Set categorical features.
Set categorical features
Parameters Parameters
---------- ----------
categorical_feature : list of int or str categorical_feature : list of int or strings
Name/index of categorical features Names or indices of categorical features.
""" """
if self.categorical_feature == categorical_feature: if self.categorical_feature == categorical_feature:
return return
...@@ -1005,13 +1016,12 @@ class Dataset(object): ...@@ -1005,13 +1016,12 @@ class Dataset(object):
raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.") raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
def set_reference(self, reference): def set_reference(self, reference):
""" """Set reference Dataset.
Set reference dataset
Parameters Parameters
---------- ----------
reference : Dataset reference : Dataset
Will use reference as template to consturct current dataset Reference that is used as a template to consturct the current Dataset.
""" """
self.set_categorical_feature(reference.categorical_feature) self.set_categorical_feature(reference.categorical_feature)
self.set_feature_name(reference.feature_name) self.set_feature_name(reference.feature_name)
...@@ -1026,13 +1036,12 @@ class Dataset(object): ...@@ -1026,13 +1036,12 @@ class Dataset(object):
raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.") raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
def set_feature_name(self, feature_name): def set_feature_name(self, feature_name):
""" """Set feature name.
Set feature name
Parameters Parameters
---------- ----------
feature_name : list of str feature_name : list of strings
Feature names Feature names.
""" """
if feature_name != 'auto': if feature_name != 'auto':
self.feature_name = feature_name self.feature_name = feature_name
...@@ -1046,13 +1055,12 @@ class Dataset(object): ...@@ -1046,13 +1055,12 @@ class Dataset(object):
ctypes.c_int(len(feature_name)))) ctypes.c_int(len(feature_name))))
def set_label(self, label): def set_label(self, label):
""" """Set label of Dataset
Set label of Dataset
Parameters Parameters
---------- ----------
label: numpy array or list or None label: list, numpy array or None
The label information to be set into Dataset The label information to be set into Dataset.
""" """
self.label = label self.label = label
if self.handle is not None: if self.handle is not None:
...@@ -1060,13 +1068,12 @@ class Dataset(object): ...@@ -1060,13 +1068,12 @@ class Dataset(object):
self.set_field('label', label) self.set_field('label', label)
def set_weight(self, weight): def set_weight(self, weight):
""" """Set weight of each instance.
Set weight of each instance.
Parameters Parameters
---------- ----------
weight : numpy array or list or None weight : list, numpy array or None
Weight for each data point Weight to be set for each data point.
""" """
self.weight = weight self.weight = weight
if self.handle is not None and weight is not None: if self.handle is not None and weight is not None:
...@@ -1074,13 +1081,12 @@ class Dataset(object): ...@@ -1074,13 +1081,12 @@ class Dataset(object):
self.set_field('weight', weight) self.set_field('weight', weight)
def set_init_score(self, init_score): def set_init_score(self, init_score):
""" """Set init score of Booster to start from.
Set init score of booster to start from.
Parameters Parameters
---------- ----------
init_score: numpy array or list or None init_score : list, numpy array or None
Init score for booster Init score for Booster.
""" """
self.init_score = init_score self.init_score = init_score
if self.handle is not None and init_score is not None: if self.handle is not None and init_score is not None:
...@@ -1088,13 +1094,12 @@ class Dataset(object): ...@@ -1088,13 +1094,12 @@ class Dataset(object):
self.set_field('init_score', init_score) self.set_field('init_score', init_score)
def set_group(self, group): def set_group(self, group):
""" """Set group size of Dataset (used for ranking).
Set group size of Dataset (used for ranking).
Parameters Parameters
---------- ----------
group : numpy array or list or None group : list, numpy array or None
Group size of each group Group size of each group.
""" """
self.group = group self.group = group
if self.handle is not None and group is not None: if self.handle is not None and group is not None:
...@@ -1102,48 +1107,48 @@ class Dataset(object): ...@@ -1102,48 +1107,48 @@ class Dataset(object):
self.set_field('group', group) self.set_field('group', group)
def get_label(self): def get_label(self):
""" """Get the label of the Dataset.
Get the label of the Dataset.
Returns Returns
------- -------
label : array label : numpy array
The label information from the Dataset.
""" """
if self.label is None and self.handle is not None: if self.label is None and self.handle is not None:
self.label = self.get_field('label') self.label = self.get_field('label')
return self.label return self.label
def get_weight(self): def get_weight(self):
""" """Get the weight of the Dataset.
Get the weight of the Dataset.
Returns Returns
------- -------
weight : array weight : numpy array
Weight for each data point from the Dataset.
""" """
if self.weight is None and self.handle is not None: if self.weight is None and self.handle is not None:
self.weight = self.get_field('weight') self.weight = self.get_field('weight')
return self.weight return self.weight
def get_init_score(self): def get_init_score(self):
""" """Get the initial score of the Dataset.
Get the initial score of the Dataset.
Returns Returns
------- -------
init_score : array init_score : numpy array
Init score of Booster.
""" """
if self.init_score is None and self.handle is not None: if self.init_score is None and self.handle is not None:
self.init_score = self.get_field('init_score') self.init_score = self.get_field('init_score')
return self.init_score return self.init_score
def get_group(self): def get_group(self):
""" """Get the group of the Dataset.
Get the group of the Dataset.
Returns Returns
------- -------
init_score : array init_score : numpy array
Group size of each group.
""" """
if self.group is None and self.handle is not None: if self.group is None and self.handle is not None:
self.group = self.get_field('group') self.group = self.get_field('group')
...@@ -1156,12 +1161,12 @@ class Dataset(object): ...@@ -1156,12 +1161,12 @@ class Dataset(object):
return self.group return self.group
def num_data(self): def num_data(self):
""" """Get the number of rows in the Dataset.
Get the number of rows in the Dataset.
Returns Returns
------- -------
number of rows : int number_of_rows : int
The number of rows in the Dataset.
""" """
if self.handle is not None: if self.handle is not None:
ret = ctypes.c_int() ret = ctypes.c_int()
...@@ -1172,12 +1177,12 @@ class Dataset(object): ...@@ -1172,12 +1177,12 @@ class Dataset(object):
raise LightGBMError("Cannot get num_data before construct dataset") raise LightGBMError("Cannot get num_data before construct dataset")
def num_feature(self): def num_feature(self):
""" """Get the number of columns (features) in the Dataset.
Get the number of columns (features) in the Dataset.
Returns Returns
------- -------
number of columns : int number_of_columns : int
The number of columns (features) in the Dataset.
""" """
if self.handle is not None: if self.handle is not None:
ret = ctypes.c_int() ret = ctypes.c_int()
...@@ -1188,14 +1193,19 @@ class Dataset(object): ...@@ -1188,14 +1193,19 @@ class Dataset(object):
raise LightGBMError("Cannot get num_feature before construct dataset") raise LightGBMError("Cannot get num_feature before construct dataset")
def get_ref_chain(self, ref_limit=100): def get_ref_chain(self, ref_limit=100):
''' """Get a chain of Dataset objects, starting with r, then going to r.reference if exists,
Gets a chain of Dataset objects, starting with r, then going to r.reference if exists, then to r.reference.reference, etc. until we hit ``ref_limit`` or a reference loop.
then to r.reference.reference, etc. until we hit ref_limit or a reference loop
Parameters
----------
ref_limit : int, optional (default=100)
The limit number of references.
Returns Returns
------- -------
chain of references of self : set of Dataset objects ref_chain : set of Dataset
''' Chain of references of the Datasets.
"""
head = self head = self
ref_chain = set() ref_chain = set()
while len(ref_chain) < ref_limit: while len(ref_chain) < ref_limit:
...@@ -1211,21 +1221,20 @@ class Dataset(object): ...@@ -1211,21 +1221,20 @@ class Dataset(object):
class Booster(object): class Booster(object):
""""Booster in LightGBM.""" """Booster in LightGBM."""
def __init__(self, params=None, train_set=None, model_file=None, silent=False): def __init__(self, params=None, train_set=None, model_file=None, silent=False):
""" """Initialize the Booster.
Initialize the Booster.
Parameters Parameters
---------- ----------
params : dict params: dict or None, optional (default=None)
Parameters for boosters. Parameters for Booster.
train_set : Dataset train_set : Dataset or None, optional (default=None)
Training dataset Training dataset.
model_file : string model_file : string or None, optional (default=None)
Path to the model file. Path to the model file.
silent : boolean, optional silent : bool, optional (default=False)
Whether print messages during construction Whether to print messages during construction.
""" """
self.handle = None self.handle = None
self.__need_reload_eval_info = True self.__need_reload_eval_info = True
...@@ -1325,6 +1334,7 @@ class Booster(object): ...@@ -1325,6 +1334,7 @@ class Booster(object):
self.__dict__.update(state) self.__dict__.update(state)
def free_dataset(self): def free_dataset(self):
"""Free Booster's Datasets."""
self.__dict__.pop('train_set', None) self.__dict__.pop('train_set', None)
self.__dict__.pop('valid_sets', None) self.__dict__.pop('valid_sets', None)
self.__num_dataset = 0 self.__num_dataset = 0
...@@ -1334,21 +1344,27 @@ class Booster(object): ...@@ -1334,21 +1344,27 @@ class Booster(object):
self.__is_predicted_cur_iter = [] self.__is_predicted_cur_iter = []
def set_train_data_name(self, name): def set_train_data_name(self, name):
"""Set the name to the training Dataset.
Parameters
----------
name: string
Name for training Dataset.
"""
self.__train_data_name = name self.__train_data_name = name
def add_valid(self, data, name): def add_valid(self, data, name):
""" """Add validation data.
Add an validation data
Parameters Parameters
---------- ----------
data : Dataset data : Dataset
Validation data Validation data.
name : String name : string
Name of validation data Name of validation data.
""" """
if not isinstance(data, Dataset): if not isinstance(data, Dataset):
raise TypeError('valid data should be Dataset instance, met {}'.format(type(data).__name__)) raise TypeError('Validation data should be Dataset instance, met {}'.format(type(data).__name__))
if data._predictor is not self.__init_predictor: if data._predictor is not self.__init_predictor:
raise LightGBMError("Add validation data failed, you should use same predictor for these data") raise LightGBMError("Add validation data failed, you should use same predictor for these data")
_safe_call(_LIB.LGBM_BoosterAddValidData( _safe_call(_LIB.LGBM_BoosterAddValidData(
...@@ -1361,15 +1377,12 @@ class Booster(object): ...@@ -1361,15 +1377,12 @@ class Booster(object):
self.__is_predicted_cur_iter.append(False) self.__is_predicted_cur_iter.append(False)
def reset_parameter(self, params): def reset_parameter(self, params):
""" """Reset parameters of Booster.
Reset parameters for booster
Parameters Parameters
---------- ----------
params : dict params : dict
New parameters for boosters New parameters for Booster.
silent : boolean, optional
Whether print messages during construction
""" """
if 'metric' in params: if 'metric' in params:
self.__need_reload_eval_info = True self.__need_reload_eval_info = True
...@@ -1380,22 +1393,24 @@ class Booster(object): ...@@ -1380,22 +1393,24 @@ class Booster(object):
c_str(params_str))) c_str(params_str)))
def update(self, train_set=None, fobj=None): def update(self, train_set=None, fobj=None):
""" """Update for one iteration.
Update for one iteration
Note: for multi-class task, the score is group by class_id first, then group by row_id
if you want to get i-th row score in j-th class, the access way is score[j*num_data+i]
and you should group grad and hess in this way as well
Parameters Parameters
---------- ----------
train_set : train_set : Dataset or None, optional (default=None)
Training data, None means use last training data Training data.
fobj : function If None, last training data is used.
fobj : callable or None, optional (default=None)
Customized objective function. Customized objective function.
For multi-class task, the score is group by class_id first, then group by row_id.
If you want to get i-th row score in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
Returns Returns
------- -------
is_finished, bool is_finished : bool
Whether the update was successfully finished.
""" """
"""need reset training data""" """need reset training data"""
...@@ -1452,14 +1467,19 @@ class Booster(object): ...@@ -1452,14 +1467,19 @@ class Booster(object):
return is_finished.value == 1 return is_finished.value == 1
def rollback_one_iter(self): def rollback_one_iter(self):
""" """Rollback one iteration."""
Rollback one iteration
"""
_safe_call(_LIB.LGBM_BoosterRollbackOneIter( _safe_call(_LIB.LGBM_BoosterRollbackOneIter(
self.handle)) self.handle))
self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)]
def current_iteration(self): def current_iteration(self):
"""Get the index of the current iteration.
Returns
-------
cur_iter : int
The index of the current iteration.
"""
out_cur_iter = ctypes.c_int(0) out_cur_iter = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetCurrentIteration( _safe_call(_LIB.LGBM_BoosterGetCurrentIteration(
self.handle, self.handle,
...@@ -1467,20 +1487,21 @@ class Booster(object): ...@@ -1467,20 +1487,21 @@ class Booster(object):
return out_cur_iter.value return out_cur_iter.value
def eval(self, data, name, feval=None): def eval(self, data, name, feval=None):
""" """Evaluate for data.
Evaluate for data
Parameters Parameters
---------- ----------
data : Dataset object data : Dataset
name : Data for the evaluating.
Name of data name : string
feval : function Name of the data.
feval : callable or None, optional (default=None)
Custom evaluation function. Custom evaluation function.
Returns Returns
------- -------
result: list result: list
Evaluation result list. List with evaluation results.
""" """
if not isinstance(data, Dataset): if not isinstance(data, Dataset):
raise TypeError("Can only eval for Dataset instance") raise TypeError("Can only eval for Dataset instance")
...@@ -1500,48 +1521,46 @@ class Booster(object): ...@@ -1500,48 +1521,46 @@ class Booster(object):
return self.__inner_eval(name, data_idx, feval) return self.__inner_eval(name, data_idx, feval)
def eval_train(self, feval=None): def eval_train(self, feval=None):
""" """Evaluate for training data.
Evaluate for training data
Parameters Parameters
---------- ----------
feval : function feval : callable or None, optional (default=None)
Custom evaluation function. Custom evaluation function.
Returns Returns
------- -------
result: str result: list
Evaluation result list. List with evaluation results.
""" """
return self.__inner_eval(self.__train_data_name, 0, feval) return self.__inner_eval(self.__train_data_name, 0, feval)
def eval_valid(self, feval=None): def eval_valid(self, feval=None):
""" """Evaluate for validation data.
Evaluate for validation data
Parameters Parameters
---------- ----------
feval : function feval : callable or None, optional (default=None)
Custom evaluation function. Custom evaluation function.
Returns Returns
------- -------
result: str result: list
Evaluation result list. List with evaluation results.
""" """
return [item for i in range_(1, self.__num_dataset) return [item for i in range_(1, self.__num_dataset)
for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)] for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)]
def save_model(self, filename, num_iteration=-1): def save_model(self, filename, num_iteration=-1):
""" """Save Booster to file.
Save model of booster to file
Parameters Parameters
---------- ----------
filename : str filename : string
Filename to save Filename to save Booster.
num_iteration: int num_iteration: int, optional (default=-1)
Number of iteration that want to save. < 0 means save the best iteration(if have) Index of the iteration that should to saved.
If <0, the best iteration (if exists) is saved.
""" """
if num_iteration <= 0: if num_iteration <= 0:
num_iteration = self.best_iteration num_iteration = self.best_iteration
...@@ -1596,17 +1615,18 @@ class Booster(object): ...@@ -1596,17 +1615,18 @@ class Booster(object):
return string_buffer.value.decode() return string_buffer.value.decode()
def dump_model(self, num_iteration=-1): def dump_model(self, num_iteration=-1):
""" """Dump Booster to json format.
Dump model to json format
Parameters Parameters
---------- ----------
num_iteration: int num_iteration: int, optional (default=-1)
Number of iteration that want to dump. < 0 means dump to best iteration(if have) Index of the iteration that should to dumped.
If <0, the best iteration (if exists) is dumped.
Returns Returns
------- -------
Json format of model json_repr : dict
Json format of Booster.
""" """
if num_iteration <= 0: if num_iteration <= 0:
num_iteration = self.best_iteration num_iteration = self.best_iteration
...@@ -1633,32 +1653,34 @@ class Booster(object): ...@@ -1633,32 +1653,34 @@ class Booster(object):
ptr_string_buffer)) ptr_string_buffer))
return json.loads(string_buffer.value.decode()) return json.loads(string_buffer.value.decode())
def predict(self, data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True, def predict(self, data, num_iteration=-1, raw_score=False, pred_leaf=False,
pred_parameter=None): data_has_header=False, is_reshape=True, pred_parameter=None):
""" """Make a prediction.
Predict logic
Parameters Parameters
---------- ----------
data : string/numpy array/scipy.sparse data : string, numpy array or scipy.sparse
Data source for prediction Data source for prediction.
When data type is string, it represents the path of txt file If string, it represents the path to txt file.
num_iteration : int num_iteration : int, optional (default=-1)
Used iteration for prediction, < 0 means predict for best iteration(if have) Iteration used for prediction.
raw_score : bool If <0, the best iteration (if exists) is used for prediction.
True for predict raw score raw_score : bool, optional (default=False)
pred_leaf : bool Whether to predict raw scores.
True for predict leaf index pred_leaf : bool, optional (default=False)
data_has_header : bool Whether to predict leaf index.
Used for txt data data_has_header : bool, optional (default=False)
is_reshape : bool Whether the data has header.
Reshape to (nrow, ncol) if true Used only if data is string.
pred_parameter: dict is_reshape : bool, optional (default=True)
Other parameters for the prediction If True, result is reshaped to [nrow, ncol].
pred_parameter: dict or None, optional (default=None)
Other parameters for the prediction.
Returns Returns
------- -------
Prediction result result : numpy array
Prediction result.
""" """
predictor = self._to_predictor(pred_parameter) predictor = self._to_predictor(pred_parameter)
if num_iteration <= 0: if num_iteration <= 0:
...@@ -1666,6 +1688,20 @@ class Booster(object): ...@@ -1666,6 +1688,20 @@ class Booster(object):
return predictor.predict(data, num_iteration, raw_score, pred_leaf, data_has_header, is_reshape) return predictor.predict(data, num_iteration, raw_score, pred_leaf, data_has_header, is_reshape)
def get_leaf_output(self, tree_id, leaf_id): def get_leaf_output(self, tree_id, leaf_id):
"""Get the output of a leaf.
Parameters
----------
tree_id : int
The index of the tree.
leaf_id : int
The index of the leaf in the tree.
Returns
-------
result : float
The output of the leaf.
"""
ret = ctypes.c_double(0) ret = ctypes.c_double(0)
_safe_call(_LIB.LGBM_BoosterGetLeafValue( _safe_call(_LIB.LGBM_BoosterGetLeafValue(
self.handle, self.handle,
...@@ -1681,7 +1717,13 @@ class Booster(object): ...@@ -1681,7 +1717,13 @@ class Booster(object):
return predictor return predictor
def num_feature(self): def num_feature(self):
"""Get num of features""" """Get number of features.
Returns
-------
num_feature : int
The number of features.
"""
out_num_feature = ctypes.c_int(0) out_num_feature = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumFeature( _safe_call(_LIB.LGBM_BoosterGetNumFeature(
self.handle, self.handle,
...@@ -1689,13 +1731,12 @@ class Booster(object): ...@@ -1689,13 +1731,12 @@ class Booster(object):
return out_num_feature.value return out_num_feature.value
def feature_name(self): def feature_name(self):
""" """Get names of features.
Get feature names.
Returns Returns
------- -------
result : array result : list
Array of feature names. List with names of features.
""" """
num_feature = self.num_feature() num_feature = self.num_feature()
"""Get name of features""" """Get name of features"""
...@@ -1711,20 +1752,19 @@ class Booster(object): ...@@ -1711,20 +1752,19 @@ class Booster(object):
return [string_buffers[i].value.decode() for i in range_(num_feature)] return [string_buffers[i].value.decode() for i in range_(num_feature)]
def feature_importance(self, importance_type='split', iteration=-1): def feature_importance(self, importance_type='split', iteration=-1):
""" """Get feature importances.
Get feature importances
Parameters Parameters
---------- ----------
importance_type : str, default "split" importance_type : string, optional (default="split")
How the importance is calculated: "split" or "gain" How the importance is calculated.
"split" is the number of times a feature is used in a model If "split", result contains numbers of times the feature is used in a model.
"gain" is the total gain of splits which use the feature If "gain", result contains total gains of splits which use the feature.
Returns Returns
------- -------
result : array result : numpy array
Array of feature importances. Array with feature importances.
""" """
if importance_type == "split": if importance_type == "split":
importance_type_int = 0 importance_type_int = 0
...@@ -1834,29 +1874,29 @@ class Booster(object): ...@@ -1834,29 +1874,29 @@ class Booster(object):
[name.startswith(('auc', 'ndcg', 'map')) for name in self.__name_inner_eval] [name.startswith(('auc', 'ndcg', 'map')) for name in self.__name_inner_eval]
def attr(self, key): def attr(self, key):
""" """Get attribute string from the Booster.
Get attribute string from the Booster.
Parameters Parameters
---------- ----------
key : str key : string
The key to get attribute from. The name of the attribute.
Returns Returns
------- -------
value : str value : string or None
The attribute value of the key, returns None if attribute do not exist. The attribute value.
Returns None if attribute do not exist.
""" """
return self.__attr.get(key, None) return self.__attr.get(key, None)
def set_attr(self, **kwargs): def set_attr(self, **kwargs):
""" """Set the attribute of the Booster.
Set the attribute of the Booster.
Parameters Parameters
---------- ----------
**kwargs **kwargs
The attributes to set. Setting a value to None deletes an attribute. The attributes to set.
Setting a value to None deletes an attribute.
""" """
for key, value in kwargs.items(): for key, value in kwargs.items():
if value is not None: if value is not None:
......
...@@ -10,6 +10,7 @@ from .compat import range_ ...@@ -10,6 +10,7 @@ from .compat import range_
class EarlyStopException(Exception): class EarlyStopException(Exception):
"""Exception of early stopping. """Exception of early stopping.
Parameters Parameters
---------- ----------
best_iteration : int best_iteration : int
...@@ -46,20 +47,19 @@ def _format_eval_result(value, show_stdv=True): ...@@ -46,20 +47,19 @@ def _format_eval_result(value, show_stdv=True):
def print_evaluation(period=1, show_stdv=True): def print_evaluation(period=1, show_stdv=True):
"""Create a callback that print evaluation result. """Create a callback that prints the evaluation results.
Parameters Parameters
---------- ----------
period : int period : int, optional (default=1)
The period to log the evaluation results The period to print the evaluation results.
show_stdv : bool, optional (default=True)
show_stdv : bool, optional Whether to show stdv (if provided).
Whether show stdv if provided
Returns Returns
------- -------
callback : function callback : function
A callback that print evaluation every period iterations. The callback that prints the evaluation results every ``period`` iteration(s).
""" """
def callback(env): def callback(env):
"""internal function""" """internal function"""
...@@ -71,7 +71,7 @@ def print_evaluation(period=1, show_stdv=True): ...@@ -71,7 +71,7 @@ def print_evaluation(period=1, show_stdv=True):
def record_evaluation(eval_result): def record_evaluation(eval_result):
"""Create a call back that records the evaluation history into eval_result. """Create a callback that records the evaluation history into ``eval_result``.
Parameters Parameters
---------- ----------
...@@ -81,7 +81,7 @@ def record_evaluation(eval_result): ...@@ -81,7 +81,7 @@ def record_evaluation(eval_result):
Returns Returns
------- -------
callback : function callback : function
The requested callback function. The callback that records the evaluation history into the passed dictionary.
""" """
if not isinstance(eval_result, dict): if not isinstance(eval_result, dict):
raise TypeError('Eval_result should be a dictionary') raise TypeError('Eval_result should be a dictionary')
...@@ -103,22 +103,25 @@ def record_evaluation(eval_result): ...@@ -103,22 +103,25 @@ def record_evaluation(eval_result):
def reset_parameter(**kwargs): def reset_parameter(**kwargs):
"""Reset parameter after first iteration """Create a callback that resets the parameter after the first iteration.
NOTE: the initial parameter will still take in-effect on first iteration. Note
----
The initial parameter will still take in-effect on first iteration.
Parameters Parameters
---------- ----------
**kwargs: value should be list or function **kwargs: value should be list or function
List of parameters for each boosting round List of parameters for each boosting round
or a customized function that calculates learning_rate in terms of or a customized function that calculates the parameter in terms of
current number of round (e.g. yields learning rate decay) current number of round (e.g. yields learning rate decay).
- list l: parameter = l[current_round] If list lst, parameter = lst[current_round].
- function f: parameter = f(current_round) If function func, parameter = func(current_round).
Returns Returns
------- -------
callback : function callback : function
The requested callback function. The callback that resets the parameter after the first iteration.
""" """
def callback(env): def callback(env):
"""internal function""" """internal function"""
...@@ -144,22 +147,25 @@ def reset_parameter(**kwargs): ...@@ -144,22 +147,25 @@ def reset_parameter(**kwargs):
def early_stopping(stopping_rounds, verbose=True): def early_stopping(stopping_rounds, verbose=True):
"""Create a callback that activates early stopping. """Create a callback that activates early stopping.
Note
----
Activates early stopping. Activates early stopping.
Requires at least one validation data and one metric Requires at least one validation data and one metric.
If there's more than one, will check all of them If there's more than one, will check all of them.
Parameters Parameters
---------- ----------
stopping_rounds : int stopping_rounds : int
The stopping rounds before the trend occur. The possible number of rounds without the trend occurrence.
verbose : optional, bool verbose : bool, optional (default=True)
Whether to print message about early stopping information. Whether to print message with early stopping information.
Returns Returns
------- -------
callback : function callback : function
The requested callback function. The callback that activates early stopping.
""" """
best_score = [] best_score = []
best_iter = [] best_iter = []
......
...@@ -22,8 +22,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -22,8 +22,7 @@ def train(params, train_set, num_boost_round=100,
early_stopping_rounds=None, evals_result=None, early_stopping_rounds=None, evals_result=None,
verbose_eval=True, learning_rates=None, verbose_eval=True, learning_rates=None,
keep_training_booster=False, callbacks=None): keep_training_booster=False, callbacks=None):
""" """Perform the training with given parameters.
Train with given parameters.
Parameters Parameters
---------- ----------
...@@ -31,68 +30,67 @@ def train(params, train_set, num_boost_round=100, ...@@ -31,68 +30,67 @@ def train(params, train_set, num_boost_round=100,
Parameters for training. Parameters for training.
train_set : Dataset train_set : Dataset
Data to be trained. Data to be trained.
num_boost_round: int num_boost_round: int, optional (default=100)
Number of boosting iterations. Number of boosting iterations.
valid_sets: list of Datasets valid_sets: list of Datasets or None, optional (default=None)
List of data to be evaluated during training List of data to be evaluated during training.
valid_names: list of string valid_names: list of string or None, optional (default=None)
Names of valid_sets Names of ``valid_sets``.
fobj : function fobj : callable or None, optional (default=None)
Customized objective function. Customized objective function.
feval : function feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Note: should return (eval_name, eval_result, is_higher_better) of list of this Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
init_model : file name of lightgbm model or 'Booster' instance init_model : string or None, optional (default=None)
model used for continued train Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of str, or 'auto' feature_name : list of strings or 'auto', optional (default="auto")
Feature names Feature names.
If 'auto' and data is pandas DataFrame, use data columns name If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of str or int, or 'auto' categorical_feature : list of strings or int, or 'auto', optional (default="auto")
Categorical features, Categorical features.
type int represents index, If list of int, interpreted as indices.
type str represents feature names (need to specify feature_name as well) If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, use pandas categorical columns If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
early_stopping_rounds: int early_stopping_rounds: int or None, optional (default=None)
Activates early stopping. Activates early stopping. The model will train until the validation score stops improving.
Requires at least one validation data and one metric Requires at least one validation data and one metric. If there's more than one, will check all of them.
If there's more than one, will check all of them If early stopping occurs, the model will add ``best_iteration`` field.
Returns the model with (best_iter + early_stopping_rounds) evals_result: dict or None, optional (default=None)
If early stopping occurs, the model will add 'best_iteration' field This dictionary used to store all evaluation results of all the items in ``valid_sets``.
evals_result: dict or None
This dictionary used to store all evaluation results of all the items in valid_sets. Example
Example: with a valid_sets containing [valid_set, train_set] -------
and valid_names containing ['eval', 'train'] With a ``valid_sets`` = [valid_set, train_set],
and a paramater containing ('metric':'logloss') ``valid_names`` = ['eval', 'train']
Returns: {'train': {'logloss': ['0.48253', '0.35953', ...]}, and a ``params`` = ('metric':'logloss')
'eval': {'logloss': ['0.480385', '0.357756', ...]}} returns: {'train': {'logloss': ['0.48253', '0.35953', ...]},
passed with None means no using this function 'eval': {'logloss': ['0.480385', '0.357756', ...]}}.
verbose_eval : bool or int verbose_eval : bool or int, optional (default=True)
Requires at least one item in evals. Requires at least one validation data.
If `verbose_eval` is True, If True, the eval metric on the valid set is printed at each boosting stage.
the eval metric on the valid set is printed at each boosting stage. If int, the eval metric on the valid set is printed at every ``verbose_eval`` boosting stage.
If `verbose_eval` is int, The last boosting stage or the boosting stage found by using ``early_stopping_rounds`` is also printed.
the eval metric on the valid set is printed at every `verbose_eval` boosting stage.
The last boosting stage Example
or the boosting stage found by using `early_stopping_rounds` is also printed. -------
Example: with verbose_eval=4 and at least one item in evals, With ``verbose_eval`` = 4 and at least one item in evals,
an evaluation metric is printed every 4 (instead of 1) boosting stages. an evaluation metric is printed every 4 (instead of 1) boosting stages.
learning_rates: list or function learning_rates: list, callable or None, optional (default=None)
List of learning rate for each boosting round List of learning rates for each boosting round
or a customized function that calculates learning_rate or a customized function that calculates ``learning_rate``
in terms of current number of round (e.g. yields learning rate decay) in terms of current number of round (e.g. yields learning rate decay).
- list l: learning_rate = l[current_round] keep_training_booster : bool, optional (default=False)
- function f: learning_rate = f(current_round) Whether the returned Booster will be used to keep training.
keep_training_booster : boolean If False, the returned value will be converted into _InnerPredictor before returning.
Whether the return booster will be used to keep training. You can still use _InnerPredictor as ``init_model`` for future continue training.
If false, will convert into _InnerPredictor before return. callbacks : list of callables or None, optional (default=None)
You can still use _InnerPredictor as init_model for future continue training.
callbacks : list of callback functions
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python-API.md for more information. See Callbacks in Python-API.md for more information.
Returns Returns
------- -------
booster : a trained booster model booster : Booster
The trained Booster model.
""" """
"""create predictor first""" """create predictor first"""
for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds"]: for alias in ["num_boost_round", "num_iterations", "num_iteration", "num_tree", "num_trees", "num_round", "num_rounds"]:
...@@ -316,68 +314,71 @@ def cv(params, train_set, num_boost_round=10, ...@@ -316,68 +314,71 @@ def cv(params, train_set, num_boost_round=10,
early_stopping_rounds=None, fpreproc=None, early_stopping_rounds=None, fpreproc=None,
verbose_eval=None, show_stdv=True, seed=0, verbose_eval=None, show_stdv=True, seed=0,
callbacks=None): callbacks=None):
""" """Perform the cross-validation with given paramaters.
Cross-validation with given paramaters.
Parameters Parameters
---------- ----------
params : dict params : dict
Booster params. Parameters for Booster.
train_set : Dataset train_set : Dataset
Data to be trained. Data to be trained on.
num_boost_round : int num_boost_round : int, optional (default=10)
Number of boosting iterations. Number of boosting iterations.
folds : a generator or iterator of (train_idx, test_idx) tuples folds : a generator or iterator of (train_idx, test_idx) tuples or None, optional (default=None)
The train indices and test indices for each folds. The train and test indices for the each fold.
This argument has highest priority over other data split arguments. This argument has highest priority over other data split arguments.
nfold : int nfold : int, optional (default=5)
Number of folds in CV. Number of folds in CV.
stratified : bool stratified : bool, optional (default=True)
Perform stratified sampling. Whether to perform stratified sampling.
shuffle: bool shuffle: bool, optional (default=True)
Whether shuffle before split data Whether to shuffle before splitting data.
metrics : string or list of strings metrics : string, list of strings or None, optional (default=None)
Evaluation metrics to be watched in CV. Evaluation metrics to be monitored while CV.
If `metrics` is not None, the metric in `params` will be overridden. If not None, the metric in ``params`` will be overridden.
fobj : function fobj : callable or None, optional (default=None)
Custom objective function. Custom objective function.
feval : function feval : callable or None, optional (default=None)
Custom evaluation function. Custom evaluation function.
init_model : file name of lightgbm model or 'Booster' instance init_model : string or None, optional (default=None)
model used for continued train Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of str, or 'auto' feature_name : list of strings or 'auto', optional (default="auto")
Feature names Feature names.
If 'auto' and data is pandas DataFrame, use data columns name If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of str or int, or 'auto' categorical_feature : list of strings or int, or 'auto', optional (default="auto")
Categorical features, Categorical features.
type int represents index, If list of int, interpreted as indices.
type str represents feature names (need to specify feature_name as well) If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
If 'auto' and data is pandas DataFrame, use pandas categorical columns If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
early_stopping_rounds: int early_stopping_rounds: int or None, optional (default=None)
Activates early stopping. CV error needs to decrease at least Activates early stopping. CV error needs to decrease at least
every <early_stopping_rounds> round(s) to continue. every ``early_stopping_rounds`` round(s) to continue.
Last entry in evaluation history is the one from best iteration. Last entry in evaluation history is the one from best iteration.
fpreproc : function fpreproc : callable or None, optional (default=None)
Preprocessing function that takes (dtrain, dtest, param) Preprocessing function that takes (dtrain, dtest, params)
and returns transformed versions of those. and returns transformed versions of those.
verbose_eval : bool, int, or None, default None verbose_eval : bool, int, or None, optional (default=None)
Whether to display the progress. Whether to display the progress.
If None, progress will be displayed when np.ndarray is returned. If None, progress will be displayed when np.ndarray is returned.
If True, progress will be displayed at boosting stage. If True, progress will be displayed at every boosting stage.
If an integer is given, If int, progress will be displayed at every given ``verbose_eval`` boosting stage.
progress will be displayed at every given `verbose_eval` boosting stage. show_stdv : bool, optional (default=True)
show_stdv : bool, default True
Whether to display the standard deviation in progress. Whether to display the standard deviation in progress.
Results are not affected, and always contains std. Results are not affected by this parameter, and always contains std.
seed : int seed : int, optional (default=0)
Seed used to generate the folds (passed to numpy.random.seed). Seed used to generate the folds (passed to numpy.random.seed).
callbacks : list of callback functions callbacks : list of callables or None, optional (default=None)
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python-API.md for more information. See Callbacks in Python-API.md for more information.
Returns Returns
------- -------
evaluation history : list(string) eval_hist : dict
Evaluation history.
The dictionary has the following format:
{'metric1-mean': [values], 'metric1-stdv': [values],
'metric2-mean': [values], 'metric1-stdv': [values],
...}.
""" """
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError("Traninig only accepts Dataset object") raise TypeError("Traninig only accepts Dataset object")
......
...@@ -24,45 +24,50 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -24,45 +24,50 @@ def plot_importance(booster, ax=None, height=0.2,
xlabel='Feature importance', ylabel='Features', xlabel='Feature importance', ylabel='Features',
importance_type='split', max_num_features=None, importance_type='split', max_num_features=None,
ignore_zero=True, figsize=None, grid=True, **kwargs): ignore_zero=True, figsize=None, grid=True, **kwargs):
"""Plot model feature importances. """Plot model's feature importances.
Parameters Parameters
---------- ----------
booster : Booster or LGBMModel booster : Booster or LGBMModel
Booster or LGBMModel instance Booster or LGBMModel instance which feature importance should be plotted.
ax : matplotlib Axes ax : matplotlib.axes.Axes or None, optional (default=None)
Target axes instance. If None, new figure and axes will be created. Target axes instance.
height : float If None, new figure and axes will be created.
Bar height, passed to ax.barh() height : float, optional (default=0.2)
xlim : tuple of 2 elements Bar height, passed to ``ax.barh()``.
Tuple passed to axes.xlim() xlim : tuple of 2 elements or None, optional (default=None)
ylim : tuple of 2 elements Tuple passed to ``ax.xlim()``.
Tuple passed to axes.ylim() ylim : tuple of 2 elements or None, optional (default=None)
title : str Tuple passed to ``ax.ylim()``.
Axes title. Pass None to disable. title : string or None, optional (default="Feature importance")
xlabel : str Axes title.
X axis title label. Pass None to disable. If None, title is disabled.
ylabel : str xlabel : string or None, optional (default="Feature importance")
Y axis title label. Pass None to disable. X-axis title label.
importance_type : str If None, title is disabled.
How the importance is calculated: "split" or "gain" ylabel : string or None, optional (default="Features")
"split" is the number of times a feature is used in a model Y-axis title label.
"gain" is the total gain of splits which use the feature If None, title is disabled.
max_num_features : int importance_type : string, optional (default="split")
How the importance is calculated.
If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature.
max_num_features : int or None, optional (default=None)
Max number of top features displayed on plot. Max number of top features displayed on plot.
If None or smaller than 1, all features will be displayed. If None or <1, all features will be displayed.
ignore_zero : bool ignore_zero : bool, optional (default=True)
Ignore features with zero importance Whether to ignore features with zero importance.
figsize : tuple of 2 elements figsize : tuple of 2 elements or None, optional (default=None)
Figure size Figure size.
grid : bool grid : bool, optional (default=True)
Whether add grid for axes Whether to add a grid for axes.
**kwargs : **kwargs : other parameters
Other keywords passed to ax.barh() Other parameters passed to ``ax.barh()``.
Returns Returns
------- -------
ax : matplotlib Axes ax : matplotlib.axes.Axes
The plot with model's feature importances.
""" """
try: try:
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -133,34 +138,40 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -133,34 +138,40 @@ def plot_metric(booster, metric=None, dataset_names=None,
Parameters Parameters
---------- ----------
booster : dict or LGBMModel booster : dict or LGBMModel
Evals_result recorded by lightgbm.train() or LGBMModel instance Dictionary returned from ``lightgbm.train()`` or LGBMModel instance.
metric : str or None metric : string or None, optional (default=None)
The metric name to plot. The metric name to plot.
Only one metric supported because different metrics have various scales. Only one metric supported because different metrics have various scales.
Pass None to pick `first` one (according to dict hashcode). If None, first metric picked from dictionary (according to hashcode).
dataset_names : None or list of str dataset_names : list of strings or None, optional (default=None)
List of the dataset names to plot. List of the dataset names which are used to calculate metric to plot.
Pass None to plot all datasets. If None, all datasets are used.
ax : matplotlib Axes ax : matplotlib.axes.Axes or None, optional (default=None)
Target axes instance. If None, new figure and axes will be created. Target axes instance.
xlim : tuple of 2 elements If None, new figure and axes will be created.
Tuple passed to axes.xlim() xlim : tuple of 2 elements or None, optional (default=None)
ylim : tuple of 2 elements Tuple passed to ``ax.xlim()``.
Tuple passed to axes.ylim() ylim : tuple of 2 elements or None, optional (default=None)
title : str Tuple passed to ``ax.ylim()``.
Axes title. Pass None to disable. title : string or None, optional (default="Metric during training")
xlabel : str Axes title.
X axis title label. Pass None to disable. If None, title is disabled.
ylabel : str xlabel : string or None, optional (default="Iterations")
Y axis title label. Pass None to disable. Pass 'auto' to use `metric`. X-axis title label.
figsize : tuple of 2 elements If None, title is disabled.
Figure size ylabel : string or None, optional (default="auto")
grid : bool Y-axis title label.
Whether add grid for axes If 'auto', metric name is used.
If None, title is disabled.
figsize : tuple of 2 elements or None, optional (default=None)
Figure size.
grid : bool, optional (default=True)
Whether to add a grid for axes.
Returns Returns
------- -------
ax : matplotlib Axes ax : matplotlib.axes.Axes
The plot with metric's history over the training.
""" """
try: try:
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -298,48 +309,52 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, ...@@ -298,48 +309,52 @@ def create_tree_digraph(booster, tree_index=0, show_info=None,
name=None, comment=None, filename=None, directory=None, name=None, comment=None, filename=None, directory=None,
format=None, engine=None, encoding=None, graph_attr=None, format=None, engine=None, encoding=None, graph_attr=None,
node_attr=None, edge_attr=None, body=None, strict=False): node_attr=None, edge_attr=None, body=None, strict=False):
"""Create a digraph of specified tree. """Create a digraph representation of specified tree.
See: Note
- http://graphviz.readthedocs.io/en/stable/api.html#digraph ----
For more information please visit
http://graphviz.readthedocs.io/en/stable/api.html#digraph.
Parameters Parameters
---------- ----------
booster : Booster, LGBMModel booster : Booster or LGBMModel
Booster or LGBMModel instance. Booster or LGBMModel instance.
tree_index : int, default 0 tree_index : int, optional (default=0)
Specify tree index of target tree. The index of a target tree to convert.
show_info : list show_info : list or None, optional (default=None)
Information shows on nodes. What information should be showed on nodes.
options: 'split_gain', 'internal_value', 'internal_count' or 'leaf_count'. Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
name : str name : string or None, optional (default=None)
Graph name used in the source code. Graph name used in the source code.
comment : str comment : string or None, optional (default=None)
Comment added to the first line of the source. Comment added to the first line of the source.
filename : str filename : string or None, optional (default=None)
Filename for saving the source (defaults to name + '.gv'). Filename for saving the source.
directory : str If None, ``name`` + '.gv' is used.
directory : string or None, optional (default=None)
(Sub)directory for source saving and rendering. (Sub)directory for source saving and rendering.
format : str format : string or None, optional (default=None)
Rendering output format ('pdf', 'png', ...). Rendering output format ('pdf', 'png', ...).
engine : str engine : string or None, optional (default=None)
Layout command used ('dot', 'neato', ...). Layout command used ('dot', 'neato', ...).
encoding : str encoding : string or None, optional (default=None)
Encoding for saving the source. Encoding for saving the source.
graph_attr : dict graph_attr : dict or None, optional (default=None)
Mapping of (attribute, value) pairs for the graph. Mapping of (attribute, value) pairs set for the graph.
node_attr : dict node_attr : dict or None, optional (default=None)
Mapping of (attribute, value) pairs set for all nodes. Mapping of (attribute, value) pairs set for all nodes.
edge_attr : dict edge_attr : dict or None, optional (default=None)
Mapping of (attribute, value) pairs set for all edges. Mapping of (attribute, value) pairs set for all edges.
body : list of str body : list of strings or None, optional (default=None)
Iterable of lines to add to the graph body. Lines to add to the graph body.
strict : bool strict : bool, optional (default=False)
Iterable of lines to add to the graph body. Whether rendering should merge multi-edges.
Returns Returns
------- -------
graph : graphviz Digraph graph : graphviz.Digraph
The digraph representation of specified tree.
""" """
if isinstance(booster, LGBMModel): if isinstance(booster, LGBMModel):
booster = booster.booster_ booster = booster.booster_
...@@ -376,27 +391,29 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, ...@@ -376,27 +391,29 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
Parameters Parameters
---------- ----------
booster : Booster, LGBMModel booster : Booster or LGBMModel
Booster or LGBMModel instance. Booster or LGBMModel instance to be plotted.
ax : matplotlib Axes ax : matplotlib.axes.Axes or None, optional (default=None)
Target axes instance. If None, new figure and axes will be created. Target axes instance.
tree_index : int, default 0 If None, new figure and axes will be created.
Specify tree index of target tree. tree_index : int, optional (default=0)
figsize : tuple of 2 elements The index of a target tree to plot.
figsize : tuple of 2 elements or None, optional (default=None)
Figure size. Figure size.
graph_attr : dict graph_attr : dict or None, optional (default=None)
Mapping of (attribute, value) pairs for the graph. Mapping of (attribute, value) pairs set for the graph.
node_attr : dict node_attr : dict or None, optional (default=None)
Mapping of (attribute, value) pairs set for all nodes. Mapping of (attribute, value) pairs set for all nodes.
edge_attr : dict edge_attr : dict or None, optional (default=None)
Mapping of (attribute, value) pairs set for all edges. Mapping of (attribute, value) pairs set for all edges.
show_info : list show_info : list or None, optional (default=None)
Information shows on nodes. What information should be showed on nodes.
options: 'split_gain', 'internal_value', 'internal_count' or 'leaf_count'. Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
Returns Returns
------- -------
ax : matplotlib Axes ax : matplotlib.axes.Axes
The plot with single tree.
""" """
try: try:
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
......
...@@ -188,7 +188,10 @@ class LGBMModel(_LGBMModelBase): ...@@ -188,7 +188,10 @@ class LGBMModel(_LGBMModelBase):
Whether to print messages while running boosting. Whether to print messages while running boosting.
**kwargs : other parameters **kwargs : other parameters
Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters. Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
Note: **kwargs is not supported in sklearn, it may cause unexpected issues.
Note
----
**kwargs is not supported in sklearn, it may cause unexpected issues.
Attributes Attributes
---------- ----------
...@@ -201,13 +204,13 @@ class LGBMModel(_LGBMModelBase): ...@@ -201,13 +204,13 @@ class LGBMModel(_LGBMModelBase):
best_score_ : dict or None best_score_ : dict or None
The best score of fitted model. The best score of fitted model.
best_iteration_ : int or None best_iteration_ : int or None
The best iteration of fitted model if `early_stopping_rounds` has been specified. The best iteration of fitted model if ``early_stopping_rounds`` has been specified.
objective_ : string or callable objective_ : string or callable
The concrete objective used while fitting this model. The concrete objective used while fitting this model.
booster_ : Booster booster_ : Booster
The underlying Booster of this model. The underlying Booster of this model.
evals_result_ : dict or None evals_result_ : dict or None
The evaluation results if `early_stopping_rounds` has been specified. The evaluation results if ``early_stopping_rounds`` has been specified.
feature_importances_ : array of shape = [n_features] feature_importances_ : array of shape = [n_features]
The feature importances (the higher, the more important the feature). The feature importances (the higher, the more important the feature).
...@@ -319,7 +322,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -319,7 +322,7 @@ class LGBMModel(_LGBMModelBase):
If callable, it should be a custom evaluation metric, see note for more details. If callable, it should be a custom evaluation metric, see note for more details.
early_stopping_rounds : int or None, optional (default=None) early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. The model will train until the validation score stops improving. Activates early stopping. The model will train until the validation score stops improving.
Validation error needs to decrease at least every `early_stopping_rounds` round(s) Validation error needs to decrease at least every ``early_stopping_rounds`` round(s)
to continue training. to continue training.
verbose : bool, optional (default=True) verbose : bool, optional (default=True)
If True and an evaluation set is used, writes the evaluation progress. If True and an evaluation set is used, writes the evaluation progress.
...@@ -560,7 +563,9 @@ class LGBMModel(_LGBMModelBase): ...@@ -560,7 +563,9 @@ class LGBMModel(_LGBMModelBase):
def feature_importances_(self): def feature_importances_(self):
"""Get feature importances. """Get feature importances.
Note: feature importance in sklearn interface used to normalize to 1, Note
----
Feature importance in sklearn interface used to normalize to 1,
it's deprecated after 2.0.4 and same as Booster.feature_importance() now. it's deprecated after 2.0.4 and same as Booster.feature_importance() now.
""" """
if self._n_features is None: if self._n_features is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment