"include/git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "a8dc03250e235a00baf3ecefe4155023624dd239"
Commit 244db078 authored by Nikita Titov's avatar Nikita Titov Committed by Qiwei Ye
Browse files

return self (#1602)

parent dcf9ad2e
...@@ -64,7 +64,7 @@ print('7th feature name is:', repr(lgb_train.feature_name[6])) ...@@ -64,7 +64,7 @@ print('7th feature name is:', repr(lgb_train.feature_name[6]))
# save model to file # save model to file
gbm.save_model('model.txt') gbm.save_model('model.txt')
# dump model to json (and save to file) # dump model to JSON (and save to file)
print('Dump model to JSON...') print('Dump model to JSON...')
model_json = gbm.dump_model() model_json = gbm.dump_model()
......
...@@ -184,7 +184,7 @@ def convert_from_sliced_object(data): ...@@ -184,7 +184,7 @@ def convert_from_sliced_object(data):
"""fix the memory of multi-dimensional sliced object""" """fix the memory of multi-dimensional sliced object"""
if data.base is not None and isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray): if data.base is not None and isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
if not data.flags.c_contiguous: if not data.flags.c_contiguous:
warnings.warn("Usage subset(sliced data) of np.ndarray is not recommended due to it will double the peak memory cost in LightGBM.") warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended due to it will double the peak memory cost in LightGBM.")
return np.copy(data) return np.copy(data)
return data return data
...@@ -607,9 +607,9 @@ class Dataset(object): ...@@ -607,9 +607,9 @@ class Dataset(object):
If 'auto' and data is pandas DataFrame, pandas categorical columns are used. If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
params: dict or None, optional (default=None) params : dict or None, optional (default=None)
Other parameters. Other parameters.
free_raw_data: bool, optional (default=True) free_raw_data : bool, optional (default=True)
If True, raw data is freed after constructing inner Dataset. If True, raw data is freed after constructing inner Dataset.
""" """
self.handle = None self.handle = None
...@@ -639,6 +639,7 @@ class Dataset(object): ...@@ -639,6 +639,7 @@ class Dataset(object):
if self.handle is not None: if self.handle is not None:
_safe_call(_LIB.LGBM_DatasetFree(self.handle)) _safe_call(_LIB.LGBM_DatasetFree(self.handle))
self.handle = None self.handle = None
return self
def _lazy_init(self, data, label=None, reference=None, def _lazy_init(self, data, label=None, reference=None,
weight=None, group=None, init_score=None, predictor=None, weight=None, group=None, init_score=None, predictor=None,
...@@ -646,7 +647,7 @@ class Dataset(object): ...@@ -646,7 +647,7 @@ class Dataset(object):
categorical_feature='auto', params=None): categorical_feature='auto', params=None):
if data is None: if data is None:
self.handle = None self.handle = None
return return self
if reference is not None: if reference is not None:
self.pandas_categorical = reference.pandas_categorical self.pandas_categorical = reference.pandas_categorical
categorical_feature = reference.categorical_feature categorical_feature = reference.categorical_feature
...@@ -747,7 +748,7 @@ class Dataset(object): ...@@ -747,7 +748,7 @@ class Dataset(object):
elif self.predictor is not None: elif self.predictor is not None:
raise TypeError('wrong predictor type {}'.format(type(self.predictor).__name__)) raise TypeError('wrong predictor type {}'.format(type(self.predictor).__name__))
# set feature names # set feature names
self.set_feature_name(feature_name) return self.set_feature_name(feature_name)
def __init_from_np2d(self, mat, params_str, ref_dataset): def __init_from_np2d(self, mat, params_str, ref_dataset):
""" """
...@@ -773,6 +774,7 @@ class Dataset(object): ...@@ -773,6 +774,7 @@ class Dataset(object):
c_str(params_str), c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self
def __init_from_list_np2d(self, mats, params_str, ref_dataset): def __init_from_list_np2d(self, mats, params_str, ref_dataset):
""" """
...@@ -821,6 +823,7 @@ class Dataset(object): ...@@ -821,6 +823,7 @@ class Dataset(object):
c_str(params_str), c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self
def __init_from_csr(self, csr, params_str, ref_dataset): def __init_from_csr(self, csr, params_str, ref_dataset):
""" """
...@@ -845,6 +848,7 @@ class Dataset(object): ...@@ -845,6 +848,7 @@ class Dataset(object):
c_str(params_str), c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self
def __init_from_csc(self, csc, params_str, ref_dataset): def __init_from_csc(self, csc, params_str, ref_dataset):
""" """
...@@ -869,6 +873,7 @@ class Dataset(object): ...@@ -869,6 +873,7 @@ class Dataset(object):
c_str(params_str), c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self
def construct(self): def construct(self):
"""Lazy init. """Lazy init.
...@@ -876,7 +881,7 @@ class Dataset(object): ...@@ -876,7 +881,7 @@ class Dataset(object):
Returns Returns
------- -------
self : Dataset self : Dataset
Returns self. Constructed Dataset object.
""" """
if self.handle is None: if self.handle is None:
if self.reference is not None: if self.reference is not None:
...@@ -928,13 +933,13 @@ class Dataset(object): ...@@ -928,13 +933,13 @@ class Dataset(object):
Init score for Dataset. Init score for Dataset.
silent : bool, optional (default=False) silent : bool, optional (default=False)
Whether to print messages during construction. Whether to print messages during construction.
params: dict or None, optional (default=None) params : dict or None, optional (default=None)
Other parameters. Other parameters.
Returns Returns
------- -------
self : Dataset valid : Dataset
Returns self. Validation Dataset with reference to self.
""" """
ret = Dataset(data, label=label, reference=self, ret = Dataset(data, label=label, reference=self,
weight=weight, group=group, init_score=init_score, weight=weight, group=group, init_score=init_score,
...@@ -950,7 +955,7 @@ class Dataset(object): ...@@ -950,7 +955,7 @@ class Dataset(object):
---------- ----------
used_indices : list of int used_indices : list of int
Indices used to create the subset. Indices used to create the subset.
params: dict or None, optional (default=None) params : dict or None, optional (default=None)
Other parameters. Other parameters.
Returns Returns
...@@ -974,10 +979,16 @@ class Dataset(object): ...@@ -974,10 +979,16 @@ class Dataset(object):
---------- ----------
filename : string filename : string
Name of the output file. Name of the output file.
Returns
-------
self : Dataset
Returns self.
""" """
_safe_call(_LIB.LGBM_DatasetSaveBinary( _safe_call(_LIB.LGBM_DatasetSaveBinary(
self.construct().handle, self.construct().handle,
c_str(filename))) c_str(filename)))
return self
def _update_params(self, params): def _update_params(self, params):
if not self.params: if not self.params:
...@@ -985,20 +996,27 @@ class Dataset(object): ...@@ -985,20 +996,27 @@ class Dataset(object):
else: else:
self.params_back_up = copy.deepcopy(self.params) self.params_back_up = copy.deepcopy(self.params)
self.params.update(params) self.params.update(params)
return self
def _reverse_update_params(self): def _reverse_update_params(self):
self.params = copy.deepcopy(self.params_back_up) self.params = copy.deepcopy(self.params_back_up)
self.params_back_up = None self.params_back_up = None
return self
def set_field(self, field_name, data): def set_field(self, field_name, data):
"""Set property into the Dataset. """Set property into the Dataset.
Parameters Parameters
---------- ----------
field_name: string field_name : string
The field name of the information. The field name of the information.
data: list, numpy array or None data : list, numpy array or None
The array of data to be set. The array of data to be set.
Returns
-------
self : Dataset
Dataset with set property.
""" """
if self.handle is None: if self.handle is None:
raise Exception("Cannot set %s before construct dataset" % field_name) raise Exception("Cannot set %s before construct dataset" % field_name)
...@@ -1010,7 +1028,7 @@ class Dataset(object): ...@@ -1010,7 +1028,7 @@ class Dataset(object):
None, None,
ctypes.c_int(0), ctypes.c_int(0),
ctypes.c_int(FIELD_TYPE_MAPPER[field_name]))) ctypes.c_int(FIELD_TYPE_MAPPER[field_name])))
return return self
dtype = np.float32 dtype = np.float32
if field_name == 'group': if field_name == 'group':
dtype = np.int32 dtype = np.int32
...@@ -1031,13 +1049,14 @@ class Dataset(object): ...@@ -1031,13 +1049,14 @@ class Dataset(object):
ptr_data, ptr_data,
ctypes.c_int(len(data)), ctypes.c_int(len(data)),
ctypes.c_int(type_data))) ctypes.c_int(type_data)))
return self
def get_field(self, field_name): def get_field(self, field_name):
"""Get property from the Dataset. """Get property from the Dataset.
Parameters Parameters
---------- ----------
field_name: string field_name : string
The field name of the information. The field name of the information.
Returns Returns
...@@ -1076,19 +1095,25 @@ class Dataset(object): ...@@ -1076,19 +1095,25 @@ class Dataset(object):
---------- ----------
categorical_feature : list of int or strings categorical_feature : list of int or strings
Names or indices of categorical features. Names or indices of categorical features.
Returns
-------
self : Dataset
Dataset with set categorical features.
""" """
if self.categorical_feature == categorical_feature: if self.categorical_feature == categorical_feature:
return return self
if self.data is not None: if self.data is not None:
if self.categorical_feature is None: if self.categorical_feature is None:
self.categorical_feature = categorical_feature self.categorical_feature = categorical_feature
self._free_handle() return self._free_handle()
elif categorical_feature == 'auto': elif categorical_feature == 'auto':
warnings.warn('Using categorical_feature in Dataset.') warnings.warn('Using categorical_feature in Dataset.')
return self
else: else:
warnings.warn('categorical_feature in Dataset is overridden. New categorical_feature is {}'.format(sorted(list(categorical_feature)))) warnings.warn('categorical_feature in Dataset is overridden. New categorical_feature is {}'.format(sorted(list(categorical_feature))))
self.categorical_feature = categorical_feature self.categorical_feature = categorical_feature
self._free_handle() return self._free_handle()
else: else:
raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.") raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
...@@ -1098,10 +1123,10 @@ class Dataset(object): ...@@ -1098,10 +1123,10 @@ class Dataset(object):
Please set init_model in engine.train or engine.cv Please set init_model in engine.train or engine.cv
""" """
if predictor is self._predictor: if predictor is self._predictor:
return return self
if self.data is not None: if self.data is not None:
self._predictor = predictor self._predictor = predictor
self._free_handle() return self._free_handle()
else: else:
raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.") raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
...@@ -1112,16 +1137,19 @@ class Dataset(object): ...@@ -1112,16 +1137,19 @@ class Dataset(object):
---------- ----------
reference : Dataset reference : Dataset
Reference that is used as a template to consturct the current Dataset. Reference that is used as a template to consturct the current Dataset.
Returns
-------
self : Dataset
Dataset with set reference.
""" """
self.set_categorical_feature(reference.categorical_feature) self.set_categorical_feature(reference.categorical_feature).set_feature_name(reference.feature_name)._set_predictor(reference._predictor)
self.set_feature_name(reference.feature_name)
self._set_predictor(reference._predictor)
# we're done if self and reference share a common upstrem reference # we're done if self and reference share a common upstrem reference
if self.get_ref_chain().intersection(reference.get_ref_chain()): if self.get_ref_chain().intersection(reference.get_ref_chain()):
return return self
if self.data is not None: if self.data is not None:
self.reference = reference self.reference = reference
self._free_handle() return self._free_handle()
else: else:
raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.") raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
...@@ -1132,6 +1160,11 @@ class Dataset(object): ...@@ -1132,6 +1160,11 @@ class Dataset(object):
---------- ----------
feature_name : list of strings feature_name : list of strings
Feature names. Feature names.
Returns
-------
self : Dataset
Dataset with set feature name.
""" """
if feature_name != 'auto': if feature_name != 'auto':
self.feature_name = feature_name self.feature_name = feature_name
...@@ -1143,19 +1176,26 @@ class Dataset(object): ...@@ -1143,19 +1176,26 @@ class Dataset(object):
self.handle, self.handle,
c_array(ctypes.c_char_p, c_feature_name), c_array(ctypes.c_char_p, c_feature_name),
ctypes.c_int(len(feature_name)))) ctypes.c_int(len(feature_name))))
return self
def set_label(self, label): def set_label(self, label):
"""Set label of Dataset """Set label of Dataset
Parameters Parameters
---------- ----------
label: list, numpy array or None label : list, numpy array or None
The label information to be set into Dataset. The label information to be set into Dataset.
Returns
-------
self : Dataset
Dataset with set label.
""" """
self.label = label self.label = label
if self.handle is not None: if self.handle is not None:
label = list_to_1d_numpy(label, name='label') label = list_to_1d_numpy(label, name='label')
self.set_field('label', label) self.set_field('label', label)
return self
def set_weight(self, weight): def set_weight(self, weight):
"""Set weight of each instance. """Set weight of each instance.
...@@ -1164,6 +1204,11 @@ class Dataset(object): ...@@ -1164,6 +1204,11 @@ class Dataset(object):
---------- ----------
weight : list, numpy array or None weight : list, numpy array or None
Weight to be set for each data point. Weight to be set for each data point.
Returns
-------
self : Dataset
Dataset with set weight.
""" """
if weight is not None and np.all(weight == 1): if weight is not None and np.all(weight == 1):
weight = None weight = None
...@@ -1171,6 +1216,7 @@ class Dataset(object): ...@@ -1171,6 +1216,7 @@ class Dataset(object):
if self.handle is not None and weight is not None: if self.handle is not None and weight is not None:
weight = list_to_1d_numpy(weight, name='weight') weight = list_to_1d_numpy(weight, name='weight')
self.set_field('weight', weight) self.set_field('weight', weight)
return self
def set_init_score(self, init_score): def set_init_score(self, init_score):
"""Set init score of Booster to start from. """Set init score of Booster to start from.
...@@ -1179,11 +1225,17 @@ class Dataset(object): ...@@ -1179,11 +1225,17 @@ class Dataset(object):
---------- ----------
init_score : list, numpy array or None init_score : list, numpy array or None
Init score for Booster. Init score for Booster.
Returns
-------
self : Dataset
Dataset with set init score.
""" """
self.init_score = init_score self.init_score = init_score
if self.handle is not None and init_score is not None: if self.handle is not None and init_score is not None:
init_score = list_to_1d_numpy(init_score, np.float64, name='init_score') init_score = list_to_1d_numpy(init_score, np.float64, name='init_score')
self.set_field('init_score', init_score) self.set_field('init_score', init_score)
return self
def set_group(self, group): def set_group(self, group):
"""Set group size of Dataset (used for ranking). """Set group size of Dataset (used for ranking).
...@@ -1192,18 +1244,24 @@ class Dataset(object): ...@@ -1192,18 +1244,24 @@ class Dataset(object):
---------- ----------
group : list, numpy array or None group : list, numpy array or None
Group size of each group. Group size of each group.
Returns
-------
self : Dataset
Dataset with set group.
""" """
self.group = group self.group = group
if self.handle is not None and group is not None: if self.handle is not None and group is not None:
group = list_to_1d_numpy(group, np.int32, name='group') group = list_to_1d_numpy(group, np.int32, name='group')
self.set_field('group', group) self.set_field('group', group)
return self
def get_label(self): def get_label(self):
"""Get the label of the Dataset. """Get the label of the Dataset.
Returns Returns
------- -------
label : numpy array label : numpy array or None
The label information from the Dataset. The label information from the Dataset.
""" """
if self.label is None: if self.label is None:
...@@ -1215,7 +1273,7 @@ class Dataset(object): ...@@ -1215,7 +1273,7 @@ class Dataset(object):
Returns Returns
------- -------
weight : numpy array weight : numpy array or None
Weight for each data point from the Dataset. Weight for each data point from the Dataset.
""" """
if self.weight is None: if self.weight is None:
...@@ -1227,7 +1285,7 @@ class Dataset(object): ...@@ -1227,7 +1285,7 @@ class Dataset(object):
Returns Returns
------- -------
init_score : numpy array init_score : numpy array or None
Init score of Booster. Init score of Booster.
""" """
if self.init_score is None: if self.init_score is None:
...@@ -1239,17 +1297,14 @@ class Dataset(object): ...@@ -1239,17 +1297,14 @@ class Dataset(object):
Returns Returns
------- -------
group : numpy array group : numpy array or None
Group size of each group. Group size of each group.
""" """
if self.group is None: if self.group is None:
self.group = self.get_field('group') self.group = self.get_field('group')
if self.group is not None: if self.group is not None:
# group data from LightGBM is boundaries data, need to convert to group size # group data from LightGBM is boundaries data, need to convert to group size
new_group = [] self.group = np.diff(self.group)
for i in range_(len(self.group) - 1):
new_group.append(self.group[i + 1] - self.group[i])
self.group = new_group
return self.group return self.group
def num_data(self): def num_data(self):
...@@ -1309,7 +1364,7 @@ class Dataset(object): ...@@ -1309,7 +1364,7 @@ class Dataset(object):
break break
else: else:
break break
return(ref_chain) return ref_chain
class Booster(object): class Booster(object):
...@@ -1319,7 +1374,7 @@ class Booster(object): ...@@ -1319,7 +1374,7 @@ class Booster(object):
Parameters Parameters
---------- ----------
params: dict or None, optional (default=None) params : dict or None, optional (default=None)
Parameters for Booster. Parameters for Booster.
train_set : Dataset or None, optional (default=None) train_set : Dataset or None, optional (default=None)
Training dataset. Training dataset.
...@@ -1448,14 +1503,22 @@ class Booster(object): ...@@ -1448,14 +1503,22 @@ class Booster(object):
self.__dict__.update(state) self.__dict__.update(state)
def free_dataset(self): def free_dataset(self):
"""Free Booster's Datasets.""" """Free Booster's Datasets.
Returns
-------
self : Booster
Booster without Datasets.
"""
self.__dict__.pop('train_set', None) self.__dict__.pop('train_set', None)
self.__dict__.pop('valid_sets', None) self.__dict__.pop('valid_sets', None)
self.__num_dataset = 0 self.__num_dataset = 0
return self
def _free_buffer(self): def _free_buffer(self):
self.__inner_predict_buffer = [] self.__inner_predict_buffer = []
self.__is_predicted_cur_iter = [] self.__is_predicted_cur_iter = []
return self
def set_network(self, machines, local_listen_port=12400, def set_network(self, machines, local_listen_port=12400,
listen_time_out=120, num_machines=1): listen_time_out=120, num_machines=1):
...@@ -1463,35 +1526,54 @@ class Booster(object): ...@@ -1463,35 +1526,54 @@ class Booster(object):
Parameters Parameters
---------- ----------
machines: list, set or string machines : list, set or string
Names of machines. Names of machines.
local_listen_port: int, optional (default=12400) local_listen_port : int, optional (default=12400)
TCP listen port for local machines. TCP listen port for local machines.
listen_time_out: int, optional (default=120) listen_time_out : int, optional (default=120)
Socket time-out in minutes. Socket time-out in minutes.
num_machines: int, optional (default=1) num_machines : int, optional (default=1)
The number of machines for parallel learning application. The number of machines for parallel learning application.
Returns
-------
self : Booster
Booster with set network.
""" """
_safe_call(_LIB.LGBM_NetworkInit(c_str(machines), _safe_call(_LIB.LGBM_NetworkInit(c_str(machines),
ctypes.c_int(local_listen_port), ctypes.c_int(local_listen_port),
ctypes.c_int(listen_time_out), ctypes.c_int(listen_time_out),
ctypes.c_int(num_machines))) ctypes.c_int(num_machines)))
self.network = True self.network = True
return self
def free_network(self): def free_network(self):
"""Free network.""" """Free Booster's network.
Returns
-------
self : Booster
Booster with freed network.
"""
_safe_call(_LIB.LGBM_NetworkFree()) _safe_call(_LIB.LGBM_NetworkFree())
self.network = False self.network = False
return self
def set_train_data_name(self, name): def set_train_data_name(self, name):
"""Set the name to the training Dataset. """Set the name to the training Dataset.
Parameters Parameters
---------- ----------
name: string name : string
Name for training Dataset. Name for the training Dataset.
Returns
-------
self : Booster
Booster with set training Dataset name.
""" """
self.__train_data_name = name self.__train_data_name = name
return self
def add_valid(self, data, name): def add_valid(self, data, name):
"""Add validation data. """Add validation data.
...@@ -1502,6 +1584,11 @@ class Booster(object): ...@@ -1502,6 +1584,11 @@ class Booster(object):
Validation data. Validation data.
name : string name : string
Name of validation data. Name of validation data.
Returns
-------
self : Booster
Booster with set validation data.
""" """
if not isinstance(data, Dataset): if not isinstance(data, Dataset):
raise TypeError('Validation data should be Dataset instance, met {}'.format(type(data).__name__)) raise TypeError('Validation data should be Dataset instance, met {}'.format(type(data).__name__))
...@@ -1515,6 +1602,7 @@ class Booster(object): ...@@ -1515,6 +1602,7 @@ class Booster(object):
self.__num_dataset += 1 self.__num_dataset += 1
self.__inner_predict_buffer.append(None) self.__inner_predict_buffer.append(None)
self.__is_predicted_cur_iter.append(False) self.__is_predicted_cur_iter.append(False)
return self
def reset_parameter(self, params): def reset_parameter(self, params):
"""Reset parameters of Booster. """Reset parameters of Booster.
...@@ -1523,6 +1611,11 @@ class Booster(object): ...@@ -1523,6 +1611,11 @@ class Booster(object):
---------- ----------
params : dict params : dict
New parameters for Booster. New parameters for Booster.
Returns
-------
self : Booster
Booster with new parameters.
""" """
if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')): if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')):
self.__need_reload_eval_info = True self.__need_reload_eval_info = True
...@@ -1531,9 +1624,10 @@ class Booster(object): ...@@ -1531,9 +1624,10 @@ class Booster(object):
_safe_call(_LIB.LGBM_BoosterResetParameter( _safe_call(_LIB.LGBM_BoosterResetParameter(
self.handle, self.handle,
c_str(params_str))) c_str(params_str)))
return self
def update(self, train_set=None, fobj=None): def update(self, train_set=None, fobj=None):
"""Update for one iteration. """Update Booster for one iteration.
Parameters Parameters
---------- ----------
...@@ -1575,28 +1669,29 @@ class Booster(object): ...@@ -1575,28 +1669,29 @@ class Booster(object):
return is_finished.value == 1 return is_finished.value == 1
else: else:
if not self.__set_objective_to_none: if not self.__set_objective_to_none:
self.reset_parameter({"objective": "none"}) self.reset_parameter({"objective": "none"}).__set_objective_to_none = True
self.__set_objective_to_none = True
grad, hess = fobj(self.__inner_predict(0), self.train_set) grad, hess = fobj(self.__inner_predict(0), self.train_set)
return self.__boost(grad, hess) return self.__boost(grad, hess)
def __boost(self, grad, hess): def __boost(self, grad, hess):
""" """
Boost the booster for one iteration, with customized gradient statistics. Boost Booster for one iteration with customized gradient statistics.
Note: for multi-class task, the score is group by class_id first, then group by row_id
if you want to get i-th row score in j-th class, the access way is score[j*num_data+i] Note: For multi-class task, the score is group by class_id first, then group by row_id.
and you should group grad and hess in this way as well If you want to get i-th row score in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
Parameters Parameters
---------- ----------
grad : 1d numpy or 1d list grad : 1d numpy array or list
The first order of gradient. The first order derivative (gradient).
hess : 1d numpy or 1d list hess : 1d numpy or 1d list
The second order of gradient. The second order derivative (Hessian).
Returns Returns
------- -------
is_finished, bool is_finished : bool
Whether the boost was successfully finished.
""" """
grad = list_to_1d_numpy(grad, name='gradient') grad = list_to_1d_numpy(grad, name='gradient')
hess = list_to_1d_numpy(hess, name='hessian') hess = list_to_1d_numpy(hess, name='hessian')
...@@ -1614,10 +1709,17 @@ class Booster(object): ...@@ -1614,10 +1709,17 @@ class Booster(object):
return is_finished.value == 1 return is_finished.value == 1
def rollback_one_iter(self): def rollback_one_iter(self):
"""Rollback one iteration.""" """Rollback one iteration.
Returns
-------
self : Booster
Booster with rolled back one iteration.
"""
_safe_call(_LIB.LGBM_BoosterRollbackOneIter( _safe_call(_LIB.LGBM_BoosterRollbackOneIter(
self.handle)) self.handle))
self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)]
return self
def current_iteration(self): def current_iteration(self):
"""Get the index of the current iteration. """Get the index of the current iteration.
...@@ -1651,7 +1753,7 @@ class Booster(object): ...@@ -1651,7 +1753,7 @@ class Booster(object):
Returns Returns
------- -------
result: list result : list
List with evaluation results. List with evaluation results.
""" """
if not isinstance(data, Dataset): if not isinstance(data, Dataset):
...@@ -1685,7 +1787,7 @@ class Booster(object): ...@@ -1685,7 +1787,7 @@ class Booster(object):
Returns Returns
------- -------
result: list result : list
List with evaluation results. List with evaluation results.
""" """
return self.__inner_eval(self.__train_data_name, 0, feval) return self.__inner_eval(self.__train_data_name, 0, feval)
...@@ -1704,7 +1806,7 @@ class Booster(object): ...@@ -1704,7 +1806,7 @@ class Booster(object):
Returns Returns
------- -------
result: list result : list
List with evaluation results. List with evaluation results.
""" """
return [item for i in range_(1, self.__num_dataset) return [item for i in range_(1, self.__num_dataset)
...@@ -1721,8 +1823,13 @@ class Booster(object): ...@@ -1721,8 +1823,13 @@ class Booster(object):
Index of the iteration that should be saved. Index of the iteration that should be saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved. If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If <= 0, all iterations are saved. If <= 0, all iterations are saved.
start_iteration: int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration that should be saved. Start index of the iteration that should be saved.
Returns
-------
self : Booster
Returns self.
""" """
if num_iteration is None: if num_iteration is None:
num_iteration = self.best_iteration num_iteration = self.best_iteration
...@@ -1732,25 +1839,32 @@ class Booster(object): ...@@ -1732,25 +1839,32 @@ class Booster(object):
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(filename))) c_str(filename)))
_save_pandas_categorical(filename, self.pandas_categorical) _save_pandas_categorical(filename, self.pandas_categorical)
return self
def shuffle_models(self): def shuffle_models(self):
"""Shuffle models. """Shuffle models.
Returns
-------
self : Booster
Booster with shuffled models.
""" """
_safe_call(_LIB.LGBM_BoosterShuffleModels(self.handle)) _safe_call(_LIB.LGBM_BoosterShuffleModels(self.handle))
return self
def model_from_string(self, model_str, verbose=True): def model_from_string(self, model_str, verbose=True):
"""Load Booster from a string. """Load Booster from a string.
Parameters Parameters
---------- ----------
model_str: string model_str : string
Model will be loaded from this string. Model will be loaded from this string.
verbose: bool, optional (default=True) verbose : bool, optional (default=True)
Set to False to disable log when loading model. Whether to print messages while loading model.
Returns Returns
------- -------
result: Booster self : Booster
Loaded Booster object. Loaded Booster object.
""" """
if self.handle is not None: if self.handle is not None:
...@@ -1767,7 +1881,7 @@ class Booster(object): ...@@ -1767,7 +1881,7 @@ class Booster(object):
self.handle, self.handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
if verbose: if verbose:
print('Finished loading model, total used %d iterations' % (int(out_num_iterations.value))) print('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
self.__num_class = out_num_class.value self.__num_class = out_num_class.value
return self return self
...@@ -1780,12 +1894,12 @@ class Booster(object): ...@@ -1780,12 +1894,12 @@ class Booster(object):
Index of the iteration that should be saved. Index of the iteration that should be saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved. If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If <= 0, all iterations are saved. If <= 0, all iterations are saved.
start_iteration: int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration that should be saved. Start index of the iteration that should be saved.
Returns Returns
------- -------
result: string str_repr : string
String representation of Booster. String representation of Booster.
""" """
if num_iteration is None: if num_iteration is None:
...@@ -1816,7 +1930,7 @@ class Booster(object): ...@@ -1816,7 +1930,7 @@ class Booster(object):
return string_buffer.value.decode() return string_buffer.value.decode()
def dump_model(self, num_iteration=None, start_iteration=0): def dump_model(self, num_iteration=None, start_iteration=0):
"""Dump Booster to json format. """Dump Booster to JSON format.
Parameters Parameters
---------- ----------
...@@ -1824,13 +1938,13 @@ class Booster(object): ...@@ -1824,13 +1938,13 @@ class Booster(object):
Index of the iteration that should be dumped. Index of the iteration that should be dumped.
If None, if the best iteration exists, it is dumped; otherwise, all iterations are dumped. If None, if the best iteration exists, it is dumped; otherwise, all iterations are dumped.
If <= 0, all iterations are dumped. If <= 0, all iterations are dumped.
start_iteration: int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration that should be dumped. Start index of the iteration that should be dumped.
Returns Returns
------- -------
json_repr : dict json_repr : dict
Json format of Booster. JSON format of Booster.
""" """
if num_iteration is None: if num_iteration is None:
num_iteration = self.best_iteration num_iteration = self.best_iteration
...@@ -1990,8 +2104,7 @@ class Booster(object): ...@@ -1990,8 +2104,7 @@ class Booster(object):
importance_type_int = 1 importance_type_int = 1
else: else:
importance_type_int = -1 importance_type_int = -1
num_feature = self.num_feature() result = np.zeros(self.num_feature(), dtype=np.float64)
result = np.array([0 for _ in range_(num_feature)], dtype=np.float64)
_safe_call(_LIB.LGBM_BoosterFeatureImportance( _safe_call(_LIB.LGBM_BoosterFeatureImportance(
self.handle, self.handle,
ctypes.c_int(iteration), ctypes.c_int(iteration),
...@@ -2004,7 +2117,7 @@ class Booster(object): ...@@ -2004,7 +2117,7 @@ class Booster(object):
def __inner_eval(self, data_name, data_idx, feval=None): def __inner_eval(self, data_name, data_idx, feval=None):
""" """
Evaulate training or validation data Evaluate training or validation data
""" """
if data_idx >= self.__num_dataset: if data_idx >= self.__num_dataset:
raise ValueError("Data_idx should be smaller than number of dataset") raise ValueError("Data_idx should be smaller than number of dataset")
...@@ -2102,7 +2215,7 @@ class Booster(object): ...@@ -2102,7 +2215,7 @@ class Booster(object):
------- -------
value : string or None value : string or None
The attribute value. The attribute value.
Returns None if attribute do not exist. Returns None if attribute does not exist.
""" """
return self.__attr.get(key, None) return self.__attr.get(key, None)
...@@ -2114,11 +2227,17 @@ class Booster(object): ...@@ -2114,11 +2227,17 @@ class Booster(object):
**kwargs **kwargs
The attributes to set. The attributes to set.
Setting a value to None deletes an attribute. Setting a value to None deletes an attribute.
Returns
-------
self : Booster
Booster with set attribute.
""" """
for key, value in kwargs.items(): for key, value in kwargs.items():
if value is not None: if value is not None:
if not isinstance(value, string_type): if not isinstance(value, string_type):
raise ValueError("Set attr only accepts strings") raise ValueError("Only string values are accepted")
self.__attr[key] = value self.__attr[key] = value
else: else:
self.__attr.pop(key, None) self.__attr.pop(key, None)
return self
...@@ -125,10 +125,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -125,10 +125,7 @@ def train(params, train_set, num_boost_round=100,
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError("Training only accepts Dataset object") raise TypeError("Training only accepts Dataset object")
train_set._update_params(params) train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(categorical_feature)
train_set._set_predictor(predictor)
train_set.set_feature_name(feature_name)
train_set.set_categorical_feature(categorical_feature)
is_valid_contain_train = False is_valid_contain_train = False
train_data_name = "training" train_data_name = "training"
...@@ -148,9 +145,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -148,9 +145,7 @@ def train(params, train_set, num_boost_round=100,
continue continue
if not isinstance(valid_data, Dataset): if not isinstance(valid_data, Dataset):
raise TypeError("Traninig only accepts Dataset object") raise TypeError("Traninig only accepts Dataset object")
valid_data._update_params(params) reduced_valid_sets.append(valid_data._update_params(params).set_reference(train_set))
valid_data.set_reference(train_set)
reduced_valid_sets.append(valid_data)
if valid_names is not None and len(valid_names) > i: if valid_names is not None and len(valid_names) > i:
name_valid_sets.append(valid_names[i]) name_valid_sets.append(valid_names[i])
else: else:
...@@ -230,8 +225,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -230,8 +225,7 @@ def train(params, train_set, num_boost_round=100,
for dataset_name, eval_name, score, _ in evaluation_result_list: for dataset_name, eval_name, score, _ in evaluation_result_list:
booster.best_score[dataset_name][eval_name] = score booster.best_score[dataset_name][eval_name] = score
if not keep_training_booster: if not keep_training_booster:
booster.model_from_string(booster.model_to_string(), False) booster.model_from_string(booster.model_to_string(), False).free_dataset()
booster.free_dataset()
return booster return booster
...@@ -421,10 +415,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -421,10 +415,7 @@ def cv(params, train_set, num_boost_round=100,
predictor = init_model._to_predictor() predictor = init_model._to_predictor()
else: else:
predictor = None predictor = None
train_set._update_params(params) train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(categorical_feature)
train_set._set_predictor(predictor)
train_set.set_feature_name(feature_name)
train_set.set_categorical_feature(categorical_feature)
if metrics is not None: if metrics is not None:
params['metric'] = metrics params['metric'] = metrics
......
...@@ -23,24 +23,24 @@ def _objective_function_wrapper(func): ...@@ -23,24 +23,24 @@ def _objective_function_wrapper(func):
Parameters Parameters
---------- ----------
func: callable func : callable
Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group): Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
y_true: array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values. The predicted values.
group: array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
Returns Returns
------- -------
new_func: callable new_func : callable
The new objective function as expected by ``lightgbm.engine.train``. The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``: The signature is ``new_func(preds, dataset)``:
preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes] preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes]
The predicted values. The predicted values.
dataset: ``dataset`` dataset : ``dataset``
The training set from which the labels will be extracted using The training set from which the labels will be extracted using
``dataset.get_label()``. ``dataset.get_label()``.
""" """
...@@ -82,31 +82,31 @@ def _eval_function_wrapper(func): ...@@ -82,31 +82,31 @@ def _eval_function_wrapper(func):
Parameters Parameters
---------- ----------
func: callable func : callable
Expects a callable with following functions: Expects a callable with following functions:
``func(y_true, y_pred)``, ``func(y_true, y_pred)``,
``func(y_true, y_pred, weight)`` ``func(y_true, y_pred, weight)``
or ``func(y_true, y_pred, weight, group)`` or ``func(y_true, y_pred, weight, group)``
and return (eval_name->str, eval_result->float, is_bigger_better->Bool): and return (eval_name->str, eval_result->float, is_bigger_better->Bool):
y_true: array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values. The predicted values.
weight: array_like of shape = [n_samples] weight : array_like of shape = [n_samples]
The weight of samples. The weight of samples.
group: array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
Returns Returns
------- -------
new_func: callable new_func : callable
The new eval function as expected by ``lightgbm.engine.train``. The new eval function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``: The signature is ``new_func(preds, dataset)``:
preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes] preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes]
The predicted values. The predicted values.
dataset: ``dataset`` dataset : ``dataset``
The training set from which the labels will be extracted using The training set from which the labels will be extracted using
``dataset.get_label()``. ``dataset.get_label()``.
""" """
...@@ -232,15 +232,15 @@ class LGBMModel(_LGBMModelBase): ...@@ -232,15 +232,15 @@ class LGBMModel(_LGBMModelBase):
``objective(y_true, y_pred) -> grad, hess`` or ``objective(y_true, y_pred) -> grad, hess`` or
``objective(y_true, y_pred, group) -> grad, hess``: ``objective(y_true, y_pred, group) -> grad, hess``:
y_true: array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
group: array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
grad: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) grad : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The value of the gradient for each sample point. The value of the gradient for each sample point.
hess: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) hess : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The value of the second derivative for each sample point. The value of the second derivative for each sample point.
For multi-class task, the y_pred is group by class_id first, then group by row_id. For multi-class task, the y_pred is group by class_id first, then group by row_id.
...@@ -365,19 +365,19 @@ class LGBMModel(_LGBMModelBase): ...@@ -365,19 +365,19 @@ class LGBMModel(_LGBMModelBase):
Returns (eval_name, eval_result, is_bigger_better) or Returns (eval_name, eval_result, is_bigger_better) or
list of (eval_name, eval_result, is_bigger_better) list of (eval_name, eval_result, is_bigger_better)
y_true: array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values. The predicted values.
weight: array-like of shape = [n_samples] weight : array-like of shape = [n_samples]
The weight of samples. The weight of samples.
group: array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
eval_name: string eval_name : string
The name of evaluation. The name of evaluation.
eval_result: float eval_result : float
The eval result. The eval result.
is_bigger_better: bool is_bigger_better : bool
Is eval result bigger better, e.g. AUC is bigger_better. Is eval result bigger better, e.g. AUC is bigger_better.
For multi-class task, the y_pred is group by class_id first, then group by row_id. For multi-class task, the y_pred is group by class_id first, then group by row_id.
...@@ -434,8 +434,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -434,8 +434,7 @@ class LGBMModel(_LGBMModelBase):
def _construct_dataset(X, y, sample_weight, init_score, group, params): def _construct_dataset(X, y, sample_weight, init_score, group, params):
ret = Dataset(X, label=y, weight=sample_weight, group=group, params=params) ret = Dataset(X, label=y, weight=sample_weight, group=group, params=params)
ret.set_init_score(init_score) return ret.set_init_score(init_score)
return ret
train_set = _construct_dataset(X, y, sample_weight, init_score, group, params) train_set = _construct_dataset(X, y, sample_weight, init_score, group, params)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment