Unverified Commit b8cc8738 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] make Booster and Dataset 'handle' attributes private (fixes #5313) (#5947)

parent 9edea60e
...@@ -852,7 +852,7 @@ class _InnerPredictor: ...@@ -852,7 +852,7 @@ class _InnerPredictor:
pred_parameter: dict or None, optional (default=None) pred_parameter: dict or None, optional (default=None)
Other parameters for the prediction. Other parameters for the prediction.
""" """
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
self.__is_manage_handle = True self.__is_manage_handle = True
if model_file is not None: if model_file is not None:
"""Prediction task""" """Prediction task"""
...@@ -860,20 +860,20 @@ class _InnerPredictor: ...@@ -860,20 +860,20 @@ class _InnerPredictor:
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
_c_str(str(model_file)), _c_str(str(model_file)),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self._handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
self.num_class = out_num_class.value self.num_class = out_num_class.value
self.num_total_iteration = out_num_iterations.value self.num_total_iteration = out_num_iterations.value
self.pandas_categorical = _load_pandas_categorical(file_name=model_file) self.pandas_categorical = _load_pandas_categorical(file_name=model_file)
elif booster_handle is not None: elif booster_handle is not None:
self.__is_manage_handle = False self.__is_manage_handle = False
self.handle = booster_handle self._handle = booster_handle
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self._handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
self.num_class = out_num_class.value self.num_class = out_num_class.value
self.num_total_iteration = self.current_iteration() self.num_total_iteration = self.current_iteration()
...@@ -887,13 +887,14 @@ class _InnerPredictor: ...@@ -887,13 +887,14 @@ class _InnerPredictor:
def __del__(self) -> None: def __del__(self) -> None:
try: try:
if self.__is_manage_handle: if self.__is_manage_handle:
_safe_call(_LIB.LGBM_BoosterFree(self.handle)) _safe_call(_LIB.LGBM_BoosterFree(self._handle))
except AttributeError: except AttributeError:
pass pass
def __getstate__(self) -> Dict[str, Any]: def __getstate__(self) -> Dict[str, Any]:
this = self.__dict__.copy() this = self.__dict__.copy()
this.pop('handle', None) this.pop('handle', None)
this.pop('_handle', None)
return this return this
def predict( def predict(
...@@ -945,7 +946,7 @@ class _InnerPredictor: ...@@ -945,7 +946,7 @@ class _InnerPredictor:
ptr_names[:] = [x.encode('utf-8') for x in data_names] ptr_names[:] = [x.encode('utf-8') for x in data_names]
_safe_call( _safe_call(
_LIB.LGBM_BoosterValidateFeatureNames( _LIB.LGBM_BoosterValidateFeatureNames(
self.handle, self._handle,
ptr_names, ptr_names,
ctypes.c_int(len(data_names)), ctypes.c_int(len(data_names)),
) )
...@@ -963,7 +964,7 @@ class _InnerPredictor: ...@@ -963,7 +964,7 @@ class _InnerPredictor:
if isinstance(data, (str, Path)): if isinstance(data, (str, Path)):
with _TempFile() as f: with _TempFile() as f:
_safe_call(_LIB.LGBM_BoosterPredictForFile( _safe_call(_LIB.LGBM_BoosterPredictForFile(
self.handle, self._handle,
_c_str(str(data)), _c_str(str(data)),
ctypes.c_int(int_data_has_header), ctypes.c_int(int_data_has_header),
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
...@@ -1049,7 +1050,7 @@ class _InnerPredictor: ...@@ -1049,7 +1050,7 @@ class _InnerPredictor:
'and then concatenate predictions for them') 'and then concatenate predictions for them')
n_preds = ctypes.c_int64(0) n_preds = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterCalcNumPredict( _safe_call(_LIB.LGBM_BoosterCalcNumPredict(
self.handle, self._handle,
ctypes.c_int(nrow), ctypes.c_int(nrow),
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
...@@ -1082,7 +1083,7 @@ class _InnerPredictor: ...@@ -1082,7 +1083,7 @@ class _InnerPredictor:
raise ValueError("Wrong length of pre-allocated predict array") raise ValueError("Wrong length of pre-allocated predict array")
out_num_preds = ctypes.c_int64(0) out_num_preds = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterPredictForMat( _safe_call(_LIB.LGBM_BoosterPredictForMat(
self.handle, self._handle,
ptr_data, ptr_data,
ctypes.c_int(type_ptr_data), ctypes.c_int(type_ptr_data),
ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[0]),
...@@ -1219,7 +1220,7 @@ class _InnerPredictor: ...@@ -1219,7 +1220,7 @@ class _InnerPredictor:
csr_indices = csr.indices.astype(np.int32, copy=False) csr_indices = csr.indices.astype(np.int32, copy=False)
_safe_call(_LIB.LGBM_BoosterPredictForCSR( _safe_call(_LIB.LGBM_BoosterPredictForCSR(
self.handle, self._handle,
ptr_indptr, ptr_indptr,
ctypes.c_int(type_ptr_indptr), ctypes.c_int(type_ptr_indptr),
csr_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), csr_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
...@@ -1262,7 +1263,7 @@ class _InnerPredictor: ...@@ -1262,7 +1263,7 @@ class _InnerPredictor:
out_ptr_data = ctypes.POINTER(ctypes.c_double)() out_ptr_data = ctypes.POINTER(ctypes.c_double)()
out_shape = np.empty(2, dtype=np.int64) out_shape = np.empty(2, dtype=np.int64)
_safe_call(_LIB.LGBM_BoosterPredictSparseOutput( _safe_call(_LIB.LGBM_BoosterPredictSparseOutput(
self.handle, self._handle,
ptr_indptr, ptr_indptr,
ctypes.c_int(type_ptr_indptr), ctypes.c_int(type_ptr_indptr),
csr_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), csr_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
...@@ -1359,7 +1360,7 @@ class _InnerPredictor: ...@@ -1359,7 +1360,7 @@ class _InnerPredictor:
out_ptr_data = ctypes.POINTER(ctypes.c_double)() out_ptr_data = ctypes.POINTER(ctypes.c_double)()
out_shape = np.empty(2, dtype=np.int64) out_shape = np.empty(2, dtype=np.int64)
_safe_call(_LIB.LGBM_BoosterPredictSparseOutput( _safe_call(_LIB.LGBM_BoosterPredictSparseOutput(
self.handle, self._handle,
ptr_indptr, ptr_indptr,
ctypes.c_int(type_ptr_indptr), ctypes.c_int(type_ptr_indptr),
csc_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), csc_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
...@@ -1429,7 +1430,7 @@ class _InnerPredictor: ...@@ -1429,7 +1430,7 @@ class _InnerPredictor:
csc_indices = csc.indices.astype(np.int32, copy=False) csc_indices = csc.indices.astype(np.int32, copy=False)
_safe_call(_LIB.LGBM_BoosterPredictForCSC( _safe_call(_LIB.LGBM_BoosterPredictForCSC(
self.handle, self._handle,
ptr_indptr, ptr_indptr,
ctypes.c_int(type_ptr_indptr), ctypes.c_int(type_ptr_indptr),
csc_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), csc_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
...@@ -1458,7 +1459,7 @@ class _InnerPredictor: ...@@ -1458,7 +1459,7 @@ class _InnerPredictor:
""" """
out_cur_iter = ctypes.c_int(0) out_cur_iter = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetCurrentIteration( _safe_call(_LIB.LGBM_BoosterGetCurrentIteration(
self.handle, self._handle,
ctypes.byref(out_cur_iter))) ctypes.byref(out_cur_iter)))
return out_cur_iter.value return out_cur_iter.value
...@@ -1518,7 +1519,7 @@ class Dataset: ...@@ -1518,7 +1519,7 @@ class Dataset:
free_raw_data : bool, optional (default=True) free_raw_data : bool, optional (default=True)
If True, raw data is freed after constructing inner Dataset. If True, raw data is freed after constructing inner Dataset.
""" """
self.handle: Optional[_DatasetHandle] = None self._handle: Optional[_DatasetHandle] = None
self.data = data self.data = data
self.label = label self.label = label
self.reference = reference self.reference = reference
...@@ -1594,11 +1595,11 @@ class Dataset: ...@@ -1594,11 +1595,11 @@ class Dataset:
self : Dataset self : Dataset
Constructed Dataset object. Constructed Dataset object.
""" """
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateByReference( _safe_call(_LIB.LGBM_DatasetCreateByReference(
ref_dataset, ref_dataset,
ctypes.c_int64(total_nrow), ctypes.c_int64(total_nrow),
ctypes.byref(self.handle), ctypes.byref(self._handle),
)) ))
return self return self
...@@ -1649,7 +1650,7 @@ class Dataset: ...@@ -1649,7 +1650,7 @@ class Dataset:
num_per_col = np.array([len(d) for d in sample_indices], dtype=np.int32) num_per_col = np.array([len(d) for d in sample_indices], dtype=np.int32)
num_per_col_ptr, _, _ = _c_int_array(num_per_col) num_per_col_ptr, _, _ = _c_int_array(num_per_col)
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
params_str = _param_dict_to_str(self.get_params()) params_str = _param_dict_to_str(self.get_params())
_safe_call(_LIB.LGBM_DatasetCreateFromSampledColumn( _safe_call(_LIB.LGBM_DatasetCreateFromSampledColumn(
ctypes.cast(sample_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))), ctypes.cast(sample_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))),
...@@ -1660,7 +1661,7 @@ class Dataset: ...@@ -1660,7 +1661,7 @@ class Dataset:
ctypes.c_int32(total_nrow), ctypes.c_int32(total_nrow),
ctypes.c_int64(total_nrow), ctypes.c_int64(total_nrow),
_c_str(params_str), _c_str(params_str),
ctypes.byref(self.handle), ctypes.byref(self._handle),
)) ))
return self return self
...@@ -1682,7 +1683,7 @@ class Dataset: ...@@ -1682,7 +1683,7 @@ class Dataset:
data_ptr, data_type, _ = _c_float_array(data) data_ptr, data_type, _ = _c_float_array(data)
_safe_call(_LIB.LGBM_DatasetPushRows( _safe_call(_LIB.LGBM_DatasetPushRows(
self.handle, self._handle,
data_ptr, data_ptr,
data_type, data_type,
ctypes.c_int32(nrow), ctypes.c_int32(nrow),
...@@ -1728,9 +1729,9 @@ class Dataset: ...@@ -1728,9 +1729,9 @@ class Dataset:
return {} return {}
def _free_handle(self) -> "Dataset": def _free_handle(self) -> "Dataset":
if self.handle is not None: if self._handle is not None:
_safe_call(_LIB.LGBM_DatasetFree(self.handle)) _safe_call(_LIB.LGBM_DatasetFree(self._handle))
self.handle = None self._handle = None
self._need_slice = True self._need_slice = True
if self.used_indices is not None: if self.used_indices is not None:
self.data = None self.data = None
...@@ -1791,7 +1792,7 @@ class Dataset: ...@@ -1791,7 +1792,7 @@ class Dataset:
params: Optional[Dict[str, Any]] params: Optional[Dict[str, Any]]
) -> "Dataset": ) -> "Dataset":
if data is None: if data is None:
self.handle = None self._handle = None
return self return self
if reference is not None: if reference is not None:
self.pandas_categorical = reference.pandas_categorical self.pandas_categorical = reference.pandas_categorical
...@@ -1835,17 +1836,17 @@ class Dataset: ...@@ -1835,17 +1836,17 @@ class Dataset:
# process for reference dataset # process for reference dataset
ref_dataset = None ref_dataset = None
if isinstance(reference, Dataset): if isinstance(reference, Dataset):
ref_dataset = reference.construct().handle ref_dataset = reference.construct()._handle
elif reference is not None: elif reference is not None:
raise TypeError('Reference dataset should be None or dataset instance') raise TypeError('Reference dataset should be None or dataset instance')
# start construct data # start construct data
if isinstance(data, (str, Path)): if isinstance(data, (str, Path)):
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateFromFile( _safe_call(_LIB.LGBM_DatasetCreateFromFile(
_c_str(str(data)), _c_str(str(data)),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
self.__init_from_csr(data, params_str, ref_dataset) self.__init_from_csr(data, params_str, ref_dataset)
elif isinstance(data, scipy.sparse.csc_matrix): elif isinstance(data, scipy.sparse.csc_matrix):
...@@ -1978,7 +1979,7 @@ class Dataset: ...@@ -1978,7 +1979,7 @@ class Dataset:
if len(mat.shape) != 2: if len(mat.shape) != 2:
raise ValueError('Input numpy.ndarray must be 2 dimensional') raise ValueError('Input numpy.ndarray must be 2 dimensional')
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
if mat.dtype == np.float32 or mat.dtype == np.float64: if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False) data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
else: # change non-float data to float data, need to copy else: # change non-float data to float data, need to copy
...@@ -1993,7 +1994,7 @@ class Dataset: ...@@ -1993,7 +1994,7 @@ class Dataset:
ctypes.c_int(_C_API_IS_ROW_MAJOR), ctypes.c_int(_C_API_IS_ROW_MAJOR),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
return self return self
def __init_from_list_np2d( def __init_from_list_np2d(
...@@ -2035,7 +2036,7 @@ class Dataset: ...@@ -2035,7 +2036,7 @@ class Dataset:
type_ptr_data = chunk_type_ptr_data type_ptr_data = chunk_type_ptr_data
holders.append(holder) holders.append(holder)
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateFromMats( _safe_call(_LIB.LGBM_DatasetCreateFromMats(
ctypes.c_int32(len(mats)), ctypes.c_int32(len(mats)),
ctypes.cast(ptr_data, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))), ctypes.cast(ptr_data, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))),
...@@ -2045,7 +2046,7 @@ class Dataset: ...@@ -2045,7 +2046,7 @@ class Dataset:
ctypes.c_int(_C_API_IS_ROW_MAJOR), ctypes.c_int(_C_API_IS_ROW_MAJOR),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
return self return self
def __init_from_csr( def __init_from_csr(
...@@ -2057,7 +2058,7 @@ class Dataset: ...@@ -2057,7 +2058,7 @@ class Dataset:
"""Initialize data from a CSR matrix.""" """Initialize data from a CSR matrix."""
if len(csr.indices) != len(csr.data): if len(csr.indices) != len(csr.data):
raise ValueError(f'Length mismatch: {len(csr.indices)} vs {len(csr.data)}') raise ValueError(f'Length mismatch: {len(csr.indices)} vs {len(csr.data)}')
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
...@@ -2076,7 +2077,7 @@ class Dataset: ...@@ -2076,7 +2077,7 @@ class Dataset:
ctypes.c_int64(csr.shape[1]), ctypes.c_int64(csr.shape[1]),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
return self return self
def __init_from_csc( def __init_from_csc(
...@@ -2088,7 +2089,7 @@ class Dataset: ...@@ -2088,7 +2089,7 @@ class Dataset:
"""Initialize data from a CSC matrix.""" """Initialize data from a CSC matrix."""
if len(csc.indices) != len(csc.data): if len(csc.indices) != len(csc.data):
raise ValueError(f'Length mismatch: {len(csc.indices)} vs {len(csc.data)}') raise ValueError(f'Length mismatch: {len(csc.indices)} vs {len(csc.data)}')
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csc.data) ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
...@@ -2107,7 +2108,7 @@ class Dataset: ...@@ -2107,7 +2108,7 @@ class Dataset:
ctypes.c_int64(csc.shape[0]), ctypes.c_int64(csc.shape[0]),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
return self return self
@staticmethod @staticmethod
...@@ -2156,7 +2157,7 @@ class Dataset: ...@@ -2156,7 +2157,7 @@ class Dataset:
self : Dataset self : Dataset
Constructed Dataset object. Constructed Dataset object.
""" """
if self.handle is None: if self._handle is None:
if self.reference is not None: if self.reference is not None:
reference_params = self.reference.get_params() reference_params = self.reference.get_params()
params = self.get_params() params = self.get_params()
...@@ -2182,14 +2183,14 @@ class Dataset: ...@@ -2182,14 +2183,14 @@ class Dataset:
group_info = np.array(self.reference.group).astype(np.int32, copy=False) group_info = np.array(self.reference.group).astype(np.int32, copy=False)
_, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices], _, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices],
return_counts=True) return_counts=True)
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
params_str = _param_dict_to_str(self.params) params_str = _param_dict_to_str(self.params)
_safe_call(_LIB.LGBM_DatasetGetSubset( _safe_call(_LIB.LGBM_DatasetGetSubset(
self.reference.construct().handle, self.reference.construct()._handle,
used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
ctypes.c_int32(used_indices.shape[0]), ctypes.c_int32(used_indices.shape[0]),
_c_str(params_str), _c_str(params_str),
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
if not self.free_raw_data: if not self.free_raw_data:
self.get_data() self.get_data()
if self.group is not None: if self.group is not None:
...@@ -2305,7 +2306,7 @@ class Dataset: ...@@ -2305,7 +2306,7 @@ class Dataset:
Returns self. Returns self.
""" """
_safe_call(_LIB.LGBM_DatasetSaveBinary( _safe_call(_LIB.LGBM_DatasetSaveBinary(
self.construct().handle, self.construct()._handle,
_c_str(str(filename)))) _c_str(str(filename))))
return self return self
...@@ -2321,7 +2322,7 @@ class Dataset: ...@@ -2321,7 +2322,7 @@ class Dataset:
self._params_back_up = deepcopy(self.params) self._params_back_up = deepcopy(self.params)
self.params.update(params) self.params.update(params)
if self.handle is None: if self._handle is None:
update() update()
elif params is not None: elif params is not None:
ret = _LIB.LGBM_DatasetUpdateParamChecking( ret = _LIB.LGBM_DatasetUpdateParamChecking(
...@@ -2337,7 +2338,7 @@ class Dataset: ...@@ -2337,7 +2338,7 @@ class Dataset:
return self return self
def _reverse_update_params(self) -> "Dataset": def _reverse_update_params(self) -> "Dataset":
if self.handle is None: if self._handle is None:
self.params = deepcopy(self._params_back_up) self.params = deepcopy(self._params_back_up)
self._params_back_up = None self._params_back_up = None
return self return self
...@@ -2361,12 +2362,12 @@ class Dataset: ...@@ -2361,12 +2362,12 @@ class Dataset:
self : Dataset self : Dataset
Dataset with set property. Dataset with set property.
""" """
if self.handle is None: if self._handle is None:
raise Exception(f"Cannot set {field_name} before construct dataset") raise Exception(f"Cannot set {field_name} before construct dataset")
if data is None: if data is None:
# set to None # set to None
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
self.handle, self._handle,
_c_str(field_name), _c_str(field_name),
None, None,
ctypes.c_int(0), ctypes.c_int(0),
...@@ -2399,7 +2400,7 @@ class Dataset: ...@@ -2399,7 +2400,7 @@ class Dataset:
if type_data != _FIELD_TYPE_MAPPER[field_name]: if type_data != _FIELD_TYPE_MAPPER[field_name]:
raise TypeError("Input type error for set_field") raise TypeError("Input type error for set_field")
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
self.handle, self._handle,
_c_str(field_name), _c_str(field_name),
ptr_data, ptr_data,
ctypes.c_int(len(data)), ctypes.c_int(len(data)),
...@@ -2420,13 +2421,13 @@ class Dataset: ...@@ -2420,13 +2421,13 @@ class Dataset:
info : numpy array or None info : numpy array or None
A numpy array with information from the Dataset. A numpy array with information from the Dataset.
""" """
if self.handle is None: if self._handle is None:
raise Exception(f"Cannot get {field_name} before construct Dataset") raise Exception(f"Cannot get {field_name} before construct Dataset")
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
out_type = ctypes.c_int(0) out_type = ctypes.c_int(0)
ret = ctypes.POINTER(ctypes.c_void_p)() ret = ctypes.POINTER(ctypes.c_void_p)()
_safe_call(_LIB.LGBM_DatasetGetField( _safe_call(_LIB.LGBM_DatasetGetField(
self.handle, self._handle,
_c_str(field_name), _c_str(field_name),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.byref(ret), ctypes.byref(ret),
...@@ -2498,7 +2499,7 @@ class Dataset: ...@@ -2498,7 +2499,7 @@ class Dataset:
elif isinstance(predictor, _InnerPredictor) and isinstance(self._predictor, _InnerPredictor): elif isinstance(predictor, _InnerPredictor) and isinstance(self._predictor, _InnerPredictor):
if (predictor == self._predictor) and (predictor.current_iteration() == self._predictor.current_iteration()): if (predictor == self._predictor) and (predictor.current_iteration() == self._predictor.current_iteration()):
return self return self
if self.handle is None: if self._handle is None:
self._predictor = predictor self._predictor = predictor
elif self.data is not None: elif self.data is not None:
self._predictor = predictor self._predictor = predictor
...@@ -2560,12 +2561,12 @@ class Dataset: ...@@ -2560,12 +2561,12 @@ class Dataset:
""" """
if feature_name != 'auto': if feature_name != 'auto':
self.feature_name = feature_name self.feature_name = feature_name
if self.handle is not None and feature_name is not None and feature_name != 'auto': if self._handle is not None and feature_name is not None and feature_name != 'auto':
if len(feature_name) != self.num_feature(): if len(feature_name) != self.num_feature():
raise ValueError(f"Length of feature_name({len(feature_name)}) and num_feature({self.num_feature()}) don't match") raise ValueError(f"Length of feature_name({len(feature_name)}) and num_feature({self.num_feature()}) don't match")
c_feature_name = [_c_str(name) for name in feature_name] c_feature_name = [_c_str(name) for name in feature_name]
_safe_call(_LIB.LGBM_DatasetSetFeatureNames( _safe_call(_LIB.LGBM_DatasetSetFeatureNames(
self.handle, self._handle,
_c_array(ctypes.c_char_p, c_feature_name), _c_array(ctypes.c_char_p, c_feature_name),
ctypes.c_int(len(feature_name)))) ctypes.c_int(len(feature_name))))
return self return self
...@@ -2584,7 +2585,7 @@ class Dataset: ...@@ -2584,7 +2585,7 @@ class Dataset:
Dataset with set label. Dataset with set label.
""" """
self.label = label self.label = label
if self.handle is not None: if self._handle is not None:
if isinstance(label, pd_DataFrame): if isinstance(label, pd_DataFrame):
if len(label.columns) > 1: if len(label.columns) > 1:
raise ValueError('DataFrame for label cannot have multiple columns') raise ValueError('DataFrame for label cannot have multiple columns')
...@@ -2625,7 +2626,7 @@ class Dataset: ...@@ -2625,7 +2626,7 @@ class Dataset:
if weight is not None and np.all(weight == 1): if weight is not None and np.all(weight == 1):
weight = None weight = None
self.weight = weight self.weight = weight
if self.handle is not None and weight is not None: if self._handle is not None and weight is not None:
weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight') weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight')
self.set_field('weight', weight) self.set_field('weight', weight)
self.weight = self.get_field('weight') # original values can be modified at cpp side self.weight = self.get_field('weight') # original values can be modified at cpp side
...@@ -2648,7 +2649,7 @@ class Dataset: ...@@ -2648,7 +2649,7 @@ class Dataset:
Dataset with set init score. Dataset with set init score.
""" """
self.init_score = init_score self.init_score = init_score
if self.handle is not None and init_score is not None: if self._handle is not None and init_score is not None:
self.set_field('init_score', init_score) self.set_field('init_score', init_score)
self.init_score = self.get_field('init_score') # original values can be modified at cpp side self.init_score = self.get_field('init_score') # original values can be modified at cpp side
return self return self
...@@ -2674,7 +2675,7 @@ class Dataset: ...@@ -2674,7 +2675,7 @@ class Dataset:
Dataset with set group. Dataset with set group.
""" """
self.group = group self.group = group
if self.handle is not None and group is not None: if self._handle is not None and group is not None:
group = _list_to_1d_numpy(group, dtype=np.int32, name='group') group = _list_to_1d_numpy(group, dtype=np.int32, name='group')
self.set_field('group', group) self.set_field('group', group)
return self return self
...@@ -2687,7 +2688,7 @@ class Dataset: ...@@ -2687,7 +2688,7 @@ class Dataset:
feature_names : list of str feature_names : list of str
The names of columns (features) in the Dataset. The names of columns (features) in the Dataset.
""" """
if self.handle is None: if self._handle is None:
raise LightGBMError("Cannot get feature_name before construct dataset") raise LightGBMError("Cannot get feature_name before construct dataset")
num_feature = self.num_feature() num_feature = self.num_feature()
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
...@@ -2696,7 +2697,7 @@ class Dataset: ...@@ -2696,7 +2697,7 @@ class Dataset:
string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)] string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_DatasetGetFeatureNames( _safe_call(_LIB.LGBM_DatasetGetFeatureNames(
self.handle, self._handle,
ctypes.c_int(num_feature), ctypes.c_int(num_feature),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.c_size_t(reserved_string_buffer_size), ctypes.c_size_t(reserved_string_buffer_size),
...@@ -2710,7 +2711,7 @@ class Dataset: ...@@ -2710,7 +2711,7 @@ class Dataset:
string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)] string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_DatasetGetFeatureNames( _safe_call(_LIB.LGBM_DatasetGetFeatureNames(
self.handle, self._handle,
ctypes.c_int(num_feature), ctypes.c_int(num_feature),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.c_size_t(actual_string_buffer_size), ctypes.c_size_t(actual_string_buffer_size),
...@@ -2762,7 +2763,7 @@ class Dataset: ...@@ -2762,7 +2763,7 @@ class Dataset:
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array or None data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array or None
Raw data used in the Dataset construction. Raw data used in the Dataset construction.
""" """
if self.handle is None: if self._handle is None:
raise Exception("Cannot get data before construct Dataset") raise Exception("Cannot get data before construct Dataset")
if self._need_slice and self.used_indices is not None and self.reference is not None: if self._need_slice and self.used_indices is not None and self.reference is not None:
self.data = self.reference.data self.data = self.reference.data
...@@ -2813,9 +2814,9 @@ class Dataset: ...@@ -2813,9 +2814,9 @@ class Dataset:
number_of_rows : int number_of_rows : int
The number of rows in the Dataset. The number of rows in the Dataset.
""" """
if self.handle is not None: if self._handle is not None:
ret = ctypes.c_int(0) ret = ctypes.c_int(0)
_safe_call(_LIB.LGBM_DatasetGetNumData(self.handle, _safe_call(_LIB.LGBM_DatasetGetNumData(self._handle,
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
else: else:
...@@ -2829,9 +2830,9 @@ class Dataset: ...@@ -2829,9 +2830,9 @@ class Dataset:
number_of_columns : int number_of_columns : int
The number of columns (features) in the Dataset. The number of columns (features) in the Dataset.
""" """
if self.handle is not None: if self._handle is not None:
ret = ctypes.c_int(0) ret = ctypes.c_int(0)
_safe_call(_LIB.LGBM_DatasetGetNumFeature(self.handle, _safe_call(_LIB.LGBM_DatasetGetNumFeature(self._handle,
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
else: else:
...@@ -2850,13 +2851,13 @@ class Dataset: ...@@ -2850,13 +2851,13 @@ class Dataset:
number_of_bins : int number_of_bins : int
The number of constructed bins for the feature in the Dataset. The number of constructed bins for the feature in the Dataset.
""" """
if self.handle is not None: if self._handle is not None:
if isinstance(feature, str): if isinstance(feature, str):
feature_index = self.feature_name.index(feature) feature_index = self.feature_name.index(feature)
else: else:
feature_index = feature feature_index = feature
ret = ctypes.c_int(0) ret = ctypes.c_int(0)
_safe_call(_LIB.LGBM_DatasetGetFeatureNumBin(self.handle, _safe_call(_LIB.LGBM_DatasetGetFeatureNumBin(self._handle,
ctypes.c_int(feature_index), ctypes.c_int(feature_index),
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
...@@ -2908,9 +2909,9 @@ class Dataset: ...@@ -2908,9 +2909,9 @@ class Dataset:
self : Dataset self : Dataset
Dataset with the new features added. Dataset with the new features added.
""" """
if self.handle is None or other.handle is None: if self._handle is None or other._handle is None:
raise ValueError('Both source and target Datasets must be constructed before adding features') raise ValueError('Both source and target Datasets must be constructed before adding features')
_safe_call(_LIB.LGBM_DatasetAddFeaturesFrom(self.handle, other.handle)) _safe_call(_LIB.LGBM_DatasetAddFeaturesFrom(self._handle, other._handle))
was_none = self.data is None was_none = self.data is None
old_self_data_type = type(self.data).__name__ old_self_data_type = type(self.data).__name__
if other.data is None: if other.data is None:
...@@ -2998,7 +2999,7 @@ class Dataset: ...@@ -2998,7 +2999,7 @@ class Dataset:
Returns self. Returns self.
""" """
_safe_call(_LIB.LGBM_DatasetDumpText( _safe_call(_LIB.LGBM_DatasetDumpText(
self.construct().handle, self.construct()._handle,
_c_str(str(filename)))) _c_str(str(filename))))
return self return self
...@@ -3042,7 +3043,7 @@ class Booster: ...@@ -3042,7 +3043,7 @@ class Booster:
model_str : str or None, optional (default=None) model_str : str or None, optional (default=None)
Model will be loaded from this string. Model will be loaded from this string.
""" """
self.handle = None self._handle = None
self._network = False self._network = False
self.__need_reload_eval_info = True self.__need_reload_eval_info = True
self._train_data_name = "training" self._train_data_name = "training"
...@@ -3093,11 +3094,11 @@ class Booster: ...@@ -3093,11 +3094,11 @@ class Booster:
# copy the parameters from train_set # copy the parameters from train_set
params.update(train_set.get_params()) params.update(train_set.get_params())
params_str = _param_dict_to_str(params) params_str = _param_dict_to_str(params)
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_BoosterCreate( _safe_call(_LIB.LGBM_BoosterCreate(
train_set.handle, train_set._handle,
_c_str(params_str), _c_str(params_str),
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
# save reference to data # save reference to data
self.train_set = train_set self.train_set = train_set
self.valid_sets: List[Dataset] = [] self.valid_sets: List[Dataset] = []
...@@ -3106,11 +3107,11 @@ class Booster: ...@@ -3106,11 +3107,11 @@ class Booster:
self.__init_predictor = train_set._predictor self.__init_predictor = train_set._predictor
if self.__init_predictor is not None: if self.__init_predictor is not None:
_safe_call(_LIB.LGBM_BoosterMerge( _safe_call(_LIB.LGBM_BoosterMerge(
self.handle, self._handle,
self.__init_predictor.handle)) self.__init_predictor._handle))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self._handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
self.__num_class = out_num_class.value self.__num_class = out_num_class.value
# buffer for inner predict # buffer for inner predict
...@@ -3122,14 +3123,14 @@ class Booster: ...@@ -3122,14 +3123,14 @@ class Booster:
elif model_file is not None: elif model_file is not None:
# Prediction task # Prediction task
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
_c_str(str(model_file)), _c_str(str(model_file)),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self._handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
self.__num_class = out_num_class.value self.__num_class = out_num_class.value
self.pandas_categorical = _load_pandas_categorical(file_name=model_file) self.pandas_categorical = _load_pandas_categorical(file_name=model_file)
...@@ -3150,8 +3151,8 @@ class Booster: ...@@ -3150,8 +3151,8 @@ class Booster:
except AttributeError: except AttributeError:
pass pass
try: try:
if self.handle is not None: if self._handle is not None:
_safe_call(_LIB.LGBM_BoosterFree(self.handle)) _safe_call(_LIB.LGBM_BoosterFree(self._handle))
except AttributeError: except AttributeError:
pass pass
...@@ -3165,15 +3166,15 @@ class Booster: ...@@ -3165,15 +3166,15 @@ class Booster:
def __getstate__(self) -> Dict[str, Any]: def __getstate__(self) -> Dict[str, Any]:
this = self.__dict__.copy() this = self.__dict__.copy()
handle = this['handle'] handle = this['_handle']
this.pop('train_set', None) this.pop('train_set', None)
this.pop('valid_sets', None) this.pop('valid_sets', None)
if handle is not None: if handle is not None:
this["handle"] = self.model_to_string(num_iteration=-1) this["_handle"] = self.model_to_string(num_iteration=-1)
return this return this
def __setstate__(self, state: Dict[str, Any]) -> None: def __setstate__(self, state: Dict[str, Any]) -> None:
model_str = state.get('handle', None) model_str = state.get('_handle', state.get('handle', None))
if model_str is not None: if model_str is not None:
handle = ctypes.c_void_p() handle = ctypes.c_void_p()
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
...@@ -3181,7 +3182,7 @@ class Booster: ...@@ -3181,7 +3182,7 @@ class Booster:
_c_str(model_str), _c_str(model_str),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(handle))) ctypes.byref(handle)))
state['handle'] = handle state['_handle'] = handle
self.__dict__.update(state) self.__dict__.update(state)
def _get_loaded_param(self) -> Dict[str, Any]: def _get_loaded_param(self) -> Dict[str, Any]:
...@@ -3190,7 +3191,7 @@ class Booster: ...@@ -3190,7 +3191,7 @@ class Booster:
string_buffer = ctypes.create_string_buffer(buffer_len) string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_BoosterGetLoadedParam( _safe_call(_LIB.LGBM_BoosterGetLoadedParam(
self.handle, self._handle,
ctypes.c_int64(buffer_len), ctypes.c_int64(buffer_len),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ptr_string_buffer)) ptr_string_buffer))
...@@ -3200,7 +3201,7 @@ class Booster: ...@@ -3200,7 +3201,7 @@ class Booster:
string_buffer = ctypes.create_string_buffer(actual_len) string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_BoosterGetLoadedParam( _safe_call(_LIB.LGBM_BoosterGetLoadedParam(
self.handle, self._handle,
ctypes.c_int64(actual_len), ctypes.c_int64(actual_len),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ptr_string_buffer)) ptr_string_buffer))
...@@ -3460,8 +3461,8 @@ class Booster: ...@@ -3460,8 +3461,8 @@ class Booster:
raise LightGBMError("Add validation data failed, " raise LightGBMError("Add validation data failed, "
"you should use same predictor for these data") "you should use same predictor for these data")
_safe_call(_LIB.LGBM_BoosterAddValidData( _safe_call(_LIB.LGBM_BoosterAddValidData(
self.handle, self._handle,
data.construct().handle)) data.construct()._handle))
self.valid_sets.append(data) self.valid_sets.append(data)
self.name_valid_sets.append(name) self.name_valid_sets.append(name)
self.__num_dataset += 1 self.__num_dataset += 1
...@@ -3485,7 +3486,7 @@ class Booster: ...@@ -3485,7 +3486,7 @@ class Booster:
params_str = _param_dict_to_str(params) params_str = _param_dict_to_str(params)
if params_str: if params_str:
_safe_call(_LIB.LGBM_BoosterResetParameter( _safe_call(_LIB.LGBM_BoosterResetParameter(
self.handle, self._handle,
_c_str(params_str))) _c_str(params_str)))
self.params.update(params) self.params.update(params)
return self return self
...@@ -3542,8 +3543,8 @@ class Booster: ...@@ -3542,8 +3543,8 @@ class Booster:
"you should use same predictor for these data") "you should use same predictor for these data")
self.train_set = train_set self.train_set = train_set
_safe_call(_LIB.LGBM_BoosterResetTrainingData( _safe_call(_LIB.LGBM_BoosterResetTrainingData(
self.handle, self._handle,
self.train_set.construct().handle)) self.train_set.construct()._handle))
self.__inner_predict_buffer[0] = None self.__inner_predict_buffer[0] = None
self.train_set_version = self.train_set.version self.train_set_version = self.train_set.version
is_finished = ctypes.c_int(0) is_finished = ctypes.c_int(0)
...@@ -3551,7 +3552,7 @@ class Booster: ...@@ -3551,7 +3552,7 @@ class Booster:
if self.__set_objective_to_none: if self.__set_objective_to_none:
raise LightGBMError('Cannot update due to null objective function.') raise LightGBMError('Cannot update due to null objective function.')
_safe_call(_LIB.LGBM_BoosterUpdateOneIter( _safe_call(_LIB.LGBM_BoosterUpdateOneIter(
self.handle, self._handle,
ctypes.byref(is_finished))) ctypes.byref(is_finished)))
self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)]
return is_finished.value == 1 return is_finished.value == 1
...@@ -3607,7 +3608,7 @@ class Booster: ...@@ -3607,7 +3608,7 @@ class Booster:
) )
is_finished = ctypes.c_int(0) is_finished = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom( _safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
self.handle, self._handle,
grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
ctypes.byref(is_finished))) ctypes.byref(is_finished)))
...@@ -3623,7 +3624,7 @@ class Booster: ...@@ -3623,7 +3624,7 @@ class Booster:
Booster with rolled back one iteration. Booster with rolled back one iteration.
""" """
_safe_call(_LIB.LGBM_BoosterRollbackOneIter( _safe_call(_LIB.LGBM_BoosterRollbackOneIter(
self.handle)) self._handle))
self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)]
return self return self
...@@ -3637,7 +3638,7 @@ class Booster: ...@@ -3637,7 +3638,7 @@ class Booster:
""" """
out_cur_iter = ctypes.c_int(0) out_cur_iter = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetCurrentIteration( _safe_call(_LIB.LGBM_BoosterGetCurrentIteration(
self.handle, self._handle,
ctypes.byref(out_cur_iter))) ctypes.byref(out_cur_iter)))
return out_cur_iter.value return out_cur_iter.value
...@@ -3651,7 +3652,7 @@ class Booster: ...@@ -3651,7 +3652,7 @@ class Booster:
""" """
model_per_iter = ctypes.c_int(0) model_per_iter = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterNumModelPerIteration( _safe_call(_LIB.LGBM_BoosterNumModelPerIteration(
self.handle, self._handle,
ctypes.byref(model_per_iter))) ctypes.byref(model_per_iter)))
return model_per_iter.value return model_per_iter.value
...@@ -3665,7 +3666,7 @@ class Booster: ...@@ -3665,7 +3666,7 @@ class Booster:
""" """
num_trees = ctypes.c_int(0) num_trees = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterNumberOfTotalModel( _safe_call(_LIB.LGBM_BoosterNumberOfTotalModel(
self.handle, self._handle,
ctypes.byref(num_trees))) ctypes.byref(num_trees)))
return num_trees.value return num_trees.value
...@@ -3679,7 +3680,7 @@ class Booster: ...@@ -3679,7 +3680,7 @@ class Booster:
""" """
ret = ctypes.c_double(0) ret = ctypes.c_double(0)
_safe_call(_LIB.LGBM_BoosterGetUpperBoundValue( _safe_call(_LIB.LGBM_BoosterGetUpperBoundValue(
self.handle, self._handle,
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
...@@ -3693,7 +3694,7 @@ class Booster: ...@@ -3693,7 +3694,7 @@ class Booster:
""" """
ret = ctypes.c_double(0) ret = ctypes.c_double(0)
_safe_call(_LIB.LGBM_BoosterGetLowerBoundValue( _safe_call(_LIB.LGBM_BoosterGetLowerBoundValue(
self.handle, self._handle,
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
...@@ -3854,7 +3855,7 @@ class Booster: ...@@ -3854,7 +3855,7 @@ class Booster:
num_iteration = self.best_iteration num_iteration = self.best_iteration
importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
_safe_call(_LIB.LGBM_BoosterSaveModel( _safe_call(_LIB.LGBM_BoosterSaveModel(
self.handle, self._handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
...@@ -3883,7 +3884,7 @@ class Booster: ...@@ -3883,7 +3884,7 @@ class Booster:
Booster with shuffled models. Booster with shuffled models.
""" """
_safe_call(_LIB.LGBM_BoosterShuffleModels( _safe_call(_LIB.LGBM_BoosterShuffleModels(
self.handle, self._handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(end_iteration))) ctypes.c_int(end_iteration)))
return self return self
...@@ -3901,18 +3902,18 @@ class Booster: ...@@ -3901,18 +3902,18 @@ class Booster:
self : Booster self : Booster
Loaded Booster object. Loaded Booster object.
""" """
if self.handle is not None: if self._handle is not None:
_safe_call(_LIB.LGBM_BoosterFree(self.handle)) _safe_call(_LIB.LGBM_BoosterFree(self._handle))
self._free_buffer() self._free_buffer()
self.handle = ctypes.c_void_p() self._handle = ctypes.c_void_p()
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterLoadModelFromString( _safe_call(_LIB.LGBM_BoosterLoadModelFromString(
_c_str(model_str), _c_str(model_str),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self._handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self._handle,
ctypes.byref(out_num_class))) ctypes.byref(out_num_class)))
self.__num_class = out_num_class.value self.__num_class = out_num_class.value
self.pandas_categorical = _load_pandas_categorical(model_str=model_str) self.pandas_categorical = _load_pandas_categorical(model_str=model_str)
...@@ -3952,7 +3953,7 @@ class Booster: ...@@ -3952,7 +3953,7 @@ class Booster:
string_buffer = ctypes.create_string_buffer(buffer_len) string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_BoosterSaveModelToString( _safe_call(_LIB.LGBM_BoosterSaveModelToString(
self.handle, self._handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
...@@ -3965,7 +3966,7 @@ class Booster: ...@@ -3965,7 +3966,7 @@ class Booster:
string_buffer = ctypes.create_string_buffer(actual_len) string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_BoosterSaveModelToString( _safe_call(_LIB.LGBM_BoosterSaveModelToString(
self.handle, self._handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
...@@ -4020,7 +4021,7 @@ class Booster: ...@@ -4020,7 +4021,7 @@ class Booster:
string_buffer = ctypes.create_string_buffer(buffer_len) string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_BoosterDumpModel( _safe_call(_LIB.LGBM_BoosterDumpModel(
self.handle, self._handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
...@@ -4033,7 +4034,7 @@ class Booster: ...@@ -4033,7 +4034,7 @@ class Booster:
string_buffer = ctypes.create_string_buffer(actual_len) string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_BoosterDumpModel( _safe_call(_LIB.LGBM_BoosterDumpModel(
self.handle, self._handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
...@@ -4202,7 +4203,7 @@ class Booster: ...@@ -4202,7 +4203,7 @@ class Booster:
nrow, ncol = leaf_preds.shape nrow, ncol = leaf_preds.shape
out_is_linear = ctypes.c_int(0) out_is_linear = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetLinear( _safe_call(_LIB.LGBM_BoosterGetLinear(
self.handle, self._handle,
ctypes.byref(out_is_linear))) ctypes.byref(out_is_linear)))
new_params = _choose_param_value( new_params = _choose_param_value(
main_param_name="linear_tree", main_param_name="linear_tree",
...@@ -4227,12 +4228,12 @@ class Booster: ...@@ -4227,12 +4228,12 @@ class Booster:
new_booster = Booster(new_params, train_set) new_booster = Booster(new_params, train_set)
# Copy models # Copy models
_safe_call(_LIB.LGBM_BoosterMerge( _safe_call(_LIB.LGBM_BoosterMerge(
new_booster.handle, new_booster._handle,
predictor.handle)) predictor._handle))
leaf_preds = leaf_preds.reshape(-1) leaf_preds = leaf_preds.reshape(-1)
ptr_data, _, _ = _c_int_array(leaf_preds) ptr_data, _, _ = _c_int_array(leaf_preds)
_safe_call(_LIB.LGBM_BoosterRefit( _safe_call(_LIB.LGBM_BoosterRefit(
new_booster.handle, new_booster._handle,
ptr_data, ptr_data,
ctypes.c_int32(nrow), ctypes.c_int32(nrow),
ctypes.c_int32(ncol))) ctypes.c_int32(ncol)))
...@@ -4256,7 +4257,7 @@ class Booster: ...@@ -4256,7 +4257,7 @@ class Booster:
""" """
ret = ctypes.c_double(0) ret = ctypes.c_double(0)
_safe_call(_LIB.LGBM_BoosterGetLeafValue( _safe_call(_LIB.LGBM_BoosterGetLeafValue(
self.handle, self._handle,
ctypes.c_int(tree_id), ctypes.c_int(tree_id),
ctypes.c_int(leaf_id), ctypes.c_int(leaf_id),
ctypes.byref(ret))) ctypes.byref(ret)))
...@@ -4286,7 +4287,7 @@ class Booster: ...@@ -4286,7 +4287,7 @@ class Booster:
""" """
_safe_call( _safe_call(
_LIB.LGBM_BoosterSetLeafValue( _LIB.LGBM_BoosterSetLeafValue(
self.handle, self._handle,
ctypes.c_int(tree_id), ctypes.c_int(tree_id),
ctypes.c_int(leaf_id), ctypes.c_int(leaf_id),
ctypes.c_double(value) ctypes.c_double(value)
...@@ -4299,7 +4300,7 @@ class Booster: ...@@ -4299,7 +4300,7 @@ class Booster:
pred_parameter: Dict[str, Any] pred_parameter: Dict[str, Any]
) -> _InnerPredictor: ) -> _InnerPredictor:
"""Convert to predictor.""" """Convert to predictor."""
predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter) predictor = _InnerPredictor(booster_handle=self._handle, pred_parameter=pred_parameter)
predictor.pandas_categorical = self.pandas_categorical predictor.pandas_categorical = self.pandas_categorical
return predictor return predictor
...@@ -4313,7 +4314,7 @@ class Booster: ...@@ -4313,7 +4314,7 @@ class Booster:
""" """
out_num_feature = ctypes.c_int(0) out_num_feature = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetNumFeature( _safe_call(_LIB.LGBM_BoosterGetNumFeature(
self.handle, self._handle,
ctypes.byref(out_num_feature))) ctypes.byref(out_num_feature)))
return out_num_feature.value return out_num_feature.value
...@@ -4333,7 +4334,7 @@ class Booster: ...@@ -4333,7 +4334,7 @@ class Booster:
string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)] string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for _ in range(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetFeatureNames( _safe_call(_LIB.LGBM_BoosterGetFeatureNames(
self.handle, self._handle,
ctypes.c_int(num_feature), ctypes.c_int(num_feature),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.c_size_t(reserved_string_buffer_size), ctypes.c_size_t(reserved_string_buffer_size),
...@@ -4347,7 +4348,7 @@ class Booster: ...@@ -4347,7 +4348,7 @@ class Booster:
string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)] string_buffers = [ctypes.create_string_buffer(actual_string_buffer_size) for _ in range(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetFeatureNames( _safe_call(_LIB.LGBM_BoosterGetFeatureNames(
self.handle, self._handle,
ctypes.c_int(num_feature), ctypes.c_int(num_feature),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.c_size_t(actual_string_buffer_size), ctypes.c_size_t(actual_string_buffer_size),
...@@ -4383,7 +4384,7 @@ class Booster: ...@@ -4383,7 +4384,7 @@ class Booster:
importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
result = np.empty(self.num_feature(), dtype=np.float64) result = np.empty(self.num_feature(), dtype=np.float64)
_safe_call(_LIB.LGBM_BoosterFeatureImportance( _safe_call(_LIB.LGBM_BoosterFeatureImportance(
self.handle, self._handle,
ctypes.c_int(iteration), ctypes.c_int(iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
...@@ -4481,7 +4482,7 @@ class Booster: ...@@ -4481,7 +4482,7 @@ class Booster:
result = np.empty(self.__num_inner_eval, dtype=np.float64) result = np.empty(self.__num_inner_eval, dtype=np.float64)
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetEval( _safe_call(_LIB.LGBM_BoosterGetEval(
self.handle, self._handle,
ctypes.c_int(data_idx), ctypes.c_int(data_idx),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
...@@ -4524,7 +4525,7 @@ class Booster: ...@@ -4524,7 +4525,7 @@ class Booster:
tmp_out_len = ctypes.c_int64(0) tmp_out_len = ctypes.c_int64(0)
data_ptr = self.__inner_predict_buffer[data_idx].ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # type: ignore[union-attr] data_ptr = self.__inner_predict_buffer[data_idx].ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # type: ignore[union-attr]
_safe_call(_LIB.LGBM_BoosterGetPredict( _safe_call(_LIB.LGBM_BoosterGetPredict(
self.handle, self._handle,
ctypes.c_int(data_idx), ctypes.c_int(data_idx),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
data_ptr)) data_ptr))
...@@ -4544,7 +4545,7 @@ class Booster: ...@@ -4544,7 +4545,7 @@ class Booster:
out_num_eval = ctypes.c_int(0) out_num_eval = ctypes.c_int(0)
# Get num of inner evals # Get num of inner evals
_safe_call(_LIB.LGBM_BoosterGetEvalCounts( _safe_call(_LIB.LGBM_BoosterGetEvalCounts(
self.handle, self._handle,
ctypes.byref(out_num_eval))) ctypes.byref(out_num_eval)))
self.__num_inner_eval = out_num_eval.value self.__num_inner_eval = out_num_eval.value
if self.__num_inner_eval > 0: if self.__num_inner_eval > 0:
...@@ -4557,7 +4558,7 @@ class Booster: ...@@ -4557,7 +4558,7 @@ class Booster:
] ]
ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames( _safe_call(_LIB.LGBM_BoosterGetEvalNames(
self.handle, self._handle,
ctypes.c_int(self.__num_inner_eval), ctypes.c_int(self.__num_inner_eval),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.c_size_t(reserved_string_buffer_size), ctypes.c_size_t(reserved_string_buffer_size),
...@@ -4573,7 +4574,7 @@ class Booster: ...@@ -4573,7 +4574,7 @@ class Booster:
] ]
ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames( _safe_call(_LIB.LGBM_BoosterGetEvalNames(
self.handle, self._handle,
ctypes.c_int(self.__num_inner_eval), ctypes.c_int(self.__num_inner_eval),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.c_size_t(actual_string_buffer_size), ctypes.c_size_t(actual_string_buffer_size),
......
...@@ -393,11 +393,11 @@ def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_feat ...@@ -393,11 +393,11 @@ def test_add_features_does_not_fail_if_initial_dataset_has_zero_informative_feat
dataset_b = lgb.Dataset(arr_b).construct() dataset_b = lgb.Dataset(arr_b).construct()
original_handle = dataset_a.handle.value original_handle = dataset_a._handle.value
dataset_a.add_features_from(dataset_b) dataset_a.add_features_from(dataset_b)
assert dataset_a.num_feature() == 6 assert dataset_a.num_feature() == 6
assert dataset_a.num_data() == 100 assert dataset_a.num_data() == 100
assert dataset_a.handle.value == original_handle assert dataset_a._handle.value == original_handle
def test_cegb_affects_behavior(tmp_path): def test_cegb_affects_behavior(tmp_path):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment