Unverified Commit 80f5666c authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] prefix internal objects with '_' (#5654)

parent f84bfcf9
...@@ -304,7 +304,7 @@ def _c_array(ctype: type, values: List[Any]) -> ctypes.Array: ...@@ -304,7 +304,7 @@ def _c_array(ctype: type, values: List[Any]) -> ctypes.Array:
return (ctype * len(values))(*values) return (ctype * len(values))(*values)
def json_default_with_numpy(obj: Any) -> Any: def _json_default_with_numpy(obj: Any) -> Any:
"""Convert numpy classes to JSON serializable objects.""" """Convert numpy classes to JSON serializable objects."""
if isinstance(obj, (np.integer, np.floating, np.bool_)): if isinstance(obj, (np.integer, np.floating, np.bool_)):
return obj.item() return obj.item()
...@@ -314,7 +314,7 @@ def json_default_with_numpy(obj: Any) -> Any: ...@@ -314,7 +314,7 @@ def json_default_with_numpy(obj: Any) -> Any:
return obj return obj
def param_dict_to_str(data: Optional[Dict[str, Any]]) -> str: def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str:
"""Convert Python dictionary to string, which is passed to C API.""" """Convert Python dictionary to string, which is passed to C API."""
if data is None or not data: if data is None or not data:
return "" return ""
...@@ -465,40 +465,44 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va ...@@ -465,40 +465,44 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
return params return params
MAX_INT32 = (1 << 31) - 1 _MAX_INT32 = (1 << 31) - 1
"""Macro definition of data type in C API of LightGBM""" """Macro definition of data type in C API of LightGBM"""
C_API_DTYPE_FLOAT32 = 0 _C_API_DTYPE_FLOAT32 = 0
C_API_DTYPE_FLOAT64 = 1 _C_API_DTYPE_FLOAT64 = 1
C_API_DTYPE_INT32 = 2 _C_API_DTYPE_INT32 = 2
C_API_DTYPE_INT64 = 3 _C_API_DTYPE_INT64 = 3
"""Matrix is row major in Python""" """Matrix is row major in Python"""
C_API_IS_ROW_MAJOR = 1 _C_API_IS_ROW_MAJOR = 1
"""Macro definition of prediction type in C API of LightGBM""" """Macro definition of prediction type in C API of LightGBM"""
C_API_PREDICT_NORMAL = 0 _C_API_PREDICT_NORMAL = 0
C_API_PREDICT_RAW_SCORE = 1 _C_API_PREDICT_RAW_SCORE = 1
C_API_PREDICT_LEAF_INDEX = 2 _C_API_PREDICT_LEAF_INDEX = 2
C_API_PREDICT_CONTRIB = 3 _C_API_PREDICT_CONTRIB = 3
"""Macro definition of sparse matrix type""" """Macro definition of sparse matrix type"""
C_API_MATRIX_TYPE_CSR = 0 _C_API_MATRIX_TYPE_CSR = 0
C_API_MATRIX_TYPE_CSC = 1 _C_API_MATRIX_TYPE_CSC = 1
"""Macro definition of feature importance type""" """Macro definition of feature importance type"""
C_API_FEATURE_IMPORTANCE_SPLIT = 0 _C_API_FEATURE_IMPORTANCE_SPLIT = 0
C_API_FEATURE_IMPORTANCE_GAIN = 1 _C_API_FEATURE_IMPORTANCE_GAIN = 1
"""Data type of data field""" """Data type of data field"""
FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32, _FIELD_TYPE_MAPPER = {
"weight": C_API_DTYPE_FLOAT32, "label": _C_API_DTYPE_FLOAT32,
"init_score": C_API_DTYPE_FLOAT64, "weight": _C_API_DTYPE_FLOAT32,
"group": C_API_DTYPE_INT32} "init_score": _C_API_DTYPE_FLOAT64,
"group": _C_API_DTYPE_INT32
}
"""String name to int feature importance type mapper""" """String name to int feature importance type mapper"""
FEATURE_IMPORTANCE_TYPE_MAPPER = {"split": C_API_FEATURE_IMPORTANCE_SPLIT, _FEATURE_IMPORTANCE_TYPE_MAPPER = {
"gain": C_API_FEATURE_IMPORTANCE_GAIN} "split": _C_API_FEATURE_IMPORTANCE_SPLIT,
"gain": _C_API_FEATURE_IMPORTANCE_GAIN
}
def _convert_from_sliced_object(data): def _convert_from_sliced_object(data):
...@@ -520,10 +524,10 @@ def _c_float_array(data): ...@@ -520,10 +524,10 @@ def _c_float_array(data):
assert data.flags.c_contiguous assert data.flags.c_contiguous
if data.dtype == np.float32: if data.dtype == np.float32:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
type_data = C_API_DTYPE_FLOAT32 type_data = _C_API_DTYPE_FLOAT32
elif data.dtype == np.float64: elif data.dtype == np.float64:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
type_data = C_API_DTYPE_FLOAT64 type_data = _C_API_DTYPE_FLOAT64
else: else:
raise TypeError(f"Expected np.float32 or np.float64, met type({data.dtype})") raise TypeError(f"Expected np.float32 or np.float64, met type({data.dtype})")
else: else:
...@@ -540,10 +544,10 @@ def _c_int_array(data): ...@@ -540,10 +544,10 @@ def _c_int_array(data):
assert data.flags.c_contiguous assert data.flags.c_contiguous
if data.dtype == np.int32: if data.dtype == np.int32:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int32))
type_data = C_API_DTYPE_INT32 type_data = _C_API_DTYPE_INT32
elif data.dtype == np.int64: elif data.dtype == np.int64:
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
type_data = C_API_DTYPE_INT64 type_data = _C_API_DTYPE_INT64
else: else:
raise TypeError(f"Expected np.int32 or np.int64, met type({data.dtype})") raise TypeError(f"Expected np.int32 or np.int64, met type({data.dtype})")
else: else:
...@@ -621,7 +625,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica ...@@ -621,7 +625,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
def _dump_pandas_categorical(pandas_categorical, file_name=None): def _dump_pandas_categorical(pandas_categorical, file_name=None):
categorical_json = json.dumps(pandas_categorical, default=json_default_with_numpy) categorical_json = json.dumps(pandas_categorical, default=_json_default_with_numpy)
pandas_str = f'\npandas_categorical:{categorical_json}\n' pandas_str = f'\npandas_categorical:{categorical_json}\n'
if file_name is not None: if file_name is not None:
with open(file_name, 'a') as f: with open(file_name, 'a') as f:
...@@ -782,7 +786,7 @@ class _InnerPredictor: ...@@ -782,7 +786,7 @@ class _InnerPredictor:
raise TypeError('Need model_file or booster_handle to create a predictor') raise TypeError('Need model_file or booster_handle to create a predictor')
pred_parameter = {} if pred_parameter is None else pred_parameter pred_parameter = {} if pred_parameter is None else pred_parameter
self.pred_parameter = param_dict_to_str(pred_parameter) self.pred_parameter = _param_dict_to_str(pred_parameter)
def __del__(self) -> None: def __del__(self) -> None:
try: try:
...@@ -851,13 +855,13 @@ class _InnerPredictor: ...@@ -851,13 +855,13 @@ class _InnerPredictor:
) )
) )
data = _data_from_pandas(data, None, None, self.pandas_categorical)[0] data = _data_from_pandas(data, None, None, self.pandas_categorical)[0]
predict_type = C_API_PREDICT_NORMAL predict_type = _C_API_PREDICT_NORMAL
if raw_score: if raw_score:
predict_type = C_API_PREDICT_RAW_SCORE predict_type = _C_API_PREDICT_RAW_SCORE
if pred_leaf: if pred_leaf:
predict_type = C_API_PREDICT_LEAF_INDEX predict_type = _C_API_PREDICT_LEAF_INDEX
if pred_contrib: if pred_contrib:
predict_type = C_API_PREDICT_CONTRIB predict_type = _C_API_PREDICT_CONTRIB
int_data_has_header = 1 if data_has_header else 0 int_data_has_header = 1 if data_has_header else 0
if isinstance(data, (str, Path)): if isinstance(data, (str, Path)):
...@@ -906,9 +910,9 @@ class _InnerPredictor: ...@@ -906,9 +910,9 @@ class _InnerPredictor:
def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type): def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type):
"""Get size of prediction result.""" """Get size of prediction result."""
if nrow > MAX_INT32: if nrow > _MAX_INT32:
raise LightGBMError('LightGBM cannot perform prediction for data ' raise LightGBMError('LightGBM cannot perform prediction for data '
f'with number of rows greater than MAX_INT32 ({MAX_INT32}).\n' f'with number of rows greater than MAX_INT32 ({_MAX_INT32}).\n'
'You can split your data into chunks ' 'You can split your data into chunks '
'and then concatenate predictions for them') 'and then concatenate predictions for them')
n_preds = ctypes.c_int64(0) n_preds = ctypes.c_int64(0)
...@@ -944,7 +948,7 @@ class _InnerPredictor: ...@@ -944,7 +948,7 @@ class _InnerPredictor:
ctypes.c_int(type_ptr_data), ctypes.c_int(type_ptr_data),
ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[0]),
ctypes.c_int32(mat.shape[1]), ctypes.c_int32(mat.shape[1]),
ctypes.c_int(C_API_IS_ROW_MAJOR), ctypes.c_int(_C_API_IS_ROW_MAJOR),
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
...@@ -956,8 +960,8 @@ class _InnerPredictor: ...@@ -956,8 +960,8 @@ class _InnerPredictor:
return preds, mat.shape[0] return preds, mat.shape[0]
nrow = mat.shape[0] nrow = mat.shape[0]
if nrow > MAX_INT32: if nrow > _MAX_INT32:
sections = np.arange(start=MAX_INT32, stop=nrow, step=MAX_INT32) sections = np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)
# __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal # __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal
n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])] n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])]
n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum() n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
...@@ -975,15 +979,15 @@ class _InnerPredictor: ...@@ -975,15 +979,15 @@ class _InnerPredictor:
# create numpy array from output arrays # create numpy array from output arrays
data_indices_len = out_shape[0] data_indices_len = out_shape[0]
indptr_len = out_shape[1] indptr_len = out_shape[1]
if indptr_type == C_API_DTYPE_INT32: if indptr_type == _C_API_DTYPE_INT32:
out_indptr = _cint32_array_to_numpy(out_ptr_indptr, indptr_len) out_indptr = _cint32_array_to_numpy(out_ptr_indptr, indptr_len)
elif indptr_type == C_API_DTYPE_INT64: elif indptr_type == _C_API_DTYPE_INT64:
out_indptr = _cint64_array_to_numpy(out_ptr_indptr, indptr_len) out_indptr = _cint64_array_to_numpy(out_ptr_indptr, indptr_len)
else: else:
raise TypeError("Expected int32 or int64 type for indptr") raise TypeError("Expected int32 or int64 type for indptr")
if data_type == C_API_DTYPE_FLOAT32: if data_type == _C_API_DTYPE_FLOAT32:
out_data = _cfloat32_array_to_numpy(out_ptr_data, data_indices_len) out_data = _cfloat32_array_to_numpy(out_ptr_data, data_indices_len)
elif data_type == C_API_DTYPE_FLOAT64: elif data_type == _C_API_DTYPE_FLOAT64:
out_data = _cfloat64_array_to_numpy(out_ptr_data, data_indices_len) out_data = _cfloat64_array_to_numpy(out_ptr_data, data_indices_len)
else: else:
raise TypeError("Expected float32 or float64 type for data") raise TypeError("Expected float32 or float64 type for data")
...@@ -1030,7 +1034,7 @@ class _InnerPredictor: ...@@ -1030,7 +1034,7 @@ class _InnerPredictor:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
assert csr.shape[1] <= MAX_INT32 assert csr.shape[1] <= _MAX_INT32
csr_indices = csr.indices.astype(np.int32, copy=False) csr_indices = csr.indices.astype(np.int32, copy=False)
_safe_call(_LIB.LGBM_BoosterPredictForCSR( _safe_call(_LIB.LGBM_BoosterPredictForCSR(
...@@ -1057,13 +1061,13 @@ class _InnerPredictor: ...@@ -1057,13 +1061,13 @@ class _InnerPredictor:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
csr_indices = csr.indices.astype(np.int32, copy=False) csr_indices = csr.indices.astype(np.int32, copy=False)
matrix_type = C_API_MATRIX_TYPE_CSR matrix_type = _C_API_MATRIX_TYPE_CSR
if type_ptr_indptr == C_API_DTYPE_INT32: if type_ptr_indptr == _C_API_DTYPE_INT32:
out_ptr_indptr = ctypes.POINTER(ctypes.c_int32)() out_ptr_indptr = ctypes.POINTER(ctypes.c_int32)()
else: else:
out_ptr_indptr = ctypes.POINTER(ctypes.c_int64)() out_ptr_indptr = ctypes.POINTER(ctypes.c_int64)()
out_ptr_indices = ctypes.POINTER(ctypes.c_int32)() out_ptr_indices = ctypes.POINTER(ctypes.c_int32)()
if type_ptr_data == C_API_DTYPE_FLOAT32: if type_ptr_data == _C_API_DTYPE_FLOAT32:
out_ptr_data = ctypes.POINTER(ctypes.c_float)() out_ptr_data = ctypes.POINTER(ctypes.c_float)()
else: else:
out_ptr_data = ctypes.POINTER(ctypes.c_double)() out_ptr_data = ctypes.POINTER(ctypes.c_double)()
...@@ -1092,11 +1096,11 @@ class _InnerPredictor: ...@@ -1092,11 +1096,11 @@ class _InnerPredictor:
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
return matrices, nrow return matrices, nrow
if predict_type == C_API_PREDICT_CONTRIB: if predict_type == _C_API_PREDICT_CONTRIB:
return inner_predict_sparse(csr, start_iteration, num_iteration, predict_type) return inner_predict_sparse(csr, start_iteration, num_iteration, predict_type)
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
if nrow > MAX_INT32: if nrow > _MAX_INT32:
sections = [0] + list(np.arange(start=MAX_INT32, stop=nrow, step=MAX_INT32)) + [nrow] sections = [0] + list(np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)) + [nrow]
# __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal # __get_num_preds() cannot work with nrow > MAX_INT32, so calculate overall number of predictions piecemeal
n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff(sections)] n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff(sections)]
n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum() n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
...@@ -1115,13 +1119,13 @@ class _InnerPredictor: ...@@ -1115,13 +1119,13 @@ class _InnerPredictor:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csc.data) ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
csc_indices = csc.indices.astype(np.int32, copy=False) csc_indices = csc.indices.astype(np.int32, copy=False)
matrix_type = C_API_MATRIX_TYPE_CSC matrix_type = _C_API_MATRIX_TYPE_CSC
if type_ptr_indptr == C_API_DTYPE_INT32: if type_ptr_indptr == _C_API_DTYPE_INT32:
out_ptr_indptr = ctypes.POINTER(ctypes.c_int32)() out_ptr_indptr = ctypes.POINTER(ctypes.c_int32)()
else: else:
out_ptr_indptr = ctypes.POINTER(ctypes.c_int64)() out_ptr_indptr = ctypes.POINTER(ctypes.c_int64)()
out_ptr_indices = ctypes.POINTER(ctypes.c_int32)() out_ptr_indices = ctypes.POINTER(ctypes.c_int32)()
if type_ptr_data == C_API_DTYPE_FLOAT32: if type_ptr_data == _C_API_DTYPE_FLOAT32:
out_ptr_data = ctypes.POINTER(ctypes.c_float)() out_ptr_data = ctypes.POINTER(ctypes.c_float)()
else: else:
out_ptr_data = ctypes.POINTER(ctypes.c_double)() out_ptr_data = ctypes.POINTER(ctypes.c_double)()
...@@ -1151,9 +1155,9 @@ class _InnerPredictor: ...@@ -1151,9 +1155,9 @@ class _InnerPredictor:
return matrices, nrow return matrices, nrow
nrow = csc.shape[0] nrow = csc.shape[0]
if nrow > MAX_INT32: if nrow > _MAX_INT32:
return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type) return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type)
if predict_type == C_API_PREDICT_CONTRIB: if predict_type == _C_API_PREDICT_CONTRIB:
return inner_predict_sparse(csc, start_iteration, num_iteration, predict_type) return inner_predict_sparse(csc, start_iteration, num_iteration, predict_type)
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type) n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
preds = np.empty(n_preds, dtype=np.float64) preds = np.empty(n_preds, dtype=np.float64)
...@@ -1162,7 +1166,7 @@ class _InnerPredictor: ...@@ -1162,7 +1166,7 @@ class _InnerPredictor:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csc.data) ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
assert csc.shape[0] <= MAX_INT32 assert csc.shape[0] <= _MAX_INT32
csc_indices = csc.indices.astype(np.int32, copy=False) csc_indices = csc.indices.astype(np.int32, copy=False)
_safe_call(_LIB.LGBM_BoosterPredictForCSC( _safe_call(_LIB.LGBM_BoosterPredictForCSC(
...@@ -1299,7 +1303,7 @@ class Dataset: ...@@ -1299,7 +1303,7 @@ class Dataset:
indices : numpy array indices : numpy array
Indices for sampled data. Indices for sampled data.
""" """
param_str = param_dict_to_str(self.get_params()) param_str = _param_dict_to_str(self.get_params())
sample_cnt = _get_sample_count(total_nrow, param_str) sample_cnt = _get_sample_count(total_nrow, param_str)
indices = np.empty(sample_cnt, dtype=np.int32) indices = np.empty(sample_cnt, dtype=np.int32)
ptr_data, _, _ = _c_int_array(indices) ptr_data, _, _ = _c_int_array(indices)
...@@ -1389,7 +1393,7 @@ class Dataset: ...@@ -1389,7 +1393,7 @@ class Dataset:
num_per_col_ptr, _, _ = _c_int_array(num_per_col) num_per_col_ptr, _, _ = _c_int_array(num_per_col)
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
params_str = param_dict_to_str(self.get_params()) params_str = _param_dict_to_str(self.get_params())
_safe_call(_LIB.LGBM_DatasetCreateFromSampledColumn( _safe_call(_LIB.LGBM_DatasetCreateFromSampledColumn(
ctypes.cast(sample_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))), ctypes.cast(sample_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_double))),
ctypes.cast(indices_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_int32))), ctypes.cast(indices_col_ptr, ctypes.POINTER(ctypes.POINTER(ctypes.c_int32))),
...@@ -1563,7 +1567,7 @@ class Dataset: ...@@ -1563,7 +1567,7 @@ class Dataset:
params.pop(cat_alias, None) params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices) params['categorical_column'] = sorted(categorical_indices)
params_str = param_dict_to_str(params) params_str = _param_dict_to_str(params)
self.params = params self.params = params
# process for reference dataset # process for reference dataset
ref_dataset = None ref_dataset = None
...@@ -1683,7 +1687,7 @@ class Dataset: ...@@ -1683,7 +1687,7 @@ class Dataset:
if ref_dataset is not None: if ref_dataset is not None:
self._init_from_ref_dataset(total_nrow, ref_dataset) self._init_from_ref_dataset(total_nrow, ref_dataset)
else: else:
param_str = param_dict_to_str(self.get_params()) param_str = _param_dict_to_str(self.get_params())
sample_cnt = _get_sample_count(total_nrow, param_str) sample_cnt = _get_sample_count(total_nrow, param_str)
sample_data, col_indices = self.__sample(seqs, total_nrow) sample_data, col_indices = self.__sample(seqs, total_nrow)
...@@ -1719,7 +1723,7 @@ class Dataset: ...@@ -1719,7 +1723,7 @@ class Dataset:
ctypes.c_int(type_ptr_data), ctypes.c_int(type_ptr_data),
ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[0]),
ctypes.c_int32(mat.shape[1]), ctypes.c_int32(mat.shape[1]),
ctypes.c_int(C_API_IS_ROW_MAJOR), ctypes.c_int(_C_API_IS_ROW_MAJOR),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
...@@ -1770,7 +1774,7 @@ class Dataset: ...@@ -1770,7 +1774,7 @@ class Dataset:
ctypes.c_int(type_ptr_data), ctypes.c_int(type_ptr_data),
nrow.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), nrow.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
ctypes.c_int32(ncol), ctypes.c_int32(ncol),
ctypes.c_int(C_API_IS_ROW_MAJOR), ctypes.c_int(_C_API_IS_ROW_MAJOR),
_c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
...@@ -1790,7 +1794,7 @@ class Dataset: ...@@ -1790,7 +1794,7 @@ class Dataset:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
assert csr.shape[1] <= MAX_INT32 assert csr.shape[1] <= _MAX_INT32
csr_indices = csr.indices.astype(np.int32, copy=False) csr_indices = csr.indices.astype(np.int32, copy=False)
_safe_call(_LIB.LGBM_DatasetCreateFromCSR( _safe_call(_LIB.LGBM_DatasetCreateFromCSR(
...@@ -1821,7 +1825,7 @@ class Dataset: ...@@ -1821,7 +1825,7 @@ class Dataset:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csc.data) ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
assert csc.shape[0] <= MAX_INT32 assert csc.shape[0] <= _MAX_INT32
csc_indices = csc.indices.astype(np.int32, copy=False) csc_indices = csc.indices.astype(np.int32, copy=False)
_safe_call(_LIB.LGBM_DatasetCreateFromCSC( _safe_call(_LIB.LGBM_DatasetCreateFromCSC(
...@@ -1911,7 +1915,7 @@ class Dataset: ...@@ -1911,7 +1915,7 @@ class Dataset:
_, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices], _, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices],
return_counts=True) return_counts=True)
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
params_str = param_dict_to_str(self.params) params_str = _param_dict_to_str(self.params)
_safe_call(_LIB.LGBM_DatasetGetSubset( _safe_call(_LIB.LGBM_DatasetGetSubset(
self.reference.construct().handle, self.reference.construct().handle,
used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
...@@ -2049,8 +2053,8 @@ class Dataset: ...@@ -2049,8 +2053,8 @@ class Dataset:
update() update()
elif params is not None: elif params is not None:
ret = _LIB.LGBM_DatasetUpdateParamChecking( ret = _LIB.LGBM_DatasetUpdateParamChecking(
_c_str(param_dict_to_str(self.params)), _c_str(_param_dict_to_str(self.params)),
_c_str(param_dict_to_str(params))) _c_str(_param_dict_to_str(params)))
if ret != 0: if ret != 0:
# could be updated if data is not freed # could be updated if data is not freed
if self.data is not None: if self.data is not None:
...@@ -2094,7 +2098,7 @@ class Dataset: ...@@ -2094,7 +2098,7 @@ class Dataset:
_c_str(field_name), _c_str(field_name),
None, None,
ctypes.c_int(0), ctypes.c_int(0),
ctypes.c_int(FIELD_TYPE_MAPPER[field_name]))) ctypes.c_int(_FIELD_TYPE_MAPPER[field_name])))
return self return self
if field_name == 'init_score': if field_name == 'init_score':
dtype = np.float64 dtype = np.float64
...@@ -2118,7 +2122,7 @@ class Dataset: ...@@ -2118,7 +2122,7 @@ class Dataset:
ptr_data, type_data, _ = _c_int_array(data) ptr_data, type_data, _ = _c_int_array(data)
else: else:
raise TypeError(f"Expected np.float32/64 or np.int32, met type({data.dtype})") raise TypeError(f"Expected np.float32/64 or np.int32, met type({data.dtype})")
if type_data != FIELD_TYPE_MAPPER[field_name]: if type_data != _FIELD_TYPE_MAPPER[field_name]:
raise TypeError("Input type error for set_field") raise TypeError("Input type error for set_field")
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
self.handle, self.handle,
...@@ -2153,15 +2157,15 @@ class Dataset: ...@@ -2153,15 +2157,15 @@ class Dataset:
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.byref(ret), ctypes.byref(ret),
ctypes.byref(out_type))) ctypes.byref(out_type)))
if out_type.value != FIELD_TYPE_MAPPER[field_name]: if out_type.value != _FIELD_TYPE_MAPPER[field_name]:
raise TypeError("Return type error for get_field") raise TypeError("Return type error for get_field")
if tmp_out_len.value == 0: if tmp_out_len.value == 0:
return None return None
if out_type.value == C_API_DTYPE_INT32: if out_type.value == _C_API_DTYPE_INT32:
arr = _cint32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_int32)), tmp_out_len.value) arr = _cint32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_int32)), tmp_out_len.value)
elif out_type.value == C_API_DTYPE_FLOAT32: elif out_type.value == _C_API_DTYPE_FLOAT32:
arr = _cfloat32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_float)), tmp_out_len.value) arr = _cfloat32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_float)), tmp_out_len.value)
elif out_type.value == C_API_DTYPE_FLOAT64: elif out_type.value == _C_API_DTYPE_FLOAT64:
arr = _cfloat64_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_double)), tmp_out_len.value) arr = _cfloat64_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_double)), tmp_out_len.value)
else: else:
raise TypeError("Unknown type") raise TypeError("Unknown type")
...@@ -2794,7 +2798,7 @@ class Booster: ...@@ -2794,7 +2798,7 @@ class Booster:
train_set.construct() train_set.construct()
# copy the parameters from train_set # copy the parameters from train_set
params.update(train_set.get_params()) params.update(train_set.get_params())
params_str = param_dict_to_str(params) params_str = _param_dict_to_str(params)
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_BoosterCreate( _safe_call(_LIB.LGBM_BoosterCreate(
train_set.handle, train_set.handle,
...@@ -3167,7 +3171,7 @@ class Booster: ...@@ -3167,7 +3171,7 @@ class Booster:
self : Booster self : Booster
Booster with new parameters. Booster with new parameters.
""" """
params_str = param_dict_to_str(params) params_str = _param_dict_to_str(params)
if params_str: if params_str:
_safe_call(_LIB.LGBM_BoosterResetParameter( _safe_call(_LIB.LGBM_BoosterResetParameter(
self.handle, self.handle,
...@@ -3537,7 +3541,7 @@ class Booster: ...@@ -3537,7 +3541,7 @@ class Booster:
""" """
if num_iteration is None: if num_iteration is None:
num_iteration = self.best_iteration num_iteration = self.best_iteration
importance_type_int = FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
_safe_call(_LIB.LGBM_BoosterSaveModel( _safe_call(_LIB.LGBM_BoosterSaveModel(
self.handle, self.handle,
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
...@@ -3631,7 +3635,7 @@ class Booster: ...@@ -3631,7 +3635,7 @@ class Booster:
""" """
if num_iteration is None: if num_iteration is None:
num_iteration = self.best_iteration num_iteration = self.best_iteration
importance_type_int = FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
buffer_len = 1 << 20 buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0) tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len) string_buffer = ctypes.create_string_buffer(buffer_len)
...@@ -3699,7 +3703,7 @@ class Booster: ...@@ -3699,7 +3703,7 @@ class Booster:
""" """
if num_iteration is None: if num_iteration is None:
num_iteration = self.best_iteration num_iteration = self.best_iteration
importance_type_int = FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
buffer_len = 1 << 20 buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0) tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len) string_buffer = ctypes.create_string_buffer(buffer_len)
...@@ -3727,7 +3731,7 @@ class Booster: ...@@ -3727,7 +3731,7 @@ class Booster:
ptr_string_buffer)) ptr_string_buffer))
ret = json.loads(string_buffer.value.decode('utf-8'), object_hook=object_hook) ret = json.loads(string_buffer.value.decode('utf-8'), object_hook=object_hook)
ret['pandas_categorical'] = json.loads(json.dumps(self.pandas_categorical, ret['pandas_categorical'] = json.loads(json.dumps(self.pandas_categorical,
default=json_default_with_numpy)) default=_json_default_with_numpy))
return ret return ret
def predict( def predict(
...@@ -4021,14 +4025,14 @@ class Booster: ...@@ -4021,14 +4025,14 @@ class Booster:
""" """
if iteration is None: if iteration is None:
iteration = self.best_iteration iteration = self.best_iteration
importance_type_int = FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type] importance_type_int = _FEATURE_IMPORTANCE_TYPE_MAPPER[importance_type]
result = np.empty(self.num_feature(), dtype=np.float64) result = np.empty(self.num_feature(), dtype=np.float64)
_safe_call(_LIB.LGBM_BoosterFeatureImportance( _safe_call(_LIB.LGBM_BoosterFeatureImportance(
self.handle, self.handle,
ctypes.c_int(iteration), ctypes.c_int(iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
if importance_type_int == C_API_FEATURE_IMPORTANCE_SPLIT: if importance_type_int == _C_API_FEATURE_IMPORTANCE_SPLIT:
return result.astype(np.int32) return result.astype(np.int32)
else: else:
return result return result
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment