Unverified Commit 06a1ee25 authored by Madnex's avatar Madnex Committed by GitHub
Browse files

[python-package] prefix several internal functions with _ (#5545)

parent 0c5f46f8
...@@ -41,7 +41,7 @@ def _get_sample_count(total_nrow: int, params: str) -> int: ...@@ -41,7 +41,7 @@ def _get_sample_count(total_nrow: int, params: str) -> int:
sample_cnt = ctypes.c_int(0) sample_cnt = ctypes.c_int(0)
_safe_call(_LIB.LGBM_GetSampleCount( _safe_call(_LIB.LGBM_GetSampleCount(
ctypes.c_int32(total_nrow), ctypes.c_int32(total_nrow),
c_str(params), _c_str(params),
ctypes.byref(sample_cnt), ctypes.byref(sample_cnt),
)) ))
return sample_cnt.value return sample_cnt.value
...@@ -197,7 +197,7 @@ def _cast_numpy_array_to_dtype(array, dtype): ...@@ -197,7 +197,7 @@ def _cast_numpy_array_to_dtype(array, dtype):
return array.astype(dtype=dtype, copy=False) return array.astype(dtype=dtype, copy=False)
def is_1d_list(data: Any) -> bool: def _is_1d_list(data: Any) -> bool:
"""Check whether data is a 1-D list.""" """Check whether data is a 1-D list."""
return isinstance(data, list) and (not data or _is_numeric(data[0])) return isinstance(data, list) and (not data or _is_numeric(data[0]))
...@@ -207,12 +207,12 @@ def _is_1d_collection(data: Any) -> bool: ...@@ -207,12 +207,12 @@ def _is_1d_collection(data: Any) -> bool:
return ( return (
_is_numpy_1d_array(data) _is_numpy_1d_array(data)
or _is_numpy_column_array(data) or _is_numpy_column_array(data)
or is_1d_list(data) or _is_1d_list(data)
or isinstance(data, pd_Series) or isinstance(data, pd_Series)
) )
def list_to_1d_numpy(data, dtype=np.float32, name='list'): def _list_to_1d_numpy(data, dtype=np.float32, name='list'):
"""Convert data to numpy 1-D array.""" """Convert data to numpy 1-D array."""
if _is_numpy_1d_array(data): if _is_numpy_1d_array(data):
return _cast_numpy_array_to_dtype(data, dtype) return _cast_numpy_array_to_dtype(data, dtype)
...@@ -220,7 +220,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'): ...@@ -220,7 +220,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
_log_warning('Converting column-vector to 1d array') _log_warning('Converting column-vector to 1d array')
array = data.ravel() array = data.ravel()
return _cast_numpy_array_to_dtype(array, dtype) return _cast_numpy_array_to_dtype(array, dtype)
elif is_1d_list(data): elif _is_1d_list(data):
return np.array(data, dtype=dtype, copy=False) return np.array(data, dtype=dtype, copy=False)
elif isinstance(data, pd_Series): elif isinstance(data, pd_Series):
_check_for_bad_pandas_dtypes(data.to_frame().dtypes) _check_for_bad_pandas_dtypes(data.to_frame().dtypes)
...@@ -237,7 +237,7 @@ def _is_numpy_2d_array(data: Any) -> bool: ...@@ -237,7 +237,7 @@ def _is_numpy_2d_array(data: Any) -> bool:
def _is_2d_list(data: Any) -> bool: def _is_2d_list(data: Any) -> bool:
"""Check whether data is a 2-D list.""" """Check whether data is a 2-D list."""
return isinstance(data, list) and len(data) > 0 and is_1d_list(data[0]) return isinstance(data, list) and len(data) > 0 and _is_1d_list(data[0])
def _is_2d_collection(data: Any) -> bool: def _is_2d_collection(data: Any) -> bool:
...@@ -262,7 +262,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') - ...@@ -262,7 +262,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') -
"It should be list of lists, numpy 2-D array or pandas DataFrame") "It should be list of lists, numpy 2-D array or pandas DataFrame")
def cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
"""Convert a ctypes float pointer array to a numpy array.""" """Convert a ctypes float pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_float)): if isinstance(cptr, ctypes.POINTER(ctypes.c_float)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -270,7 +270,7 @@ def cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -270,7 +270,7 @@ def cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected float pointer') raise RuntimeError('Expected float pointer')
def cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
"""Convert a ctypes double pointer array to a numpy array.""" """Convert a ctypes double pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_double)): if isinstance(cptr, ctypes.POINTER(ctypes.c_double)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -278,7 +278,7 @@ def cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -278,7 +278,7 @@ def cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected double pointer') raise RuntimeError('Expected double pointer')
def cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
"""Convert a ctypes int pointer array to a numpy array.""" """Convert a ctypes int pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)): if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -286,7 +286,7 @@ def cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -286,7 +286,7 @@ def cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected int32 pointer') raise RuntimeError('Expected int32 pointer')
def cint64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cint64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
"""Convert a ctypes int pointer array to a numpy array.""" """Convert a ctypes int pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_int64)): if isinstance(cptr, ctypes.POINTER(ctypes.c_int64)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -294,12 +294,12 @@ def cint64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -294,12 +294,12 @@ def cint64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected int64 pointer') raise RuntimeError('Expected int64 pointer')
def c_str(string: str) -> ctypes.c_char_p: def _c_str(string: str) -> ctypes.c_char_p:
"""Convert a Python string to C string.""" """Convert a Python string to C string."""
return ctypes.c_char_p(string.encode('utf-8')) return ctypes.c_char_p(string.encode('utf-8'))
def c_array(ctype: type, values: List[Any]) -> ctypes.Array: def _c_array(ctype: type, values: List[Any]) -> ctypes.Array:
"""Convert a Python array to C array.""" """Convert a Python array to C array."""
return (ctype * len(values))(*values) return (ctype * len(values))(*values)
...@@ -513,7 +513,7 @@ def convert_from_sliced_object(data): ...@@ -513,7 +513,7 @@ def convert_from_sliced_object(data):
def c_float_array(data): def c_float_array(data):
"""Get pointer of float numpy array / list.""" """Get pointer of float numpy array / list."""
if is_1d_list(data): if _is_1d_list(data):
data = np.array(data, copy=False) data = np.array(data, copy=False)
if _is_numpy_1d_array(data): if _is_numpy_1d_array(data):
data = convert_from_sliced_object(data) data = convert_from_sliced_object(data)
...@@ -533,7 +533,7 @@ def c_float_array(data): ...@@ -533,7 +533,7 @@ def c_float_array(data):
def c_int_array(data): def c_int_array(data):
"""Get pointer of int numpy array / list.""" """Get pointer of int numpy array / list."""
if is_1d_list(data): if _is_1d_list(data):
data = np.array(data, copy=False) data = np.array(data, copy=False)
if _is_numpy_1d_array(data): if _is_numpy_1d_array(data):
data = convert_from_sliced_object(data) data = convert_from_sliced_object(data)
...@@ -749,7 +749,7 @@ class _InnerPredictor: ...@@ -749,7 +749,7 @@ class _InnerPredictor:
"""Prediction task""" """Prediction task"""
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(str(model_file)), _c_str(str(model_file)),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
...@@ -855,13 +855,13 @@ class _InnerPredictor: ...@@ -855,13 +855,13 @@ class _InnerPredictor:
with _TempFile() as f: with _TempFile() as f:
_safe_call(_LIB.LGBM_BoosterPredictForFile( _safe_call(_LIB.LGBM_BoosterPredictForFile(
self.handle, self.handle,
c_str(str(data)), _c_str(str(data)),
ctypes.c_int(int_data_has_header), ctypes.c_int(int_data_has_header),
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), _c_str(self.pred_parameter),
c_str(f.name))) _c_str(f.name)))
preds = np.loadtxt(f.name, dtype=np.float64) preds = np.loadtxt(f.name, dtype=np.float64)
nrow = preds.shape[0] nrow = preds.shape[0]
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
...@@ -939,7 +939,7 @@ class _InnerPredictor: ...@@ -939,7 +939,7 @@ class _InnerPredictor:
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), _c_str(self.pred_parameter),
ctypes.byref(out_num_preds), ctypes.byref(out_num_preds),
preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
if n_preds != out_num_preds.value: if n_preds != out_num_preds.value:
...@@ -967,18 +967,18 @@ class _InnerPredictor: ...@@ -967,18 +967,18 @@ class _InnerPredictor:
data_indices_len = out_shape[0] data_indices_len = out_shape[0]
indptr_len = out_shape[1] indptr_len = out_shape[1]
if indptr_type == C_API_DTYPE_INT32: if indptr_type == C_API_DTYPE_INT32:
out_indptr = cint32_array_to_numpy(out_ptr_indptr, indptr_len) out_indptr = _cint32_array_to_numpy(out_ptr_indptr, indptr_len)
elif indptr_type == C_API_DTYPE_INT64: elif indptr_type == C_API_DTYPE_INT64:
out_indptr = cint64_array_to_numpy(out_ptr_indptr, indptr_len) out_indptr = _cint64_array_to_numpy(out_ptr_indptr, indptr_len)
else: else:
raise TypeError("Expected int32 or int64 type for indptr") raise TypeError("Expected int32 or int64 type for indptr")
if data_type == C_API_DTYPE_FLOAT32: if data_type == C_API_DTYPE_FLOAT32:
out_data = cfloat32_array_to_numpy(out_ptr_data, data_indices_len) out_data = _cfloat32_array_to_numpy(out_ptr_data, data_indices_len)
elif data_type == C_API_DTYPE_FLOAT64: elif data_type == C_API_DTYPE_FLOAT64:
out_data = cfloat64_array_to_numpy(out_ptr_data, data_indices_len) out_data = _cfloat64_array_to_numpy(out_ptr_data, data_indices_len)
else: else:
raise TypeError("Expected float32 or float64 type for data") raise TypeError("Expected float32 or float64 type for data")
out_indices = cint32_array_to_numpy(out_ptr_indices, data_indices_len) out_indices = _cint32_array_to_numpy(out_ptr_indices, data_indices_len)
# break up indptr based on number of rows (note more than one matrix in multiclass case) # break up indptr based on number of rows (note more than one matrix in multiclass case)
per_class_indptr_shape = cs.indptr.shape[0] per_class_indptr_shape = cs.indptr.shape[0]
# for CSC there is extra column added # for CSC there is extra column added
...@@ -1037,7 +1037,7 @@ class _InnerPredictor: ...@@ -1037,7 +1037,7 @@ class _InnerPredictor:
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), _c_str(self.pred_parameter),
ctypes.byref(out_num_preds), ctypes.byref(out_num_preds),
preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
if n_preds != out_num_preds.value: if n_preds != out_num_preds.value:
...@@ -1072,7 +1072,7 @@ class _InnerPredictor: ...@@ -1072,7 +1072,7 @@ class _InnerPredictor:
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), _c_str(self.pred_parameter),
ctypes.c_int(matrix_type), ctypes.c_int(matrix_type),
out_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), out_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)),
ctypes.byref(out_ptr_indptr), ctypes.byref(out_ptr_indptr),
...@@ -1130,7 +1130,7 @@ class _InnerPredictor: ...@@ -1130,7 +1130,7 @@ class _InnerPredictor:
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), _c_str(self.pred_parameter),
ctypes.c_int(matrix_type), ctypes.c_int(matrix_type),
out_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), out_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)),
ctypes.byref(out_ptr_indptr), ctypes.byref(out_ptr_indptr),
...@@ -1169,7 +1169,7 @@ class _InnerPredictor: ...@@ -1169,7 +1169,7 @@ class _InnerPredictor:
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), _c_str(self.pred_parameter),
ctypes.byref(out_num_preds), ctypes.byref(out_num_preds),
preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
if n_preds != out_num_preds.value: if n_preds != out_num_preds.value:
...@@ -1298,7 +1298,7 @@ class Dataset: ...@@ -1298,7 +1298,7 @@ class Dataset:
_safe_call(_LIB.LGBM_SampleIndices( _safe_call(_LIB.LGBM_SampleIndices(
ctypes.c_int32(total_nrow), ctypes.c_int32(total_nrow),
c_str(param_str), _c_str(param_str),
ptr_data, ptr_data,
ctypes.byref(actual_sample_cnt), ctypes.byref(actual_sample_cnt),
)) ))
...@@ -1389,7 +1389,7 @@ class Dataset: ...@@ -1389,7 +1389,7 @@ class Dataset:
ctypes.c_int32(sample_cnt), ctypes.c_int32(sample_cnt),
ctypes.c_int32(total_nrow), ctypes.c_int32(total_nrow),
ctypes.c_int64(total_nrow), ctypes.c_int64(total_nrow),
c_str(params_str), _c_str(params_str),
ctypes.byref(self.handle), ctypes.byref(self.handle),
)) ))
return self return self
...@@ -1566,8 +1566,8 @@ class Dataset: ...@@ -1566,8 +1566,8 @@ class Dataset:
if isinstance(data, (str, Path)): if isinstance(data, (str, Path)):
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateFromFile( _safe_call(_LIB.LGBM_DatasetCreateFromFile(
c_str(str(data)), _c_str(str(data)),
c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
...@@ -1711,7 +1711,7 @@ class Dataset: ...@@ -1711,7 +1711,7 @@ class Dataset:
ctypes.c_int32(mat.shape[0]), ctypes.c_int32(mat.shape[0]),
ctypes.c_int32(mat.shape[1]), ctypes.c_int32(mat.shape[1]),
ctypes.c_int(C_API_IS_ROW_MAJOR), ctypes.c_int(C_API_IS_ROW_MAJOR),
c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self return self
...@@ -1762,7 +1762,7 @@ class Dataset: ...@@ -1762,7 +1762,7 @@ class Dataset:
nrow.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), nrow.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
ctypes.c_int32(ncol), ctypes.c_int32(ncol),
ctypes.c_int(C_API_IS_ROW_MAJOR), ctypes.c_int(C_API_IS_ROW_MAJOR),
c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self return self
...@@ -1793,7 +1793,7 @@ class Dataset: ...@@ -1793,7 +1793,7 @@ class Dataset:
ctypes.c_int64(len(csr.indptr)), ctypes.c_int64(len(csr.indptr)),
ctypes.c_int64(len(csr.data)), ctypes.c_int64(len(csr.data)),
ctypes.c_int64(csr.shape[1]), ctypes.c_int64(csr.shape[1]),
c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self return self
...@@ -1824,7 +1824,7 @@ class Dataset: ...@@ -1824,7 +1824,7 @@ class Dataset:
ctypes.c_int64(len(csc.indptr)), ctypes.c_int64(len(csc.indptr)),
ctypes.c_int64(len(csc.data)), ctypes.c_int64(len(csc.data)),
ctypes.c_int64(csc.shape[0]), ctypes.c_int64(csc.shape[0]),
c_str(params_str), _c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
return self return self
...@@ -1895,7 +1895,7 @@ class Dataset: ...@@ -1895,7 +1895,7 @@ class Dataset:
feature_name=self.feature_name, params=self.params) feature_name=self.feature_name, params=self.params)
else: else:
# construct subset # construct subset
used_indices = list_to_1d_numpy(self.used_indices, np.int32, name='used_indices') used_indices = _list_to_1d_numpy(self.used_indices, np.int32, name='used_indices')
assert used_indices.flags.c_contiguous assert used_indices.flags.c_contiguous
if self.reference.group is not None: if self.reference.group is not None:
group_info = np.array(self.reference.group).astype(np.int32, copy=False) group_info = np.array(self.reference.group).astype(np.int32, copy=False)
...@@ -1907,7 +1907,7 @@ class Dataset: ...@@ -1907,7 +1907,7 @@ class Dataset:
self.reference.construct().handle, self.reference.construct().handle,
used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)), used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
ctypes.c_int32(used_indices.shape[0]), ctypes.c_int32(used_indices.shape[0]),
c_str(params_str), _c_str(params_str),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
if not self.free_raw_data: if not self.free_raw_data:
self.get_data() self.get_data()
...@@ -2021,7 +2021,7 @@ class Dataset: ...@@ -2021,7 +2021,7 @@ class Dataset:
""" """
_safe_call(_LIB.LGBM_DatasetSaveBinary( _safe_call(_LIB.LGBM_DatasetSaveBinary(
self.construct().handle, self.construct().handle,
c_str(str(filename)))) _c_str(str(filename))))
return self return self
def _update_params(self, params: Optional[Dict[str, Any]]) -> "Dataset": def _update_params(self, params: Optional[Dict[str, Any]]) -> "Dataset":
...@@ -2040,8 +2040,8 @@ class Dataset: ...@@ -2040,8 +2040,8 @@ class Dataset:
update() update()
elif params is not None: elif params is not None:
ret = _LIB.LGBM_DatasetUpdateParamChecking( ret = _LIB.LGBM_DatasetUpdateParamChecking(
c_str(param_dict_to_str(self.params)), _c_str(param_dict_to_str(self.params)),
c_str(param_dict_to_str(params))) _c_str(param_dict_to_str(params)))
if ret != 0: if ret != 0:
# could be updated if data is not freed # could be updated if data is not freed
if self.data is not None: if self.data is not None:
...@@ -2082,7 +2082,7 @@ class Dataset: ...@@ -2082,7 +2082,7 @@ class Dataset:
# set to None # set to None
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
self.handle, self.handle,
c_str(field_name), _c_str(field_name),
None, None,
ctypes.c_int(0), ctypes.c_int(0),
ctypes.c_int(FIELD_TYPE_MAPPER[field_name]))) ctypes.c_int(FIELD_TYPE_MAPPER[field_name])))
...@@ -2090,7 +2090,7 @@ class Dataset: ...@@ -2090,7 +2090,7 @@ class Dataset:
if field_name == 'init_score': if field_name == 'init_score':
dtype = np.float64 dtype = np.float64
if _is_1d_collection(data): if _is_1d_collection(data):
data = list_to_1d_numpy(data, dtype, name=field_name) data = _list_to_1d_numpy(data, dtype, name=field_name)
elif _is_2d_collection(data): elif _is_2d_collection(data):
data = _data_to_2d_numpy(data, dtype, name=field_name) data = _data_to_2d_numpy(data, dtype, name=field_name)
data = data.ravel(order='F') data = data.ravel(order='F')
...@@ -2101,7 +2101,7 @@ class Dataset: ...@@ -2101,7 +2101,7 @@ class Dataset:
) )
else: else:
dtype = np.int32 if field_name == 'group' else np.float32 dtype = np.int32 if field_name == 'group' else np.float32
data = list_to_1d_numpy(data, dtype, name=field_name) data = _list_to_1d_numpy(data, dtype, name=field_name)
if data.dtype == np.float32 or data.dtype == np.float64: if data.dtype == np.float32 or data.dtype == np.float64:
ptr_data, type_data, _ = c_float_array(data) ptr_data, type_data, _ = c_float_array(data)
...@@ -2113,7 +2113,7 @@ class Dataset: ...@@ -2113,7 +2113,7 @@ class Dataset:
raise TypeError("Input type error for set_field") raise TypeError("Input type error for set_field")
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
self.handle, self.handle,
c_str(field_name), _c_str(field_name),
ptr_data, ptr_data,
ctypes.c_int(len(data)), ctypes.c_int(len(data)),
ctypes.c_int(type_data))) ctypes.c_int(type_data)))
...@@ -2140,7 +2140,7 @@ class Dataset: ...@@ -2140,7 +2140,7 @@ class Dataset:
ret = ctypes.POINTER(ctypes.c_void_p)() ret = ctypes.POINTER(ctypes.c_void_p)()
_safe_call(_LIB.LGBM_DatasetGetField( _safe_call(_LIB.LGBM_DatasetGetField(
self.handle, self.handle,
c_str(field_name), _c_str(field_name),
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ctypes.byref(ret), ctypes.byref(ret),
ctypes.byref(out_type))) ctypes.byref(out_type)))
...@@ -2149,11 +2149,11 @@ class Dataset: ...@@ -2149,11 +2149,11 @@ class Dataset:
if tmp_out_len.value == 0: if tmp_out_len.value == 0:
return None return None
if out_type.value == C_API_DTYPE_INT32: if out_type.value == C_API_DTYPE_INT32:
arr = cint32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_int32)), tmp_out_len.value) arr = _cint32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_int32)), tmp_out_len.value)
elif out_type.value == C_API_DTYPE_FLOAT32: elif out_type.value == C_API_DTYPE_FLOAT32:
arr = cfloat32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_float)), tmp_out_len.value) arr = _cfloat32_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_float)), tmp_out_len.value)
elif out_type.value == C_API_DTYPE_FLOAT64: elif out_type.value == C_API_DTYPE_FLOAT64:
arr = cfloat64_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_double)), tmp_out_len.value) arr = _cfloat64_array_to_numpy(ctypes.cast(ret, ctypes.POINTER(ctypes.c_double)), tmp_out_len.value)
else: else:
raise TypeError("Unknown type") raise TypeError("Unknown type")
if field_name == 'init_score': if field_name == 'init_score':
...@@ -2265,10 +2265,10 @@ class Dataset: ...@@ -2265,10 +2265,10 @@ class Dataset:
if self.handle is not None and feature_name is not None and feature_name != 'auto': if self.handle is not None and feature_name is not None and feature_name != 'auto':
if len(feature_name) != self.num_feature(): if len(feature_name) != self.num_feature():
raise ValueError(f"Length of feature_name({len(feature_name)}) and num_feature({self.num_feature()}) don't match") raise ValueError(f"Length of feature_name({len(feature_name)}) and num_feature({self.num_feature()}) don't match")
c_feature_name = [c_str(name) for name in feature_name] c_feature_name = [_c_str(name) for name in feature_name]
_safe_call(_LIB.LGBM_DatasetSetFeatureNames( _safe_call(_LIB.LGBM_DatasetSetFeatureNames(
self.handle, self.handle,
c_array(ctypes.c_char_p, c_feature_name), _c_array(ctypes.c_char_p, c_feature_name),
ctypes.c_int(len(feature_name)))) ctypes.c_int(len(feature_name))))
return self return self
...@@ -2293,7 +2293,7 @@ class Dataset: ...@@ -2293,7 +2293,7 @@ class Dataset:
_check_for_bad_pandas_dtypes(label.dtypes) _check_for_bad_pandas_dtypes(label.dtypes)
label_array = np.ravel(label.values.astype(np.float32, copy=False)) label_array = np.ravel(label.values.astype(np.float32, copy=False))
else: else:
label_array = list_to_1d_numpy(label, name='label') label_array = _list_to_1d_numpy(label, name='label')
self.set_field('label', label_array) self.set_field('label', label_array)
self.label = self.get_field('label') # original values can be modified at cpp side self.label = self.get_field('label') # original values can be modified at cpp side
return self return self
...@@ -2315,7 +2315,7 @@ class Dataset: ...@@ -2315,7 +2315,7 @@ class Dataset:
weight = None weight = None
self.weight = weight self.weight = weight
if self.handle is not None and weight is not None: if self.handle is not None and weight is not None:
weight = list_to_1d_numpy(weight, name='weight') weight = _list_to_1d_numpy(weight, name='weight')
self.set_field('weight', weight) self.set_field('weight', weight)
self.weight = self.get_field('weight') # original values can be modified at cpp side self.weight = self.get_field('weight') # original values can be modified at cpp side
return self return self
...@@ -2358,7 +2358,7 @@ class Dataset: ...@@ -2358,7 +2358,7 @@ class Dataset:
""" """
self.group = group self.group = group
if self.handle is not None and group is not None: if self.handle is not None and group is not None:
group = list_to_1d_numpy(group, np.int32, name='group') group = _list_to_1d_numpy(group, np.int32, name='group')
self.set_field('group', group) self.set_field('group', group)
return self return self
...@@ -2682,7 +2682,7 @@ class Dataset: ...@@ -2682,7 +2682,7 @@ class Dataset:
""" """
_safe_call(_LIB.LGBM_DatasetDumpText( _safe_call(_LIB.LGBM_DatasetDumpText(
self.construct().handle, self.construct().handle,
c_str(str(filename)))) _c_str(str(filename))))
return self return self
...@@ -2779,7 +2779,7 @@ class Booster: ...@@ -2779,7 +2779,7 @@ class Booster:
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_BoosterCreate( _safe_call(_LIB.LGBM_BoosterCreate(
train_set.handle, train_set.handle,
c_str(params_str), _c_str(params_str),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
# save reference to data # save reference to data
self.train_set = train_set self.train_set = train_set
...@@ -2807,7 +2807,7 @@ class Booster: ...@@ -2807,7 +2807,7 @@ class Booster:
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(str(model_file)), _c_str(str(model_file)),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
...@@ -2861,7 +2861,7 @@ class Booster: ...@@ -2861,7 +2861,7 @@ class Booster:
handle = ctypes.c_void_p() handle = ctypes.c_void_p()
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterLoadModelFromString( _safe_call(_LIB.LGBM_BoosterLoadModelFromString(
c_str(model_str), _c_str(model_str),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(handle))) ctypes.byref(handle)))
state['handle'] = handle state['handle'] = handle
...@@ -2934,7 +2934,7 @@ class Booster: ...@@ -2934,7 +2934,7 @@ class Booster:
""" """
if isinstance(machines, (list, set)): if isinstance(machines, (list, set)):
machines = ','.join(machines) machines = ','.join(machines)
_safe_call(_LIB.LGBM_NetworkInit(c_str(machines), _safe_call(_LIB.LGBM_NetworkInit(_c_str(machines),
ctypes.c_int(local_listen_port), ctypes.c_int(local_listen_port),
ctypes.c_int(listen_time_out), ctypes.c_int(listen_time_out),
ctypes.c_int(num_machines))) ctypes.c_int(num_machines)))
...@@ -3152,7 +3152,7 @@ class Booster: ...@@ -3152,7 +3152,7 @@ class Booster:
if params_str: if params_str:
_safe_call(_LIB.LGBM_BoosterResetParameter( _safe_call(_LIB.LGBM_BoosterResetParameter(
self.handle, self.handle,
c_str(params_str))) _c_str(params_str)))
self.params.update(params) self.params.update(params)
return self return self
...@@ -3258,8 +3258,8 @@ class Booster: ...@@ -3258,8 +3258,8 @@ class Booster:
if self.__num_class > 1: if self.__num_class > 1:
grad = grad.ravel(order='F') grad = grad.ravel(order='F')
hess = hess.ravel(order='F') hess = hess.ravel(order='F')
grad = list_to_1d_numpy(grad, name='gradient') grad = _list_to_1d_numpy(grad, name='gradient')
hess = list_to_1d_numpy(hess, name='hessian') hess = _list_to_1d_numpy(hess, name='hessian')
assert grad.flags.c_contiguous assert grad.flags.c_contiguous
assert hess.flags.c_contiguous assert hess.flags.c_contiguous
if len(grad) != len(hess): if len(grad) != len(hess):
...@@ -3524,7 +3524,7 @@ class Booster: ...@@ -3524,7 +3524,7 @@ class Booster:
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
c_str(str(filename)))) _c_str(str(filename))))
_dump_pandas_categorical(self.pandas_categorical, filename) _dump_pandas_categorical(self.pandas_categorical, filename)
return self return self
...@@ -3573,7 +3573,7 @@ class Booster: ...@@ -3573,7 +3573,7 @@ class Booster:
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterLoadModelFromString( _safe_call(_LIB.LGBM_BoosterLoadModelFromString(
c_str(model_str), _c_str(model_str),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
......
...@@ -609,17 +609,17 @@ def test_list_to_1d_numpy(collection, dtype): ...@@ -609,17 +609,17 @@ def test_list_to_1d_numpy(collection, dtype):
y = pd_Series(y) y = pd_Series(y)
if isinstance(y, np.ndarray) and len(y.shape) == 2: if isinstance(y, np.ndarray) and len(y.shape) == 2:
with pytest.warns(UserWarning, match='column-vector'): with pytest.warns(UserWarning, match='column-vector'):
lgb.basic.list_to_1d_numpy(y) lgb.basic._list_to_1d_numpy(y)
return return
elif isinstance(y, list) and isinstance(y[0], list): elif isinstance(y, list) and isinstance(y[0], list):
with pytest.raises(TypeError): with pytest.raises(TypeError):
lgb.basic.list_to_1d_numpy(y) lgb.basic._list_to_1d_numpy(y)
return return
elif isinstance(y, pd_Series) and y.dtype == object: elif isinstance(y, pd_Series) and y.dtype == object:
with pytest.raises(ValueError): with pytest.raises(ValueError):
lgb.basic.list_to_1d_numpy(y) lgb.basic._list_to_1d_numpy(y)
return return
result = lgb.basic.list_to_1d_numpy(y, dtype=dtype) result = lgb.basic._list_to_1d_numpy(y, dtype=dtype)
assert result.size == 10 assert result.size == 10
assert result.dtype == dtype assert result.dtype == dtype
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment