Unverified Commit e8cdc2c9 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] add more type hints in basic.py (#5729)

parent 77132aa7
...@@ -275,7 +275,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') - ...@@ -275,7 +275,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') -
"It should be list of lists, numpy 2-D array or pandas DataFrame") "It should be list of lists, numpy 2-D array or pandas DataFrame")
def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cfloat32_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
"""Convert a ctypes float pointer array to a numpy array.""" """Convert a ctypes float pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_float)): if isinstance(cptr, ctypes.POINTER(ctypes.c_float)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -283,7 +283,7 @@ def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -283,7 +283,7 @@ def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected float pointer') raise RuntimeError('Expected float pointer')
def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cfloat64_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
"""Convert a ctypes double pointer array to a numpy array.""" """Convert a ctypes double pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_double)): if isinstance(cptr, ctypes.POINTER(ctypes.c_double)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -291,7 +291,7 @@ def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -291,7 +291,7 @@ def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected double pointer') raise RuntimeError('Expected double pointer')
def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cint32_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
"""Convert a ctypes int pointer array to a numpy array.""" """Convert a ctypes int pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)): if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -299,7 +299,7 @@ def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray: ...@@ -299,7 +299,7 @@ def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise RuntimeError('Expected int32 pointer') raise RuntimeError('Expected int32 pointer')
def _cint64_array_to_numpy(cptr: Any, length: int) -> np.ndarray: def _cint64_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
"""Convert a ctypes int pointer array to a numpy array.""" """Convert a ctypes int pointer array to a numpy array."""
if isinstance(cptr, ctypes.POINTER(ctypes.c_int64)): if isinstance(cptr, ctypes.POINTER(ctypes.c_int64)):
return np.ctypeslib.as_array(cptr, shape=(length,)).copy() return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
...@@ -902,26 +902,56 @@ class _InnerPredictor: ...@@ -902,26 +902,56 @@ class _InnerPredictor:
preds = np.loadtxt(f.name, dtype=np.float64) preds = np.loadtxt(f.name, dtype=np.float64)
nrow = preds.shape[0] nrow = preds.shape[0]
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
preds, nrow = self.__pred_for_csr(data, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_csr(
csr=data,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
elif isinstance(data, scipy.sparse.csc_matrix): elif isinstance(data, scipy.sparse.csc_matrix):
preds, nrow = self.__pred_for_csc(data, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_csc(
csc=data,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
elif isinstance(data, np.ndarray): elif isinstance(data, np.ndarray):
preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(
mat=data,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
elif isinstance(data, list): elif isinstance(data, list):
try: try:
data = np.array(data) data = np.array(data)
except BaseException: except BaseException:
raise ValueError('Cannot convert data list to numpy array.') raise ValueError('Cannot convert data list to numpy array.')
preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(
mat=data,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
elif isinstance(data, dt_DataTable): elif isinstance(data, dt_DataTable):
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(
mat=data.to_numpy(),
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
else: else:
try: try:
_log_warning('Converting data to scipy sparse matrix.') _log_warning('Converting data to scipy sparse matrix.')
csr = scipy.sparse.csr_matrix(data) csr = scipy.sparse.csr_matrix(data)
except BaseException: except BaseException:
raise TypeError(f'Cannot predict data for type {type(data).__name__}') raise TypeError(f'Cannot predict data for type {type(data).__name__}')
preds, nrow = self.__pred_for_csr(csr, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_csr(
csr=csr,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
if pred_leaf: if pred_leaf:
preds = preds.astype(np.int32) preds = preds.astype(np.int32)
is_sparse = scipy.sparse.issparse(preds) or isinstance(preds, list) is_sparse = scipy.sparse.issparse(preds) or isinstance(preds, list)
...@@ -932,7 +962,13 @@ class _InnerPredictor: ...@@ -932,7 +962,13 @@ class _InnerPredictor:
raise ValueError(f'Length of predict result ({preds.size}) cannot be divide nrow ({nrow})') raise ValueError(f'Length of predict result ({preds.size}) cannot be divide nrow ({nrow})')
return preds return preds
def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type): def __get_num_preds(
self,
start_iteration: int,
num_iteration: int,
nrow: int,
predict_type: int
) -> int:
"""Get size of prediction result.""" """Get size of prediction result."""
if nrow > _MAX_INT32: if nrow > _MAX_INT32:
raise LightGBMError('LightGBM cannot perform prediction for data ' raise LightGBMError('LightGBM cannot perform prediction for data '
...@@ -962,7 +998,12 @@ class _InnerPredictor: ...@@ -962,7 +998,12 @@ class _InnerPredictor:
else: # change non-float data to float data, need to copy else: # change non-float data to float data, need to copy
data = np.array(mat.reshape(mat.size), dtype=np.float32) data = np.array(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data, _ = _c_float_array(data) ptr_data, type_ptr_data, _ = _c_float_array(data)
n_preds = self.__get_num_preds(start_iteration, num_iteration, mat.shape[0], predict_type) n_preds = self.__get_num_preds(
start_iteration=start_iteration,
num_iteration=num_iteration,
nrow=mat.shape[0],
predict_type=predict_type
)
if preds is None: if preds is None:
preds = np.empty(n_preds, dtype=np.float64) preds = np.empty(n_preds, dtype=np.float64)
elif len(preds.shape) != 1 or len(preds) != n_preds: elif len(preds.shape) != 1 or len(preds) != n_preds:
...@@ -1026,14 +1067,14 @@ class _InnerPredictor: ...@@ -1026,14 +1067,14 @@ class _InnerPredictor:
def __create_sparse_native( def __create_sparse_native(
self, self,
cs: Union[scipy.sparse.csc_matrix, scipy.sparse.csr_matrix], cs: Union[scipy.sparse.csc_matrix, scipy.sparse.csr_matrix],
out_shape, out_shape: np.ndarray,
out_ptr_indptr, out_ptr_indptr: "ctypes._Pointer",
out_ptr_indices, out_ptr_indices: "ctypes._Pointer",
out_ptr_data, out_ptr_data: "ctypes._Pointer",
indptr_type, indptr_type: int,
data_type, data_type: int,
is_csr: bool is_csr: bool
): ) -> Union[List[scipy.sparse.csc_matrix], List[scipy.sparse.csr_matrix]]:
# create numpy array from output arrays # create numpy array from output arrays
data_indices_len = out_shape[0] data_indices_len = out_shape[0]
indptr_len = out_shape[1] indptr_len = out_shape[1]
...@@ -1087,7 +1128,12 @@ class _InnerPredictor: ...@@ -1087,7 +1128,12 @@ class _InnerPredictor:
preds: Optional[np.ndarray] preds: Optional[np.ndarray]
) -> Tuple[np.ndarray, int]: ) -> Tuple[np.ndarray, int]:
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type) n_preds = self.__get_num_preds(
start_iteration=start_iteration,
num_iteration=num_iteration,
nrow=nrow,
predict_type=predict_type
)
if preds is None: if preds is None:
preds = np.empty(n_preds, dtype=np.float64) preds = np.empty(n_preds, dtype=np.float64)
elif len(preds.shape) != 1 or len(preds) != n_preds: elif len(preds.shape) != 1 or len(preds) != n_preds:
...@@ -1126,7 +1172,7 @@ class _InnerPredictor: ...@@ -1126,7 +1172,7 @@ class _InnerPredictor:
start_iteration: int, start_iteration: int,
num_iteration: int, num_iteration: int,
predict_type: int predict_type: int
): ) -> Tuple[Union[List[scipy.sparse.csc_matrix], List[scipy.sparse.csr_matrix]], int]:
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
csr_indices = csr.indices.astype(np.int32, copy=False) csr_indices = csr.indices.astype(np.int32, copy=False)
...@@ -1173,7 +1219,13 @@ class _InnerPredictor: ...@@ -1173,7 +1219,13 @@ class _InnerPredictor:
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
return matrices, nrow return matrices, nrow
def __pred_for_csr(self, csr, start_iteration, num_iteration, predict_type): def __pred_for_csr(
self,
csr: scipy.sparse.csr_matrix,
start_iteration: int,
num_iteration: int,
predict_type: int
) -> Tuple[np.ndarray, int]:
"""Predict for a CSR data.""" """Predict for a CSR data."""
if predict_type == _C_API_PREDICT_CONTRIB: if predict_type == _C_API_PREDICT_CONTRIB:
return self.__inner_predict_csr_sparse( return self.__inner_predict_csr_sparse(
...@@ -1211,10 +1263,10 @@ class _InnerPredictor: ...@@ -1211,10 +1263,10 @@ class _InnerPredictor:
def __inner_predict_sparse_csc( def __inner_predict_sparse_csc(
self, self,
csc, csc: scipy.sparse.csc_matrix,
start_iteration, start_iteration: int,
num_iteration, num_iteration: int,
predict_type predict_type: int
): ):
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csc.data) ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
...@@ -1262,11 +1314,22 @@ class _InnerPredictor: ...@@ -1262,11 +1314,22 @@ class _InnerPredictor:
nrow = csc.shape[0] nrow = csc.shape[0]
return matrices, nrow return matrices, nrow
def __pred_for_csc(self, csc, start_iteration, num_iteration, predict_type): def __pred_for_csc(
self,
csc: scipy.sparse.csc_matrix,
start_iteration: int,
num_iteration: int,
predict_type: int
) -> Tuple[np.ndarray, int]:
"""Predict for a CSC data.""" """Predict for a CSC data."""
nrow = csc.shape[0] nrow = csc.shape[0]
if nrow > _MAX_INT32: if nrow > _MAX_INT32:
return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type) return self.__pred_for_csr(
csr=csc.tocsr(),
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
if predict_type == _C_API_PREDICT_CONTRIB: if predict_type == _C_API_PREDICT_CONTRIB:
return self.__inner_predict_sparse_csc( return self.__inner_predict_sparse_csc(
csc=csc, csc=csc,
...@@ -1274,7 +1337,12 @@ class _InnerPredictor: ...@@ -1274,7 +1337,12 @@ class _InnerPredictor:
num_iteration=num_iteration, num_iteration=num_iteration,
predict_type=predict_type predict_type=predict_type
) )
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type) n_preds = self.__get_num_preds(
start_iteration=start_iteration,
num_iteration=num_iteration,
nrow=nrow,
predict_type=predict_type
)
preds = np.empty(n_preds, dtype=np.float64) preds = np.empty(n_preds, dtype=np.float64)
out_num_preds = ctypes.c_int64(0) out_num_preds = ctypes.c_int64(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment