[python-package] add more type hints in basic.py (#5729)

e8cdc2c9 · James Lamb · GitHub · 77132aa7 · e8cdc2c9
Unverified Commit e8cdc2c9 authored Feb 26, 2023 by James Lamb Committed by GitHub Feb 26, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 97 additions and 29 deletions

python-package/lightgbm/basic.py python-package/lightgbm/basic.py +97 -29

No files found.
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -275,7 +275,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') -
                    "It should be list of lists, numpy 2-D array or pandas DataFrame")
-def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
+def _cfloat32_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
    """Convert a ctypes float pointer array to a numpy array."""
    if isinstance(cptr, ctypes.POINTER(ctypes.c_float)):
        return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
@@ -283,7 +283,7 @@ def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
        raise RuntimeError('Expected float pointer')
-def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
+def _cfloat64_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
    """Convert a ctypes double pointer array to a numpy array."""
    if isinstance(cptr, ctypes.POINTER(ctypes.c_double)):
        return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
@@ -291,7 +291,7 @@ def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
        raise RuntimeError('Expected double pointer')
-def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
+def _cint32_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
    """Convert a ctypes int pointer array to a numpy array."""
    if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)):
        return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
@@ -299,7 +299,7 @@ def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
        raise RuntimeError('Expected int32 pointer')
-def _cint64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
+def _cint64_array_to_numpy(cptr: "ctypes._Pointer", length: int) -> np.ndarray:
    """Convert a ctypes int pointer array to a numpy array."""
    if isinstance(cptr, ctypes.POINTER(ctypes.c_int64)):
        return np.ctypeslib.as_array(cptr, shape=(length,)).copy()
@@ -902,26 +902,56 @@ class _InnerPredictor:
                preds = np.loadtxt(f.name, dtype=np.float64)
                nrow = preds.shape[0]
        elif isinstance(data, scipy.sparse.csr_matrix):
-            preds, nrow = self.__pred_for_csr(data, start_iteration, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_csr(
+                csr=data,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        elif isinstance(data, scipy.sparse.csc_matrix):
-            preds, nrow = self.__pred_for_csc(data, start_iteration, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_csc(
+                csc=data,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        elif isinstance(data, np.ndarray):
-            preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_np2d(
+                mat=data,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        elif isinstance(data, list):
            try:
                data = np.array(data)
            except BaseException:
                raise ValueError('Cannot convert data list to numpy array.')
-            preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_np2d(
+                mat=data,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        elif isinstance(data, dt_DataTable):
-            preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_np2d(
+                mat=data.to_numpy(),
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        else:
            try:
                _log_warning('Converting data to scipy sparse matrix.')
                csr = scipy.sparse.csr_matrix(data)
            except BaseException:
                raise TypeError(f'Cannot predict data for type {type(data).__name__}')
-            preds, nrow = self.__pred_for_csr(csr, start_iteration, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_csr(
+                csr=csr,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        if pred_leaf:
            preds = preds.astype(np.int32)
        is_sparse = scipy.sparse.issparse(preds) or isinstance(preds, list)
@@ -932,7 +962,13 @@ class _InnerPredictor:
                raise ValueError(f'Length of predict result ({preds.size}) cannot be divide nrow ({nrow})')
        return preds
-    def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type):
+    def __get_num_preds(
+        self,
+        start_iteration: int,
+        num_iteration: int,
+        nrow: int,
+        predict_type: int
+    ) -> int:
        """Get size of prediction result."""
        if nrow > _MAX_INT32:
            raise LightGBMError('LightGBM cannot perform prediction for data '
@@ -962,7 +998,12 @@ class _InnerPredictor:
        else:  # change non-float data to float data, need to copy
            data = np.array(mat.reshape(mat.size), dtype=np.float32)
        ptr_data, type_ptr_data, _ = _c_float_array(data)
-        n_preds = self.__get_num_preds(start_iteration, num_iteration, mat.shape[0], predict_type)
+        n_preds = self.__get_num_preds(
+            start_iteration=start_iteration,
+            num_iteration=num_iteration,
+            nrow=mat.shape[0],
+            predict_type=predict_type
+        )
        if preds is None:
            preds = np.empty(n_preds, dtype=np.float64)
        elif len(preds.shape) != 1 or len(preds) != n_preds:
@@ -1026,14 +1067,14 @@ class _InnerPredictor:
    def __create_sparse_native(
        self,
        cs: Union[scipy.sparse.csc_matrix, scipy.sparse.csr_matrix],
-        out_shape,
+        out_shape: np.ndarray,
-        out_ptr_indptr,
+        out_ptr_indptr: "ctypes._Pointer",
-        out_ptr_indices,
+        out_ptr_indices: "ctypes._Pointer",
-        out_ptr_data,
+        out_ptr_data: "ctypes._Pointer",
-        indptr_type,
+        indptr_type: int,
-        data_type,
+        data_type: int,
        is_csr: bool
-    ):
+    ) -> Union[List[scipy.sparse.csc_matrix], List[scipy.sparse.csr_matrix]]:
        # create numpy array from output arrays
        data_indices_len = out_shape[0]
        indptr_len = out_shape[1]
@@ -1087,7 +1128,12 @@ class _InnerPredictor:
        preds: Optional[np.ndarray]
    ) -> Tuple[np.ndarray, int]:
        nrow = len(csr.indptr) - 1
-        n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
+        n_preds = self.__get_num_preds(
+            start_iteration=start_iteration,
+            num_iteration=num_iteration,
+            nrow=nrow,
+            predict_type=predict_type
+        )
        if preds is None:
            preds = np.empty(n_preds, dtype=np.float64)
        elif len(preds.shape) != 1 or len(preds) != n_preds:
@@ -1126,7 +1172,7 @@ class _InnerPredictor:
        start_iteration: int,
        num_iteration: int,
        predict_type: int
-    ):
+    ) -> Tuple[Union[List[scipy.sparse.csc_matrix], List[scipy.sparse.csr_matrix]], int]:
        ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
        ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
        csr_indices = csr.indices.astype(np.int32, copy=False)
@@ -1173,7 +1219,13 @@ class _InnerPredictor:
        nrow = len(csr.indptr) - 1
        return matrices, nrow
-    def __pred_for_csr(self, csr, start_iteration, num_iteration, predict_type):
+    def __pred_for_csr(
+        self,
+        csr: scipy.sparse.csr_matrix,
+        start_iteration: int,
+        num_iteration: int,
+        predict_type: int
+    ) -> Tuple[np.ndarray, int]:
        """Predict for a CSR data."""
        if predict_type == _C_API_PREDICT_CONTRIB:
            return self.__inner_predict_csr_sparse(
@@ -1211,10 +1263,10 @@ class _InnerPredictor:
    def __inner_predict_sparse_csc(
        self,
-        csc,
+        csc: scipy.sparse.csc_matrix,
-        start_iteration,
+        start_iteration: int,
-        num_iteration,
+        num_iteration: int,
-        predict_type
+        predict_type: int
    ):
        ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
        ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
@@ -1262,11 +1314,22 @@ class _InnerPredictor:
        nrow = csc.shape[0]
        return matrices, nrow
-    def __pred_for_csc(self, csc, start_iteration, num_iteration, predict_type):
+    def __pred_for_csc(
+        self,
+        csc: scipy.sparse.csc_matrix,
+        start_iteration: int,
+        num_iteration: int,
+        predict_type: int
+    ) -> Tuple[np.ndarray, int]:
        """Predict for a CSC data."""
        nrow = csc.shape[0]
        if nrow > _MAX_INT32:
-            return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type)
+            return self.__pred_for_csr(
+                csr=csc.tocsr(),
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        if predict_type == _C_API_PREDICT_CONTRIB:
            return self.__inner_predict_sparse_csc(
                csc=csc,
@@ -1274,7 +1337,12 @@ class _InnerPredictor:
                num_iteration=num_iteration,
                predict_type=predict_type
            )
-        n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
+        n_preds = self.__get_num_preds(
+            start_iteration=start_iteration,
+            num_iteration=num_iteration,
+            nrow=nrow,
+            predict_type=predict_type
+        )
        preds = np.empty(n_preds, dtype=np.float64)
        out_num_preds = ctypes.c_int64(0)