[python-package] remove some inner function definitions (#5704)

76c0077a · James Lamb · GitHub · 771bad8c · 76c0077a
Unverified Commit 76c0077a authored Feb 12, 2023 by James Lamb Committed by GitHub Feb 12, 2023
Show whitespace changes
Inline Side-by-side

Showing with 263 additions and 168 deletions

python-package/lightgbm/basic.py python-package/lightgbm/basic.py +263 -168

No files found.
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -75,6 +75,10 @@ _INFO_METHOD_NAME = "info"
 _WARNING_METHOD_NAME = "warning"
+def _has_method(logger: Any, method_name: str) -> bool:
+    return callable(getattr(logger, method_name, None))
 def register_logger(
    logger: Any, info_method_name: str = "info", warning_method_name: str = "warning"
 ) -> None:
@@ -89,9 +93,6 @@ def register_logger(
    warning_method_name : str, optional (default="warning")
        Method used to log warning messages.
    """
-    def _has_method(logger: Any, method_name: str) -> bool:
-        return callable(getattr(logger, method_name, None))
    if not _has_method(logger, info_method_name) or not _has_method(logger, warning_method_name):
        raise TypeError(
            f"Logger must provide '{info_method_name}' and '{warning_method_name}' method"
@@ -323,6 +324,14 @@ def _json_default_with_numpy(obj: Any) -> Any:
        return obj
+def _to_string(x: Union[int, float, str, List]) -> str:
+    if isinstance(x, list):
+        val_list = ",".join(str(val) for val in x)
+        return f"[{val_list}]"
+    else:
+        return str(x)
 def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str:
    """Convert Python dictionary to string, which is passed to C API."""
    if data is None or not data:
@@ -330,12 +339,7 @@ def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str:
    pairs = []
    for key, val in data.items():
        if isinstance(val, (list, tuple, set)) or _is_numpy_1d_array(val):
-            def to_string(x):
+            pairs.append(f"{key}={','.join(map(_to_string, val))}")
-                if isinstance(x, list):
-                    return f"[{','.join(map(str, x))}]"
-                else:
-                    return str(x)
-            pairs.append(f"{key}={','.join(map(to_string, val))}")
        elif isinstance(val, (str, Path, _NUMERIC_TYPES)) or _is_numeric(val):
            pairs.append(f"{key}={val}")
        elif val is not None:
@@ -564,19 +568,19 @@ def _c_int_array(data):
    return (ptr_data, type_data, data)  # return `data` to avoid the temporary copy is freed
-def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
+def _is_allowed_numpy_dtype(dtype) -> bool:
    float128 = getattr(np, 'float128', type(None))
-    def is_allowed_numpy_dtype(dtype):
    return (
        issubclass(dtype, (np.integer, np.floating, np.bool_))
        and not issubclass(dtype, (np.timedelta64, float128))
    )
+def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
    bad_pandas_dtypes = [
        f'{column_name}: {pandas_dtype}'
        for column_name, pandas_dtype in pandas_dtypes_series.items()
-        if not is_allowed_numpy_dtype(pandas_dtype.type)
+        if not _is_allowed_numpy_dtype(pandas_dtype.type)
    ]
    if bad_pandas_dtypes:
        raise ValueError('pandas dtypes must be int, float or bool.\n'
@@ -934,12 +938,14 @@ class _InnerPredictor:
            ctypes.byref(n_preds)))
        return n_preds.value
-    def __pred_for_np2d(self, mat, start_iteration, num_iteration, predict_type):
+    def __inner_predict_np2d(
-        """Predict for a 2-D numpy matrix."""
+        self,
-        if len(mat.shape) != 2:
+        mat: np.ndarray,
-            raise ValueError('Input numpy.ndarray or list must be 2 dimensional')
+        start_iteration: int,
+        num_iteration: int,
-        def inner_predict(mat, start_iteration, num_iteration, predict_type, preds=None):
+        predict_type: int,
+        preds: Optional[np.ndarray]
+    ) -> Tuple[np.ndarray, int]:
        if mat.dtype == np.float32 or mat.dtype == np.float64:
            data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
        else:  # change non-float data to float data, need to copy
@@ -968,6 +974,17 @@ class _InnerPredictor:
            raise ValueError("Wrong length for predict results")
        return preds, mat.shape[0]
+    def __pred_for_np2d(
+        self,
+        mat: np.ndarray,
+        start_iteration: int,
+        num_iteration: int,
+        predict_type: int
+    ) -> Tuple[np.ndarray, int]:
+        """Predict for a 2-D numpy matrix."""
+        if len(mat.shape) != 2:
+            raise ValueError('Input numpy.ndarray or list must be 2 dimensional')
        nrow = mat.shape[0]
        if nrow > _MAX_INT32:
            sections = np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)
@@ -978,13 +995,34 @@ class _InnerPredictor:
            for chunk, (start_idx_pred, end_idx_pred) in zip(np.array_split(mat, sections),
                                                             zip(n_preds_sections, n_preds_sections[1:])):
                # avoid memory consumption by arrays concatenation operations
-                inner_predict(chunk, start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred])
+                self.__inner_predict_np2d(
+                    mat=chunk,
+                    start_iteration=start_iteration,
+                    num_iteration=num_iteration,
+                    predict_type=predict_type,
+                    preds=preds[start_idx_pred:end_idx_pred]
+                )
            return preds, nrow
        else:
-            return inner_predict(mat, start_iteration, num_iteration, predict_type)
+            return self.__inner_predict_np2d(
+                mat=mat,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type,
+                preds=None
+            )
-    def __create_sparse_native(self, cs, out_shape, out_ptr_indptr, out_ptr_indices, out_ptr_data,
+    def __create_sparse_native(
-                               indptr_type, data_type, is_csr=True):
+        self,
+        cs: Union[scipy.sparse.csc_matrix, scipy.sparse.csr_matrix],
+        out_shape,
+        out_ptr_indptr,
+        out_ptr_indices,
+        out_ptr_data,
+        indptr_type,
+        data_type,
+        is_csr: bool
+    ):
        # create numpy array from output arrays
        data_indices_len = out_shape[0]
        indptr_len = out_shape[1]
@@ -1029,9 +1067,14 @@ class _InnerPredictor:
            return cs_output_matrices[0]
        return cs_output_matrices
-    def __pred_for_csr(self, csr, start_iteration, num_iteration, predict_type):
+    def __inner_predict_csr(
-        """Predict for a CSR data."""
+        self,
-        def inner_predict(csr, start_iteration, num_iteration, predict_type, preds=None):
+        csr: scipy.sparse.csr_matrix,
+        start_iteration: int,
+        num_iteration: int,
+        predict_type: int,
+        preds: Optional[np.ndarray]
+    ) -> Tuple[np.ndarray, int]:
        nrow = len(csr.indptr) - 1
        n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
        if preds is None:
@@ -1040,7 +1083,7 @@ class _InnerPredictor:
            raise ValueError("Wrong length of pre-allocated predict array")
        out_num_preds = ctypes.c_int64(0)
-            ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
+        ptr_indptr, type_ptr_indptr, _ = _c_int_array(csr.indptr)
        ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
        assert csr.shape[1] <= _MAX_INT32
@@ -1066,7 +1109,13 @@ class _InnerPredictor:
            raise ValueError("Wrong length for predict results")
        return preds, nrow
-        def inner_predict_sparse(csr, start_iteration, num_iteration, predict_type):
+    def __inner_predict_csr_sparse(
+        self,
+        csr: scipy.sparse.csr_matrix,
+        start_iteration: int,
+        num_iteration: int,
+        predict_type: int
+    ):
        ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
        ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
        csr_indices = csr.indices.astype(np.int32, copy=False)
@@ -1100,13 +1149,28 @@ class _InnerPredictor:
            ctypes.byref(out_ptr_indptr),
            ctypes.byref(out_ptr_indices),
            ctypes.byref(out_ptr_data)))
-            matrices = self.__create_sparse_native(csr, out_shape, out_ptr_indptr, out_ptr_indices, out_ptr_data,
+        matrices = self.__create_sparse_native(
-                                                   type_ptr_indptr, type_ptr_data, is_csr=True)
+            cs=csr,
+            out_shape=out_shape,
+            out_ptr_indptr=out_ptr_indptr,
+            out_ptr_indices=out_ptr_indices,
+            out_ptr_data=out_ptr_data,
+            indptr_type=type_ptr_indptr,
+            data_type=type_ptr_data,
+            is_csr=True
+        )
        nrow = len(csr.indptr) - 1
        return matrices, nrow
+    def __pred_for_csr(self, csr, start_iteration, num_iteration, predict_type):
+        """Predict for a CSR data."""
        if predict_type == _C_API_PREDICT_CONTRIB:
-            return inner_predict_sparse(csr, start_iteration, num_iteration, predict_type)
+            return self.__inner_predict_csr_sparse(
+                csr=csr,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        nrow = len(csr.indptr) - 1
        if nrow > _MAX_INT32:
            sections = [0] + list(np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)) + [nrow]
@@ -1117,14 +1181,30 @@ class _InnerPredictor:
            for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip(zip(sections, sections[1:]),
                                                                            zip(n_preds_sections, n_preds_sections[1:])):
                # avoid memory consumption by arrays concatenation operations
-                inner_predict(csr[start_idx:end_idx], start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred])
+                self.__inner_predict_csr(
+                    csr=csr[start_idx:end_idx],
+                    start_iteration=start_iteration,
+                    num_iteration=num_iteration,
+                    predict_type=predict_type,
+                    preds=preds[start_idx_pred:end_idx_pred]
+                )
            return preds, nrow
        else:
-            return inner_predict(csr, start_iteration, num_iteration, predict_type)
+            return self.__inner_predict_csr(
+                csr=csr,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type,
+                preds=None
+            )
-    def __pred_for_csc(self, csc, start_iteration, num_iteration, predict_type):
+    def __inner_predict_sparse_csc(
-        """Predict for a CSC data."""
+        self,
-        def inner_predict_sparse(csc, start_iteration, num_iteration, predict_type):
+        csc,
+        start_iteration,
+        num_iteration,
+        predict_type
+    ):
        ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
        ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
        csc_indices = csc.indices.astype(np.int32, copy=False)
@@ -1158,16 +1238,31 @@ class _InnerPredictor:
            ctypes.byref(out_ptr_indptr),
            ctypes.byref(out_ptr_indices),
            ctypes.byref(out_ptr_data)))
-            matrices = self.__create_sparse_native(csc, out_shape, out_ptr_indptr, out_ptr_indices, out_ptr_data,
+        matrices = self.__create_sparse_native(
-                                                   type_ptr_indptr, type_ptr_data, is_csr=False)
+            cs=csc,
+            out_shape=out_shape,
+            out_ptr_indptr=out_ptr_indptr,
+            out_ptr_indices=out_ptr_indices,
+            out_ptr_data=out_ptr_data,
+            indptr_type=type_ptr_indptr,
+            data_type=type_ptr_data,
+            is_csr=False
+        )
        nrow = csc.shape[0]
        return matrices, nrow
+    def __pred_for_csc(self, csc, start_iteration, num_iteration, predict_type):
+        """Predict for a CSC data."""
        nrow = csc.shape[0]
        if nrow > _MAX_INT32:
            return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type)
        if predict_type == _C_API_PREDICT_CONTRIB:
-            return inner_predict_sparse(csc, start_iteration, num_iteration, predict_type)
+            return self.__inner_predict_sparse_csc(
+                csc=csc,
+                start_iteration=start_iteration,
+                num_iteration=num_iteration,
+                predict_type=predict_type
+            )
        n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
        preds = np.empty(n_preds, dtype=np.float64)
        out_num_preds = ctypes.c_int64(0)
@@ -4162,7 +4257,7 @@ class Booster:
                    ret.append((data_name, eval_name, val, is_higher_better))
        return ret
-    def __inner_predict(self, data_idx: int):
+    def __inner_predict(self, data_idx: int) -> np.ndarray:
        """Predict for training and validation dataset."""
        if data_idx >= self.__num_dataset:
            raise ValueError("Data_idx should be smaller than number of dataset")