Unverified Commit 76c0077a authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] remove some inner function definitions (#5704)

parent 771bad8c
...@@ -75,6 +75,10 @@ _INFO_METHOD_NAME = "info" ...@@ -75,6 +75,10 @@ _INFO_METHOD_NAME = "info"
_WARNING_METHOD_NAME = "warning" _WARNING_METHOD_NAME = "warning"
def _has_method(logger: Any, method_name: str) -> bool:
return callable(getattr(logger, method_name, None))
def register_logger( def register_logger(
logger: Any, info_method_name: str = "info", warning_method_name: str = "warning" logger: Any, info_method_name: str = "info", warning_method_name: str = "warning"
) -> None: ) -> None:
...@@ -89,9 +93,6 @@ def register_logger( ...@@ -89,9 +93,6 @@ def register_logger(
warning_method_name : str, optional (default="warning") warning_method_name : str, optional (default="warning")
Method used to log warning messages. Method used to log warning messages.
""" """
def _has_method(logger: Any, method_name: str) -> bool:
return callable(getattr(logger, method_name, None))
if not _has_method(logger, info_method_name) or not _has_method(logger, warning_method_name): if not _has_method(logger, info_method_name) or not _has_method(logger, warning_method_name):
raise TypeError( raise TypeError(
f"Logger must provide '{info_method_name}' and '{warning_method_name}' method" f"Logger must provide '{info_method_name}' and '{warning_method_name}' method"
...@@ -323,6 +324,14 @@ def _json_default_with_numpy(obj: Any) -> Any: ...@@ -323,6 +324,14 @@ def _json_default_with_numpy(obj: Any) -> Any:
return obj return obj
def _to_string(x: Union[int, float, str, List]) -> str:
if isinstance(x, list):
val_list = ",".join(str(val) for val in x)
return f"[{val_list}]"
else:
return str(x)
def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str: def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str:
"""Convert Python dictionary to string, which is passed to C API.""" """Convert Python dictionary to string, which is passed to C API."""
if data is None or not data: if data is None or not data:
...@@ -330,12 +339,7 @@ def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str: ...@@ -330,12 +339,7 @@ def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str:
pairs = [] pairs = []
for key, val in data.items(): for key, val in data.items():
if isinstance(val, (list, tuple, set)) or _is_numpy_1d_array(val): if isinstance(val, (list, tuple, set)) or _is_numpy_1d_array(val):
def to_string(x): pairs.append(f"{key}={','.join(map(_to_string, val))}")
if isinstance(x, list):
return f"[{','.join(map(str, x))}]"
else:
return str(x)
pairs.append(f"{key}={','.join(map(to_string, val))}")
elif isinstance(val, (str, Path, _NUMERIC_TYPES)) or _is_numeric(val): elif isinstance(val, (str, Path, _NUMERIC_TYPES)) or _is_numeric(val):
pairs.append(f"{key}={val}") pairs.append(f"{key}={val}")
elif val is not None: elif val is not None:
...@@ -564,19 +568,19 @@ def _c_int_array(data): ...@@ -564,19 +568,19 @@ def _c_int_array(data):
return (ptr_data, type_data, data) # return `data` to avoid the temporary copy is freed return (ptr_data, type_data, data) # return `data` to avoid the temporary copy is freed
def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None: def _is_allowed_numpy_dtype(dtype) -> bool:
float128 = getattr(np, 'float128', type(None)) float128 = getattr(np, 'float128', type(None))
def is_allowed_numpy_dtype(dtype):
return ( return (
issubclass(dtype, (np.integer, np.floating, np.bool_)) issubclass(dtype, (np.integer, np.floating, np.bool_))
and not issubclass(dtype, (np.timedelta64, float128)) and not issubclass(dtype, (np.timedelta64, float128))
) )
def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
bad_pandas_dtypes = [ bad_pandas_dtypes = [
f'{column_name}: {pandas_dtype}' f'{column_name}: {pandas_dtype}'
for column_name, pandas_dtype in pandas_dtypes_series.items() for column_name, pandas_dtype in pandas_dtypes_series.items()
if not is_allowed_numpy_dtype(pandas_dtype.type) if not _is_allowed_numpy_dtype(pandas_dtype.type)
] ]
if bad_pandas_dtypes: if bad_pandas_dtypes:
raise ValueError('pandas dtypes must be int, float or bool.\n' raise ValueError('pandas dtypes must be int, float or bool.\n'
...@@ -934,12 +938,14 @@ class _InnerPredictor: ...@@ -934,12 +938,14 @@ class _InnerPredictor:
ctypes.byref(n_preds))) ctypes.byref(n_preds)))
return n_preds.value return n_preds.value
def __pred_for_np2d(self, mat, start_iteration, num_iteration, predict_type): def __inner_predict_np2d(
"""Predict for a 2-D numpy matrix.""" self,
if len(mat.shape) != 2: mat: np.ndarray,
raise ValueError('Input numpy.ndarray or list must be 2 dimensional') start_iteration: int,
num_iteration: int,
def inner_predict(mat, start_iteration, num_iteration, predict_type, preds=None): predict_type: int,
preds: Optional[np.ndarray]
) -> Tuple[np.ndarray, int]:
if mat.dtype == np.float32 or mat.dtype == np.float64: if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False) data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
else: # change non-float data to float data, need to copy else: # change non-float data to float data, need to copy
...@@ -968,6 +974,17 @@ class _InnerPredictor: ...@@ -968,6 +974,17 @@ class _InnerPredictor:
raise ValueError("Wrong length for predict results") raise ValueError("Wrong length for predict results")
return preds, mat.shape[0] return preds, mat.shape[0]
def __pred_for_np2d(
self,
mat: np.ndarray,
start_iteration: int,
num_iteration: int,
predict_type: int
) -> Tuple[np.ndarray, int]:
"""Predict for a 2-D numpy matrix."""
if len(mat.shape) != 2:
raise ValueError('Input numpy.ndarray or list must be 2 dimensional')
nrow = mat.shape[0] nrow = mat.shape[0]
if nrow > _MAX_INT32: if nrow > _MAX_INT32:
sections = np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32) sections = np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)
...@@ -978,13 +995,34 @@ class _InnerPredictor: ...@@ -978,13 +995,34 @@ class _InnerPredictor:
for chunk, (start_idx_pred, end_idx_pred) in zip(np.array_split(mat, sections), for chunk, (start_idx_pred, end_idx_pred) in zip(np.array_split(mat, sections),
zip(n_preds_sections, n_preds_sections[1:])): zip(n_preds_sections, n_preds_sections[1:])):
# avoid memory consumption by arrays concatenation operations # avoid memory consumption by arrays concatenation operations
inner_predict(chunk, start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred]) self.__inner_predict_np2d(
mat=chunk,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type,
preds=preds[start_idx_pred:end_idx_pred]
)
return preds, nrow return preds, nrow
else: else:
return inner_predict(mat, start_iteration, num_iteration, predict_type) return self.__inner_predict_np2d(
mat=mat,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type,
preds=None
)
def __create_sparse_native(self, cs, out_shape, out_ptr_indptr, out_ptr_indices, out_ptr_data, def __create_sparse_native(
indptr_type, data_type, is_csr=True): self,
cs: Union[scipy.sparse.csc_matrix, scipy.sparse.csr_matrix],
out_shape,
out_ptr_indptr,
out_ptr_indices,
out_ptr_data,
indptr_type,
data_type,
is_csr: bool
):
# create numpy array from output arrays # create numpy array from output arrays
data_indices_len = out_shape[0] data_indices_len = out_shape[0]
indptr_len = out_shape[1] indptr_len = out_shape[1]
...@@ -1029,9 +1067,14 @@ class _InnerPredictor: ...@@ -1029,9 +1067,14 @@ class _InnerPredictor:
return cs_output_matrices[0] return cs_output_matrices[0]
return cs_output_matrices return cs_output_matrices
def __pred_for_csr(self, csr, start_iteration, num_iteration, predict_type): def __inner_predict_csr(
"""Predict for a CSR data.""" self,
def inner_predict(csr, start_iteration, num_iteration, predict_type, preds=None): csr: scipy.sparse.csr_matrix,
start_iteration: int,
num_iteration: int,
predict_type: int,
preds: Optional[np.ndarray]
) -> Tuple[np.ndarray, int]:
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type) n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
if preds is None: if preds is None:
...@@ -1040,7 +1083,7 @@ class _InnerPredictor: ...@@ -1040,7 +1083,7 @@ class _InnerPredictor:
raise ValueError("Wrong length of pre-allocated predict array") raise ValueError("Wrong length of pre-allocated predict array")
out_num_preds = ctypes.c_int64(0) out_num_preds = ctypes.c_int64(0)
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, _ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
assert csr.shape[1] <= _MAX_INT32 assert csr.shape[1] <= _MAX_INT32
...@@ -1066,7 +1109,13 @@ class _InnerPredictor: ...@@ -1066,7 +1109,13 @@ class _InnerPredictor:
raise ValueError("Wrong length for predict results") raise ValueError("Wrong length for predict results")
return preds, nrow return preds, nrow
def inner_predict_sparse(csr, start_iteration, num_iteration, predict_type): def __inner_predict_csr_sparse(
self,
csr: scipy.sparse.csr_matrix,
start_iteration: int,
num_iteration: int,
predict_type: int
):
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csr.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csr.data) ptr_data, type_ptr_data, _ = _c_float_array(csr.data)
csr_indices = csr.indices.astype(np.int32, copy=False) csr_indices = csr.indices.astype(np.int32, copy=False)
...@@ -1100,13 +1149,28 @@ class _InnerPredictor: ...@@ -1100,13 +1149,28 @@ class _InnerPredictor:
ctypes.byref(out_ptr_indptr), ctypes.byref(out_ptr_indptr),
ctypes.byref(out_ptr_indices), ctypes.byref(out_ptr_indices),
ctypes.byref(out_ptr_data))) ctypes.byref(out_ptr_data)))
matrices = self.__create_sparse_native(csr, out_shape, out_ptr_indptr, out_ptr_indices, out_ptr_data, matrices = self.__create_sparse_native(
type_ptr_indptr, type_ptr_data, is_csr=True) cs=csr,
out_shape=out_shape,
out_ptr_indptr=out_ptr_indptr,
out_ptr_indices=out_ptr_indices,
out_ptr_data=out_ptr_data,
indptr_type=type_ptr_indptr,
data_type=type_ptr_data,
is_csr=True
)
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
return matrices, nrow return matrices, nrow
def __pred_for_csr(self, csr, start_iteration, num_iteration, predict_type):
"""Predict for a CSR data."""
if predict_type == _C_API_PREDICT_CONTRIB: if predict_type == _C_API_PREDICT_CONTRIB:
return inner_predict_sparse(csr, start_iteration, num_iteration, predict_type) return self.__inner_predict_csr_sparse(
csr=csr,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
if nrow > _MAX_INT32: if nrow > _MAX_INT32:
sections = [0] + list(np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)) + [nrow] sections = [0] + list(np.arange(start=_MAX_INT32, stop=nrow, step=_MAX_INT32)) + [nrow]
...@@ -1117,14 +1181,30 @@ class _InnerPredictor: ...@@ -1117,14 +1181,30 @@ class _InnerPredictor:
for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip(zip(sections, sections[1:]), for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip(zip(sections, sections[1:]),
zip(n_preds_sections, n_preds_sections[1:])): zip(n_preds_sections, n_preds_sections[1:])):
# avoid memory consumption by arrays concatenation operations # avoid memory consumption by arrays concatenation operations
inner_predict(csr[start_idx:end_idx], start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred]) self.__inner_predict_csr(
csr=csr[start_idx:end_idx],
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type,
preds=preds[start_idx_pred:end_idx_pred]
)
return preds, nrow return preds, nrow
else: else:
return inner_predict(csr, start_iteration, num_iteration, predict_type) return self.__inner_predict_csr(
csr=csr,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type,
preds=None
)
def __pred_for_csc(self, csc, start_iteration, num_iteration, predict_type): def __inner_predict_sparse_csc(
"""Predict for a CSC data.""" self,
def inner_predict_sparse(csc, start_iteration, num_iteration, predict_type): csc,
start_iteration,
num_iteration,
predict_type
):
ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr) ptr_indptr, type_ptr_indptr, __ = _c_int_array(csc.indptr)
ptr_data, type_ptr_data, _ = _c_float_array(csc.data) ptr_data, type_ptr_data, _ = _c_float_array(csc.data)
csc_indices = csc.indices.astype(np.int32, copy=False) csc_indices = csc.indices.astype(np.int32, copy=False)
...@@ -1158,16 +1238,31 @@ class _InnerPredictor: ...@@ -1158,16 +1238,31 @@ class _InnerPredictor:
ctypes.byref(out_ptr_indptr), ctypes.byref(out_ptr_indptr),
ctypes.byref(out_ptr_indices), ctypes.byref(out_ptr_indices),
ctypes.byref(out_ptr_data))) ctypes.byref(out_ptr_data)))
matrices = self.__create_sparse_native(csc, out_shape, out_ptr_indptr, out_ptr_indices, out_ptr_data, matrices = self.__create_sparse_native(
type_ptr_indptr, type_ptr_data, is_csr=False) cs=csc,
out_shape=out_shape,
out_ptr_indptr=out_ptr_indptr,
out_ptr_indices=out_ptr_indices,
out_ptr_data=out_ptr_data,
indptr_type=type_ptr_indptr,
data_type=type_ptr_data,
is_csr=False
)
nrow = csc.shape[0] nrow = csc.shape[0]
return matrices, nrow return matrices, nrow
def __pred_for_csc(self, csc, start_iteration, num_iteration, predict_type):
"""Predict for a CSC data."""
nrow = csc.shape[0] nrow = csc.shape[0]
if nrow > _MAX_INT32: if nrow > _MAX_INT32:
return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type) return self.__pred_for_csr(csc.tocsr(), start_iteration, num_iteration, predict_type)
if predict_type == _C_API_PREDICT_CONTRIB: if predict_type == _C_API_PREDICT_CONTRIB:
return inner_predict_sparse(csc, start_iteration, num_iteration, predict_type) return self.__inner_predict_sparse_csc(
csc=csc,
start_iteration=start_iteration,
num_iteration=num_iteration,
predict_type=predict_type
)
n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type) n_preds = self.__get_num_preds(start_iteration, num_iteration, nrow, predict_type)
preds = np.empty(n_preds, dtype=np.float64) preds = np.empty(n_preds, dtype=np.float64)
out_num_preds = ctypes.c_int64(0) out_num_preds = ctypes.c_int64(0)
...@@ -4162,7 +4257,7 @@ class Booster: ...@@ -4162,7 +4257,7 @@ class Booster:
ret.append((data_name, eval_name, val, is_higher_better)) ret.append((data_name, eval_name, val, is_higher_better))
return ret return ret
def __inner_predict(self, data_idx: int): def __inner_predict(self, data_idx: int) -> np.ndarray:
"""Predict for training and validation dataset.""" """Predict for training and validation dataset."""
if data_idx >= self.__num_dataset: if data_idx >= self.__num_dataset:
raise ValueError("Data_idx should be smaller than number of dataset") raise ValueError("Data_idx should be smaller than number of dataset")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment