Unverified Commit fd921d53 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] remove default arguments in internal functions (#5834)

parent f065b436
...@@ -281,8 +281,8 @@ def _is_1d_collection(data: Any) -> bool: ...@@ -281,8 +281,8 @@ def _is_1d_collection(data: Any) -> bool:
def _list_to_1d_numpy( def _list_to_1d_numpy(
data: Any, data: Any,
dtype: "np.typing.DTypeLike" = np.float32, dtype: "np.typing.DTypeLike",
name: str = 'list' name: str
) -> np.ndarray: ) -> np.ndarray:
"""Convert data to numpy 1-D array.""" """Convert data to numpy 1-D array."""
if _is_numpy_1d_array(data): if _is_numpy_1d_array(data):
...@@ -322,8 +322,8 @@ def _is_2d_collection(data: Any) -> bool: ...@@ -322,8 +322,8 @@ def _is_2d_collection(data: Any) -> bool:
def _data_to_2d_numpy( def _data_to_2d_numpy(
data: Any, data: Any,
dtype: "np.typing.DTypeLike" = np.float32, dtype: "np.typing.DTypeLike",
name: str = 'list' name: str
) -> np.ndarray: ) -> np.ndarray:
"""Convert data to numpy 2-D array.""" """Convert data to numpy 2-D array."""
if _is_numpy_2d_array(data): if _is_numpy_2d_array(data):
...@@ -1774,15 +1774,15 @@ class Dataset: ...@@ -1774,15 +1774,15 @@ class Dataset:
def _lazy_init( def _lazy_init(
self, self,
data: Optional[_LGBM_TrainDataType], data: Optional[_LGBM_TrainDataType],
label: Optional[_LGBM_LabelType] = None, label: Optional[_LGBM_LabelType],
reference: Optional["Dataset"] = None, reference: Optional["Dataset"],
weight: Optional[_LGBM_WeightType] = None, weight: Optional[_LGBM_WeightType],
group: Optional[_LGBM_GroupType] = None, group: Optional[_LGBM_GroupType],
init_score: Optional[_LGBM_InitScoreType] = None, init_score: Optional[_LGBM_InitScoreType],
predictor: Optional[_InnerPredictor] = None, predictor: Optional[_InnerPredictor],
feature_name: _LGBM_FeatureNameConfiguration = 'auto', feature_name: _LGBM_FeatureNameConfiguration,
categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto', categorical_feature: _LGBM_CategoricalFeatureConfiguration,
params: Optional[Dict[str, Any]] = None params: Optional[Dict[str, Any]]
) -> "Dataset": ) -> "Dataset":
if data is None: if data is None:
self.handle = None self.handle = None
...@@ -2166,13 +2166,13 @@ class Dataset: ...@@ -2166,13 +2166,13 @@ class Dataset:
self._update_params(reference_params) self._update_params(reference_params)
if self.used_indices is None: if self.used_indices is None:
# create valid # create valid
self._lazy_init(self.data, label=self.label, reference=self.reference, self._lazy_init(data=self.data, label=self.label, reference=self.reference,
weight=self.weight, group=self.group, weight=self.weight, group=self.group,
init_score=self.init_score, predictor=self._predictor, init_score=self.init_score, predictor=self._predictor,
feature_name=self.feature_name, params=self.params) feature_name=self.feature_name, categorical_feature='auto', params=self.params)
else: else:
# construct subset # construct subset
used_indices = _list_to_1d_numpy(self.used_indices, np.int32, name='used_indices') used_indices = _list_to_1d_numpy(self.used_indices, dtype=np.int32, name='used_indices')
assert used_indices.flags.c_contiguous assert used_indices.flags.c_contiguous
if self.reference.group is not None: if self.reference.group is not None:
group_info = np.array(self.reference.group).astype(np.int32, copy=False) group_info = np.array(self.reference.group).astype(np.int32, copy=False)
...@@ -2201,7 +2201,7 @@ class Dataset: ...@@ -2201,7 +2201,7 @@ class Dataset:
) )
else: else:
# create train # create train
self._lazy_init(self.data, label=self.label, self._lazy_init(data=self.data, label=self.label, reference=None,
weight=self.weight, group=self.group, weight=self.weight, group=self.group,
init_score=self.init_score, predictor=self._predictor, init_score=self.init_score, predictor=self._predictor,
feature_name=self.feature_name, categorical_feature=self.categorical_feature, params=self.params) feature_name=self.feature_name, categorical_feature=self.categorical_feature, params=self.params)
...@@ -2372,9 +2372,9 @@ class Dataset: ...@@ -2372,9 +2372,9 @@ class Dataset:
if field_name == 'init_score': if field_name == 'init_score':
dtype = np.float64 dtype = np.float64
if _is_1d_collection(data): if _is_1d_collection(data):
data = _list_to_1d_numpy(data, dtype, name=field_name) data = _list_to_1d_numpy(data, dtype=dtype, name=field_name)
elif _is_2d_collection(data): elif _is_2d_collection(data):
data = _data_to_2d_numpy(data, dtype, name=field_name) data = _data_to_2d_numpy(data, dtype=dtype, name=field_name)
data = data.ravel(order='F') data = data.ravel(order='F')
else: else:
raise TypeError( raise TypeError(
...@@ -2383,7 +2383,7 @@ class Dataset: ...@@ -2383,7 +2383,7 @@ class Dataset:
) )
else: else:
dtype = np.int32 if field_name == 'group' else np.float32 dtype = np.int32 if field_name == 'group' else np.float32
data = _list_to_1d_numpy(data, dtype, name=field_name) data = _list_to_1d_numpy(data, dtype=dtype, name=field_name)
ptr_data: Union[_ctypes_float_ptr, _ctypes_int_ptr] ptr_data: Union[_ctypes_float_ptr, _ctypes_int_ptr]
if data.dtype == np.float32 or data.dtype == np.float64: if data.dtype == np.float32 or data.dtype == np.float64:
...@@ -2597,7 +2597,7 @@ class Dataset: ...@@ -2597,7 +2597,7 @@ class Dataset:
label = label.to_numpy(dtype=np.float32, na_value=np.nan) label = label.to_numpy(dtype=np.float32, na_value=np.nan)
label_array = np.ravel(label) label_array = np.ravel(label)
else: else:
label_array = _list_to_1d_numpy(label, name='label') label_array = _list_to_1d_numpy(label, dtype=np.float32, name='label')
self.set_field('label', label_array) self.set_field('label', label_array)
self.label = self.get_field('label') # original values can be modified at cpp side self.label = self.get_field('label') # original values can be modified at cpp side
return self return self
...@@ -2622,7 +2622,7 @@ class Dataset: ...@@ -2622,7 +2622,7 @@ class Dataset:
weight = None weight = None
self.weight = weight self.weight = weight
if self.handle is not None and weight is not None: if self.handle is not None and weight is not None:
weight = _list_to_1d_numpy(weight, name='weight') weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight')
self.set_field('weight', weight) self.set_field('weight', weight)
self.weight = self.get_field('weight') # original values can be modified at cpp side self.weight = self.get_field('weight') # original values can be modified at cpp side
return self return self
...@@ -2671,7 +2671,7 @@ class Dataset: ...@@ -2671,7 +2671,7 @@ class Dataset:
""" """
self.group = group self.group = group
if self.handle is not None and group is not None: if self.handle is not None and group is not None:
group = _list_to_1d_numpy(group, np.int32, name='group') group = _list_to_1d_numpy(group, dtype=np.int32, name='group')
self.set_field('group', group) self.set_field('group', group)
return self return self
...@@ -3588,8 +3588,8 @@ class Booster: ...@@ -3588,8 +3588,8 @@ class Booster:
if self.__num_class > 1: if self.__num_class > 1:
grad = grad.ravel(order='F') grad = grad.ravel(order='F')
hess = hess.ravel(order='F') hess = hess.ravel(order='F')
grad = _list_to_1d_numpy(grad, name='gradient') grad = _list_to_1d_numpy(grad, dtype=np.float32, name='gradient')
hess = _list_to_1d_numpy(hess, name='hessian') hess = _list_to_1d_numpy(hess, dtype=np.float32, name='hessian')
assert grad.flags.c_contiguous assert grad.flags.c_contiguous
assert hess.flags.c_contiguous assert hess.flags.c_contiguous
if len(grad) != len(hess): if len(grad) != len(hess):
...@@ -4098,7 +4098,7 @@ class Booster: ...@@ -4098,7 +4098,7 @@ class Booster:
Prediction result. Prediction result.
Can be sparse or a list of sparse objects (each element represents predictions for one class) for feature contributions (when ``pred_contrib=True``). Can be sparse or a list of sparse objects (each element represents predictions for one class) for feature contributions (when ``pred_contrib=True``).
""" """
predictor = self._to_predictor(deepcopy(kwargs)) predictor = self._to_predictor(pred_parameter=deepcopy(kwargs))
if num_iteration is None: if num_iteration is None:
if start_iteration <= 0: if start_iteration <= 0:
num_iteration = self.best_iteration num_iteration = self.best_iteration
...@@ -4188,7 +4188,7 @@ class Booster: ...@@ -4188,7 +4188,7 @@ class Booster:
raise LightGBMError('Cannot refit due to null objective function.') raise LightGBMError('Cannot refit due to null objective function.')
if dataset_params is None: if dataset_params is None:
dataset_params = {} dataset_params = {}
predictor = self._to_predictor(deepcopy(kwargs)) predictor = self._to_predictor(pred_parameter=deepcopy(kwargs))
leaf_preds = predictor.predict( leaf_preds = predictor.predict(
data=data, data=data,
start_iteration=-1, start_iteration=-1,
...@@ -4292,7 +4292,7 @@ class Booster: ...@@ -4292,7 +4292,7 @@ class Booster:
def _to_predictor( def _to_predictor(
self, self,
pred_parameter: Optional[Dict[str, Any]] = None pred_parameter: Dict[str, Any]
) -> _InnerPredictor: ) -> _InnerPredictor:
"""Convert to predictor.""" """Convert to predictor."""
predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter) predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter)
......
...@@ -189,7 +189,7 @@ def _train_part( ...@@ -189,7 +189,7 @@ def _train_part(
local_listen_port: int, local_listen_port: int,
num_machines: int, num_machines: int,
return_model: bool, return_model: bool,
time_out: int = 120, time_out: int,
**kwargs: Any **kwargs: Any
) -> Optional[LGBMModel]: ) -> Optional[LGBMModel]:
network_params = { network_params = {
......
...@@ -173,7 +173,7 @@ def train( ...@@ -173,7 +173,7 @@ def train(
if isinstance(init_model, (str, Path)): if isinstance(init_model, (str, Path)):
predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor(dict(init_model.params, **params)) predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params))
init_iteration = predictor.num_total_iteration if predictor is not None else 0 init_iteration = predictor.num_total_iteration if predictor is not None else 0
# check dataset # check dataset
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
...@@ -678,7 +678,7 @@ def cv( ...@@ -678,7 +678,7 @@ def cv(
if isinstance(init_model, (str, Path)): if isinstance(init_model, (str, Path)):
predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor(dict(init_model.params, **params)) predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params))
else: else:
predictor = None predictor = None
......
...@@ -632,17 +632,17 @@ def test_list_to_1d_numpy(collection, dtype): ...@@ -632,17 +632,17 @@ def test_list_to_1d_numpy(collection, dtype):
y = pd_Series(y) y = pd_Series(y)
if isinstance(y, np.ndarray) and len(y.shape) == 2: if isinstance(y, np.ndarray) and len(y.shape) == 2:
with pytest.warns(UserWarning, match='column-vector'): with pytest.warns(UserWarning, match='column-vector'):
lgb.basic._list_to_1d_numpy(y) lgb.basic._list_to_1d_numpy(y, dtype=np.float32, name="list")
return return
elif isinstance(y, list) and isinstance(y[0], list): elif isinstance(y, list) and isinstance(y[0], list):
with pytest.raises(TypeError): with pytest.raises(TypeError):
lgb.basic._list_to_1d_numpy(y) lgb.basic._list_to_1d_numpy(y, dtype=np.float32, name="list")
return return
elif isinstance(y, pd_Series) and y.dtype == object: elif isinstance(y, pd_Series) and y.dtype == object:
with pytest.raises(ValueError): with pytest.raises(ValueError):
lgb.basic._list_to_1d_numpy(y) lgb.basic._list_to_1d_numpy(y, dtype=np.float32, name="list")
return return
result = lgb.basic._list_to_1d_numpy(y, dtype=dtype) result = lgb.basic._list_to_1d_numpy(y, dtype=dtype, name="list")
assert result.size == 10 assert result.size == 10
assert result.dtype == dtype assert result.dtype == dtype
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment