Unverified Commit 81922a7e authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] [python-package] update pre-commit hooks to latest versions (#6817)

parent 2db0b25e
...@@ -24,7 +24,7 @@ repos: ...@@ -24,7 +24,7 @@ repos:
args: ["--strict"] args: ["--strict"]
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version. # Ruff version.
rev: v0.8.3 rev: v0.9.5
hooks: hooks:
# Run the linter. # Run the linter.
- id: ruff - id: ruff
...@@ -39,7 +39,7 @@ repos: ...@@ -39,7 +39,7 @@ repos:
hooks: hooks:
- id: shellcheck - id: shellcheck
- repo: https://github.com/crate-ci/typos - repo: https://github.com/crate-ci/typos
rev: v1.28.3 rev: v1.29.5
hooks: hooks:
- id: typos - id: typos
args: ["--force-exclude"] args: ["--force-exclude"]
......
...@@ -11,7 +11,7 @@ boosting_type = gbdt ...@@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app # alias: application, app
objective = binary objective = binary
# eval metrics, support multi metric, delimite by ',' , support following metrics # eval metrics, support multi metric, delimited by ',' , support following metrics
# l1 # l1
# l2 , default metric for regression # l2 , default metric for regression
# ndcg , default metric for lambdarank # ndcg , default metric for lambdarank
......
...@@ -11,7 +11,7 @@ boosting_type = gbdt ...@@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app # alias: application, app
objective = regression objective = regression
# eval metrics, support multi metric, delimite by ',' , support following metrics # eval metrics, support multi metric, delimited by ',' , support following metrics
# l1 # l1
# l2 , default metric for regression # l2 , default metric for regression
# ndcg , default metric for lambdarank # ndcg , default metric for lambdarank
......
...@@ -11,7 +11,7 @@ boosting_type = gbdt ...@@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app # alias: application, app
objective = rank_xendcg objective = rank_xendcg
# eval metrics, support multi metric, delimite by ',' , support following metrics # eval metrics, support multi metric, delimited by ',' , support following metrics
# l1 # l1
# l2 , default metric for regression # l2 , default metric for regression
# ndcg , default metric for lambdarank # ndcg , default metric for lambdarank
......
...@@ -381,7 +381,7 @@ def _list_to_1d_numpy( ...@@ -381,7 +381,7 @@ def _list_to_1d_numpy(
return np.asarray(data, dtype=dtype) # SparseArray should be supported as well return np.asarray(data, dtype=dtype) # SparseArray should be supported as well
else: else:
raise TypeError( raise TypeError(
f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series" f"Wrong type({type(data).__name__}) for {name}.\nIt should be list, numpy 1-D array or pandas Series"
) )
...@@ -803,8 +803,7 @@ def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None: ...@@ -803,8 +803,7 @@ def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
] ]
if bad_pandas_dtypes: if bad_pandas_dtypes:
raise ValueError( raise ValueError(
'pandas dtypes must be int, float or bool.\n' f"pandas dtypes must be int, float or bool.\nFields with bad pandas dtypes: {', '.join(bad_pandas_dtypes)}"
f'Fields with bad pandas dtypes: {", ".join(bad_pandas_dtypes)}'
) )
...@@ -3298,7 +3297,7 @@ class Dataset: ...@@ -3298,7 +3297,7 @@ class Dataset:
self.data = np.array(list(self._yield_row_from_seqlist(self.data, self.used_indices))) self.data = np.array(list(self._yield_row_from_seqlist(self.data, self.used_indices)))
else: else:
_log_warning( _log_warning(
f"Cannot subset {type(self.data).__name__} type of raw data.\n" "Returning original raw data" f"Cannot subset {type(self.data).__name__} type of raw data.\nReturning original raw data"
) )
self._need_slice = False self._need_slice = False
if self.data is None: if self.data is None:
...@@ -3718,7 +3717,7 @@ class Booster: ...@@ -3718,7 +3717,7 @@ class Booster:
self.model_from_string(model_str) self.model_from_string(model_str)
else: else:
raise TypeError( raise TypeError(
"Need at least one training dataset or model file or model string " "to create Booster instance" "Need at least one training dataset or model file or model string to create Booster instance"
) )
self.params = params self.params = params
...@@ -4052,7 +4051,7 @@ class Booster: ...@@ -4052,7 +4051,7 @@ class Booster:
if not isinstance(data, Dataset): if not isinstance(data, Dataset):
raise TypeError(f"Validation data should be Dataset instance, met {type(data).__name__}") raise TypeError(f"Validation data should be Dataset instance, met {type(data).__name__}")
if data._predictor is not self.__init_predictor: if data._predictor is not self.__init_predictor:
raise LightGBMError("Add validation data failed, " "you should use same predictor for these data") raise LightGBMError("Add validation data failed, you should use same predictor for these data")
_safe_call( _safe_call(
_LIB.LGBM_BoosterAddValidData( _LIB.LGBM_BoosterAddValidData(
self._handle, self._handle,
...@@ -4138,7 +4137,7 @@ class Booster: ...@@ -4138,7 +4137,7 @@ class Booster:
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError(f"Training data should be Dataset instance, met {type(train_set).__name__}") raise TypeError(f"Training data should be Dataset instance, met {type(train_set).__name__}")
if train_set._predictor is not self.__init_predictor: if train_set._predictor is not self.__init_predictor:
raise LightGBMError("Replace training data failed, " "you should use same predictor for these data") raise LightGBMError("Replace training data failed, you should use same predictor for these data")
self.train_set = train_set self.train_set = train_set
_safe_call( _safe_call(
_LIB.LGBM_BoosterResetTrainingData( _LIB.LGBM_BoosterResetTrainingData(
......
...@@ -393,7 +393,7 @@ class _EarlyStoppingCallback: ...@@ -393,7 +393,7 @@ class _EarlyStoppingCallback:
if self.verbose: if self.verbose:
best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]]) best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
_log_info( _log_info(
"Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}" f"Did not meet early stopping. Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
) )
if self.first_metric_only: if self.first_metric_only:
_log_info(f"Evaluated only: {metric_name}") _log_info(f"Evaluated only: {metric_name}")
......
...@@ -1166,7 +1166,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel): ...@@ -1166,7 +1166,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f""" __init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None) {_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled. {" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs} {_kwargs}{_after_kwargs}
""" """
...@@ -1221,7 +1221,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel): ...@@ -1221,7 +1221,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :] _base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
# DaskLGBMClassifier support for callbacks and init_model is not tested # DaskLGBMClassifier support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMClassifier.fit()``. Other parameters passed through to ``LGBMClassifier.fit()``.
Returns Returns
...@@ -1369,7 +1369,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel): ...@@ -1369,7 +1369,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f""" __init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None) {_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled. {" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs} {_kwargs}{_after_kwargs}
""" """
...@@ -1424,7 +1424,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel): ...@@ -1424,7 +1424,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :] _base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
# DaskLGBMRegressor support for callbacks and init_model is not tested # DaskLGBMRegressor support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMRegressor.fit()``. Other parameters passed through to ``LGBMRegressor.fit()``.
Returns Returns
...@@ -1536,7 +1536,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel): ...@@ -1536,7 +1536,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f""" __init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None) {_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled. {" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs} {_kwargs}{_after_kwargs}
""" """
...@@ -1596,11 +1596,11 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel): ...@@ -1596,11 +1596,11 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
_base_doc[: _base_doc.find("feature_name :")] _base_doc[: _base_doc.find("feature_name :")]
+ "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n" + "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n"
+ f"{' ':8}The evaluation positions of the specified metric.\n" + f"{' ':8}The evaluation positions of the specified metric.\n"
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}" + f"{' ':4}{_base_doc[_base_doc.find('feature_name :') :]}"
) )
# DaskLGBMRanker support for callbacks and init_model is not tested # DaskLGBMRanker support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMRanker.fit()``. Other parameters passed through to ``LGBMRanker.fit()``.
Returns Returns
......
...@@ -247,7 +247,7 @@ def plot_split_value_histogram( ...@@ -247,7 +247,7 @@ def plot_split_value_histogram(
hist, split_bins = booster.get_split_value_histogram(feature=feature, bins=bins, xgboost_style=False) hist, split_bins = booster.get_split_value_histogram(feature=feature, bins=bins, xgboost_style=False)
if np.count_nonzero(hist) == 0: if np.count_nonzero(hist) == 0:
raise ValueError("Cannot plot split value histogram, " f"because feature {feature} was not used in splitting") raise ValueError(f"Cannot plot split value histogram, because feature {feature} was not used in splitting")
width = width_coef * (split_bins[1] - split_bins[0]) width = width_coef * (split_bins[1] - split_bins[0])
centred = (split_bins[:-1] + split_bins[1:]) / 2 centred = (split_bins[:-1] + split_bins[1:]) / 2
......
...@@ -47,7 +47,7 @@ class FeatureParallelTreeLearner: public TREELEARNER_T { ...@@ -47,7 +47,7 @@ class FeatureParallelTreeLearner: public TREELEARNER_T {
/*! /*!
* \brief Data parallel learning algorithm. * \brief Data parallel learning algorithm.
* Workers use local data to construct histograms locally, then sync up global histograms. * Workers use local data to construct histograms locally, then sync up global histograms.
* It is recommonded used when #data is large or #feature is small * It is recommended used when #data is large or #feature is small
*/ */
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
class DataParallelTreeLearner: public TREELEARNER_T { class DataParallelTreeLearner: public TREELEARNER_T {
......
...@@ -20,9 +20,9 @@ if os.getenv("ALLOW_SKIP_ARROW_TESTS") == "1": ...@@ -20,9 +20,9 @@ if os.getenv("ALLOW_SKIP_ARROW_TESTS") == "1":
else: else:
import pyarrow as pa # type: ignore import pyarrow as pa # type: ignore
assert ( assert lgb.compat.PYARROW_INSTALLED is True, (
lgb.compat.PYARROW_INSTALLED is True "'pyarrow' and its dependencies must be installed to run the arrow tests"
), "'pyarrow' and its dependencies must be installed to run the arrow tests" )
# ----------------------------------------------------------------------------------------------- # # ----------------------------------------------------------------------------------------------- #
# UTILITIES # # UTILITIES #
......
...@@ -2168,8 +2168,7 @@ def test_monotone_constraints(test_with_categorical_variable): ...@@ -2168,8 +2168,7 @@ def test_monotone_constraints(test_with_categorical_variable):
trainset = generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable) trainset = generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable)
for test_with_interaction_constraints in [True, False]: for test_with_interaction_constraints in [True, False]:
error_msg = ( error_msg = (
"Model not correctly constrained " f"Model not correctly constrained (test_with_interaction_constraints={test_with_interaction_constraints})"
f"(test_with_interaction_constraints={test_with_interaction_constraints})"
) )
for monotone_constraints_method in ["basic", "intermediate", "advanced"]: for monotone_constraints_method in ["basic", "intermediate", "advanced"]:
params = { params = {
......
...@@ -1425,9 +1425,9 @@ def test_getting_feature_names_in_np_input(estimator_class): ...@@ -1425,9 +1425,9 @@ def test_getting_feature_names_in_np_input(estimator_class):
def test_getting_feature_names_in_pd_input(estimator_class): def test_getting_feature_names_in_pd_input(estimator_class):
X, y = load_digits(n_class=2, return_X_y=True, as_frame=True) X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
col_names = X.columns.to_list() col_names = X.columns.to_list()
assert isinstance(col_names, list) and all( assert isinstance(col_names, list) and all(isinstance(c, str) for c in col_names), (
isinstance(c, str) for c in col_names "input data must have feature names for this test to cover the expected functionality"
), "input data must have feature names for this test to cover the expected functionality" )
params = {"n_estimators": 2, "num_leaves": 7} params = {"n_estimators": 2, "num_leaves": 7}
if estimator_class is lgb.LGBMModel: if estimator_class is lgb.LGBMModel:
model = estimator_class(**{**params, "objective": "binary"}) model = estimator_class(**{**params, "objective": "binary"})
......
...@@ -251,12 +251,12 @@ def assert_subtree_valid(root): ...@@ -251,12 +251,12 @@ def assert_subtree_valid(root):
right_child = root["right_child"] right_child = root["right_child"]
(l_w, l_c) = assert_subtree_valid(left_child) (l_w, l_c) = assert_subtree_valid(left_child)
(r_w, r_c) = assert_subtree_valid(right_child) (r_w, r_c) = assert_subtree_valid(right_child)
assert ( assert abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3, (
abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3 "root node's internal weight should be approximately the sum of its child nodes' internal weights"
), "root node's internal weight should be approximately the sum of its child nodes' internal weights" )
assert ( assert root["internal_count"] == l_c + r_c, (
root["internal_count"] == l_c + r_c "root node's internal count should be exactly the sum of its child nodes' internal counts"
), "root node's internal count should be exactly the sum of its child nodes' internal counts" )
return (root["internal_weight"], root["internal_count"]) return (root["internal_weight"], root["internal_count"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment