"examples/vscode:/vscode.git/clone" did not exist on "8e5079efa1ecca0fde1824d8020d51e02735a541"
Unverified Commit 81922a7e authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] [python-package] update pre-commit hooks to latest versions (#6817)

parent 2db0b25e
......@@ -24,7 +24,7 @@ repos:
args: ["--strict"]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.8.3
rev: v0.9.5
hooks:
# Run the linter.
- id: ruff
......@@ -39,7 +39,7 @@ repos:
hooks:
- id: shellcheck
- repo: https://github.com/crate-ci/typos
rev: v1.28.3
rev: v1.29.5
hooks:
- id: typos
args: ["--force-exclude"]
......
......@@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app
objective = binary
# eval metrics, support multi metric, delimite by ',' , support following metrics
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
......
......@@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app
objective = regression
# eval metrics, support multi metric, delimite by ',' , support following metrics
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
......
......@@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app
objective = rank_xendcg
# eval metrics, support multi metric, delimite by ',' , support following metrics
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
......
......@@ -381,7 +381,7 @@ def _list_to_1d_numpy(
return np.asarray(data, dtype=dtype) # SparseArray should be supported as well
else:
raise TypeError(
f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
f"Wrong type({type(data).__name__}) for {name}.\nIt should be list, numpy 1-D array or pandas Series"
)
......@@ -803,8 +803,7 @@ def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
]
if bad_pandas_dtypes:
raise ValueError(
'pandas dtypes must be int, float or bool.\n'
f'Fields with bad pandas dtypes: {", ".join(bad_pandas_dtypes)}'
f"pandas dtypes must be int, float or bool.\nFields with bad pandas dtypes: {', '.join(bad_pandas_dtypes)}"
)
......@@ -3298,7 +3297,7 @@ class Dataset:
self.data = np.array(list(self._yield_row_from_seqlist(self.data, self.used_indices)))
else:
_log_warning(
f"Cannot subset {type(self.data).__name__} type of raw data.\n" "Returning original raw data"
f"Cannot subset {type(self.data).__name__} type of raw data.\nReturning original raw data"
)
self._need_slice = False
if self.data is None:
......@@ -3718,7 +3717,7 @@ class Booster:
self.model_from_string(model_str)
else:
raise TypeError(
"Need at least one training dataset or model file or model string " "to create Booster instance"
"Need at least one training dataset or model file or model string to create Booster instance"
)
self.params = params
......@@ -4052,7 +4051,7 @@ class Booster:
if not isinstance(data, Dataset):
raise TypeError(f"Validation data should be Dataset instance, met {type(data).__name__}")
if data._predictor is not self.__init_predictor:
raise LightGBMError("Add validation data failed, " "you should use same predictor for these data")
raise LightGBMError("Add validation data failed, you should use same predictor for these data")
_safe_call(
_LIB.LGBM_BoosterAddValidData(
self._handle,
......@@ -4138,7 +4137,7 @@ class Booster:
if not isinstance(train_set, Dataset):
raise TypeError(f"Training data should be Dataset instance, met {type(train_set).__name__}")
if train_set._predictor is not self.__init_predictor:
raise LightGBMError("Replace training data failed, " "you should use same predictor for these data")
raise LightGBMError("Replace training data failed, you should use same predictor for these data")
self.train_set = train_set
_safe_call(
_LIB.LGBM_BoosterResetTrainingData(
......
......@@ -393,7 +393,7 @@ class _EarlyStoppingCallback:
if self.verbose:
best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
_log_info(
"Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
f"Did not meet early stopping. Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
)
if self.first_metric_only:
_log_info(f"Evaluated only: {metric_name}")
......
......@@ -1166,7 +1166,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs}
"""
......@@ -1221,7 +1221,7 @@ class DaskLGBMClassifier(LGBMClassifier, _DaskLGBMModel):
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
# DaskLGBMClassifier support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMClassifier.fit()``.
Returns
......@@ -1369,7 +1369,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs}
"""
......@@ -1424,7 +1424,7 @@ class DaskLGBMRegressor(LGBMRegressor, _DaskLGBMModel):
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
# DaskLGBMRegressor support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMRegressor.fit()``.
Returns
......@@ -1536,7 +1536,7 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs}
"""
......@@ -1596,11 +1596,11 @@ class DaskLGBMRanker(LGBMRanker, _DaskLGBMModel):
_base_doc[: _base_doc.find("feature_name :")]
+ "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n"
+ f"{' ':8}The evaluation positions of the specified metric.\n"
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}"
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :') :]}"
)
# DaskLGBMRanker support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMRanker.fit()``.
Returns
......
......@@ -247,7 +247,7 @@ def plot_split_value_histogram(
hist, split_bins = booster.get_split_value_histogram(feature=feature, bins=bins, xgboost_style=False)
if np.count_nonzero(hist) == 0:
raise ValueError("Cannot plot split value histogram, " f"because feature {feature} was not used in splitting")
raise ValueError(f"Cannot plot split value histogram, because feature {feature} was not used in splitting")
width = width_coef * (split_bins[1] - split_bins[0])
centred = (split_bins[:-1] + split_bins[1:]) / 2
......
......@@ -47,7 +47,7 @@ class FeatureParallelTreeLearner: public TREELEARNER_T {
/*!
* \brief Data parallel learning algorithm.
* Workers use local data to construct histograms locally, then sync up global histograms.
* It is recommonded used when #data is large or #feature is small
* It is recommended used when #data is large or #feature is small
*/
template <typename TREELEARNER_T>
class DataParallelTreeLearner: public TREELEARNER_T {
......
......@@ -20,9 +20,9 @@ if os.getenv("ALLOW_SKIP_ARROW_TESTS") == "1":
else:
import pyarrow as pa # type: ignore
assert (
lgb.compat.PYARROW_INSTALLED is True
), "'pyarrow' and its dependencies must be installed to run the arrow tests"
assert lgb.compat.PYARROW_INSTALLED is True, (
"'pyarrow' and its dependencies must be installed to run the arrow tests"
)
# ----------------------------------------------------------------------------------------------- #
# UTILITIES #
......
......@@ -2168,8 +2168,7 @@ def test_monotone_constraints(test_with_categorical_variable):
trainset = generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable)
for test_with_interaction_constraints in [True, False]:
error_msg = (
"Model not correctly constrained "
f"(test_with_interaction_constraints={test_with_interaction_constraints})"
f"Model not correctly constrained (test_with_interaction_constraints={test_with_interaction_constraints})"
)
for monotone_constraints_method in ["basic", "intermediate", "advanced"]:
params = {
......
......@@ -1425,9 +1425,9 @@ def test_getting_feature_names_in_np_input(estimator_class):
def test_getting_feature_names_in_pd_input(estimator_class):
X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
col_names = X.columns.to_list()
assert isinstance(col_names, list) and all(
isinstance(c, str) for c in col_names
), "input data must have feature names for this test to cover the expected functionality"
assert isinstance(col_names, list) and all(isinstance(c, str) for c in col_names), (
"input data must have feature names for this test to cover the expected functionality"
)
params = {"n_estimators": 2, "num_leaves": 7}
if estimator_class is lgb.LGBMModel:
model = estimator_class(**{**params, "objective": "binary"})
......
......@@ -251,12 +251,12 @@ def assert_subtree_valid(root):
right_child = root["right_child"]
(l_w, l_c) = assert_subtree_valid(left_child)
(r_w, r_c) = assert_subtree_valid(right_child)
assert (
abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3
), "root node's internal weight should be approximately the sum of its child nodes' internal weights"
assert (
root["internal_count"] == l_c + r_c
), "root node's internal count should be exactly the sum of its child nodes' internal counts"
assert abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3, (
"root node's internal weight should be approximately the sum of its child nodes' internal weights"
)
assert root["internal_count"] == l_c + r_c, (
"root node's internal count should be exactly the sum of its child nodes' internal counts"
)
return (root["internal_weight"], root["internal_count"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment