Unverified Commit 4531ff54 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] adapt to scikit-learn 1.6 testing changes, pin more packages...

[python-package] adapt to scikit-learn 1.6 testing changes, pin more packages in R 3.6 CI jobs (#6718)
parent 5151fe85
# [description]
#
# Installs a pinned set of packages that worked together
# as of the last R 3.6 release.
#
.install_packages <- function(packages) {
install.packages( # nolint: undesirable_function
pkgs = paste( # nolint: paste
"https://cran.r-project.org/src/contrib/Archive"
, packages
, sep = "/"
)
, dependencies = FALSE
, lib = Sys.getenv("R_LIBS")
, repos = NULL
)
}
# when confronted with a bunch of URLs like this, install.packages() sometimes
# struggles to determine install order... so install packages in batches here,
# starting from the root of the dependency graph and working up
# there was only a single release of {praise}, so there is no contrib/Archive URL for it
install.packages( # nolint: undesirable_function
pkgs = "https://cran.r-project.org/src/contrib/praise_1.0.0.tar.gz"
, dependencies = FALSE
, lib = Sys.getenv("R_LIBS")
, repos = NULL
)
.install_packages(c(
"brio/brio_1.1.4.tar.gz" # nolint: non_portable_path
, "cli/cli_3.6.2.tar.gz" # nolint: non_portable_path
, "crayon/crayon_1.5.2.tar.gz" # nolint: non_portable_path
, "digest/digest_0.6.36.tar.gz" # nolint: non_portable_path
, "evaluate/evaluate_0.23.tar.gz" # nolint: non_portable_path
, "fansi/fansi_1.0.5.tar.gz" # nolint: non_portable_path
, "fs/fs_1.6.4.tar.gz" # nolint: non_portable_path
, "glue/glue_1.7.0.tar.gz" # nolint: non_portable_path
, "jsonlite/jsonlite_1.8.8.tar.gz" # nolint: non_portable_path
, "lattice/lattice_0.20-41.tar.gz" # nolint: non_portable_path
, "magrittr/magrittr_2.0.2.tar.gz" # nolint: non_portable_path
, "pkgconfig/pkgconfig_2.0.2.tar.gz" # nolint: non_portable_path
, "ps/ps_1.8.0.tar.gz" # nolint: non_portable_path
, "R6/R6_2.5.0.tar.gz" # nolint: non_portable_path
, "rlang/rlang_1.1.3.tar.gz" # nolint: non_portable_path
, "rprojroot/rprojroot_2.0.3.tar.gz" # nolint: non_portable_path
, "utf8/utf8_1.2.3.tar.gz" # nolint: non_portable_path
, "withr/withr_3.0.1.tar.gz" # nolint: non_portable_path
))
.install_packages(c(
"desc/desc_1.4.2.tar.gz" # nolint: non_portable_path
, "diffobj/diffobj_0.3.4.tar.gz" # nolint: non_portable_path
, "lifecycle/lifecycle_1.0.3.tar.gz" # nolint: non_portable_path
, "processx/processx_3.8.3.tar.gz" # nolint: non_portable_path
))
.install_packages(c(
"callr/callr_3.7.5.tar.gz" # nolint: non_portable_path
, "vctrs/vctrs_0.6.4.tar.gz" # nolint: non_portable_path
))
.install_packages(c(
"pillar/pillar_1.8.1.tar.gz" # nolint: non_portable_path
, "tibble/tibble_3.2.0.tar.gz" # nolint: non_portable_path
))
.install_packages(c(
"pkgbuild/pkgbuild_1.4.4.tar.gz" # nolint: non_portable_path
, "rematch2/rematch2_2.1.1.tar.gz" # nolint: non_portable_path
, "waldo/waldo_0.5.3.tar.gz" # nolint: non_portable_path
))
.install_packages(c(
"pkgload/pkgload_1.3.4.tar.gz" # nolint: non_portable_path
, "testthat/testthat_3.2.1.tar.gz" # nolint: non_portable_path
))
...@@ -108,10 +108,10 @@ if [[ $OS_NAME == "macos" ]]; then ...@@ -108,10 +108,10 @@ if [[ $OS_NAME == "macos" ]]; then
export R_TIDYCMD=/usr/local/bin/tidy export R_TIDYCMD=/usr/local/bin/tidy
fi fi
# fix for issue where CRAN was not returning {lattice} and {evaluate} when using R 3.6 # fix for issue where CRAN was not returning {evaluate}, {lattice}, or {waldo} when using R 3.6
# "Warning: dependency ‘lattice’ is not available" # "Warning: dependency ‘lattice’ is not available"
if [[ "${R_MAJOR_VERSION}" == "3" ]]; then if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
Rscript --vanilla -e "install.packages(c('https://cran.r-project.org/src/contrib/Archive/lattice/lattice_0.20-41.tar.gz', 'https://cran.r-project.org/src/contrib/Archive/evaluate/evaluate_0.23.tar.gz'), repos = NULL, lib = '${R_LIB_PATH}')" Rscript --vanilla ./.ci/install-old-r-packages.R
else else
# {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}. # {Matrix} needs {lattice}, so this needs to run before manually installing {Matrix}.
# This should be unnecessary on R >=4.4.0 # This should be unnecessary on R >=4.4.0
......
...@@ -14,6 +14,14 @@ try: ...@@ -14,6 +14,14 @@ try:
from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import assert_all_finite, check_array, check_X_y from sklearn.utils.validation import assert_all_finite, check_array, check_X_y
# sklearn.utils Tags types can be imported unconditionally once
# lightgbm's minimum scikit-learn version is 1.6 or higher
try:
from sklearn.utils import ClassifierTags as _sklearn_ClassifierTags
from sklearn.utils import RegressorTags as _sklearn_RegressorTags
except ImportError:
_sklearn_ClassifierTags = None
_sklearn_RegressorTags = None
try: try:
from sklearn.exceptions import NotFittedError from sklearn.exceptions import NotFittedError
from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold
...@@ -140,6 +148,8 @@ except ImportError: ...@@ -140,6 +148,8 @@ except ImportError:
_LGBMCheckClassificationTargets = None _LGBMCheckClassificationTargets = None
_LGBMComputeSampleWeight = None _LGBMComputeSampleWeight = None
_LGBMValidateData = None _LGBMValidateData = None
_sklearn_ClassifierTags = None
_sklearn_RegressorTags = None
_sklearn_version = None _sklearn_version = None
# additional scikit-learn imports only for type hints # additional scikit-learn imports only for type hints
......
...@@ -40,6 +40,8 @@ from .compat import ( ...@@ -40,6 +40,8 @@ from .compat import (
_LGBMModelBase, _LGBMModelBase,
_LGBMRegressorBase, _LGBMRegressorBase,
_LGBMValidateData, _LGBMValidateData,
_sklearn_ClassifierTags,
_sklearn_RegressorTags,
_sklearn_version, _sklearn_version,
dt_DataTable, dt_DataTable,
pd_DataFrame, pd_DataFrame,
...@@ -703,7 +705,6 @@ class LGBMModel(_LGBMModelBase): ...@@ -703,7 +705,6 @@ class LGBMModel(_LGBMModelBase):
tags.input_tags.allow_nan = tags_dict["allow_nan"] tags.input_tags.allow_nan = tags_dict["allow_nan"]
tags.input_tags.sparse = "sparse" in tags_dict["X_types"] tags.input_tags.sparse = "sparse" in tags_dict["X_types"]
tags.target_tags.one_d_labels = "1dlabels" in tags_dict["X_types"] tags.target_tags.one_d_labels = "1dlabels" in tags_dict["X_types"]
tags._xfail_checks = tags_dict["_xfail_checks"]
return tags return tags
def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]: def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
...@@ -1291,7 +1292,10 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel): ...@@ -1291,7 +1292,10 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
return tags return tags
def __sklearn_tags__(self) -> "_sklearn_Tags": def __sklearn_tags__(self) -> "_sklearn_Tags":
return LGBMModel.__sklearn_tags__(self) tags = LGBMModel.__sklearn_tags__(self)
tags.estimator_type = "regressor"
tags.regressor_tags = _sklearn_RegressorTags(multi_label=False)
return tags
def fit( # type: ignore[override] def fit( # type: ignore[override]
self, self,
...@@ -1350,7 +1354,10 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel): ...@@ -1350,7 +1354,10 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
return tags return tags
def __sklearn_tags__(self) -> "_sklearn_Tags": def __sklearn_tags__(self) -> "_sklearn_Tags":
return LGBMModel.__sklearn_tags__(self) tags = LGBMModel.__sklearn_tags__(self)
tags.estimator_type = "classifier"
tags.classifier_tags = _sklearn_ClassifierTags(multi_class=True, multi_label=False)
return tags
def fit( # type: ignore[override] def fit( # type: ignore[override]
self, self,
......
...@@ -17,11 +17,18 @@ from sklearn.ensemble import StackingClassifier, StackingRegressor ...@@ -17,11 +17,18 @@ from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.metrics import accuracy_score, log_loss, mean_squared_error, r2_score from sklearn.metrics import accuracy_score, log_loss, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.multioutput import ClassifierChain, MultiOutputClassifier, MultiOutputRegressor, RegressorChain from sklearn.multioutput import ClassifierChain, MultiOutputClassifier, MultiOutputRegressor, RegressorChain
from sklearn.utils.estimator_checks import parametrize_with_checks from sklearn.utils.estimator_checks import parametrize_with_checks as sklearn_parametrize_with_checks
from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_is_fitted
import lightgbm as lgb import lightgbm as lgb
from lightgbm.compat import DATATABLE_INSTALLED, PANDAS_INSTALLED, dt_DataTable, pd_DataFrame, pd_Series from lightgbm.compat import (
DATATABLE_INSTALLED,
PANDAS_INSTALLED,
_sklearn_version,
dt_DataTable,
pd_DataFrame,
pd_Series,
)
from .utils import ( from .utils import (
assert_silent, assert_silent,
...@@ -35,6 +42,9 @@ from .utils import ( ...@@ -35,6 +42,9 @@ from .utils import (
softmax, softmax,
) )
SKLEARN_MAJOR, SKLEARN_MINOR, *_ = _sklearn_version.split(".")
SKLEARN_VERSION_GTE_1_6 = (int(SKLEARN_MAJOR), int(SKLEARN_MINOR)) >= (1, 6)
decreasing_generator = itertools.count(0, -1) decreasing_generator = itertools.count(0, -1)
estimator_classes = (lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker) estimator_classes = (lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker)
task_to_model_factory = { task_to_model_factory = {
...@@ -1432,7 +1442,28 @@ def test_getting_feature_names_in_pd_input(estimator_class): ...@@ -1432,7 +1442,28 @@ def test_getting_feature_names_in_pd_input(estimator_class):
np.testing.assert_array_equal(model.feature_names_in_, X.columns) np.testing.assert_array_equal(model.feature_names_in_, X.columns)
@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()]) # Starting with scikit-learn 1.6 (https://github.com/scikit-learn/scikit-learn/pull/30149),
# the only API for marking estimator tests as expected to fail is to pass a keyword argument
# to parametrize_with_checks(). That function didn't accept additional arguments in earlier
# versions.
#
# This block defines a patched version of parametrize_with_checks() so lightgbm's tests
# can be compatible with scikit-learn <1.6 and >=1.6.
#
# This should be removed once minimum supported scikit-learn version is at least 1.6.
if SKLEARN_VERSION_GTE_1_6:
parametrize_with_checks = sklearn_parametrize_with_checks
else:
def parametrize_with_checks(estimator, *args, **kwargs):
return sklearn_parametrize_with_checks(estimator)
def _get_expected_failed_tests(estimator):
return estimator._more_tags()["_xfail_checks"]
@parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()], expected_failed_checks=_get_expected_failed_tests)
def test_sklearn_integration(estimator, check): def test_sklearn_integration(estimator, check):
estimator.set_params(min_child_samples=1, min_data_in_bin=1) estimator.set_params(min_child_samples=1, min_data_in_bin=1)
check(estimator) check(estimator)
...@@ -1457,7 +1488,6 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato ...@@ -1457,7 +1488,6 @@ def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimato
assert sklearn_tags.input_tags.allow_nan is True assert sklearn_tags.input_tags.allow_nan is True
assert sklearn_tags.input_tags.sparse is True assert sklearn_tags.input_tags.sparse is True
assert sklearn_tags.target_tags.one_d_labels is True assert sklearn_tags.target_tags.one_d_labels is True
assert sklearn_tags._xfail_checks == more_tags["_xfail_checks"]
@pytest.mark.parametrize("task", all_tasks) @pytest.mark.parametrize("task", all_tasks)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment