[ci] update pre-commit versions, add some pixi things to config files (#6986)

16ca335b · James Lamb · GitHub · 915c2706 · 16ca335b · 16ca335b
Unverified Commit 16ca335b authored Aug 03, 2025 by James Lamb Committed by GitHub Aug 03, 2025
13 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -466,3 +466,6 @@ dask-worker-space/

 # hipify-perl -inplace leaves behind *.prehip files
 *.prehip
+
+# pixi environments
+.pixi
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,13 +18,13 @@ repos:
      - id: end-of-file-fixer
      - id: trailing-whitespace
  - repo: https://github.com/adrienverge/yamllint
-    rev: v1.35.1
+    rev: v1.37.1
    hooks:
      - id: yamllint
        args: ["--strict"]
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.9.10
+    rev: v0.12.5
    hooks:
      # Run the linter.
      - id: ruff
@@ -39,13 +39,13 @@ repos:
    hooks:
      - id: shellcheck
  - repo: https://github.com/crate-ci/typos
-    rev: v1.30.2
+    rev: v1.34.0
    hooks:
      - id: typos
        args: ["--force-exclude"]
        exclude: (\.gitignore$)|(^\.editorconfig$)
  - repo: https://github.com/henryiii/validate-pyproject-schema-store
-    rev: 2025.03.10
+    rev: 2025.07.14
    hooks:
      - id: validate-pyproject
        files: python-package/pyproject.toml$
--- a/biome.json
+++ b/biome.json
 {
    "files": {
-        "ignore": [".mypy_cache/"]
+        "ignore": [".mypy_cache/", ".pixi/"]
    },
    "formatter": {
        "enabled": true,

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -67,7 +67,7 @@ class IgnoredDirective(Directive):

 # -- General configuration ------------------------------------------------

-os.environ["LIGHTGBM_BUILD_DOC"] = "1"
+os.environ["LIGHTGBM_BUILD_DOC"] = "True"
 C_API = os.environ.get("C_API", "").lower().strip() != "no"
 RTD = bool(os.environ.get("READTHEDOCS", ""))
 RTD_VERSION = os.environ.get("READTHEDOCS_VERSION", "stable")

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -287,7 +287,7 @@ def _log_callback(msg: bytes) -> None:


 # connect the Python logger to logging in lib_lightgbm
-if not environ.get("LIGHTGBM_BUILD_DOC", False):
+if environ.get("LIGHTGBM_BUILD_DOC", "False") != "True":
    _LIB.LGBM_GetLastError.restype = ctypes.c_char_p
    callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p)
    _LIB.callback = callback(_log_callback)  # type: ignore[attr-defined]
@@ -840,7 +840,7 @@ def _data_from_pandas(
        for col, category in zip(cat_cols, pandas_categorical):
            if list(data[col].cat.categories) != list(category):
                data[col] = data[col].cat.set_categories(category)
-    if len(cat_cols):  # cat_cols is list
+    if cat_cols:  # cat_cols is list
        data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes).replace({-1: np.nan})

    # use cat cols from DataFrame

--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -74,7 +74,7 @@ class CallbackEnv:
 def _is_using_cv(env: CallbackEnv) -> bool:
    """Check if model in callback env is a CVBooster."""
    # this import is here to avoid a circular import
-    from .engine import CVBooster
+    from .engine import CVBooster  # noqa: PLC0415

    return isinstance(env.model, CVBooster)


--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
@@ -52,7 +52,7 @@ try:
            #  * lightgbm cannot be used with scikit-learn versions older than that
            #  * this validate_data() re-implementation will not be called in scikit-learn>=1.6
            #
-            from sklearn.utils.validation import _num_features
+            from sklearn.utils.validation import _num_features  # noqa: PLC0415

            # _num_features() raises a TypeError on 1-dimensional input. That's a problem
            # because scikit-learn's 'check_fit1d' estimator check sets that expectation that

--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -769,7 +769,7 @@ def _train(
                )
                raise LightGBMError(msg)

-            worker_address_to_port = {address: local_listen_port for address in worker_addresses}
+            worker_address_to_port = dict.fromkeys(worker_addresses, local_listen_port)
        else:
            _log_info("Finding random open ports for workers")
            worker_to_socket_future, worker_address_to_port = _assign_open_ports_to_workers(

--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
@@ -41,7 +41,7 @@ def _find_lib_path() -> List[str]:

 # we don't need lib_lightgbm while building docs
 _LIB: ctypes.CDLL
-if environ.get("LIGHTGBM_BUILD_DOC", False):
+if environ.get("LIGHTGBM_BUILD_DOC", "False") == "True":
    from unittest.mock import Mock  # isort: skip

    _LIB = Mock(ctypes.CDLL)  # type: ignore

--- a/python-package/lightgbm/plotting.py
+++ b/python-package/lightgbm/plotting.py
@@ -104,7 +104,7 @@ def plot_importance(
        The plot with model's feature importances.
    """
    if MATPLOTLIB_INSTALLED:
-        import matplotlib.pyplot as plt
+        import matplotlib.pyplot as plt  # noqa: PLC0415
    else:
        raise ImportError("You must install matplotlib and restart your session to plot importance.")

@@ -235,8 +235,8 @@ def plot_split_value_histogram(
        The plot with specified model's feature split value histogram.
    """
    if MATPLOTLIB_INSTALLED:
-        import matplotlib.pyplot as plt
-        from matplotlib.ticker import MaxNLocator
+        import matplotlib.pyplot as plt  # noqa: PLC0415
+        from matplotlib.ticker import MaxNLocator  # noqa: PLC0415
    else:
        raise ImportError("You must install matplotlib and restart your session to plot split value histogram.")

@@ -342,7 +342,7 @@ def plot_metric(
        The plot with metric's history over the training.
    """
    if MATPLOTLIB_INSTALLED:
-        import matplotlib.pyplot as plt
+        import matplotlib.pyplot as plt  # noqa: PLC0415
    else:
        raise ImportError("You must install matplotlib and restart your session to plot metric.")

@@ -468,7 +468,7 @@ def _to_graphviz(
      - https://graphviz.readthedocs.io/en/stable/api.html#digraph
    """
    if GRAPHVIZ_INSTALLED:
-        from graphviz import Digraph
+        from graphviz import Digraph  # noqa: PLC0415
    else:
        raise ImportError("You must install graphviz and restart your session to plot tree.")

@@ -812,8 +812,8 @@ def plot_tree(
        The plot with single tree.
    """
    if MATPLOTLIB_INSTALLED:
-        import matplotlib.image
-        import matplotlib.pyplot as plt
+        import matplotlib.image  # noqa: PLC0415
+        import matplotlib.pyplot as plt  # noqa: PLC0415
    else:
        raise ImportError("You must install matplotlib and restart your session to plot tree.")


--- a/src/io/cuda/cuda_column_data.cpp
+++ b/src/io/cuda/cuda_column_data.cpp
@@ -135,7 +135,7 @@ void CUDAColumnData::Init(const int num_columns,
        } else if (bit_type == 32) {
          InitOneColumnData<false, false, uint32_t>(column_data[column_index], nullptr, &data_by_column_[column_index]);
        } else {
-          Log::Fatal("Unknow column bit type %d", bit_type);
+          Log::Fatal("Unknown column bit type %d", bit_type);
        }
      } else {
        // is sparse column
@@ -146,7 +146,7 @@ void CUDAColumnData::Init(const int num_columns,
        } else if (bit_type == 32) {
          InitOneColumnData<true, false, uint32_t>(nullptr, column_bin_iterator[column_index], &data_by_column_[column_index]);
        } else {
-          Log::Fatal("Unknow column bit type %d", bit_type);
+          Log::Fatal("Unknown column bit type %d", bit_type);
        }
      }
      OMP_LOOP_EX_END();

--- a/src/io/cuda/cuda_row_data.cpp
+++ b/src/io/cuda/cuda_row_data.cpp
@@ -105,7 +105,7 @@ void CUDARowData::Init(const Dataset* train_data, TrainingShareStates* train_sha
          &cuda_row_ptr_uint64_t_,
          &cuda_partition_ptr_uint64_t_);
      } else {
-        Log::Fatal("Unknow data ptr bit type %d", row_ptr_bit_type_);
+        Log::Fatal("Unknown data ptr bit type %d", row_ptr_bit_type_);
      }
    }
  } else if (bit_type_ == 16) {
@@ -136,7 +136,7 @@ void CUDARowData::Init(const Dataset* train_data, TrainingShareStates* train_sha
          &cuda_row_ptr_uint64_t_,
          &cuda_partition_ptr_uint64_t_);
      } else {
-        Log::Fatal("Unknow data ptr bit type %d", row_ptr_bit_type_);
+        Log::Fatal("Unknown data ptr bit type %d", row_ptr_bit_type_);
      }
    }
  } else if (bit_type_ == 32) {
@@ -167,11 +167,11 @@ void CUDARowData::Init(const Dataset* train_data, TrainingShareStates* train_sha
          &cuda_row_ptr_uint64_t_,
          &cuda_partition_ptr_uint64_t_);
      } else {
-        Log::Fatal("Unknow data ptr bit type %d", row_ptr_bit_type_);
+        Log::Fatal("Unknown data ptr bit type %d", row_ptr_bit_type_);
      }
    }
  } else {
-    Log::Fatal("Unknow bit type = %d", bit_type_);
+    Log::Fatal("Unknown bit type = %d", bit_type_);
  }
  SynchronizeCUDADevice(__FILE__, __LINE__);
 }

--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1730,12 +1730,13 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
    y_col_array = y.reshape(-1, 1)
    params = {"n_estimators": 1, "num_leaves": 3, "random_state": 0}
    model_factory = task_to_model_factory[task]
-    with pytest.warns(UserWarning, match="column-vector"):
-        if task == "ranking":
-            model_1d = model_factory(**params).fit(X, y, group=g)
+    if task == "ranking":
+        model_1d = model_factory(**params).fit(X, y, group=g)
+        with pytest.warns(UserWarning, match="column-vector"):
            model_2d = model_factory(**params).fit(X, y_col_array, group=g)
-        else:
-            model_1d = model_factory(**params).fit(X, y)
+    else:
+        model_1d = model_factory(**params).fit(X, y)
+        with pytest.warns(UserWarning, match="column-vector"):
            model_2d = model_factory(**params).fit(X, y_col_array)

    preds_1d = model_1d.predict(X)