unpin datasets; update pre-commit (#3316)

* update pre-commit * unpin datasets

unpin datasets; update pre-commit (#3316)
* update pre-commit * unpin datasets
705bedd0 · Baber Abbasi · GitHub · a1404f06 · 705bedd0 · 705bedd0
Unverified Commit 705bedd0 authored Oct 02, 2025 by Baber Abbasi Committed by GitHub Oct 03, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 23 additions and 22 deletions

.pre-commit-config.yaml .pre-commit-config.yaml +13 -16

lm_eval/__main__.py lm_eval/__main__.py +4 -0

pyproject.toml pyproject.toml +6 -6

No files found.
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,18 +2,18 @@
 exclude: ^tests/testdata/
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
    hooks:
      - id: check-added-large-files
      - id: check-ast
-      - id: check-byte-order-marker
+      - id: fix-byte-order-marker
      - id: check-case-conflict
      - id: check-json
      - id: check-merge-conflict
-        args: [--assume-in-merge]
+        args: [ --assume-in-merge ]
      - id: check-symlinks
      - id: check-yaml
-        args: ["--unsafe"]
+        args: [ "--unsafe" ]
      - id: destroyed-symlinks
      - id: detect-private-key
      - id: end-of-file-fixer
@@ -21,21 +21,18 @@ repos:
        always_run: false
      - id: requirements-txt-fixer
      - id: trailing-whitespace
-        args: [--markdown-linebreak-ext=md]
+        args: [ --markdown-linebreak-ext=md ]
      - id: fix-byte-order-marker
        exclude: docs/CNAME
-      - id: fix-encoding-pragma
-        args: [--remove]
      - id: mixed-line-ending
-        args: [--fix=lf]
+        args: [ --fix=lf ]
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.2
+    rev: v0.13.2
    hooks:
      # Run the linter.
-      - id: ruff
+      - id: ruff-check
-        args:
+        args: [ --fix ]
-          - --fix
+      # Run the formatter.
-          # Run the formatter.
      - id: ruff-format
  - repo: https://github.com/codespell-project/codespell
    rev: v2.4.1
@@ -47,10 +44,10 @@ repos:
              .*\.json|ignore.txt|lm_eval/tasks/.*|.*yaml|.*\.ipynb
          )$
-        args: [--check-filenames, --check-hidden, --ignore-words=ignore.txt]
+        args: [ --check-filenames, --check-hidden, --ignore-words=ignore.txt ]
  - repo: https://github.com/jackdewinter/pymarkdown
-    rev: v0.9.30
+    rev: v0.9.32
    hooks:
      - id: pymarkdown
        exclude: ^(lm_eval/tasks/.*|docs/footguns\.md)$
-        args: [fix, -r]
+        args: [ fix, -r ]
--- a/lm_eval/__main__.py
+++ b/lm_eval/__main__.py
@@ -437,6 +437,10 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
        if vparse(datasets.__version__) < vparse("4.0.0"):
            datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
+        else:
+            eval_logger.warning(
+                "trust_remote_code and datasets scripts are no longer supported on datasets>=4.0.0. Skipping. If your task still requires this, please downgrade to datasets==3.6.0 or earlier."
+            )
        if isinstance(args.model_args, dict):
            args.model_args["trust_remote_code"] = True

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "lm_eval"
 version = "0.4.9.1"
 authors = [
-    {name="EleutherAI", email="contact@eleuther.ai"}
+    { name = "EleutherAI", email = "contact@eleuther.ai" }
 ]
 description = "A framework for evaluating language models"
 readme = "README.md"
@@ -21,7 +21,7 @@ license = { "text" = "MIT" }
 dependencies = [
    "accelerate>=0.26.0",
    "evaluate",
-    "datasets>=2.16.0,<4.0",
+    "datasets>=2.16.0",
    "evaluate>=0.4.0",
    "jsonlines",
    "numexpr",
@@ -68,8 +68,8 @@ ibm_watsonx_ai = ["ibm_watsonx_ai>=1.1.22", "python-dotenv"]
 ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
 ipex = ["optimum"]
 japanese_leaderboard = ["emoji==2.14.0", "neologdn==0.5.3", "fugashi[unidic-lite]", "rouge_score>=0.1.2"]
-longbench=["jieba", "fuzzywuzzy", "rouge"]
+longbench = ["jieba", "fuzzywuzzy", "rouge"]
-libra=["pymorphy2"]
+libra = ["pymorphy2"]
 mamba = ["mamba_ssm", "causal-conv1d==1.0.2", "torch"]
 math = ["sympy>=1.12", "antlr4-python3-runtime==4.11", "math_verify[antlr4_11_0]"]
 multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"]
@@ -115,10 +115,10 @@ lines-after-imports = 2
 known-first-party = ["lm_eval"]
 [tool.ruff.lint.extend-per-file-ignores]
-"__init__.py" = ["F401","F402","F403"]
+"__init__.py" = ["F401", "F402", "F403"]
 "utils.py" = ["F401"]
 [dependency-groups]
 dev = [
-  "api","dev","sentencepiece"
+    "api", "dev", "sentencepiece"
 ]