[build-system] requires = ["setuptools>=40.8.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "lm_eval" version = "0.4.9" authors = [ {name="EleutherAI", email="contact@eleuther.ai"} ] description = "A framework for evaluating language models" readme = "README.md" classifiers = [ "Development Status :: 3 - Alpha", "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] requires-python = ">=3.9" license = { "text" = "MIT" } dependencies = [ "accelerate>=0.26.0", "evaluate", "datasets>=2.16.0", "evaluate>=0.4.0", "jsonlines", "numexpr", "peft>=0.2.0", "pybind11>=2.6.2", "pytablewriter", "rouge-score>=0.0.4", "sacrebleu>=1.5.0", "scikit-learn>=0.24.1", "sqlitedict", "torch>=1.8", "tqdm-multiprocess", "transformers>=4.1", "zstandard", "dill", "word2number", "more_itertools", ] [tool.setuptools.packages.find] include = ["lm_eval*"] # required to include yaml files in pip installation [tool.setuptools.package-data] lm_eval = ["**/*.yaml", "tasks/**/*"] [project.scripts] lm-eval = "lm_eval.__main__:cli_evaluate" lm_eval = "lm_eval.__main__:cli_evaluate" [project.urls] Homepage = "https://github.com/EleutherAI/lm-evaluation-harness" Repository = "https://github.com/EleutherAI/lm-evaluation-harness" [project.optional-dependencies] acpbench = ["lark>=1.1.9", "tarski[clingo]==0.8.2", "pddl==0.4.2", "kstar-planner==1.4.2"] api = ["requests", "aiohttp", "tenacity", "tqdm", "tiktoken"] audiolm_qwen = ["librosa", "soundfile"] dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "requests", "aiohttp", "tenacity", "tqdm", "tiktoken", "sentencepiece"] gptq = ["auto-gptq[triton]>=0.6.0"] gptqmodel = ["gptqmodel>=1.0.9"] hf_transfer = ["hf_transfer"] ibm_watsonx_ai = ["ibm_watsonx_ai>=1.1.22", "python-dotenv"] ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"] ipex = ["optimum"] japanese_leaderboard = ["emoji==2.14.0", "neologdn==0.5.3", "fugashi[unidic-lite]", "rouge_score>=0.1.2"] longbench=["jieba", "fuzzywuzzy", "rouge"] mamba = ["mamba_ssm", "causal-conv1d==1.0.2", "torch"] math = ["sympy>=1.12", "antlr4-python3-runtime==4.11", "math_verify[antlr4_11_0]"] multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"] neuronx = ["optimum[neuronx]"] optimum = ["optimum[openvino]"] promptsource = ["promptsource>=0.2.3"] ruler = ["nltk", "wonderwords", "scipy"] sae_lens = ["sae_lens"] sentencepiece = ["sentencepiece>=0.1.98"] sparsify = ["sparsify"] testing = ["pytest", "pytest-cov", "pytest-xdist"] unitxt = ["unitxt==1.22.0"] vllm = ["vllm>=0.4.2"] wandb = ["wandb>=0.16.3", "pandas", "numpy"] zeno = ["pandas", "zeno-client"] tasks = [ "lm_eval[acpbench]", "lm_eval[ifeval]", "lm_eval[japanese_leaderboard]", "lm_eval[longbench]", "lm_eval[math]", "lm_eval[multilingual]", "lm_eval[ruler]", ] [tool.pymarkdown] plugins.md013.enabled = false # line-length plugins.md024.allow_different_nesting = true # no-duplicate-headers plugins.md025.enabled = false # single-header plugins.md028.enabled = false # no-blanks-blockquote plugins.md029.allow_extended_start_values = true # ol-prefix plugins.md034.enabled = false # no-bare-urls [tool.ruff.lint] extend-select = ["I"] [tool.ruff.lint.isort] lines-after-imports = 2 known-first-party = ["lm_eval"] [tool.ruff.lint.extend-per-file-ignores] "__init__.py" = ["F401","F402","F403"] "utils.py" = ["F401"] [dependency-groups] dev = [ "api","dev","sentencepiece" ]