Commit 50e99bd7 authored by Herbie Bradley's avatar Herbie Bradley
Browse files

Merge remote-tracking branch 'origin/big-refactor' into calibration

parents 3d4c4cd6 a3252ed7
......@@ -10,7 +10,7 @@ try:
except ModuleNotFoundError:
raise Exception(
"`pycountry` is required for generating translation task prompt templates. \
please install pycountry via pip install lm-eval[multilingua] or pip install -e .[multilingual]",
please install pycountry via pip install lm-eval[multilingual] or pip install -e .[multilingual]",
)
......
......@@ -16,7 +16,6 @@ import gc
import torch
import transformers
from omegaconf import OmegaConf
from jinja2 import BaseLoader, Environment, StrictUndefined
from itertools import islice
......@@ -55,8 +54,8 @@ def simple_parse_args_string(args_string):
args_string = args_string.strip()
if not args_string:
return {}
arg_list = args_string.split(",")
args_dict = OmegaConf.to_object(OmegaConf.from_dotlist(arg_list))
arg_list = [arg for arg in args_string.split(",") if arg]
args_dict = {k: v for k, v in [arg.split("=") for arg in arg_list]}
return args_dict
......@@ -267,9 +266,9 @@ def make_table(result_dict, column: str = "results"):
from pytablewriter import MarkdownTableWriter, LatexTableWriter
if column == "results":
column_name = "Task"
elif column == "aggregate":
column_name = "Benchmark"
column_name = "Tasks"
elif column == "groups":
column_name = "Groups"
md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
......@@ -395,8 +394,10 @@ def import_function(loader, node):
function_name = loader.construct_scalar(node)
yaml_path = os.path.dirname(loader.name)
module_name, function_name = function_name.split(".")
module_path = os.path.join(yaml_path, "{}.py".format(module_name))
*module_name, function_name = function_name.split(".")
if type(module_name) == list:
module_name = ".".join(module_name)
module_path = os.path.normpath(os.path.join(yaml_path, "{}.py".format(module_name)))
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
......@@ -430,8 +431,7 @@ def load_yaml_config(yaml_path):
# If not found, assume the included yaml
# is in the same dir as the original yaml
if not os.path.isfile(path):
path = os.path.join(yaml_dir, path)
path = os.path.normpath(os.path.join(yaml_dir, path))
try:
included_yaml_config = load_yaml_config(path)
final_yaml_config.update(included_yaml_config)
......
......@@ -11,7 +11,6 @@ from lm_eval import evaluator, utils
from lm_eval.api.registry import ALL_TASKS
from lm_eval.logger import eval_logger, SPACING
from lm_eval.tasks import include_task_folder
from lm_eval.benchmarks import include_benchmarks
os.environ["TOKENIZERS_PARALLELISM"] = "false"
......@@ -209,8 +208,8 @@ def main() -> None:
f"batch_size: {args.batch_size}{f' ({batch_sizes})' if batch_sizes else ''}"
)
print(evaluator.make_table(results))
if "aggregate" in results:
print(evaluator.make_table(results, "aggregate"))
if "groups" in results:
print(evaluator.make_table(results, "groups"))
if __name__ == "__main__":
......
[mypy]
python_version = 3.9
python_version = 3.8
show_traceback = True
check_untyped_defs = True
no_implicit_reexport = True
......
[build-system]
requires = ["setuptools>=40.8.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "lm_eval"
version = "1.0.0"
authors = [
{name="EleutherAI", email="contact@eleuther.ai"}
]
description = "A framework for evaluating language models"
readme = "README.md"
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
requires-python = ">=3.8"
license = { "text" = "MIT" }
dependencies = [
"accelerate>=0.21.0",
"evaluate",
"datasets>=2.0.0",
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
"peft>=0.2.0",
"pybind11>=2.6.2",
"pytablewriter",
"rouge-score>=0.0.4",
"sacrebleu>=1.5.0",
"scikit-learn>=0.24.1",
"sqlitedict",
"torch>=1.8",
"tqdm-multiprocess",
"transformers>=4.1",
"zstandard",
]
[tool.setuptools]
packages = ["lm_eval"]
# required to include yaml files in pip installation
[tool.setuptools.package-data]
lm_eval = ["**/*.yaml", "tasks/**/*"]
examples = ["**/*.yaml"]
[project.scripts]
lm-eval = "main:main"
lm_eval = "main:main"
[project.urls]
Homepage = "https://github.com/EleutherAI/lm-evaluation-harness"
Repository = "https://github.com/EleutherAI/lm-evaluation-harness"
[project.optional-dependencies]
dev = ["black", "flake8", "pre-commit", "pytest", "pytest-cov"]
linting = [
"flake8",
"pylint",
"mypy",
"pre-commit",
]
testing = ["pytest", "pytest-cov", "pytest-xdist"]
multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"]
sentencepiece = ["sentencepiece>=0.1.98", "protobuf>=4.22.1"]
promptsource = [
"promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
]
gptq = ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"]
anthropic = ["anthropic"]
openai = ["openai", "tiktoken"]
all = [
"lm_eval[dev]",
"lm_eval[testing]",
"lm_eval[linting]",
"lm_eval[multilingual]",
"lm_eval[sentencepiece]",
"lm_eval[promptsource]",
"lm_eval[gptq]",
"lm_eval[anthropic]",
"lm_eval[openai]"
]
......@@ -38,17 +38,21 @@ def main():
iters = []
for set in args.sets.split(","):
docs = None
if set == "train" and task.has_training_docs():
docs = task.training_docs()
if set == "val" and task.has_validation_docs():
docs = task.validation_docs()
if set == "test" and task.has_test_docs():
docs = task.test_docs()
iters.append(docs)
if docs is not None:
iters.append(docs)
docs = join_iters(iters)
with open(os.path.join(args.output_base_path, task_name), "w") as f:
with open(
os.path.join(args.output_base_path, task_name), "w", encoding="utf8"
) as f:
for i, doc in (
zip(range(args.num_examples), docs)
if args.num_examples > 0
......
import setuptools
import itertools
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
extras_require = {
"dev": ["black", "flake8", "pre-commit", "pytest", "pytest-cov"],
"linting": [
"flake8",
"pylint",
"mypy",
"pre-commit",
],
"testing": ["pytest", "pytest-cov", "pytest-xdist"],
"multilingual": ["nagisa>=0.2.7", "jieba>=0.42.1"],
"sentencepiece": ["sentencepiece>=0.1.98", "protobuf>=4.22.1", "pycountry"],
"promptsource": [
"promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
],
"gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
"anthropic": ["anthropic"],
"openai": ["openai", "tiktoken"],
}
extras_require["all"] = list(itertools.chain.from_iterable(extras_require.values()))
setuptools.setup(
name="lm_eval",
version="1.0.0",
author="EleutherAI",
author_email="contact@eleuther.ai",
description="A framework for evaluating language models",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/EleutherAI/lm-evaluation-harness",
packages=setuptools.find_packages(),
# required to include yaml files in pip installation
package_data={
"lm_eval": ["**/*.yaml", "tasks/**/*"],
"examples": ["**/*.yaml"],
},
entry_points={
"console_scripts": ["lm-eval = main:main", "lm_eval = main:main"],
},
include_package_data=True,
classifiers=[
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires=">=3.9",
install_requires=[
"accelerate>=0.21.0",
"evaluate",
"datasets>=2.0.0",
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
"omegaconf>=2.2",
"peft>=0.2.0",
"pybind11>=2.6.2",
"pytablewriter",
"rouge-score>=0.0.4",
"sacrebleu>=1.5.0",
"scikit-learn>=0.24.1",
"sqlitedict",
"torch>=1.8",
"tqdm-multiprocess",
"transformers>=4.1",
"zstandard",
],
extras_require=extras_require,
)
# This is to make sure that the package supports editable installs
setuptools.setup()
......@@ -7,6 +7,7 @@ import lm_eval.tasks as tasks
# import lm_eval.models as models
import lm_eval.api as api
import lm_eval.evaluator as evaluator
from typing import List
import random
import pytest
......@@ -26,7 +27,7 @@ import pytest
)
],
)
def test_evaluator(task_name: list[str], limit: int, model: str, model_args: str):
def test_evaluator(task_name: List[str], limit: int, model: str, model_args: str):
task_name = task_name
limit = 10
......
......@@ -9,6 +9,7 @@ import os
# This is the path where the output for the changed files for the tasks folder is stored
# FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
# reads a text file and returns a list of words
# used to read the output of the changed txt from tj-actions/changed-files
def load_changed_files(file_path: str) -> List[str]:
......@@ -32,7 +33,7 @@ def parser(full_path: List[str]) -> List[str]:
return list(_output)
def new_tasks() -> Union[list[str], None]:
def new_tasks() -> Union[List[str], None]:
FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
if os.path.exists(FILENAME):
# If tasks folder has changed then we get the list of files from FILENAME
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment