Commit 50e99bd7 authored by Herbie Bradley's avatar Herbie Bradley
Browse files

Merge remote-tracking branch 'origin/big-refactor' into calibration

parents 3d4c4cd6 a3252ed7
...@@ -10,7 +10,7 @@ try: ...@@ -10,7 +10,7 @@ try:
except ModuleNotFoundError: except ModuleNotFoundError:
raise Exception( raise Exception(
"`pycountry` is required for generating translation task prompt templates. \ "`pycountry` is required for generating translation task prompt templates. \
please install pycountry via pip install lm-eval[multilingua] or pip install -e .[multilingual]", please install pycountry via pip install lm-eval[multilingual] or pip install -e .[multilingual]",
) )
......
...@@ -16,7 +16,6 @@ import gc ...@@ -16,7 +16,6 @@ import gc
import torch import torch
import transformers import transformers
from omegaconf import OmegaConf
from jinja2 import BaseLoader, Environment, StrictUndefined from jinja2 import BaseLoader, Environment, StrictUndefined
from itertools import islice from itertools import islice
...@@ -55,8 +54,8 @@ def simple_parse_args_string(args_string): ...@@ -55,8 +54,8 @@ def simple_parse_args_string(args_string):
args_string = args_string.strip() args_string = args_string.strip()
if not args_string: if not args_string:
return {} return {}
arg_list = args_string.split(",") arg_list = [arg for arg in args_string.split(",") if arg]
args_dict = OmegaConf.to_object(OmegaConf.from_dotlist(arg_list)) args_dict = {k: v for k, v in [arg.split("=") for arg in arg_list]}
return args_dict return args_dict
...@@ -267,9 +266,9 @@ def make_table(result_dict, column: str = "results"): ...@@ -267,9 +266,9 @@ def make_table(result_dict, column: str = "results"):
from pytablewriter import MarkdownTableWriter, LatexTableWriter from pytablewriter import MarkdownTableWriter, LatexTableWriter
if column == "results": if column == "results":
column_name = "Task" column_name = "Tasks"
elif column == "aggregate": elif column == "groups":
column_name = "Benchmark" column_name = "Groups"
md_writer = MarkdownTableWriter() md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter() latex_writer = LatexTableWriter()
...@@ -395,8 +394,10 @@ def import_function(loader, node): ...@@ -395,8 +394,10 @@ def import_function(loader, node):
function_name = loader.construct_scalar(node) function_name = loader.construct_scalar(node)
yaml_path = os.path.dirname(loader.name) yaml_path = os.path.dirname(loader.name)
module_name, function_name = function_name.split(".") *module_name, function_name = function_name.split(".")
module_path = os.path.join(yaml_path, "{}.py".format(module_name)) if type(module_name) == list:
module_name = ".".join(module_name)
module_path = os.path.normpath(os.path.join(yaml_path, "{}.py".format(module_name)))
spec = importlib.util.spec_from_file_location(module_name, module_path) spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec) module = importlib.util.module_from_spec(spec)
...@@ -430,8 +431,7 @@ def load_yaml_config(yaml_path): ...@@ -430,8 +431,7 @@ def load_yaml_config(yaml_path):
# If not found, assume the included yaml # If not found, assume the included yaml
# is in the same dir as the original yaml # is in the same dir as the original yaml
if not os.path.isfile(path): if not os.path.isfile(path):
path = os.path.join(yaml_dir, path) path = os.path.normpath(os.path.join(yaml_dir, path))
try: try:
included_yaml_config = load_yaml_config(path) included_yaml_config = load_yaml_config(path)
final_yaml_config.update(included_yaml_config) final_yaml_config.update(included_yaml_config)
......
...@@ -11,7 +11,6 @@ from lm_eval import evaluator, utils ...@@ -11,7 +11,6 @@ from lm_eval import evaluator, utils
from lm_eval.api.registry import ALL_TASKS from lm_eval.api.registry import ALL_TASKS
from lm_eval.logger import eval_logger, SPACING from lm_eval.logger import eval_logger, SPACING
from lm_eval.tasks import include_task_folder from lm_eval.tasks import include_task_folder
from lm_eval.benchmarks import include_benchmarks
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
...@@ -209,8 +208,8 @@ def main() -> None: ...@@ -209,8 +208,8 @@ def main() -> None:
f"batch_size: {args.batch_size}{f' ({batch_sizes})' if batch_sizes else ''}" f"batch_size: {args.batch_size}{f' ({batch_sizes})' if batch_sizes else ''}"
) )
print(evaluator.make_table(results)) print(evaluator.make_table(results))
if "aggregate" in results: if "groups" in results:
print(evaluator.make_table(results, "aggregate")) print(evaluator.make_table(results, "groups"))
if __name__ == "__main__": if __name__ == "__main__":
......
[mypy] [mypy]
python_version = 3.9 python_version = 3.8
show_traceback = True show_traceback = True
check_untyped_defs = True check_untyped_defs = True
no_implicit_reexport = True no_implicit_reexport = True
......
[build-system] [build-system]
requires = ["setuptools>=40.8.0", "wheel"] requires = ["setuptools>=40.8.0", "wheel"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project]
name = "lm_eval"
version = "1.0.0"
authors = [
{name="EleutherAI", email="contact@eleuther.ai"}
]
description = "A framework for evaluating language models"
readme = "README.md"
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
requires-python = ">=3.8"
license = { "text" = "MIT" }
dependencies = [
"accelerate>=0.21.0",
"evaluate",
"datasets>=2.0.0",
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
"peft>=0.2.0",
"pybind11>=2.6.2",
"pytablewriter",
"rouge-score>=0.0.4",
"sacrebleu>=1.5.0",
"scikit-learn>=0.24.1",
"sqlitedict",
"torch>=1.8",
"tqdm-multiprocess",
"transformers>=4.1",
"zstandard",
]
[tool.setuptools]
packages = ["lm_eval"]
# required to include yaml files in pip installation
[tool.setuptools.package-data]
lm_eval = ["**/*.yaml", "tasks/**/*"]
examples = ["**/*.yaml"]
[project.scripts]
lm-eval = "main:main"
lm_eval = "main:main"
[project.urls]
Homepage = "https://github.com/EleutherAI/lm-evaluation-harness"
Repository = "https://github.com/EleutherAI/lm-evaluation-harness"
[project.optional-dependencies]
dev = ["black", "flake8", "pre-commit", "pytest", "pytest-cov"]
linting = [
"flake8",
"pylint",
"mypy",
"pre-commit",
]
testing = ["pytest", "pytest-cov", "pytest-xdist"]
multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"]
sentencepiece = ["sentencepiece>=0.1.98", "protobuf>=4.22.1"]
promptsource = [
"promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
]
gptq = ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"]
anthropic = ["anthropic"]
openai = ["openai", "tiktoken"]
all = [
"lm_eval[dev]",
"lm_eval[testing]",
"lm_eval[linting]",
"lm_eval[multilingual]",
"lm_eval[sentencepiece]",
"lm_eval[promptsource]",
"lm_eval[gptq]",
"lm_eval[anthropic]",
"lm_eval[openai]"
]
...@@ -38,17 +38,21 @@ def main(): ...@@ -38,17 +38,21 @@ def main():
iters = [] iters = []
for set in args.sets.split(","): for set in args.sets.split(","):
docs = None
if set == "train" and task.has_training_docs(): if set == "train" and task.has_training_docs():
docs = task.training_docs() docs = task.training_docs()
if set == "val" and task.has_validation_docs(): if set == "val" and task.has_validation_docs():
docs = task.validation_docs() docs = task.validation_docs()
if set == "test" and task.has_test_docs(): if set == "test" and task.has_test_docs():
docs = task.test_docs() docs = task.test_docs()
iters.append(docs) if docs is not None:
iters.append(docs)
docs = join_iters(iters) docs = join_iters(iters)
with open(os.path.join(args.output_base_path, task_name), "w") as f: with open(
os.path.join(args.output_base_path, task_name), "w", encoding="utf8"
) as f:
for i, doc in ( for i, doc in (
zip(range(args.num_examples), docs) zip(range(args.num_examples), docs)
if args.num_examples > 0 if args.num_examples > 0
......
import setuptools import setuptools
import itertools
with open("README.md", "r", encoding="utf-8") as fh: # This is to make sure that the package supports editable installs
long_description = fh.read() setuptools.setup()
extras_require = {
"dev": ["black", "flake8", "pre-commit", "pytest", "pytest-cov"],
"linting": [
"flake8",
"pylint",
"mypy",
"pre-commit",
],
"testing": ["pytest", "pytest-cov", "pytest-xdist"],
"multilingual": ["nagisa>=0.2.7", "jieba>=0.42.1"],
"sentencepiece": ["sentencepiece>=0.1.98", "protobuf>=4.22.1", "pycountry"],
"promptsource": [
"promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
],
"gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
"anthropic": ["anthropic"],
"openai": ["openai", "tiktoken"],
}
extras_require["all"] = list(itertools.chain.from_iterable(extras_require.values()))
setuptools.setup(
name="lm_eval",
version="1.0.0",
author="EleutherAI",
author_email="contact@eleuther.ai",
description="A framework for evaluating language models",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/EleutherAI/lm-evaluation-harness",
packages=setuptools.find_packages(),
# required to include yaml files in pip installation
package_data={
"lm_eval": ["**/*.yaml", "tasks/**/*"],
"examples": ["**/*.yaml"],
},
entry_points={
"console_scripts": ["lm-eval = main:main", "lm_eval = main:main"],
},
include_package_data=True,
classifiers=[
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires=">=3.9",
install_requires=[
"accelerate>=0.21.0",
"evaluate",
"datasets>=2.0.0",
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
"omegaconf>=2.2",
"peft>=0.2.0",
"pybind11>=2.6.2",
"pytablewriter",
"rouge-score>=0.0.4",
"sacrebleu>=1.5.0",
"scikit-learn>=0.24.1",
"sqlitedict",
"torch>=1.8",
"tqdm-multiprocess",
"transformers>=4.1",
"zstandard",
],
extras_require=extras_require,
)
...@@ -7,6 +7,7 @@ import lm_eval.tasks as tasks ...@@ -7,6 +7,7 @@ import lm_eval.tasks as tasks
# import lm_eval.models as models # import lm_eval.models as models
import lm_eval.api as api import lm_eval.api as api
import lm_eval.evaluator as evaluator import lm_eval.evaluator as evaluator
from typing import List
import random import random
import pytest import pytest
...@@ -26,7 +27,7 @@ import pytest ...@@ -26,7 +27,7 @@ import pytest
) )
], ],
) )
def test_evaluator(task_name: list[str], limit: int, model: str, model_args: str): def test_evaluator(task_name: List[str], limit: int, model: str, model_args: str):
task_name = task_name task_name = task_name
limit = 10 limit = 10
......
...@@ -9,6 +9,7 @@ import os ...@@ -9,6 +9,7 @@ import os
# This is the path where the output for the changed files for the tasks folder is stored # This is the path where the output for the changed files for the tasks folder is stored
# FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt" # FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
# reads a text file and returns a list of words # reads a text file and returns a list of words
# used to read the output of the changed txt from tj-actions/changed-files # used to read the output of the changed txt from tj-actions/changed-files
def load_changed_files(file_path: str) -> List[str]: def load_changed_files(file_path: str) -> List[str]:
...@@ -32,7 +33,7 @@ def parser(full_path: List[str]) -> List[str]: ...@@ -32,7 +33,7 @@ def parser(full_path: List[str]) -> List[str]:
return list(_output) return list(_output)
def new_tasks() -> Union[list[str], None]: def new_tasks() -> Union[List[str], None]:
FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt" FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
if os.path.exists(FILENAME): if os.path.exists(FILENAME):
# If tasks folder has changed then we get the list of files from FILENAME # If tasks folder has changed then we get the list of files from FILENAME
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment