Commit 16bc6bc0 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

merge conflicts

parents 3d7f777d 465c695b
...@@ -28,7 +28,7 @@ def parse_args(): ...@@ -28,7 +28,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--num_fewshot", "--num_fewshot",
type=int, type=int,
default=0, default=None,
help="Number of examples in few-shot context", help="Number of examples in few-shot context",
) )
parser.add_argument("--batch_size", type=int, default=1) # TODO: only integers parser.add_argument("--batch_size", type=int, default=1) # TODO: only integers
......
...@@ -18,6 +18,9 @@ setuptools.setup( ...@@ -18,6 +18,9 @@ setuptools.setup(
"lm_eval": ["**/*.yaml"], "lm_eval": ["**/*.yaml"],
"examples": ["**/*.yaml"], "examples": ["**/*.yaml"],
}, },
entry_points={
"console_scripts": ["lm-eval = main:main", "lm_eval = main:main"],
},
include_package_data=True, include_package_data=True,
classifiers=[ classifiers=[
"Development Status :: 3 - Alpha", "Development Status :: 3 - Alpha",
......
def pytest_addoption(parser):
parser.addoption(
"--new_task",
action="store_true",
help="new_tasks_found",
)
import pytest
from itertools import islice
import lm_eval.tasks as tasks
from .utilities_testing import load_changed_files, parser
from typing import List
from lm_eval.api.task import ConfigurableTask
import os
# GitHub CI
def new_tasks() -> List[str]:
FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
if os.path.exists(FILENAME):
# If tasks folder has changed then we get the list of files from FILENAME
# and parse the yaml files to get the task names.
return parser(load_changed_files(FILENAME))
elif os.getenv("API") is not None:
# Or if API has changed then we set the ENV variable API to True
# and run given tasks.
return ["arc_easy", "hellaswag", "piqa", "wikitext"]
# if both not true just do arc_easy
else:
return ["arc_easy"]
def get_task_class() -> List[ConfigurableTask]:
task_name = new_tasks()
x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name]
return x
@pytest.fixture()
def limit() -> int:
return 10
# Tests
@pytest.mark.parametrize("task_class", get_task_class())
class TestNewTasks:
def test_download(self, task_class: ConfigurableTask):
task_class().download()
assert task_class().dataset is not None
def test_has_training_docs(self, task_class: ConfigurableTask):
assert task_class().has_training_docs() in [True, False]
def test_check_training_docs(self, task_class: ConfigurableTask):
task = task_class()
if task.has_training_docs():
assert task._config["training_split"] is not None
def test_has_validation_docs(self, task_class):
assert task_class().has_validation_docs() in [True, False]
def test_check_validation_docs(self, task_class):
task = task_class()
if task.has_validation_docs():
assert task._config["validation_split"] is not None
def test_has_test_docs(self, task_class):
assert task_class().has_test_docs() in [True, False]
def test_check_test_docs(self, task_class):
task = task_class()
if task.has_test_docs():
assert task._config["test_split"] is not None
def test_should_decontaminate(self, task_class):
task = task_class()
assert task.should_decontaminate() in [True, False]
if task.should_decontaminate():
assert task._config["doc_to_decontamination_query"] is not None
def test_doc_to_text(self, task_class, limit):
task = task_class()
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
_array = [task.doc_to_text(doc) for doc in arr]
# space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
assert all(
isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True)
for x in _array
)
def test_create_choices(self, task_class, limit):
task = task_class()
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
if "multiple_choice" in task._config.group:
_array = [task.doc_to_choice(doc) for doc in arr]
# assert all(len(x) == 4 for x in _array)
assert all(isinstance(x, list) for x in _array)
assert all(isinstance(x[0], str) for x in _array)
def test_doc_to_target(self, task_class, limit):
task = task_class()
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
_array_target = [task.doc_to_target(doc) for doc in arr]
assert all(isinstance(label, int) for label in _array_target)
assert len(_array_target) == limit if limit else True
# _array_text = [task.doc_to_text(doc) for doc in arr]
# Not working
# assert all(tgt[0] == " " or txt[-1] == "\n" if len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
def test_build_all_requests(self, task_class, limit):
task_class().build_all_requests(rank=1, limit=limit, world_size=1)
assert task_class.instances is not None
def test_construct_requests(self, task_class, limit):
task = task_class()
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
assert all(isinstance(doc, list) for doc in requests)
assert len(requests) == limit if limit else True
import json
from typing import List
from lm_eval.utils import load_yaml_config
from pathlib import Path
import sys
# This is the path where the output for the changed files for the tasks folder is stored
# FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
# reads a text file and returns a list of words
# used to read the output of the changed txt from tj-actions/changed-files
def load_changed_files(file_path: str) -> List[str]:
with open(file_path, "r") as f:
content = f.read()
words_list = [x for x in content.split()]
sys.stdout.write(f"list of files: {words_list}")
return words_list
# checks the txt file for list of changed files.
# if file ends with .yaml then check yaml for task name
# if file ends with .py then parse the folder for all yaml files
def parser(full_path: List[str]) -> List[str]:
_output = set()
for x in full_path:
if x.endswith(".yaml"):
_output.add(load_yaml_config(x)["task"])
elif x.endswith(".py"):
path = [str(x) for x in (list(Path(x).parent.glob("*.yaml")))]
_output |= {load_yaml_config(x)["task"] for x in path}
return list(_output)
import pytest
from itertools import islice from itertools import islice
import pytest
from typing import List
import lm_eval.tasks as tasks import lm_eval.tasks as tasks
from tests.extra.test_utils import load_changed_files, parser from lm_eval.api.task import ConfigurableTask
from typing import List, ClassVar
import os
# Using fixtures to get the task class and limit
@pytest.fixture() @pytest.fixture()
def any_new_tasks(request) -> bool: def task_class() -> ConfigurableTask:
return request.config.getoption("--new_task") task_name = ["arc_easy"]
x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name]
return x[0]
# ["arc_easy] else get list of new tasks
def new_tasks(any_new_tasks: bool) -> List[str]:
FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
if any_new_tasks and os.path.exists(FILENAME):
return [parser(load_changed_files(FILENAME))]
elif os.getenv("API") is not None:
return ["arc_easy", "hellaswag", "piqa", "wikitext"]
else:
return ["arc_easy"]
@pytest.fixture(params=new_tasks(any_new_tasks))
def task_class(request):
task_name = request.param
return [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name][0]
@pytest.fixture() @pytest.fixture()
def limit(any_new_tasks: bool) -> int: def limit() -> int:
return 100 if any_new_tasks else 10 return 10
# Tests # Tests
def test_download(task_class): def test_download(task_class: ConfigurableTask):
task_class().download() task_class().download()
assert task_class().dataset is not None assert task_class().dataset is not None
def test_has_training_docs(task_class): def test_has_training_docs(task_class: ConfigurableTask):
assert task_class().has_training_docs() in [True, False] assert task_class().has_training_docs() in [True, False]
def test_check_training_docs(task_class): def test_check_training_docs(task_class: ConfigurableTask):
task = task_class() task = task_class()
assert task.has_training_docs() if task._config["training_split"] else True if task.has_training_docs():
assert task._config["training_split"] is not None
def test_has_validation_docs(task_class): def test_has_validation_docs(task_class):
assert task_class().has_training_docs() in [True, False] assert task_class().has_validation_docs() in [True, False]
def test_check_validation_docs(task_class): def test_check_validation_docs(task_class):
task = task_class() task = task_class()
assert ( if task.has_validation_docs():
task_class().has_training_docs() if task._config["validation_split"] else True assert task._config["validation_split"] is not None
)
def test_has_test_docs(task_class): def test_has_test_docs(task_class):
assert task_class().has_training_docs() in [True, False] assert task_class().has_test_docs() in [True, False]
def test_check_test_docs(task_class): def test_check_test_docs(task_class):
task = task_class() task = task_class()
assert task_class().has_training_docs() if task._config["test_split"] else True if task.has_test_docs():
assert task._config["test_split"] is not None
def test_should_decontaminate(task_class): def test_should_decontaminate(task_class):
task_class = task_class() task = task_class()
assert task_class.should_decontaminate() in [True, False] assert task.should_decontaminate() in [True, False]
if task_class.should_decontaminate(): if task.should_decontaminate():
assert task_class._config["doc_to_decontamination_query"] is not None assert task._config["doc_to_decontamination_query"] is not None
def test_doc_to_text(task_class, limit): def test_doc_to_text(task_class, limit):
task = task_class()
arr = ( arr = (
list(islice(task_class().test_docs(), limit)) list(islice(task.test_docs(), limit))
if limit if task.has_test_docs()
else list(task_class().test_docs()) else list(islice(task.validation_docs(), limit))
) )
_array = [task_class().doc_to_text(doc) for doc in arr] _array = [task.doc_to_text(doc) for doc in arr]
# space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on # space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
assert all( assert all(
isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True) for x in _array isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True) for x in _array
...@@ -91,24 +77,27 @@ def test_doc_to_text(task_class, limit): ...@@ -91,24 +77,27 @@ def test_doc_to_text(task_class, limit):
def test_create_choices(task_class, limit): def test_create_choices(task_class, limit):
task = task_class()
arr = ( arr = (
list(islice(task_class().test_docs(), limit)) list(islice(task.test_docs(), limit))
if limit if task.has_test_docs()
else list(task_class().test_docs()) else list(islice(task.validation_docs(), limit))
) )
_array = [task_class().doc_to_choice(doc) for doc in arr] if "multiple_choice" in task._config.group:
_array = [task.doc_to_choice(doc) for doc in arr]
# assert all(len(x) == 4 for x in _array) # assert all(len(x) == 4 for x in _array)
assert all(isinstance(x, list) for x in _array) assert all(isinstance(x, list) for x in _array)
assert all(isinstance(x[0], str) for x in _array) assert all(isinstance(x[0], str) for x in _array)
def test_doc_to_target(task_class, limit): def test_doc_to_target(task_class, limit):
task = task_class()
arr = ( arr = (
list(islice(task_class().test_docs(), limit)) list(islice(task.test_docs(), limit))
if limit if task.has_test_docs()
else list(task_class().test_target()) else list(islice(task.validation_docs(), limit))
) )
_array_target = [task_class().doc_to_target(doc) for doc in arr] _array_target = [task.doc_to_target(doc) for doc in arr]
assert all(isinstance(label, int) for label in _array_target) assert all(isinstance(label, int) for label in _array_target)
assert len(_array_target) == limit if limit else True assert len(_array_target) == limit if limit else True
# _array_text = [task.doc_to_text(doc) for doc in arr] # _array_text = [task.doc_to_text(doc) for doc in arr]
...@@ -122,15 +111,13 @@ def test_build_all_requests(task_class, limit): ...@@ -122,15 +111,13 @@ def test_build_all_requests(task_class, limit):
def test_construct_requests(task_class, limit): def test_construct_requests(task_class, limit):
task = task_class()
arr = ( arr = (
list(islice(task_class().test_docs(), limit)) list(islice(task.test_docs(), limit))
if limit if task.has_test_docs()
else list(task_class().test_docs()) else list(islice(task.validation_docs(), limit))
) )
requests = [ requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
task_class().construct_requests(doc, task_class().doc_to_text(doc))
for doc in arr
]
assert all(isinstance(doc, list) for doc in requests) assert all(isinstance(doc, list) for doc in requests)
assert len(requests) == limit if limit else True assert len(requests) == limit if limit else True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment