Commit 930b4253 authored by Baber's avatar Baber
Browse files

Merge branch 'smolrefact' into lazy_reg

# Conflicts:
#	lm_eval/__init__.py
#	lm_eval/api/metrics.py
#	lm_eval/api/registry.py
#	lm_eval/api/task.py
#	lm_eval/filters/__init__.py
#	pyproject.toml
parents d547b663 73202a2e
......@@ -46,7 +46,12 @@ def limit() -> int:
return 10
class BaseTasks:
@pytest.mark.parametrize(
"task_class",
task_class(get_new_tasks_else_default()),
ids=lambda x: f"{x.config.task}",
)
class TestBaseTasks:
"""
Base class for testing tasks
"""
......@@ -160,8 +165,50 @@ class BaseTasks:
task_class(get_new_tasks_else_default()),
ids=lambda x: f"{x.config.task}",
)
class TestNewTasksElseDefault(BaseTasks):
class TestNewTasksElseDefault(TestBaseTasks):
"""
Test class parameterized with a list of new/modified tasks
(or a set of default tasks if none have been modified)
"""
@pytest.mark.parametrize(
"task_class",
task_class(
["arc_easy_unitxt"], tasks.TaskManager(include_path="./tests/testconfigs")
),
ids=lambda x: f"{x.config.task}",
)
class TestUnitxtTasks(TestBaseTasks):
"""
Test class for Unitxt tasks parameterized with a small custom
task as described here:
https://www.unitxt.ai/en/latest/docs/lm_eval.html
"""
def test_check_training_docs(self, task_class: ConfigurableTask):
if task_class.has_training_docs():
assert task_class.dataset["train"] is not None
def test_check_validation_docs(self, task_class):
if task_class.has_validation_docs():
assert task_class.dataset["validation"] is not None
def test_check_test_docs(self, task_class):
task = task_class
if task.has_test_docs():
assert task.dataset["test"] is not None
def test_doc_to_text(self, task_class, limit: int):
task = task_class
arr = (
list(islice(task.test_docs(), limit))
if task.has_test_docs()
else list(islice(task.validation_docs(), limit))
)
_array = [task.doc_to_text(doc) for doc in arr]
if not task.multiple_input:
for x in _array:
assert isinstance(x, str)
else:
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment