Commit a199314a authored by baberabb's avatar baberabb
Browse files

seperated tests for new tasks into own file.

parent 2820042d
......@@ -2,11 +2,7 @@ name: Tasks Modified
on:
push:
branches:
- big-refactor
pull_request:
branches:
- big-refactor
workflow_dispatch:
jobs:
......@@ -19,11 +15,15 @@ jobs:
with:
fetch-depth: 0 # OR "2" -> To retrieve the preceding commit.
# Example 1
# Uses the tj-actions/changed-files@v37 action to check for changes.
# Outputs provided here: https://github.com/tj-actions/changed-files#outputs
# The `files_yaml` input optionally takes a yaml string to specify which files to check,
# and prepends the name to the standard output names.
- name: Check task folders
id: changed-tasks
uses: tj-actions/changed-files@v37.1.2
with:
# tasks checks the tasks folder and api checks the api folder for changes
files_yaml: |
tasks:
- lm_eval/tasks/**
......@@ -31,6 +31,8 @@ jobs:
- lm_eval/api/**
write_output_files: true
# This is a workaround to get the list of all modified files and save it to an env variable.
# The next two echo statements are just for logging.
- name: Run Tests
if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
run: |
......@@ -42,20 +44,21 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: 3.9
cache: 'pip'
- name: Install dependencies
if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
run: |
python -m pip install --upgrade pip
pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
# Install optional git dependencies
# pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# Install optional git dependencies
# pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
# if new tasks are added, run tests on them; pytest file=test_new_tasks
if: steps.changed-tasks.outputs.tasks_any_modified == 'true'
run: python -m pytest tests/test_tasks.py -s -vv -n=auto --new_task
run: python -m pytest tests/extra/test_new_tasks.py -s -vv -n=auto
# if api is modified, run tests on it
- name: Test more tasks with pytest
env:
API: true
if: steps.changed-tasks.outputs.api_any_modified == 'true'
run: python -m pytest tests/test_api.py -s -vv -n=auto --new_task
run: python -m pytest tests/extra/test_new_tasks.py -s -vv -n=auto
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
# just comment out unwanted steps to turn off the test.
name: Unit Tests
on:
push:
branches:
- big-refactor
pull_request:
branches:
- big-refactor
workflow_dispatch:
# Jobs run concurrently and steps run sequentially within a job.
# jobs: linter and cpu_tests. Add more jobs/steps as required.
jobs:
linter:
name: Linters
......@@ -35,9 +32,10 @@ jobs:
flake8 . --count --select=F,E9,E71,E72,E501,E112,E113,W6 --extend-ignore=F541 --show-source --statistics --exit-zero
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Lint with mypy
run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
# mypy turned off for now
# - name: Lint with mypy
# run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
# Job 2
testcpu:
name: CPU Tests
runs-on: ubuntu-latest
......
......@@ -3,6 +3,15 @@ env
data/
lm_cache
.idea
*.egg-info/
build
dist
*.egg-info
venv
.vscode/
temp
__pycache__
.ipynb_checkpoints
temp
# IPython
profile_default/
ipython_config.py
def pytest_addoption(parser):
parser.addoption(
"--new_task",
action="store_true",
help="new_tasks_found",
)
import pytest
from itertools import islice
import lm_eval.tasks as tasks
from .utilities_testing import load_changed_files, parser
from typing import List, ClassVar
from lm_eval.api.task import ConfigurableTask
import os
# GitHub CI
# If tasks folder has changed then we get the list of files from FILENAME
# and parse the yaml files to get the task names.
# Or if API has changed then we set the ENV variable API to True
# and run some given extended tasks
def new_tasks() -> List[str]:
FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
if os.path.exists(FILENAME):
return parser(load_changed_files(FILENAME))
elif os.getenv("API") is not None:
return ["arc_easy", "hellaswag", "piqa", "wikitext"]
# if both not true just do arc_easy
else:
return ["arc_easy"]
@pytest.fixture(params=new_tasks())
def task_class(request) -> ConfigurableTask:
task_name = request.param
if task_name is None:
task_name = "arc_easy"
x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name == task_name]
return x[0]
@pytest.fixture(params=new_tasks())
def limit(request) -> int:
# not used; just for consistency
return 100
# Tests
def test_download(task_class: ConfigurableTask):
task_class().download()
assert task_class().dataset is not None
def test_has_training_docs(task_class: ConfigurableTask):
assert task_class().has_training_docs() in [True, False]
def test_check_training_docs(task_class: ConfigurableTask):
task = task_class()
assert task.has_training_docs() if task._config["training_split"] else True
def test_has_validation_docs(task_class):
assert task_class().has_training_docs() in [True, False]
def test_check_validation_docs(task_class):
task = task_class()
assert (
task_class().has_training_docs() if task._config["validation_split"] else True
)
def test_has_test_docs(task_class):
assert task_class().has_training_docs() in [True, False]
def test_check_test_docs(task_class):
task = task_class()
assert task_class().has_training_docs() if task._config["test_split"] else True
def test_should_decontaminate(task_class):
task_class = task_class()
assert task_class.should_decontaminate() in [True, False]
if task_class.should_decontaminate():
assert task_class._config["doc_to_decontamination_query"] is not None
def test_doc_to_text(task_class, limit):
arr = (
list(islice(task_class().test_docs(), limit))
if limit
else list(task_class().test_docs())
)
_array = [task_class().doc_to_text(doc) for doc in arr]
# space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
assert all(
isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True) for x in _array
)
def test_create_choices(task_class, limit):
arr = (
list(islice(task_class().test_docs(), limit))
if limit
else list(task_class().test_docs())
)
_array = [task_class().doc_to_choice(doc) for doc in arr]
# assert all(len(x) == 4 for x in _array)
assert all(isinstance(x, list) for x in _array)
assert all(isinstance(x[0], str) for x in _array)
def test_doc_to_target(task_class, limit):
arr = (
list(islice(task_class().test_docs(), limit))
if limit
else list(task_class().test_target())
)
_array_target = [task_class().doc_to_target(doc) for doc in arr]
assert all(isinstance(label, int) for label in _array_target)
assert len(_array_target) == limit if limit else True
# _array_text = [task.doc_to_text(doc) for doc in arr]
# Not working
# assert all(tgt[0] == " " or txt[-1] == "\n" if len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
def test_build_all_requests(task_class, limit):
task_class().build_all_requests(rank=1, limit=limit, world_size=1)
assert task_class.instances is not None
def test_construct_requests(task_class, limit):
arr = (
list(islice(task_class().test_docs(), limit))
if limit
else list(task_class().test_docs())
)
requests = [
task_class().construct_requests(doc, task_class().doc_to_text(doc))
for doc in arr
]
assert all(isinstance(doc, list) for doc in requests)
assert len(requests) == limit if limit else True
......@@ -3,7 +3,7 @@ from typing import List
from lm_eval.utils import load_yaml_config
from pathlib import Path
# This is the path where the output for the changed files for the tasks folder is stored
FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
......@@ -12,6 +12,9 @@ def load_changed_files(file_path: str = FILE_PATH) -> List[str]:
return [line.strip() for line in f.readlines()]
# checks the txt file for list of changed files.
# if file ends with .yaml then check yaml for task name
# if file ends with .py then parse the folder for all yaml files
def parser(full_path: List[str]) -> List[str]:
_output = set()
for x in full_path:
......
import pytest
from itertools import islice
import pytest
from typing import List
import lm_eval.tasks as tasks
from tests.extra.test_utils import load_changed_files, parser
from typing import List, ClassVar
import os
from lm_eval.api.task import ConfigurableTask
@pytest.fixture()
def any_new_tasks(request) -> bool:
return request.config.getoption("--new_task")
# ["arc_easy] else get list of new tasks
def new_tasks(any_new_tasks: bool) -> List[str]:
FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
if any_new_tasks and os.path.exists(FILENAME):
return [parser(load_changed_files(FILENAME))]
elif os.getenv("API") is not None:
return ["arc_easy", "hellaswag", "piqa", "wikitext"]
else:
return ["arc_easy"]
@pytest.fixture(params=new_tasks(any_new_tasks))
def task_class(request):
task_name = request.param
return [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name][0]
def task_class(task_name: List[str] = None) -> ConfigurableTask:
if task_name is None:
task_name = ["arc_easy"]
x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name]
return x[0]
@pytest.fixture()
......@@ -36,16 +21,16 @@ def limit(any_new_tasks: bool) -> int:
# Tests
def test_download(task_class):
def test_download(task_class: ConfigurableTask):
task_class().download()
assert task_class().dataset is not None
def test_has_training_docs(task_class):
def test_has_training_docs(task_class: ConfigurableTask):
assert task_class().has_training_docs() in [True, False]
def test_check_training_docs(task_class):
def test_check_training_docs(task_class: ConfigurableTask):
task = task_class()
assert task.has_training_docs() if task._config["training_split"] else True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment