seperated tests for new tasks into own file.

a199314a · baberabb · 2820042d · a199314a · a199314a · a199314a
Commit a199314a authored Jul 21, 2023 by baberabb
7 changed files
--- a/.github/workflows/new_tasks.yml
+++ b/.github/workflows/new_tasks.yml
@@ -2,11 +2,7 @@ name: Tasks Modified

 on:
  push:
-    branches:
-      - big-refactor
  pull_request:
-    branches:
-      - big-refactor
  workflow_dispatch:

 jobs:
@@ -19,11 +15,15 @@ jobs:
        with:
          fetch-depth: 0  # OR "2" -> To retrieve the preceding commit.

-      # Example 1
+      # Uses the tj-actions/changed-files@v37 action to check for changes.
+      # Outputs provided here: https://github.com/tj-actions/changed-files#outputs
+      # The `files_yaml` input optionally takes a yaml string to specify which files to check,
+      # and prepends the name to the standard output names.
      - name: Check task folders
        id: changed-tasks
        uses: tj-actions/changed-files@v37.1.2
        with:
+          # tasks checks the tasks folder and api checks the api folder for changes
          files_yaml: |
            tasks:
              - lm_eval/tasks/**
@@ -31,6 +31,8 @@ jobs:
              - lm_eval/api/**
          write_output_files: true

+        # This is a workaround to get the list of all modified files and save it to an env variable.
+        # The next two echo statements are just for logging.
      - name: Run Tests
        if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
        run: |
@@ -42,20 +44,21 @@ jobs:
        uses: actions/setup-python@v4
        with:
          python-version: 3.9
-          cache: 'pip'
      - name: Install dependencies
        if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
        run: |
            python -m pip install --upgrade pip
            pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
-            #         Install optional git dependencies
-    #                pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
-    #        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    #   Install optional git dependencies
+    #       pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
+    #       if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
      - name: Test with pytest
+        # if new tasks are added, run tests on them; pytest file=test_new_tasks
        if: steps.changed-tasks.outputs.tasks_any_modified == 'true'
-        run: python -m pytest tests/test_tasks.py -s -vv -n=auto --new_task
+        run: python -m pytest tests/extra/test_new_tasks.py -s -vv -n=auto
+        # if api is modified, run tests on it
      - name: Test more tasks with pytest
        env:
          API: true
        if: steps.changed-tasks.outputs.api_any_modified == 'true'
-        run: python -m pytest tests/test_api.py -s -vv -n=auto --new_task
+        run: python -m pytest tests/extra/test_new_tasks.py -s -vv -n=auto
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
+# just comment out unwanted steps to turn off the test.
 name: Unit Tests

 on:
  push:
-    branches:
-      - big-refactor
  pull_request:
-    branches:
-      - big-refactor
  workflow_dispatch:
-
+# Jobs run concurrently and steps run sequentially within a job.
+# jobs: linter and cpu_tests. Add more jobs/steps as required.
 jobs:
  linter:
    name: Linters
@@ -35,9 +32,10 @@ jobs:
        flake8 . --count --select=F,E9,E71,E72,E501,E112,E113,W6 --extend-ignore=F541 --show-source --statistics --exit-zero
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Lint with mypy
-      run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
-
+      # mypy turned off for now
+#    - name: Lint with mypy
+#      run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
+# Job 2
  testcpu:
    name: CPU Tests
    runs-on: ubuntu-latest

--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,15 @@ env
 data/
 lm_cache
 .idea
-
-*.egg-info/
+build
+dist
+*.egg-info
+venv
 .vscode/
+temp
+__pycache__
+.ipynb_checkpoints
+temp
+# IPython
+profile_default/
+ipython_config.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
-def pytest_addoption(parser):
-    parser.addoption(
-        "--new_task",
-        action="store_true",
-        help="new_tasks_found",
-    )
--- a/tests/extra/test_new_tasks.py
+++ b/tests/extra/test_new_tasks.py
+import pytest
+from itertools import islice
+import lm_eval.tasks as tasks
+from .utilities_testing import load_changed_files, parser
+from typing import List, ClassVar
+from lm_eval.api.task import ConfigurableTask
+import os
+
+
+# GitHub CI
+# If tasks folder has changed then we get the list of files from FILENAME
+# and parse the yaml files to get the task names.
+# Or if API has changed then we set the ENV variable API to True
+# and run some given extended tasks
+def new_tasks() -> List[str]:
+    FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
+    if os.path.exists(FILENAME):
+        return parser(load_changed_files(FILENAME))
+    elif os.getenv("API") is not None:
+        return ["arc_easy", "hellaswag", "piqa", "wikitext"]
+    # if both not true just do arc_easy
+    else:
+        return ["arc_easy"]
+
+
+@pytest.fixture(params=new_tasks())
+def task_class(request) -> ConfigurableTask:
+    task_name = request.param
+    if task_name is None:
+        task_name = "arc_easy"
+    x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name == task_name]
+    return x[0]
+
+
+@pytest.fixture(params=new_tasks())
+def limit(request) -> int:
+    # not used; just for consistency
+    return 100
+
+
+# Tests
+def test_download(task_class: ConfigurableTask):
+    task_class().download()
+    assert task_class().dataset is not None
+
+
+def test_has_training_docs(task_class: ConfigurableTask):
+    assert task_class().has_training_docs() in [True, False]
+
+
+def test_check_training_docs(task_class: ConfigurableTask):
+    task = task_class()
+    assert task.has_training_docs() if task._config["training_split"] else True
+
+
+def test_has_validation_docs(task_class):
+    assert task_class().has_training_docs() in [True, False]
+
+
+def test_check_validation_docs(task_class):
+    task = task_class()
+    assert (
+        task_class().has_training_docs() if task._config["validation_split"] else True
+    )
+
+
+def test_has_test_docs(task_class):
+    assert task_class().has_training_docs() in [True, False]
+
+
+def test_check_test_docs(task_class):
+    task = task_class()
+    assert task_class().has_training_docs() if task._config["test_split"] else True
+
+
+def test_should_decontaminate(task_class):
+    task_class = task_class()
+    assert task_class.should_decontaminate() in [True, False]
+    if task_class.should_decontaminate():
+        assert task_class._config["doc_to_decontamination_query"] is not None
+
+
+def test_doc_to_text(task_class, limit):
+    arr = (
+        list(islice(task_class().test_docs(), limit))
+        if limit
+        else list(task_class().test_docs())
+    )
+    _array = [task_class().doc_to_text(doc) for doc in arr]
+    # space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
+    assert all(
+        isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True) for x in _array
+    )
+
+
+def test_create_choices(task_class, limit):
+    arr = (
+        list(islice(task_class().test_docs(), limit))
+        if limit
+        else list(task_class().test_docs())
+    )
+    _array = [task_class().doc_to_choice(doc) for doc in arr]
+    # assert all(len(x) == 4 for x in _array)
+    assert all(isinstance(x, list) for x in _array)
+    assert all(isinstance(x[0], str) for x in _array)
+
+
+def test_doc_to_target(task_class, limit):
+    arr = (
+        list(islice(task_class().test_docs(), limit))
+        if limit
+        else list(task_class().test_target())
+    )
+    _array_target = [task_class().doc_to_target(doc) for doc in arr]
+    assert all(isinstance(label, int) for label in _array_target)
+    assert len(_array_target) == limit if limit else True
+    # _array_text = [task.doc_to_text(doc) for doc in arr]
+    # Not working
+    # assert all(tgt[0] == " " or txt[-1] == "\n" if  len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
+
+
+def test_build_all_requests(task_class, limit):
+    task_class().build_all_requests(rank=1, limit=limit, world_size=1)
+    assert task_class.instances is not None
+
+
+def test_construct_requests(task_class, limit):
+    arr = (
+        list(islice(task_class().test_docs(), limit))
+        if limit
+        else list(task_class().test_docs())
+    )
+    requests = [
+        task_class().construct_requests(doc, task_class().doc_to_text(doc))
+        for doc in arr
+    ]
+    assert all(isinstance(doc, list) for doc in requests)
+    assert len(requests) == limit if limit else True
--- a/tests/extra/test_utils.py
+++ b/tests/extra/test_utils.py
@@ -3,7 +3,7 @@ from typing import List
 from lm_eval.utils import load_yaml_config
 from pathlib import Path

-
+# This is the path where the output for the changed files for the tasks folder is stored
 FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"


@@ -12,6 +12,9 @@ def load_changed_files(file_path: str = FILE_PATH) -> List[str]:
        return [line.strip() for line in f.readlines()]


+# checks the txt file for list of changed files.
+# if file ends with .yaml then check yaml for task name
+# if file ends with .py then parse the folder for all yaml files
 def parser(full_path: List[str]) -> List[str]:
    _output = set()
    for x in full_path:

--- a/tests/test_tasks.py
+++ b/tests/test_tasks.py
-import pytest
 from itertools import islice
+import pytest
+from typing import List
 import lm_eval.tasks as tasks
-from tests.extra.test_utils import load_changed_files, parser
-from typing import List, ClassVar
-import os
+from lm_eval.api.task import ConfigurableTask


 @pytest.fixture()
-def any_new_tasks(request) -> bool:
-    return request.config.getoption("--new_task")
-
-
-# ["arc_easy] else get list of new tasks
-def new_tasks(any_new_tasks: bool) -> List[str]:
-    FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
-    if any_new_tasks and os.path.exists(FILENAME):
-        return [parser(load_changed_files(FILENAME))]
-    elif os.getenv("API") is not None:
-        return ["arc_easy", "hellaswag", "piqa", "wikitext"]
-    else:
-        return ["arc_easy"]
-
-
-@pytest.fixture(params=new_tasks(any_new_tasks))
-def task_class(request):
-    task_name = request.param
-    return [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name][0]
+def task_class(task_name: List[str] = None) -> ConfigurableTask:
+    if task_name is None:
+        task_name = ["arc_easy"]
+    x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name]
+    return x[0]


 @pytest.fixture()
@@ -36,16 +21,16 @@ def limit(any_new_tasks: bool) -> int:
 # Tests


-def test_download(task_class):
+def test_download(task_class: ConfigurableTask):
    task_class().download()
    assert task_class().dataset is not None


-def test_has_training_docs(task_class):
+def test_has_training_docs(task_class: ConfigurableTask):
    assert task_class().has_training_docs() in [True, False]


-def test_check_training_docs(task_class):
+def test_check_training_docs(task_class: ConfigurableTask):
    task = task_class()
    assert task.has_training_docs() if task._config["training_split"] else True