Merge branch 'big-refactor' into pyproject_toml_v2

0594fe2b · Ethan Smith · GitHub · 40c9f9cb · b65b9ca3 · 0594fe2b
Unverified Commit 0594fe2b authored Sep 13, 2023 by Ethan Smith Committed by GitHub Sep 13, 2023
10 changed files
--- a/lm_eval/tasks/wsc273/utils.py
+++ b/lm_eval/tasks/wsc273/utils.py
+upper_pronouns = [
+    "A",
+    "An",
+    "The",
+    "She",
+    "He",
+    "It",
+    "They",
+    "My",
+    "His",
+    "Her",
+    "Their",
+]
+
+
+def process_doc(dataset):
+    def process_fn(doc):
+        # The HF implementation of `wsc273` is not `partial evaluation` friendly.
+        doc["text"] = doc["text"].replace("  ", " ")
+        doc["options"][0] = __normalize_option(doc, doc["options"][0])
+        doc["options"][1] = __normalize_option(doc, doc["options"][1])
+        return doc
+
+    return dataset.map(process_fn)
+
+
+def __normalize_option(doc, option):
+    # Append `'s` to possessive determiner based options.
+    if doc["pronoun"].lower() in ["my", "his", "her", "our", "their"]:
+        option += "'s"
+    # Appropriately lowercase the pronoun in the option.
+    pronoun = option.split()[0]
+    start_of_sentence = doc["text"][doc["pronoun_loc"] - 2] == "."
+    if not start_of_sentence and pronoun in upper_pronouns:
+        return option.replace(pronoun, pronoun.lower())
+    return option
--- a/lm_eval/utils.py
+++ b/lm_eval/utils.py
@@ -10,7 +10,7 @@ import collections
 import importlib.util
 import fnmatch

-from typing import List, Literal, Union
+from typing import Iterator, List, Literal, Union

 import gc
 import torch
@@ -65,7 +65,7 @@ def join_iters(iters):
        yield from iter


-def chunks(iter, n=0, fn=None):
+def chunks(iter, n: int = 0, fn=None):
    arr = []
    for i, x in enumerate(iter):
        arr.append(x)
@@ -87,11 +87,11 @@ def group(arr, fn):


 class MultiChoice:
-    def __init__(self, choices):
+    def __init__(self, choices) -> None:
        self.choices = choices

    # Simple wildcard support (linux filename patterns)
-    def __contains__(self, values):
+    def __contains__(self, values) -> bool:
        for value in values.split(","):
            if len(fnmatch.filter(self.choices, value)) == 0:
                eval_logger.info(f"Available tasks to choose:")
@@ -100,7 +100,7 @@ class MultiChoice:
                raise ValueError("'{}' is not in task list".format(value))
        return True

-    def __iter__(self):
+    def __iter__(self) -> Iterator:
        for choice in self.choices:
            yield choice

@@ -108,7 +108,6 @@ class MultiChoice:
 # Returns a list containing all values of the source_list that
 # match at least one of the patterns
 def pattern_match(patterns, source_list):
-
    if type(patterns) == str:
        patterns = [patterns]

@@ -177,7 +176,7 @@ def make_disjoint_window(pair):


 class Reorderer:
-    def __init__(self, arr, fn):
+    def __init__(self, arr, fn) -> None:
        self.size = len(arr)
        arr = list(enumerate(arr))
        arr = group(arr, lambda x: fn(x[1]))
@@ -212,7 +211,7 @@ class Grouper:
    objects in `arr` satisfying `key == fn(ob)`.
    """

-    def __init__(self, arr, fn):
+    def __init__(self, arr, fn) -> None:
        # self.orig_arr = arr
        self.size = len(arr)
        arr = list(enumerate(arr))
@@ -263,7 +262,7 @@ class Grouper:
        return res


-def make_table(result_dict, column="results"):
+def make_table(result_dict, column: str = "results"):
    """Generate table of results."""
    from pytablewriter import MarkdownTableWriter, LatexTableWriter

@@ -393,7 +392,6 @@ def get_git_commit_hash():


 def import_function(loader, node):
-
    function_name = loader.construct_scalar(node)
    yaml_path = os.path.dirname(loader.name)

@@ -428,7 +426,6 @@ def load_yaml_config(yaml_path):
            include_path.reverse()
            final_yaml_config = {}
            for path in include_path:
-
                # Assumes that path is a full path.
                # If not found, assume the included yaml
                # is in the same dir as the original yaml
@@ -447,7 +444,7 @@ def load_yaml_config(yaml_path):
        return yaml_config


-def regex_replace(string, pattern, repl, count=0):
+def regex_replace(string, pattern, repl, count: int = 0):
    """Implements the `re.sub` function as a custom Jinja filter."""
    return re.sub(pattern, repl, string, count=count)

@@ -521,7 +518,7 @@ def pad_and_concat(
    return torch.cat(tensors, dim=0)


-def clear_torch_cache():
+def clear_torch_cache() -> None:
    gc.collect()
    torch.cuda.empty_cache()

@@ -546,7 +543,7 @@ class MultiTokenEOSCriteria(transformers.StoppingCriteria):
        tokenizer: transformers.PreTrainedTokenizer,
        initial_decoder_input_length: int,
        batch_size: int,
-    ):
+    ) -> None:
        self.initial_decoder_input_length = initial_decoder_input_length
        self.done_tracker = [False] * batch_size
        self.sequence = sequence

--- a/main.py
+++ b/main.py
@@ -9,23 +9,26 @@ from pathlib import Path

 from lm_eval import evaluator, utils
 from lm_eval.api.registry import ALL_TASKS
-from lm_eval.logger import eval_logger
+from lm_eval.logger import eval_logger, SPACING
 from lm_eval.tasks import include_task_folder
+from lm_eval.benchmarks import include_benchmarks

 os.environ["TOKENIZERS_PARALLELISM"] = "false"


-def parse_args():
-    parser = argparse.ArgumentParser()
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("--model", required=True, help="Name of model e.g. `hf`")
+    parser.add_argument(
+        "--tasks",
+        default=None,
+        help="Available Tasks:\n - {}".format("\n - ".join(sorted(ALL_TASKS))),
+    )
    parser.add_argument(
        "--model_args",
        default="",
        help="String arguments for model, e.g. `pretrained=EleutherAI/pythia-160m,dtype=float32`",
    )
-    parser.add_argument(
-        "--tasks", default=None  # , choices=utils.MultiChoice(sorted(ALL_TASKS))
-    )
    parser.add_argument(
        "--num_fewshot",
        type=int,
@@ -98,7 +101,7 @@ def parse_args():
    return parser.parse_args()


-def main():
+def main() -> None:
    args = parse_args()

    if args.limit:
@@ -125,10 +128,21 @@ def main():
        else:
            tasks_list = args.tasks.split(",")
            task_names = utils.pattern_match(tasks_list, ALL_TASKS)
+            task_missing = []
            for task in [task for task in tasks_list if task not in task_names]:
                if os.path.isfile(task):
                    config = utils.load_yaml_config(task)
                    task_names.append(config)
+                else:
+                    task_missing.append(task)
+
+        if task_missing != []:
+            missing = ", ".join(task_missing)
+            eval_logger.error(
+                f"Tasks were not found: {missing}\n"
+                f"{SPACING}Try `lm-eval -h` for list of available tasks",
+            )
+            raise ValueError(f"Tasks {missing} were not found.")

    if args.output_path:
        path = Path(args.output_path)

--- a/mypy.ini
+++ b/mypy.ini
+[mypy]
+python_version = 3.9
+show_traceback = True
+check_untyped_defs = True
+no_implicit_reexport = True
+warn_unreachable = True
+warn_unused_configs = True
+warn_unused_ignores = True
+warn_redundant_casts = True
+
+# We ignore errors everywhere to gradually add type annotations
+
+[mypy-lm_eval.*]
+ignore_errors = True
+
+[mypy-lm_eval.api.*]
+ignore_errors = True
+
+[mypy-lm_eval.prompts.*]
+ignore_errors = True
+
+[mypy-lm_eval.models.*]
+ignore_errors = True
+
+[mypy-scripts.*]
+ignore_errors = True
+
+[mypy-main]
+ignore_errors = True
--- a/tests/extra/__init__.py
+++ b/tests/extra/__init__.py
--- a/tests/extra/test_new_tasks.py
+++ b/tests/extra/test_new_tasks.py
-import pytest
-from itertools import islice
-import lm_eval.tasks as tasks
-from .utilities_testing import load_changed_files, parser
-from typing import List
-from lm_eval.api.task import ConfigurableTask
-import os
-
-
-# GitHub CI
-def new_tasks() -> List[str]:
-    FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
-    if os.path.exists(FILENAME):
-        # If tasks folder has changed then we get the list of files from FILENAME
-        # and parse the yaml files to get the task names.
-        return parser(load_changed_files(FILENAME))
-    elif os.getenv("API") is not None:
-        # Or if API has changed then we set the ENV variable API to True
-        # and run  given tasks.
-        return ["arc_easy", "hellaswag", "piqa", "wikitext"]
-    # if both not true just do arc_easy
-    else:
-        return ["arc_easy"]
-
-
-def get_task_class() -> List[ConfigurableTask]:
-    task_name = new_tasks()
-    x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name]
-    return x
-
-
-@pytest.fixture()
-def limit() -> int:
-    return 10
-
-
-# Tests
-@pytest.mark.parametrize("task_class", get_task_class())
-class TestNewTasks:
-    def test_download(self, task_class: ConfigurableTask):
-        task_class().download()
-        assert task_class().dataset is not None
-
-    def test_has_training_docs(self, task_class: ConfigurableTask):
-        assert task_class().has_training_docs() in [True, False]
-
-    def test_check_training_docs(self, task_class: ConfigurableTask):
-        task = task_class()
-        if task.has_training_docs():
-            assert task._config["training_split"] is not None
-
-    def test_has_validation_docs(self, task_class):
-        assert task_class().has_validation_docs() in [True, False]
-
-    def test_check_validation_docs(self, task_class):
-        task = task_class()
-        if task.has_validation_docs():
-            assert task._config["validation_split"] is not None
-
-    def test_has_test_docs(self, task_class):
-        assert task_class().has_test_docs() in [True, False]
-
-    def test_check_test_docs(self, task_class):
-        task = task_class()
-        if task.has_test_docs():
-            assert task._config["test_split"] is not None
-
-    def test_should_decontaminate(self, task_class):
-        task = task_class()
-        assert task.should_decontaminate() in [True, False]
-        if task.should_decontaminate():
-            assert task._config["doc_to_decontamination_query"] is not None
-
-    def test_doc_to_text(self, task_class, limit):
-        task = task_class()
-        arr = (
-            list(islice(task.test_docs(), limit))
-            if task.has_test_docs()
-            else list(islice(task.validation_docs(), limit))
-        )
-        _array = [task.doc_to_text(doc) for doc in arr]
-        # space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
-        assert all(
-            isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True)
-            for x in _array
-        )
-
-    def test_create_choices(self, task_class, limit):
-        task = task_class()
-        arr = (
-            list(islice(task.test_docs(), limit))
-            if task.has_test_docs()
-            else list(islice(task.validation_docs(), limit))
-        )
-        if "multiple_choice" in task._config.output_type:
-            _array = [task.doc_to_choice(doc) for doc in arr]
-            # assert all(len(x) == 4 for x in _array)
-            assert all(isinstance(x, list) for x in _array)
-            assert all(isinstance(x[0], str) for x in _array)
-
-    def test_doc_to_target(self, task_class, limit):
-        task = task_class()
-        arr = (
-            list(islice(task.test_docs(), limit))
-            if task.has_test_docs()
-            else list(islice(task.validation_docs(), limit))
-        )
-        _array_target = [task.doc_to_target(doc) for doc in arr]
-        if task._config.output_type == "multiple_choice":
-            assert all(isinstance(label, int) for label in _array_target)
-        # _array_text = [task.doc_to_text(doc) for doc in arr]
-        # Not working
-        # assert all(tgt[0] == " " or txt[-1] == "\n" if  len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
-
-    def test_build_all_requests(self, task_class, limit):
-        task_class().build_all_requests(rank=1, limit=limit, world_size=1)
-        assert task_class.instances is not None
-
-    # ToDO: Add proper testing
-    def test_construct_requests(self, task_class, limit):
-        task = task_class()
-        arr = (
-            list(islice(task.test_docs(), limit))
-            if task.has_test_docs()
-            else list(islice(task.validation_docs(), limit))
-        )
-        requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
-        # assert all(isinstance(doc, list) for doc in requests)
-        assert len(requests) == limit if limit else True
--- a/tests/extra/test_utils.py
+++ b/tests/extra/test_utils.py
-import json
-from typing import List
-from lm_eval.utils import load_yaml_config
-from pathlib import Path
-
-
-FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
-
-
-def load_changed_files(file_path: str = FILE_PATH) -> List[str]:
-    with open(file_path, "r") as f:
-        return [l for line in f.readlines() for l in line.strip().split(" ")]
-
-
-def parser(full_path: List[str]) -> List[str]:
-    _output = set()
-    for x in full_path:
-        if x.endswith(".yaml"):
-            _output.add(load_yaml_config(x)["task"])
-        elif x.endswith(".py"):
-            path = [str(x) for x in (list(Path(x).parent.glob("*.yaml")))]
-            _output |= {load_yaml_config(x)["task"] for x in path}
-    return list(_output)
--- a/tests/models/test_huggingface.py
+++ b/tests/models/test_huggingface.py
 from __future__ import annotations
 import pytest
+from pathlib import Path
 import numpy as np
 from lm_eval.models.huggingface import HFLM
 from lm_eval.api.instance import Instance
 import lm_eval.tasks as tasks
+import sys
+import torch


 class Test_HFLM:
-
+    torch.use_deterministic_algorithms(True)
+    version_minor = sys.version_info.minor
    multiple_choice_task = tasks.TASK_REGISTRY.get("arc_easy")()  # type: ignore
    multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
    MULTIPLE_CH: list[Instance] = multiple_choice_task.instances
@@ -90,8 +94,15 @@ class Test_HFLM:
    def test_logliklihood(self) -> None:
        res = self.LM.loglikelihood(self.MULTIPLE_CH)
        _RES, _res = self.MULTIPLE_CH_RES, [r[0] for r in res]
-        # change atol in case of consistent failure
-        assert np.allclose(_res, _RES, atol=1e-4)
+        # log samples to CI
+        dir_path = Path("test_logs")
+        dir_path.mkdir(parents=True, exist_ok=True)
+
+        file_path = dir_path / f"outputs_log_{self.version_minor}.txt"
+        file_path = file_path.resolve()
+        with open(file_path, "w") as f:
+            f.write("\n".join(str(x) for x in _res))
+        assert np.allclose(_res, _RES, atol=1e-2)
        # check indices for Multiple Choice
        argmax_RES, argmax_res = np.argmax(
            np.array(_RES).reshape(-1, 4), axis=1

--- a/tests/test_tasks.py
+++ b/tests/test_tasks.py
 from itertools import islice
 import pytest
-from typing import List
+from .utils import new_tasks
 import lm_eval.tasks as tasks
 from lm_eval.api.task import ConfigurableTask

-# Using fixtures to get the task class and limit
-@pytest.fixture()
-def task_class() -> ConfigurableTask:
-    task_name = ["arc_easy"]
-    x = [cls for name, cls in tasks.TASK_REGISTRY.items() if name in task_name]
-    return x[0]
+
+# Default Task
+TASKS = ["arc_easy"]
+
+
+def task_class():
+    global TASKS
+    # CI: new_tasks checks if any modifications have been made
+    task_classes = new_tasks()
+    # Check if task_classes is empty
+    if task_classes:
+        return [tasks.TASK_REGISTRY.get(x)() for x in task_classes]
+    else:
+        return [tasks.TASK_REGISTRY.get(x)() for x in TASKS]


 @pytest.fixture()
@@ -18,109 +26,96 @@ def limit() -> int:


 # Tests
-
-
-def test_download(task_class: ConfigurableTask):
-    task_class().download()
-    assert task_class().dataset is not None
-
-
-def test_has_training_docs(task_class: ConfigurableTask):
-    assert task_class().has_training_docs() in [True, False]
-
-
-def test_check_training_docs(task_class: ConfigurableTask):
-    task = task_class()
-    if task.has_training_docs():
-        assert task._config["training_split"] is not None
-
-
-def test_has_validation_docs(task_class):
-    assert task_class().has_validation_docs() in [True, False]
-
-
-def test_check_validation_docs(task_class):
-    task = task_class()
-    if task.has_validation_docs():
-        assert task._config["validation_split"] is not None
-
-
-def test_has_test_docs(task_class):
-    assert task_class().has_test_docs() in [True, False]
-
-
-def test_check_test_docs(task_class):
-    task = task_class()
-    if task.has_test_docs():
-        assert task._config["test_split"] is not None
-
-
-def test_should_decontaminate(task_class):
-    task = task_class()
-    assert task.should_decontaminate() in [True, False]
-    if task.should_decontaminate():
-        assert task._config["doc_to_decontamination_query"] is not None
-
-
-def test_doc_to_text(task_class, limit):
-    task = task_class()
-    arr = (
-        list(islice(task.test_docs(), limit))
-        if task.has_test_docs()
-        else list(islice(task.validation_docs(), limit))
-    )
-    _array = [task.doc_to_text(doc) for doc in arr]
-    # space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
-    assert all(
-        isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True) for x in _array
-    )
-
-
-def test_create_choices(task_class, limit):
-    task = task_class()
-    arr = (
-        list(islice(task.test_docs(), limit))
-        if task.has_test_docs()
-        else list(islice(task.validation_docs(), limit))
-    )
-    if "multiple_choice" in task._config.output_type:
-        _array = [task.doc_to_choice(doc) for doc in arr]
-        # assert all(len(x) == 4 for x in _array)
-        assert all(isinstance(x, list) for x in _array)
-        assert all(isinstance(x[0], str) for x in _array)
-
-
-def test_doc_to_target(task_class, limit):
-    task = task_class()
-    arr = (
-        list(islice(task.test_docs(), limit))
-        if task.has_test_docs()
-        else list(islice(task.validation_docs(), limit))
-    )
-    _array_target = [task.doc_to_target(doc) for doc in arr]
-    if task._config.output_type == "multiple_choice":
-        assert all(isinstance(label, int) for label in _array_target)
-    # _array_text = [task.doc_to_text(doc) for doc in arr]
-    # Not working
-    # assert all(tgt[0] == " " or txt[-1] == "\n" if  len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
-
-
-def test_build_all_requests(task_class, limit):
-    task_class().build_all_requests(rank=1, limit=limit, world_size=1)
-    assert task_class.instances is not None
-
-
-# ToDO: Add proper testing
-def test_construct_requests(task_class, limit):
-    task = task_class()
-    arr = (
-        list(islice(task.test_docs(), limit))
-        if task.has_test_docs()
-        else list(islice(task.validation_docs(), limit))
-    )
-    requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
-    # assert all(isinstance(doc, list) for doc in requests)
-    assert len(requests) == limit if limit else True
+@pytest.mark.parametrize("task_class", task_class())
+class TestNewTasks:
+    def test_download(self, task_class: ConfigurableTask):
+        task_class.download()
+        assert task_class.dataset is not None
+
+    def test_has_training_docs(self, task_class: ConfigurableTask):
+        assert task_class.has_training_docs() in [True, False]
+
+    def test_check_training_docs(self, task_class: ConfigurableTask):
+        if task_class.has_training_docs():
+            assert task_class._config["training_split"] is not None
+
+    def test_has_validation_docs(self, task_class):
+        assert task_class.has_validation_docs() in [True, False]
+
+    def test_check_validation_docs(self, task_class):
+        if task_class.has_validation_docs():
+            assert task_class._config["validation_split"] is not None
+
+    def test_has_test_docs(self, task_class):
+        assert task_class.has_test_docs() in [True, False]
+
+    def test_check_test_docs(self, task_class):
+        task = task_class
+        if task.has_test_docs():
+            assert task._config["test_split"] is not None
+
+    def test_should_decontaminate(self, task_class):
+        task = task_class
+        assert task.should_decontaminate() in [True, False]
+        if task.should_decontaminate():
+            assert task._config["doc_to_decontamination_query"] is not None
+
+    def test_doc_to_text(self, task_class, limit):
+        task = task_class
+        arr = (
+            list(islice(task.test_docs(), limit))
+            if task.has_test_docs()
+            else list(islice(task.validation_docs(), limit))
+        )
+        _array = [task.doc_to_text(doc) for doc in arr]
+        # space convention; allow txt to have length 0 for perplexity-like tasks since the model tacks an <|endoftext|> on
+        assert all(
+            isinstance(x, str) and (x[-1] != " " if len(x) != 0 else True)
+            for x in _array
+        )
+
+    def test_create_choices(self, task_class, limit):
+        task = task_class
+        arr = (
+            list(islice(task.test_docs(), limit))
+            if task.has_test_docs()
+            else list(islice(task.validation_docs(), limit))
+        )
+        if "multiple_choice" in task._config.output_type:
+            _array = [task.doc_to_choice(doc) for doc in arr]
+            # assert all(len(x) == 4 for x in _array)
+            assert all(isinstance(x, list) for x in _array)
+            assert all(isinstance(x[0], str) for x in _array)
+
+    def test_doc_to_target(self, task_class, limit):
+        task = task_class
+        arr = (
+            list(islice(task.test_docs(), limit))
+            if task.has_test_docs()
+            else list(islice(task.validation_docs(), limit))
+        )
+        _array_target = [task.doc_to_target(doc) for doc in arr]
+        if task._config.output_type == "multiple_choice":
+            assert all(isinstance(label, int) for label in _array_target)
+        # _array_text = [task.doc_to_text(doc) for doc in arr]
+        # Not working
+        # assert all(tgt[0] == " " or txt[-1] == "\n" if  len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
+
+    def test_build_all_requests(self, task_class, limit):
+        task_class.build_all_requests(rank=1, limit=limit, world_size=1)
+        assert task_class.instances is not None
+
+    # ToDO: Add proper testing
+    def test_construct_requests(self, task_class, limit):
+        task = task_class
+        arr = (
+            list(islice(task.test_docs(), limit))
+            if task.has_test_docs()
+            else list(islice(task.validation_docs(), limit))
+        )
+        requests = [task.construct_requests(doc, task.doc_to_text(doc)) for doc in arr]
+        # assert all(isinstance(doc, list) for doc in requests)
+        assert len(requests) == limit if limit else True


 # def test_create_choices(task_class):

--- a/tests/extra/utilities_testing.py
+++ b/tests/extra/utilities_testing.py
-import json
 from typing import List
 from lm_eval.utils import load_yaml_config
 from pathlib import Path
-import sys
+from typing import Union
+import os

+
+# {{{CI}}}
 # This is the path where the output for the changed files for the tasks folder is stored
 # FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"

-
 # reads a text file and returns a list of words
 # used to read the output of the changed txt from tj-actions/changed-files
 def load_changed_files(file_path: str) -> List[str]:
    with open(file_path, "r") as f:
        content = f.read()
        words_list = [x for x in content.split()]
-        sys.stdout.write(f"list of files: {words_list}")
    return words_list


@@ -30,3 +30,18 @@ def parser(full_path: List[str]) -> List[str]:
            path = [str(x) for x in (list(Path(x).parent.glob("*.yaml")))]
            _output |= {load_yaml_config(x)["task"] for x in path}
    return list(_output)
+
+
+def new_tasks() -> Union[list[str], None]:
+    FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
+    if os.path.exists(FILENAME):
+        # If tasks folder has changed then we get the list of files from FILENAME
+        # and parse the yaml files to get the task names.
+        return parser(load_changed_files(FILENAME))
+    elif os.getenv("API") is not None:
+        # Or if API has changed then we set the ENV variable API to True
+        # and run  given tasks.
+        return ["arc_easy", "hellaswag", "piqa", "wikitext"]
+    # if both not true just do arc_easy
+    else:
+        return