merged with conflict resolved

9f1cb1e7 · lintangsutawika · 8f859cd2 · 0375b792 · 9f1cb1e7 · 9f1cb1e7
Commit 9f1cb1e7 authored May 23, 2023 by lintangsutawika
5 changed files
--- a/lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+++ b/lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+group:
+  - super-glue-t5-prompt
+task: t5-prompt
+reference: "From Raffel et. al. 2019"
+dataset_path: super_glue
+dataset_name: wsc
+training_split: train
+validation_split: validation
+doc_to_text: !function "preprocess_wsc.doc_to_text"
+doc_to_target: "{% set answer_choices = ['False', 'True'] %}{{answer_choices[label]}}"
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
--- a/lm_eval/tasks/wikitext.py
+++ b/lm_eval/tasks/wikitext.py
@@ -10,8 +10,10 @@ NOTE: This `Task` is based on WikiText-2.
 Homepage: https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/
 """
 import re
-from lm_eval.api.task import PerplexityTask, register_task

+from lm_eval.api.task import PerplexityTask
+
+from lm_eval.api.register import register_task, register_group

 _CITATION = """
 @misc{merity2016pointer,
@@ -58,6 +60,7 @@ def wikitext_detokenizer(string):

    return string

+
 @register_task("wikitext")
 class WikiText(PerplexityTask):
    VERSION = "2.0"

--- a/lm_eval/utils.py
+++ b/lm_eval/utils.py
 import os
-import pathlib
 import re
-import collections
-import functools
-import inspect
 import sys
+import yaml
+import inspect
+import pathlib
+import functools
+import subprocess
+import collections
+import importlib.util
+
 from typing import List

 from omegaconf import OmegaConf
 from jinja2 import BaseLoader, Environment, StrictUndefined
 from itertools import islice

+
 class ExitCodeError(Exception):
    pass

@@ -146,7 +151,6 @@ class Reorderer:
        return res


-
 def make_table(result_dict):
    """Generate table of results."""
    from pytablewriter import MarkdownTableWriter, LatexTableWriter
@@ -240,6 +244,74 @@ def run_task_tests(task_list: List[str]):
        )


+def get_git_commit_hash():
+    """
+    Gets the git commit hash of your current repo (if it exists).
+    Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
+    """
+    try:
+        git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
+        git_hash = git_hash.decode()
+    except subprocess.CalledProcessError:
+        git_hash = None
+    return git_hash
+
+
+def import_function(loader, node):
+
+    function_name = loader.construct_scalar(node)
+    yaml_path = os.path.dirname(loader.name)
+
+    module_name, function_name = function_name.split(".")
+    module_path = os.path.join(yaml_path, "{}.py".format(module_name))
+
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+
+    function = getattr(module, function_name)
+    return function
+
+
+# Add the import_function constructor to the YAML loader
+yaml.add_constructor("!function", import_function)
+
+
+def load_yaml_config(yaml_path):
+    with open(yaml_path, "rb") as file:
+        yaml_config = yaml.full_load(file)
+        yaml_dir = os.path.dirname(yaml_path)
+
+        if "include" in yaml_config:
+            include_path = yaml_config["include"]
+            del yaml_config["include"]
+
+            if type(include_path) == str:
+                include_path = [include_path]
+
+            # Load from the last one first
+            include_path.reverse()
+            final_yaml_config = {}
+            for path in include_path:
+
+                # Assumes that path is a full path.
+                # If not found, assume the included yaml
+                # is in the same dir as the original yaml
+                if not os.path.isfile(path):
+                    path = os.path.join(yaml_dir, path)
+
+                try:
+                    included_yaml_config = load_yaml_config(path)
+                    final_yaml_config.update(included_yaml_config)
+                except Exception as ex:
+                    # If failed to load, ignore
+                    raise ex
+
+            final_yaml_config.update(yaml_config)
+            return final_yaml_config
+        return yaml_config
+
+
 env = Environment(loader=BaseLoader, undefined=StrictUndefined)


@@ -248,10 +320,10 @@ def apply_template(template, doc):
    return rtemplate.render(**doc)


-def create_iterator(raw_iterator, rank, world_size, limit = None):
-    """ 
-    Method for creating a (potentially) sliced and limited 
-    iterator from a raw document iterator. Used for splitting data 
+def create_iterator(raw_iterator, rank, world_size, limit=None):
+    """
+    Method for creating a (potentially) sliced and limited
+    iterator from a raw document iterator. Used for splitting data
    among ranks in multigpu setting or only pulling a sample of documents
    """
    return islice(raw_iterator, rank, limit, world_size)
--- a/main.py
+++ b/main.py
-import argparse
+import os
 import json
-import logging
 import fnmatch
-import yaml
+import argparse

-from lm_eval import tasks, evaluator
-# import lm_eval.api.task
-from lm_eval.api.task import ConfigurableTask, TASK_REGISTRY
+from lm_eval import evaluator, utils
+from lm_eval.tasks import ALL_TASKS
+from lm_eval.logger import eval_logger

-logging.getLogger("openai").setLevel(logging.WARNING)
+os.environ["TOKENIZERS_PARALLELISM"] = "false"

-ALL_TASKS = sorted(list(TASK_REGISTRY))

 class MultiChoice:
    def __init__(self, choices):
        self.choices = choices
-        print(f"{ALL_TASKS} is this")

    # Simple wildcard support (linux filename patterns)
    def __contains__(self, values):
        for value in values.split(","):
            if len(fnmatch.filter(self.choices, value)) == 0:
-                return False
+                eval_logger.warning("{} is not in task list.".format(value))
+                # eval_logger.info(f"{choices} is this")

        return True

@@ -46,7 +44,6 @@ def parse_args():
    parser.add_argument("--decontamination_ngrams_path", default=None)
    parser.add_argument("--description_dict_path", default=None)
    parser.add_argument("--check_integrity", action="store_true")
-
    return parser.parse_args()


@@ -64,30 +61,29 @@ def main():
    args = parse_args()

    if args.limit:
-        print(
-            "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
+        eval_logger.warning(
+            " --limit SHOULD ONLY BE USED FOR TESTING."
+            "REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
        )

-    if args.tasks is None:
-        if args.config:
-            task_names = []
-            for config_files in args.config.split(","):
-                with open(config_files, "r") as f:
-                    config = yaml.load(f, yaml.Loader)
-
-                if args.num_fewshot != 0:
-                    config["num_fewshot"] = args.num_fewshot
-
-                if args.batch_size != None:
-                    config["batch_size"] = args.batch_size
+    if args.tasks is not None:
+        if os.path.isdir(args.tasks):
+            import glob

+            task_names = []
+            yaml_path = os.path.join(args.tasks, "*.yaml")
+            for yaml_file in glob.glob(yaml_path):
+                config = utils.load_yaml_config(yaml_file)
                task_names.append(config)
        else:
-            task_names = ALL_TASKS
-    else:
-        task_names = pattern_match(args.tasks.split(","), ALL_TASKS)
-
-    print(f"Selected Tasks: {task_names}")
+            tasks_list = args.tasks.split(",")
+            task_names = pattern_match(tasks_list, ALL_TASKS)
+            for task in [task for task in tasks_list if task not in task_names]:
+                if os.path.isfile(task):
+                    config = utils.load_yaml_config(task)
+                    task_names.append(config)
+
+    eval_logger.info(f"Selected Tasks: {task_names}")

    results = evaluator.simple_evaluate(
        model=args.model,

--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,6 @@ setuptools.setup(
    extras_require={
        "dev": ["black", "flake8", "pre-commit", "pytest", "pytest-cov"],
        "multilingual": ["nagisa>=0.2.7", "jieba>=0.42.1"],
-        "sentencepiece": ["sentencepiece>=0.1.98", "protobuf>=4.22.1"]
+        "sentencepiece": ["sentencepiece>=0.1.98", "protobuf>=4.22.1"],
    },
 )