udpate with merge

51f27158 · lintangsutawika · 924c9790 · f5408b6b · 51f27158 · 51f27158
Commit 51f27158 authored Feb 01, 2024 by lintangsutawika
20 changed files
--- a/lm_eval/models/optimum_lm.py
+++ b/lm_eval/models/optimum_lm.py
+from importlib.util import find_spec
+from pathlib import Path
+
+from lm_eval.api.registry import register_model
+from lm_eval.models.huggingface import HFLM
+
+
+@register_model("openvino")
+class OptimumLM(HFLM):
+    """
+    Optimum Intel provides a simple interface to optimize Transformer models and convert them to \
+    OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on \
+    Intel® architectures using OpenVINO™ runtime.
+    """
+
+    def __init__(
+        self,
+        device="cpu",
+        **kwargs,
+    ) -> None:
+        if "backend" in kwargs:
+            # optimum currently only supports causal models
+            assert (
+                kwargs["backend"] == "causal"
+            ), "Currently, only OVModelForCausalLM is supported."
+
+        self.openvino_device = device
+
+        super().__init__(
+            device=self.openvino_device,
+            backend=kwargs.get("backend", "causal"),
+            **kwargs,
+        )
+
+    def _create_model(
+        self,
+        pretrained: str,
+        revision="main",
+        dtype="auto",
+        trust_remote_code=False,
+        **kwargs,
+    ) -> None:
+        if not find_spec("optimum"):
+            raise Exception(
+                "package `optimum` is not installed. Please install it via `pip install optimum[openvino]`"
+            )
+        else:
+            from optimum.intel.openvino import OVModelForCausalLM
+
+        model_kwargs = kwargs if kwargs else {}
+        model_file = Path(pretrained) / "openvino_model.xml"
+        if model_file.exists():
+            export = False
+        else:
+            export = True
+        kwargs["ov_config"] = {
+            "PERFORMANCE_HINT": "LATENCY",
+            "NUM_STREAMS": "1",
+            "CACHE_DIR": "",
+        }
+
+        self._model = OVModelForCausalLM.from_pretrained(
+            pretrained,
+            revision=revision,
+            trust_remote_code=trust_remote_code,
+            export=export,
+            device=self.openvino_device.upper(),
+            **model_kwargs,
+        )
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
@@ -170,18 +170,12 @@ class VLLM(LM):
        stop: Optional[List[str]] = None,
        **kwargs,
    ):
-        if "do_sample" in kwargs.keys():
-            kwargs.pop("do_sample")
        if generate:
-            # hf defaults
-            kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
-            kwargs["spaces_between_special_tokens"] = kwargs.get(
-                "spaces_between_special_tokens", False
-            )
+            kwargs = self.modify_gen_kwargs(kwargs)
            sampling_params = SamplingParams(max_tokens=max_tokens, stop=stop, **kwargs)
        else:
            sampling_params = SamplingParams(
-                temperature=0, prompt_logprobs=2, max_tokens=1
+                temperature=0, prompt_logprobs=1, max_tokens=1
            )
        if self.data_parallel_size > 1:
            requests = [list(x) for x in divide(requests, self.data_parallel_size)]
@@ -438,3 +432,16 @@ class VLLM(LM):
                    break

        return continuation_logprobs, is_greedy
+
+    @staticmethod
+    def modify_gen_kwargs(kwargs: dict) -> dict:
+        # sampling_params
+        do_sample = kwargs.pop("do_sample", None)
+        if do_sample is False or "temperature" not in kwargs:
+            kwargs["temperature"] = 0.0
+        # hf defaults
+        kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
+        kwargs["spaces_between_special_tokens"] = kwargs.get(
+            "spaces_between_special_tokens", False
+        )
+        return kwargs
--- a/lm_eval/prompts/__init__.py
+++ b/lm_eval/prompts/__init__.py
@@ -117,7 +117,7 @@ class PromptString:

        # TODO need a way to process doc_to_choice
        if "doc_to_choice" in self.prompt_string:
-            raise "Not yet implemented to accept doc_to_choice"
+            raise Exception("Not yet implemented to accept doc_to_choice")

        text_string = utils.apply_template(doc_to_text, doc)
        target_string = utils.apply_template(doc_to_target, doc)

--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -43,7 +43,7 @@ def register_configurable_task(config: Dict[str, str]) -> int:
    if "group" in config:
        if config["group"] == config["task"]:
            raise ValueError("task and group name cannot be the same")
-        elif type(config["group"]) == str:
+        elif isinstance(config["group"], str):
            group_name = [config["group"]]
        else:
            group_name = config["group"]
@@ -57,8 +57,8 @@ def register_configurable_task(config: Dict[str, str]) -> int:
 def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -> int:
    group = config["group"]
    all_task_list = config["task"]
-    config_list = [task for task in all_task_list if type(task) != str]
-    task_list = [task for task in all_task_list if type(task) == str]
+    config_list = [task for task in all_task_list if not isinstance(task, str)]
+    task_list = [task for task in all_task_list if isinstance(task, str)]

    for task_config in config_list:

@@ -67,12 +67,12 @@ def register_configurable_group(config: Dict[str, str], yaml_path: str = None) -
        if "task" in task_config:
            task_name = task_config["task"]
            if task_name in ALL_TASKS:
-                task_obj = get_task_dict(task_name)[task_name]
-                if type(task_obj) == tuple:
+                task_obj = TASK_REGISTRY[task_name]
+                if isinstance(task_obj, tuple):
                    _, task_obj = task_obj

                if task_obj is not None:
-                    base_config = task_obj._config.to_dict(keep_callable=True)
+                    base_config = task_obj.CONFIG.to_dict(keep_callable=True)
                    task_name_config["task"] = f"{group}_{task_name}"

        task_config = utils.load_yaml_config(yaml_path, task_config)
@@ -166,10 +166,10 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None:
                    )
                    for config in all_configs:
                        if register_task:
-                            if type(config["task"]) == str:
+                            if isinstance(config["task"], str):
                                register_configurable_task(config)
                        else:
-                            if type(config["task"]) == list:
+                            if isinstance(config["task"], list):
                                register_configurable_group(config, yaml_path)

                # Log this silently and show it only when
@@ -243,7 +243,7 @@ def get_task_dict(task_name_list: List[Union[str, Dict, Task]], **kwargs):
    task_name_from_config_dict = {}
    task_name_from_object_dict = {}

-    if type(task_name_list) != list:
+    if not isinstance(task_name_list, list):
        task_name_list = [task_name_list]

    for task_element in task_name_list:

--- a/lm_eval/tasks/bbh/_generate_configs.py
+++ b/lm_eval/tasks/bbh/_generate_configs.py
@@ -28,7 +28,7 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    base_doc_to_text = "Q: {{input}}\nA:"
@@ -70,7 +70,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"/{task}.yaml"
        utils.eval_logger.info(f"Saving yaml for subset {task} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/belebele/_generate_configs.py
+++ b/lm_eval/tasks/belebele/_generate_configs.py
@@ -27,13 +27,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    def query():
@@ -54,7 +54,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{lang}.yaml"
        logging.info(f"Saving yaml for subset {lang} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/bigbench/generate_tasks.py
+++ b/lm_eval/tasks/bigbench/generate_tasks.py
@@ -181,7 +181,7 @@ def main() -> None:
        for task in all_subtasks:
            file_name = f"{task}.yaml"
            try:
-                with open(f"{path}/{file_name}", "w") as f:
+                with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
                    f.write("# Generated by utils.py\n")
                    yaml.dump(
                        {

--- a/lm_eval/tasks/blimp/generate_configs.py
+++ b/lm_eval/tasks/blimp/generate_configs.py
@@ -75,7 +75,7 @@ def main() -> None:
    for task in all_subtasks:
        file_name = f"{task}.yaml"
        try:
-            with open(f"{file_name}", "w") as f:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
                f.write("# Generated by utils.py\n")
                yaml.dump(
                    {

--- a/lm_eval/tasks/ceval/_generate_configs.py
+++ b/lm_eval/tasks/ceval/_generate_configs.py
@@ -79,13 +79,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    for subject_eng, subject_zh in tqdm(SUBJECTS.items()):
@@ -107,7 +107,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml"
        eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/cmmlu/_generate_configs.py
+++ b/lm_eval/tasks/cmmlu/_generate_configs.py
@@ -94,13 +94,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    for subject_eng, subject_zh in tqdm(SUBJECTS.items()):
@@ -122,7 +122,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml"
        eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/code_x_glue/code-text/bleu.py
+++ b/lm_eval/tasks/code_x_glue/code-text/bleu.py
@@ -184,7 +184,7 @@ def splitPuncts(line):
 def computeMaps(predictions, goldfile):
    predictionMap: Dict[str, list] = {}
    goldMap: Dict[str, list] = {}
-    gf = open(goldfile, "r")
+    gf = open(goldfile, "r", encoding="utf-8")

    for row in predictions:
        cols = row.strip().split("\t")

--- a/lm_eval/tasks/csatqa/_generate_configs.py
+++ b/lm_eval/tasks/csatqa/_generate_configs.py
@@ -25,7 +25,7 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    for name in tqdm(SUBSETS):
@@ -39,7 +39,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{name.lower()}.yaml"
        eval_logger.info(f"Saving yaml for subset {name} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/gsm8k/gsm8k.yaml
+++ b/lm_eval/tasks/gsm8k/gsm8k.yaml
@@ -24,6 +24,7 @@ generation_kwargs:
    - "\n\n"
    - "Question:"
  do_sample: false
+  temperature: 0.0
 repeats: 1
 num_fewshot: 5
 filter_list:

--- a/lm_eval/tasks/mmlu/_generate_configs.py
+++ b/lm_eval/tasks/mmlu/_generate_configs.py
@@ -85,13 +85,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    ALL_CATEGORIES = []
@@ -120,7 +120,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{subject}.yaml"
        eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,
@@ -142,7 +142,7 @@ if __name__ == "__main__":
        file_save_path = args.save_prefix_path + ".yaml"

    eval_logger.info(f"Saving benchmark config to {file_save_path}")
-    with open(file_save_path, "w") as yaml_file:
+    with open(file_save_path, "w", encoding="utf-8") as yaml_file:
        yaml.dump(
            {
                "group": f"mmlu_{args.task_prefix}"

--- a/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
+++ b/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
@@ -9,7 +9,7 @@ def main() -> None:
    for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
        file_name = f"{task}.yaml"
        try:
-            with open(f"{file_name}", "w") as f:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
                f.write("# Generated by _generate_configs.py\n")
                yaml.dump(
                    {

--- a/lm_eval/tasks/model_written_evals/persona/_generate_configs.py
+++ b/lm_eval/tasks/model_written_evals/persona/_generate_configs.py
@@ -9,7 +9,7 @@ def main() -> None:
    for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
        file_name = f"{task}.yaml"
        try:
-            with open(f"{file_name}", "w") as f:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
                f.write("# Generated by _generate_configs.py\n")
                yaml.dump(
                    {

--- a/lm_eval/tasks/qasper/utils.py
+++ b/lm_eval/tasks/qasper/utils.py
@@ -50,7 +50,7 @@ def process_docs(dataset, set_answer_type="bool"):
                    obs_list["abstract"].append(abstract)
                    obs_list["question"].append(question)
                    obs_list["answer_type"].append(answer_type)
-                    if type(answer) == list:
+                    if isinstance(answer, list):
                        answer = ", ".join(answer)
                    obs_list["answer"].append(answer)


--- a/lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+++ b/lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
@@ -7,6 +7,7 @@ training_split: train
 validation_split: validation
 output_type: generate_until
 doc_to_text: !function "t5_utils.doc_to_text"
+process_results: !function "t5_utils.process_results"
 doc_to_target: label
 generation_kwargs:
  until:
@@ -15,9 +16,5 @@ metric_list:
  - metric: accuracy
    aggregation: mean
    higher_is_better: true
-filter_list:
-  - name: "wsc_postprocessor"
-    filter:
-      - function: !function t5_utils.WSCPostprocess
 metadata:
-  version: 0.0
+  version: 1.0
--- a/lm_eval/tasks/super_glue/wsc/t5_utils.py
+++ b/lm_eval/tasks/super_glue/wsc/t5_utils.py
 import re
-from lm_eval.api.filter import Filter
-
+from typing import List

 def doc_to_text(x):
    text = re.sub(r" X ", " *" + x["span2_text"] + "* ", _wsc_inputs(x))
@@ -24,14 +23,14 @@ def _wsc_inputs(x):
            [
                " ".join(words[:pronoun_index]),
                "X",
-                " ".join(words[pronoun_index + 1 :]),
+                " ".join(words[pronoun_index + 1:]),
            ]
        )

    # Handle some special cases.
    if (
-        x["text"]
-        == 'The boy continued to whip the pony , and eventually the pony threw him over. John laughed out quite loud. "Good for him," he said. '
+            x["text"]
+            == 'The boy continued to whip the pony , and eventually the pony threw him over. John laughed out quite loud. "Good for him," he said. '
    ):
        return (
            "The boy continued to whip the pony , and eventually the pony threw "
@@ -40,8 +39,8 @@ def _wsc_inputs(x):

    # Using the span2_index, we get 'use' instead of 'it'.
    if (
-        x["text"]
-        == "When they had eventually calmed down a bit , and had gotten home, Mr. Farley put the magic pebble in an iron safe . Some day they might want to use it , but really for now, what more could they wish for?"
+            x["text"]
+            == "When they had eventually calmed down a bit , and had gotten home, Mr. Farley put the magic pebble in an iron safe . Some day they might want to use it , but really for now, what more could they wish for?"
    ):
        return (
            "When they had eventually calmed down a bit , and had gotten home, "
@@ -52,56 +51,53 @@ def _wsc_inputs(x):
    return create_input()


-class WSCPostprocess(Filter):
-    def __init__(self, **kwargs):
-        self.determiners = {
-            "a",
-            "an",
-            "few",
-            "her",
-            "his",
-            "each",
-            "every",
-            "many",
-            "much",
-            "my",
-            "our",
-            "some",
-            "that",
-            "the",
-            "their",
-            "these",
-            "this",
-            "those",
-            "which",
-            "whose",
-            "your",
-        }
-
-    def clean(self, s):
-        """Ignore capitalization and determiners."""
-        s = s.strip().lower()
-        return " ".join([w for w in s.split(" ") if w not in self.determiners])
-
-    def apply(self, resps, docs):
-        filtered_resps = []
-        for prediction, reference in zip(*(resps, docs["span1_text"])):
-            prediction = self.clean(prediction[0])
-            reference = self.clean(reference)
-
-            if ("'" in prediction) != ("'" in reference):
-                # referent is "Bob's hat" as predicting the referent.
-                predicted_referent = False
-            else:
-                prediction_words = set(prediction.split(" "))
-                referent_words = set(reference.split(" "))
-
-                # Handle cases where the prediction is "fuzzy bunny" and the referent is
-                # "bunny".
-                predicted_referent = prediction_words.issubset(
-                    referent_words
-                ) or referent_words.issubset(prediction_words)
-
-            filtered_resps.append(predicted_referent)
-
-        return filtered_resps
+DETERMINERS = {
+    "a",
+    "an",
+    "few",
+    "her",
+    "his",
+    "each",
+    "every",
+    "many",
+    "much",
+    "my",
+    "our",
+    "some",
+    "that",
+    "the",
+    "their",
+    "these",
+    "this",
+    "those",
+    "which",
+    "whose",
+    "your",
+}
+
+
+def clean(s: str) -> str:
+    """Ignore capitalization and determiners."""
+    s = s.strip().lower()
+    return " ".join([w for w in s.split(" ") if w not in DETERMINERS])
+
+
+def process_results(docs: dict, resps: List):
+    prediction = clean(resps[0])
+    reference = clean(docs["span1_text"])
+
+    if ("'" in prediction) != ("'" in reference):
+        # referent is "Bob's hat" as predicting the referent.
+        predicted_referent = False
+    else:
+        prediction_words = set(prediction.split(" "))
+        referent_words = set(reference.split(" "))
+
+        # Handle cases where the prediction is "fuzzy bunny" and the referent is
+        # "bunny".
+        predicted_referent = prediction_words.issubset(
+            referent_words
+        ) or referent_words.issubset(prediction_words)
+
+    acc = 1.0 if predicted_referent == docs["label"] else 0.0
+    return {"accuracy": acc}
--- a/lm_eval/tasks/xwinograd/utils.py
+++ b/lm_eval/tasks/xwinograd/utils.py
@@ -51,7 +51,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
    for lang in LANGUAGES:
        file_name = f"xwinograd_{lang}.yaml"
        try:
-            with open(f"{output_dir}/{file_name}", "w" if overwrite else "x") as f:
+            with open(f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf-8") as f:
                f.write("# Generated by utils.py\n")
                yaml.dump(
                    {