Merge branch 'big-refactor' into mypy

4721379e · Hailey Schoelkopf · GitHub · a551c789 · cc7828dd · 4721379e
Unverified Commit 4721379e authored Sep 05, 2023 by Hailey Schoelkopf Committed by GitHub Sep 05, 2023
Showing with 53 additions and 1 deletion

lm_eval/tasks/wsc273/default.yaml lm_eval/tasks/wsc273/default.yaml +15 -0

lm_eval/tasks/wsc273/utils.py lm_eval/tasks/wsc273/utils.py +36 -0

main.py main.py +1 -0

setup.py setup.py +1 -1

No files found.
--- a/lm_eval/tasks/wsc273/default.yaml
+++ b/lm_eval/tasks/wsc273/default.yaml
+task: wsc273
+dataset_path: winograd_wsc
+dataset_name: wsc273
+output_type: multiple_choice
+test_split: test
+doc_to_text: label
+process_docs: !function utils.process_doc
+doc_to_target: "{% set index = pronoun_loc + pronoun | length %}{{text[index:]}}"
+doc_to_choice: "{% set template = text[:pronoun_loc] %}{{[template+options[0], template+options[1]]}}"
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
--- a/lm_eval/tasks/wsc273/utils.py
+++ b/lm_eval/tasks/wsc273/utils.py
+upper_pronouns = [
+    "A",
+    "An",
+    "The",
+    "She",
+    "He",
+    "It",
+    "They",
+    "My",
+    "His",
+    "Her",
+    "Their",
+]
+def process_doc(dataset):
+    def process_fn(doc):
+        # The HF implementation of `wsc273` is not `partial evaluation` friendly.
+        doc["text"] = doc["text"].replace("  ", " ")
+        doc["options"][0] = __normalize_option(doc, doc["options"][0])
+        doc["options"][1] = __normalize_option(doc, doc["options"][1])
+        return doc
+    return dataset.map(process_fn)
+def __normalize_option(doc, option):
+    # Append `'s` to possessive determiner based options.
+    if doc["pronoun"].lower() in ["my", "his", "her", "our", "their"]:
+        option += "'s"
+    # Appropriately lowercase the pronoun in the option.
+    pronoun = option.split()[0]
+    start_of_sentence = doc["text"][doc["pronoun_loc"] - 2] == "."
+    if not start_of_sentence and pronoun in upper_pronouns:
+        return option.replace(pronoun, pronoun.lower())
+    return option
--- a/main.py
+++ b/main.py
@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils
 from lm_eval.api.registry import ALL_TASKS
 from lm_eval.logger import eval_logger
 from lm_eval.tasks import include_task_folder
+from lm_eval.benchmarks import include_benchmarks
 os.environ["TOKENIZERS_PARALLELISM"] = "false"

--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@ setuptools.setup(
    ],
    python_requires=">=3.9",
    install_requires=[
-        "accelerate>=0.18.0",
+        "accelerate>=0.21.0",
        "evaluate",
        "datasets>=2.0.0",
        "evaluate>=0.4.0",