format

11be7c93 · lintangsutawika · d1c189ea · 11be7c93 · 11be7c93 · 11be7c93
Commit 11be7c93 authored Sep 18, 2023 by lintangsutawika
4 changed files
--- a/lm_eval/tasks/mutual/utils.py
+++ b/lm_eval/tasks/mutual/utils.py
 import numpy as np
-def process_docs(dataset):
+def process_docs(dataset):
    def _detokenize(text):
        text = text.replace(" '", "'")
        text = text.replace(" \n", "\n")

--- a/lm_eval/tasks/qasper/README.md
+++ b/lm_eval/tasks/qasper/README.md
@@ -46,7 +46,7 @@ Homepage: https://allenai.org/data/qasper
 #### Tasks
-* `qasper_bool`: Multiple choice task that evaluates the task with `answer_type="bool"` 
+* `qasper_bool`: Multiple choice task that evaluates the task with `answer_type="bool"`
 * `qasper_freeform`: Greedy generation task that evaluates the samples from the task with `answer_type="free form answer"`
 ### Checklist

--- a/lm_eval/tasks/qasper/metrics.py
+++ b/lm_eval/tasks/qasper/metrics.py
+import re
 import string
+from collections import Counter
 def normalize_answer(s):
    """
    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
@@ -21,6 +25,7 @@ def normalize_answer(s):
    return white_space_fix(remove_articles(remove_punc(lower(s))))
 def f1_abstractive(predictions, references):
    """
    Taken from the official evaluation script for v1.1 of the SQuAD dataset.

--- a/lm_eval/tasks/qasper/utils.py
+++ b/lm_eval/tasks/qasper/utils.py
 from datasets import Dataset
 from functools import partial
 def process_docs(dataset, set_answer_type="bool"):
-    FEATURES = [
+    FEATURES = ["title", "abstract", "question", "answer", "answer_type"]
-        "title",
-        "abstract",
-        "question",
-        "answer",
-        "answer_type"
-        ]
    def _categorise_answer(answer_blob):
        if answer_blob["unanswerable"]:
@@ -62,12 +57,16 @@ def process_docs(dataset, set_answer_type="bool"):
        return obs_list
-    dataset = dataset.map(_flatten, remove_columns=[key for key in dataset.features.keys() if key not in FEATURES])
+    dataset = dataset.map(
+        _flatten,
+        remove_columns=[key for key in dataset.features.keys() if key not in FEATURES],
+    )
    new_dataset = {}
    for key in dataset.features.keys():
        new_dataset[key] = [x for row in dataset[key] for x in row]
    return Dataset.from_dict(new_dataset)
 process_docs_bool = partial(process_docs, set_answer_type="bool")
 process_docs_freeform = partial(process_docs, set_answer_type="free form answer")