Fix triviaqa task

8c419c83 · seopbo · 8cff2bea · 8c419c83
Commit 8c419c83 authored May 26, 2023 by seopbo
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 9 deletions

lm_eval/tasks/triviaqa.py lm_eval/tasks/triviaqa.py +17 -9

No files found.
--- a/lm_eval/tasks/triviaqa.py
+++ b/lm_eval/tasks/triviaqa.py
@@ -11,10 +11,10 @@ Homepage: https://nlp.cs.washington.edu/triviaqa/
 """
 import inspect
 import lm_eval.datasets.triviaqa.triviaqa
+import string
 from lm_eval.base import Task, rf
 from lm_eval.metrics import mean
 _CITATION = """
 @InProceedings{JoshiTriviaQA2017,
    author = {Joshi, Mandar and Choi, Eunsol and Weld, Daniel S. and Zettlemoyer, Luke},
@@ -74,19 +74,27 @@ class TriviaQA(Task):
        return ret
    def construct_requests(self, doc, ctx):
-        ret = []
+        """Uses RequestFactory to construct Requests and returns an iterable of
-        for alias in self._remove_prefixes(doc["answer"]["aliases"]):
+        Requests which will be sent to the LM.
-            _, is_prediction = rf.loglikelihood(ctx, " " + alias)
+        :param doc:
-            ret.append(is_prediction)
+                The document as returned from training_docs, validation_docs, or test_docs.
-        return ret
+        :param ctx: str
+                The context string, generated by fewshot_context. This includes the natural
+                language description, as well as the few shot examples, and the question
+                part of the document for `doc`.
+        """
+        continuation = rf.greedy_until(ctx, {"until": ["\n", ".", ","]})
+        return continuation
    def process_results(self, doc, results):
-        return {"acc": float(any(results))}
+        continuation = results[0].strip().lower().translate(str.maketrans('', '', string.punctuation))
+        list_of_candidates = [alias.lower().translate(str.maketrans('', '', string.punctuation)) for alias in self._remove_prefixes(doc["answer"]["aliases"])]
+        return {"em": float(continuation in list_of_candidates)}
    def aggregation(self):
        return {
-            "acc": mean,
+            "em": mean,
        }
    def higher_is_better(self):
-        return {"acc": True}
+        return {"em": True}