Merge pull request #159 from jon-tow/logiqa-evaluation

Implement `LogiQA` data download and evaluation

Merge pull request #159 from jon-tow/logiqa-evaluation
Implement `LogiQA` data download and evaluation
8809c5f1 · Leo Gao · GitHub · 36485d7a · a187cd44 · 8809c5f1
Unverified Commit 8809c5f1 authored Mar 09, 2021 by Leo Gao Committed by GitHub Mar 09, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 72 additions and 0 deletions

README.md README.md +1 -0

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +2 -0

lm_eval/tasks/logiqa.py lm_eval/tasks/logiqa.py +69 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -41,6 +41,7 @@ The goal of this project is to build a set of tools for evaluating LMs on typica
 |qa4mre_2013                   |     |   |✓   |acc            |
 |arc_easy                      |✓    |✓  |✓   |acc            |
 |arc_challenge                 |✓    |✓  |✓   |acc            |
+|logiqa                        |✓    |✓  |✓   |acc            |
 |hellaswag                     |✓    |✓  |    |acc            |
 |openbookqa                    |✓    |✓  |✓   |acc            |
 |race                          |✓    |✓  |✓   |acc            |

--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -32,6 +32,7 @@ from . import mathqa
 from . import ethics
 from . import drop
 from . import unscramble
+from . import logiqa
 ########################################
 # Translation tasks
@@ -102,6 +103,7 @@ TASK_REGISTRY = {
    "arc_easy": arc.ARCEasy,
    "arc_challenge": arc.ARCChallenge,
    # "quac": quac.QuAC, # not implemented yet
+    "logiqa": logiqa.LogiQA,
    "hellaswag": hellaswag.HellaSwag, # not implemented yet
    "openbookqa": openbookqa.OpenBookQA,
    # "sat": sat.SATAnalogies, # not implemented yet

--- a/lm_eval/tasks/logiqa.py
+++ b/lm_eval/tasks/logiqa.py
+from lm_eval.base import MultipleChoiceTask
+from best_download import download_file
+from pathlib import Path
+class LogiQA(MultipleChoiceTask):
+    DATASET_PATH = Path("data/logiqa")
+    def download(self):
+        if self.DATASET_PATH.exists():
+            return
+        Path.mkdir(self.DATASET_PATH)
+        base_url = "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master"
+        splits = [
+            {"name": "Train", "checksum": "7d5bb1f58278e33b395744cd2ad8d7600faa0b3c4d615c659a44ec1181d759fa"},
+            {"name": "Eval", "checksum": "4c49e6753b7262c001506b9151135abf722247035ab075dad93acdea5789c01f"},
+            {"name": "Test", "checksum": "359acb78c37802208f7fde9e2f6574b8526527c63d6a336f90a53f1932cb4701"}
+        ]
+        for split in splits:
+            file = self.DATASET_PATH / f"{split['name']}.txt"
+            download_file(f"{base_url}/{split['name']}.txt", str(file), split["checksum"])
+    def has_training_docs(self):
+        return True
+    def has_validation_docs(self):
+        return True
+    def has_test_docs(self):
+        return True
+    def _convert_standard(self, doc):
+        return {
+            "query": "Passage: " + doc["passage"] + "\nQuestion: " + doc["question"] + "\nAnswer:",
+            "choices": doc["options"],
+            "gold": ["a", "b", "c", "d"].index(doc["answerKey"])
+        }
+    def _load_docs(self, filename):
+        def normalize(text):
+            return text.replace(".", ". ").strip()
+        with open(filename, 'r') as f:
+            docs = f.read().strip().split("\n\n")
+        for rawdoc in docs:
+            rawdoc = rawdoc.split("\n")
+            doc = {
+                "answerKey": rawdoc[0].strip(),
+                "passage": normalize(rawdoc[1]),
+                "question": normalize(rawdoc[2]),
+                "options": [normalize(option[2:]) for option in rawdoc[3:]]
+            }
+            yield self._convert_standard(doc)
+    def training_docs(self):
+        return self._load_docs(self.DATASET_PATH / "Train.txt")
+    def validation_docs(self):
+        return self._load_docs(self.DATASET_PATH / "Eval.txt")
+    def test_docs(self):
+        return self._load_docs(self.DATASET_PATH / "Test.txt")
+    def fewshot_description(self):
+        # TODO: figure out actual description
+        return ""
+    def doc_to_text(self, doc):
+        return doc["query"]