Unverified Commit 8809c5f1 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #159 from jon-tow/logiqa-evaluation

Implement `LogiQA` data download and evaluation
parents 36485d7a a187cd44
...@@ -41,6 +41,7 @@ The goal of this project is to build a set of tools for evaluating LMs on typica ...@@ -41,6 +41,7 @@ The goal of this project is to build a set of tools for evaluating LMs on typica
|qa4mre_2013 | | |✓ |acc | |qa4mre_2013 | | |✓ |acc |
|arc_easy |✓ |✓ |✓ |acc | |arc_easy |✓ |✓ |✓ |acc |
|arc_challenge |✓ |✓ |✓ |acc | |arc_challenge |✓ |✓ |✓ |acc |
|logiqa |✓ |✓ |✓ |acc |
|hellaswag |✓ |✓ | |acc | |hellaswag |✓ |✓ | |acc |
|openbookqa |✓ |✓ |✓ |acc | |openbookqa |✓ |✓ |✓ |acc |
|race |✓ |✓ |✓ |acc | |race |✓ |✓ |✓ |acc |
......
...@@ -32,6 +32,7 @@ from . import mathqa ...@@ -32,6 +32,7 @@ from . import mathqa
from . import ethics from . import ethics
from . import drop from . import drop
from . import unscramble from . import unscramble
from . import logiqa
######################################## ########################################
# Translation tasks # Translation tasks
...@@ -102,6 +103,7 @@ TASK_REGISTRY = { ...@@ -102,6 +103,7 @@ TASK_REGISTRY = {
"arc_easy": arc.ARCEasy, "arc_easy": arc.ARCEasy,
"arc_challenge": arc.ARCChallenge, "arc_challenge": arc.ARCChallenge,
# "quac": quac.QuAC, # not implemented yet # "quac": quac.QuAC, # not implemented yet
"logiqa": logiqa.LogiQA,
"hellaswag": hellaswag.HellaSwag, # not implemented yet "hellaswag": hellaswag.HellaSwag, # not implemented yet
"openbookqa": openbookqa.OpenBookQA, "openbookqa": openbookqa.OpenBookQA,
# "sat": sat.SATAnalogies, # not implemented yet # "sat": sat.SATAnalogies, # not implemented yet
......
from lm_eval.base import MultipleChoiceTask
from best_download import download_file
from pathlib import Path
class LogiQA(MultipleChoiceTask):
DATASET_PATH = Path("data/logiqa")
def download(self):
if self.DATASET_PATH.exists():
return
Path.mkdir(self.DATASET_PATH)
base_url = "https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master"
splits = [
{"name": "Train", "checksum": "7d5bb1f58278e33b395744cd2ad8d7600faa0b3c4d615c659a44ec1181d759fa"},
{"name": "Eval", "checksum": "4c49e6753b7262c001506b9151135abf722247035ab075dad93acdea5789c01f"},
{"name": "Test", "checksum": "359acb78c37802208f7fde9e2f6574b8526527c63d6a336f90a53f1932cb4701"}
]
for split in splits:
file = self.DATASET_PATH / f"{split['name']}.txt"
download_file(f"{base_url}/{split['name']}.txt", str(file), split["checksum"])
def has_training_docs(self):
return True
def has_validation_docs(self):
return True
def has_test_docs(self):
return True
def _convert_standard(self, doc):
return {
"query": "Passage: " + doc["passage"] + "\nQuestion: " + doc["question"] + "\nAnswer:",
"choices": doc["options"],
"gold": ["a", "b", "c", "d"].index(doc["answerKey"])
}
def _load_docs(self, filename):
def normalize(text):
return text.replace(".", ". ").strip()
with open(filename, 'r') as f:
docs = f.read().strip().split("\n\n")
for rawdoc in docs:
rawdoc = rawdoc.split("\n")
doc = {
"answerKey": rawdoc[0].strip(),
"passage": normalize(rawdoc[1]),
"question": normalize(rawdoc[2]),
"options": [normalize(option[2:]) for option in rawdoc[3:]]
}
yield self._convert_standard(doc)
def training_docs(self):
return self._load_docs(self.DATASET_PATH / "Train.txt")
def validation_docs(self):
return self._load_docs(self.DATASET_PATH / "Eval.txt")
def test_docs(self):
return self._load_docs(self.DATASET_PATH / "Test.txt")
def fewshot_description(self):
# TODO: figure out actual description
return ""
def doc_to_text(self, doc):
return doc["query"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment