add csatqa

11e650db · guijinSON · 42f82060 · 11e650db · 11e650db · 11e650db
Commit 11e650db authored Jul 16, 2023 by guijinSON
Showing with 80 additions and 0 deletions

.DS_Store .DS_Store +0 -0

lm_eval/.DS_Store lm_eval/.DS_Store +0 -0

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +8 -0

lm_eval/tasks/csatqa.py lm_eval/tasks/csatqa.py +72 -0

No files found.
--- a/.DS_Store
+++ b/.DS_Store
--- a/lm_eval/.DS_Store
+++ b/lm_eval/.DS_Store
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -63,6 +63,7 @@ from . import xnli
 from . import mgsm
 from . import scrolls
 from . import ceval
+from . import csatqa
 ########################################
 # Translation tasks
@@ -318,6 +319,13 @@ TASK_REGISTRY = {
    "crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality,
    "crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance,
    "crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre,
+    "csatqa_wr": csatqa.WR,
+    "csatqa_gr": csatqa.GR,
+    "csatqa_rcs": csatqa.RCS,
+    "csatqa_rcss": csatqa.RCSS,
+    "csatqa_rch": csatqa.RCH,
+    "csatqa_li": csatqa.LI,
+    # Requires manual download
    # Requires manual download of data.
    # "storycloze_2016": storycloze.StoryCloze2016,
    # "storycloze_2018": storycloze.StoryCloze2018,

--- a/lm_eval/tasks/csatqa.py
+++ b/lm_eval/tasks/csatqa.py
+import os
+import datasets
+import json
+_CITATION = """\
+"""
+_DESCRIPTION = """\
+    CSAT-QA
+"""
+_HOMEPAGE = "https://huggingface.co/HAERAE-HUB"
+_LICENSE = "Proprietary"
+split_names = ["WR", "GR", "RCS", "RCSS", "RCH", "LI"]
+class CSATQAConfig(datasets.BuilderConfig):
+    def __init__(self, **kwargs):
+        super().__init__(version=datasets.Version("1.0.0"), **kwargs)
+class CSATQA(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        CSATQAConfig(
+            name=name,
+        )
+        for name in split_names
+    ]
+    def _info(self):
+        features = datasets.Features(
+            {
+                "question": datasets.Value("string"),
+                "option#1": datasets.Value("string"),
+                "option#2": datasets.Value("string"),
+                "option#3": datasets.Value("string"),
+                "option#4": datasets.Value("string"),
+                "option#5": datasets.Value("string"),
+                "gold": datasets.Value("int8"),
+            }
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+    def _split_generators(self, dl_manager):
+        data_dir = "HAERAE-HUB/CSAT-QA"
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "filepath": os.path.join(data_dir, "data", "data.jsonl"),
+                },
+            ),
+        ]
+    def _generate_examples(self, filepath):
+        with open(filepath, encoding="utf-8") as f:
+            for key, row in enumerate(f):
+                data = json.loads(row)
+                if data["split"] == self.config.name:
+                    data["gold"] = int(data["gold"]) - 1
+                    data.pop("split")
+                    yield key, data
\ No newline at end of file