Commit 11e650db authored by guijinSON's avatar guijinSON
Browse files

add csatqa

parent 42f82060
File added
...@@ -63,6 +63,7 @@ from . import xnli ...@@ -63,6 +63,7 @@ from . import xnli
from . import mgsm from . import mgsm
from . import scrolls from . import scrolls
from . import ceval from . import ceval
from . import csatqa
######################################## ########################################
# Translation tasks # Translation tasks
...@@ -318,6 +319,13 @@ TASK_REGISTRY = { ...@@ -318,6 +319,13 @@ TASK_REGISTRY = {
"crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality, "crows_pairs_french_nationality": crowspairs.CrowsPairsFrenchNationality,
"crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance, "crows_pairs_french_physical_appearance": crowspairs.CrowsPairsFrenchPhysicalAppearance,
"crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre, "crows_pairs_french_autre": crowspairs.CrowsPairsFrenchAutre,
"csatqa_wr": csatqa.WR,
"csatqa_gr": csatqa.GR,
"csatqa_rcs": csatqa.RCS,
"csatqa_rcss": csatqa.RCSS,
"csatqa_rch": csatqa.RCH,
"csatqa_li": csatqa.LI,
# Requires manual download
# Requires manual download of data. # Requires manual download of data.
# "storycloze_2016": storycloze.StoryCloze2016, # "storycloze_2016": storycloze.StoryCloze2016,
# "storycloze_2018": storycloze.StoryCloze2018, # "storycloze_2018": storycloze.StoryCloze2018,
......
import os
import datasets
import json
_CITATION = """\
"""
_DESCRIPTION = """\
CSAT-QA
"""
_HOMEPAGE = "https://huggingface.co/HAERAE-HUB"
_LICENSE = "Proprietary"
split_names = ["WR", "GR", "RCS", "RCSS", "RCH", "LI"]
class CSATQAConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class CSATQA(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CSATQAConfig(
name=name,
)
for name in split_names
]
def _info(self):
features = datasets.Features(
{
"question": datasets.Value("string"),
"option#1": datasets.Value("string"),
"option#2": datasets.Value("string"),
"option#3": datasets.Value("string"),
"option#4": datasets.Value("string"),
"option#5": datasets.Value("string"),
"gold": datasets.Value("int8"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = "HAERAE-HUB/CSAT-QA"
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "data.jsonl"),
},
),
]
def _generate_examples(self, filepath):
with open(filepath, encoding="utf-8") as f:
for key, row in enumerate(f):
data = json.loads(row)
if data["split"] == self.config.name:
data["gold"] = int(data["gold"]) - 1
data.pop("split")
yield key, data
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment