Add KLUE-NLI task

a1a5a805 · ingyuseong · f7072b3b · a1a5a805 · a1a5a805
Commit a1a5a805 authored Mar 26, 2023 by ingyuseong
Hide whitespace changes
Inline Side-by-side

Showing with 54 additions and 0 deletions

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +1 -0

lm_eval/tasks/klue.py lm_eval/tasks/klue.py +53 -0

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -310,6 +310,7 @@ TASK_REGISTRY = {
  
    "klue_sts": klue.STS,
    "klue_ynat": klue.YNAT,
+    "klue_nli": klue.NLI,
    "nsmc": nsmc.NSMC,    
    "korquad": korquad.Korquad,
    "kobest_boolq": kobest.BoolQ,

--- a/lm_eval/tasks/klue.py
+++ b/lm_eval/tasks/klue.py
@@ -138,3 +138,56 @@ class YNAT(MultipleChoiceTask):
        return {
            "f1": macro_f1_score
        }
+
+
+class NLI(Task):
+    VERSION = 0
+    DATASET_PATH = "klue"
+    DATASET_NAME = "nli"
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        return False
+
+    def training_docs(self):
+        if self._training_docs is None:
+            self._training_docs = list(self.dataset["train"])
+        return self._training_docs
+
+    def validation_docs(self):
+        return self.dataset["validation"]
+
+    def doc_to_text(self, doc):
+        return "{}\질문: {} 참, 거짓, 혹은 무관?\n정답:".format(
+            doc["premise"],
+            doc["hypothesis"].strip()
+            + ("" if doc["hypothesis"].strip().endswith(".") else "."),
+        )
+
+    def doc_to_target(self, doc):
+        # 참 = entailment
+        # 거짓 = contradiction
+        # 무관 = neutral
+        return " {}".format({0: "참", 1: "무관", 2: "거짓"}[doc["label"]])
+
+    def construct_requests(self, doc, ctx):
+        ll_true, _ = rf.loglikelihood(ctx, " 참")
+        ll_neither, _ = rf.loglikelihood(ctx, " 무관")
+        ll_false, _ = rf.loglikelihood(ctx, " 거짓")
+        return ll_true, ll_neither, ll_false
+
+    def process_results(self, doc, results):
+        gold = doc["label"]
+        pred = np.argmax(results)
+        return {"acc": pred == gold}
+
+    def higher_is_better(self):
+        return {"acc": True}
+
+    def aggregation(self):
+        return {"acc": mean}
\ No newline at end of file