Merge pull request #405 from ingyuseong/feature/klue-nli

Add `KLUE-NLI` task

Merge pull request #405 from ingyuseong/feature/klue-nli
Add `KLUE-NLI` task
cc7650d8 · Kevin Ko · GitHub · f7072b3b · 9d8f0cc1 · cc7650d8
Unverified Commit cc7650d8 authored Mar 27, 2023 by Kevin Ko Committed by GitHub Mar 27, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 55 additions and 1 deletion

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +1 -0

lm_eval/tasks/klue.py lm_eval/tasks/klue.py +54 -1

No files found.
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -310,6 +310,7 @@ TASK_REGISTRY = {
    "klue_sts": klue.STS,
    "klue_ynat": klue.YNAT,
+    "klue_nli": klue.NLI,
    "nsmc": nsmc.NSMC,    
    "korquad": korquad.Korquad,
    "kobest_boolq": kobest.BoolQ,

--- a/lm_eval/tasks/klue.py
+++ b/lm_eval/tasks/klue.py
@@ -52,7 +52,7 @@ class STS(Task):
        return self.dataset["validation"]
    def doc_to_text(self, doc):
-        return "질문: 문장 1과 문장 2는 서로 유사한 의미를 가지나요?\n문장 1:{}\n문장 2:{}\n정답:".format(
+        return "질문: 문장 1과 문장 2는 서로 유사한 의미를 가지나요?\n문장 1: {}\n문장 2: {}\n정답:".format(
            general_detokenize(doc["sentence1"]),
            general_detokenize(doc["sentence2"]) 
        )
@@ -138,3 +138,56 @@ class YNAT(MultipleChoiceTask):
        return {
            "f1": macro_f1_score
        }
+class NLI(Task):
+    VERSION = 0
+    DATASET_PATH = "klue"
+    DATASET_NAME = "nli"
+    def has_training_docs(self):
+        return True
+    def has_validation_docs(self):
+        return True
+    def has_test_docs(self):
+        return False
+    def training_docs(self):
+        if self._training_docs is None:
+            self._training_docs = list(self.dataset["train"])
+        return self._training_docs
+    def validation_docs(self):
+        return self.dataset["validation"]
+    def doc_to_text(self, doc):
+        return "{}\n질문: {} 참, 거짓, 중립 중 무엇인가요?\n정답:".format(
+            doc["premise"],
+            doc["hypothesis"].strip()
+            + ("" if doc["hypothesis"].strip().endswith(".") else "."),
+        )
+    def doc_to_target(self, doc):
+        # 참 = entailment
+        # 거짓 = contradiction
+        # 무관 = neutral
+        return " {}".format({0: "참", 1: "중립", 2: "거짓"}[doc["label"]])
+    def construct_requests(self, doc, ctx):
+        ll_true, _ = rf.loglikelihood(ctx, " 참")
+        ll_neither, _ = rf.loglikelihood(ctx, " 중립")
+        ll_false, _ = rf.loglikelihood(ctx, " 거짓")
+        return ll_true, ll_neither, ll_false
+    def process_results(self, doc, results):
+        gold = doc["label"]
+        pred = np.argmax(results)
+        return {"acc": pred == gold}
+    def higher_is_better(self):
+        return {"acc": True}
+    def aggregation(self):
+        return {"acc": mean}