fix minor bug

2e724896 · bzantium · 786d612e · 2e724896 · 2e724896 · 2e724896
Commit 2e724896 authored May 23, 2023 by bzantium
Hide whitespace changes
Inline Side-by-side

Showing with 23 additions and 7 deletions

.gitignore .gitignore +4 -0

lm_eval/tasks/__init__.py lm_eval/tasks/__init__.py +1 -1

lm_eval/tasks/kobest.py lm_eval/tasks/kobest.py +18 -6

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,7 @@ env
 data/
 lm_cache
 .idea
+build/
+logs/
+output/
+lm_eval.egg-info/
\ No newline at end of file
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -339,7 +339,7 @@ TASK_REGISTRY = {
    "korunsmile": korunsmile.KorUnSmile,
    "kohatespeech":kohatespeech.HateSpeech,
    "kohatespeech_gen_bias":kohatespeech.GenderBias,
-    "kohatespeech_apeach":kohatespeech.Apeach
+    "kohatespeech_apeach":kohatespeech.Apeach,
    **xcopa.construct_tasks(),
    **bigbench.create_all_tasks(),
    **xstorycloze.create_all_tasks(),

--- a/lm_eval/tasks/kobest.py
+++ b/lm_eval/tasks/kobest.py
@@ -177,7 +177,7 @@ class WiC(Task):
        return self.dataset["test"]

    def doc_to_text(self, doc):
-        return "다음 두 문장에서 단어 '{}'가 다른 의미로 쓰였으면 아니, 같은 의미로 쓰였으면 예로 답하시오.\n\n문장1: {}\n문장2: {}\n정답:".format(doc["context_1"], doc["context_2"], doc["word"])
+        return "다음 두 문장에서 단어 '{}'가 다른 의미로 쓰였으면 아니, 같은 의미로 쓰였으면 예로 답하시오.\n\n문장1: {}\n문장2: {}\n정답:".format(doc["word"], doc["context_1"], doc["context_2"])

    def doc_to_target(self, doc):
        return " {}".format({0: " 아니", 1: " 예"}[doc["label"]])
@@ -246,23 +246,35 @@ class HellaSwag(MultipleChoiceTask):
        return doc["query"]

    def process_results(self, doc, results):
-        pred = np.argmax(results)
        gold = doc["gold"]
+
+        completion_len = np.array([float(len(i)) for i in doc["choices"]])
+        pred = np.argmax(results)
+        pred_norm = np.argmax(results / completion_len)
+        acc = 1.0 if pred == gold else 0.0
+        acc_norm = 1.0 if pred_norm == gold else 0.0
+        
        return {
-            "acc": pred == gold,
-            "macro_f1": (gold, pred)
+            "acc": acc,
+            "acc_norm": acc_norm,
+            "macro_f1": (gold, pred),
+            "macro_f1_norm": (gold, pred_norm),
        }

    def higher_is_better(self):
        return {
            "acc": True,
-            "macro_f1": True
+            "acc_norm": True,
+            "macro_f1": True,
+            "macro_f1_norm": True,
        }

    def aggregation(self):
        return {
            "acc": mean,
-            "macro_f1": macro_f1_score
+            "acc_norm": mean,
+            "macro_f1": macro_f1_score,
+            "macro_f1_norm": macro_f1_score,
        }