add logieval

19f6d1bd · baberabb · 66777b63 · 19f6d1bd · 19f6d1bd · 19f6d1bd
Commit 19f6d1bd authored Jul 29, 2023 by baberabb
3 changed files
--- a/lm_eval/tasks/logiqav2/README.md
+++ b/lm_eval/tasks/logiqav2/README.md
@@ -31,6 +31,8 @@ Homepage: https://github.com/csitfun/LogiQA2.0
 `logiqa2_NLI`: The NLI version of the dataset converted from the MRC version.
+`logieval`: Prompt based; https://github.com/csitfun/LogiEval
 The subtasks have not been verified yet.
 ### Checklist
@@ -38,7 +40,7 @@ The subtasks have not been verified yet.
 * [x] Is the task an existing benchmark in the literature?
  * [x] Have you referenced the original paper that introduced the task?
  * [x] If yes, does the original paper provide a reference implementation?
-    * [x] The original paper does not. There is another implementation of this task, but it seems to be designed for instruction tuned models: https://github.com/csitfun/LogiEval
+    * [x] The original paper does not. There is another implementation of this task, but it designed for instruction tuned models: https://github.com/csitfun/LogiEval
 If other tasks on this dataset are already supported:
 * [x] Is the "Main" variant of this task clearly denoted?

--- a/lm_eval/tasks/logiqav2/logieval.yaml
+++ b/lm_eval/tasks/logiqav2/logieval.yaml
+#group:
+#  - greedy_until
+task: logieval
+dataset_path: lm_eval/tasks/logiqav2/logiqa2.py
+dataset_name: logieval
+output_type: greedy_until
+training_split: train
+#validation_split: validation
+test_split: test
+# Instructions + {content}
+doc_to_text: "Instructions: You will be presented with a passage and a question about that passage. There are four options to be chosen from, you need to choose the only correct option to answer that question. If the first option is right, you generate the answer 'A', if the second option is right, you generate the answer 'B', if the third option is right, you generate the answer 'C', if the fourth option is right, you generate the answer 'D'. Read the question and options thoroughly and select the correct answer from the four answer labels. Read the passage thoroughly to ensure you know what the passage entails.\n{{content}}"
+doc_to_target: "{{ideal}}"
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
--- a/lm_eval/tasks/logiqav2/logiqa2.py
+++ b/lm_eval/tasks/logiqav2/logiqa2.py
@@ -55,6 +55,10 @@ _URLS = {
        "validation": "https://raw.githubusercontent.com/csitfun/LogiQA2.0/main/logiqa2nli/DATA/QA2NLI/dev.txt",
        "test": "https://raw.githubusercontent.com/csitfun/LogiQA2.0/main/logiqa2nli/DATA/QA2NLI/test.txt",
    },
+    "logieval": {
+        "train": "https://raw.githubusercontent.com/csitfun/LogiEval/main/Data/logiqa_ood.jsonl",
+        "test": "https://raw.githubusercontent.com/csitfun/LogiEval/main/Data/logiqa.jsonl",
+    },
 }
@@ -90,6 +94,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
            version=VERSION,
            description="The NLI part of LogiQA2.0 dataset",
        ),
+        datasets.BuilderConfig(
+            name="logieval",
+            version=VERSION,
+            description="Instruction based MRC task",
+        ),
    ]
    DEFAULT_CONFIG_NAME = "logiqa2"
@@ -122,6 +131,10 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
                    "conclusion": datasets.Value("string"),
                }
            )
+        elif self.config.name in ("logiqa2_nli", "logieval"):
+            features = datasets.Features(
+                {"content": datasets.Value("string"), "ideal": datasets.Value("string")}
+            )
        else:
            features = datasets.Features(
                {
@@ -147,10 +160,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
        urls = {
            "train": _urls["train"],
            "test": _urls["test"],
-            "validation": _urls["validation"],
        }
+        if "validation" in _urls:
+            urls["validation"] = _urls["validation"]
        data_dir = dl_manager.download_and_extract(urls)
-        return [
+        splits = [
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                # These kwargs will be passed to _generate_examples
@@ -164,6 +178,9 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
                # These kwargs will be passed to _generate_examples
                gen_kwargs={"filepath": data_dir["test"], "split": "test"},
            ),
+        ]
+        if "validation" in _urls:
+            splits.append(
                datasets.SplitGenerator(
                    name=datasets.Split.VALIDATION,
                    # These kwargs will be passed to _generate_examples
@@ -171,8 +188,9 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
                        "filepath": data_dir["validation"],
                        "split": "validation",
                    },
-            ),
+                )
-        ]
+            )
+        return splits
    def _generate_examples(self, filepath, split):
        with open(filepath, encoding="utf-8") as f:
@@ -196,7 +214,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
                        "minor_premise": data["minor_premise"],
                        "conclusion": data["conclusion"],
                    }
+                elif self.config.name == "logieval":
+                    yield key, {
+                        "content": data["input"][1]["content"],
+                        "ideal": data["ideal"],
+                    }
                else:
                    yield key, {
                        "id": data["id"],