Commit 19f6d1bd authored by baberabb's avatar baberabb
Browse files

add logieval

parent 66777b63
...@@ -31,6 +31,8 @@ Homepage: https://github.com/csitfun/LogiQA2.0 ...@@ -31,6 +31,8 @@ Homepage: https://github.com/csitfun/LogiQA2.0
`logiqa2_NLI`: The NLI version of the dataset converted from the MRC version. `logiqa2_NLI`: The NLI version of the dataset converted from the MRC version.
`logieval`: Prompt based; https://github.com/csitfun/LogiEval
The subtasks have not been verified yet. The subtasks have not been verified yet.
### Checklist ### Checklist
...@@ -38,7 +40,7 @@ The subtasks have not been verified yet. ...@@ -38,7 +40,7 @@ The subtasks have not been verified yet.
* [x] Is the task an existing benchmark in the literature? * [x] Is the task an existing benchmark in the literature?
* [x] Have you referenced the original paper that introduced the task? * [x] Have you referenced the original paper that introduced the task?
* [x] If yes, does the original paper provide a reference implementation? * [x] If yes, does the original paper provide a reference implementation?
* [x] The original paper does not. There is another implementation of this task, but it seems to be designed for instruction tuned models: https://github.com/csitfun/LogiEval * [x] The original paper does not. There is another implementation of this task, but it designed for instruction tuned models: https://github.com/csitfun/LogiEval
If other tasks on this dataset are already supported: If other tasks on this dataset are already supported:
* [x] Is the "Main" variant of this task clearly denoted? * [x] Is the "Main" variant of this task clearly denoted?
......
#group:
# - greedy_until
task: logieval
dataset_path: lm_eval/tasks/logiqav2/logiqa2.py
dataset_name: logieval
output_type: greedy_until
training_split: train
#validation_split: validation
test_split: test
# Instructions + {content}
doc_to_text: "Instructions: You will be presented with a passage and a question about that passage. There are four options to be chosen from, you need to choose the only correct option to answer that question. If the first option is right, you generate the answer 'A', if the second option is right, you generate the answer 'B', if the third option is right, you generate the answer 'C', if the fourth option is right, you generate the answer 'D'. Read the question and options thoroughly and select the correct answer from the four answer labels. Read the passage thoroughly to ensure you know what the passage entails.\n{{content}}"
doc_to_target: "{{ideal}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
...@@ -55,6 +55,10 @@ _URLS = { ...@@ -55,6 +55,10 @@ _URLS = {
"validation": "https://raw.githubusercontent.com/csitfun/LogiQA2.0/main/logiqa2nli/DATA/QA2NLI/dev.txt", "validation": "https://raw.githubusercontent.com/csitfun/LogiQA2.0/main/logiqa2nli/DATA/QA2NLI/dev.txt",
"test": "https://raw.githubusercontent.com/csitfun/LogiQA2.0/main/logiqa2nli/DATA/QA2NLI/test.txt", "test": "https://raw.githubusercontent.com/csitfun/LogiQA2.0/main/logiqa2nli/DATA/QA2NLI/test.txt",
}, },
"logieval": {
"train": "https://raw.githubusercontent.com/csitfun/LogiEval/main/Data/logiqa_ood.jsonl",
"test": "https://raw.githubusercontent.com/csitfun/LogiEval/main/Data/logiqa.jsonl",
},
} }
...@@ -90,6 +94,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder): ...@@ -90,6 +94,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
version=VERSION, version=VERSION,
description="The NLI part of LogiQA2.0 dataset", description="The NLI part of LogiQA2.0 dataset",
), ),
datasets.BuilderConfig(
name="logieval",
version=VERSION,
description="Instruction based MRC task",
),
] ]
DEFAULT_CONFIG_NAME = "logiqa2" DEFAULT_CONFIG_NAME = "logiqa2"
...@@ -122,6 +131,10 @@ class LogiQA2(datasets.GeneratorBasedBuilder): ...@@ -122,6 +131,10 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
"conclusion": datasets.Value("string"), "conclusion": datasets.Value("string"),
} }
) )
elif self.config.name in ("logiqa2_nli", "logieval"):
features = datasets.Features(
{"content": datasets.Value("string"), "ideal": datasets.Value("string")}
)
else: else:
features = datasets.Features( features = datasets.Features(
{ {
...@@ -147,10 +160,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder): ...@@ -147,10 +160,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
urls = { urls = {
"train": _urls["train"], "train": _urls["train"],
"test": _urls["test"], "test": _urls["test"],
"validation": _urls["validation"],
} }
if "validation" in _urls:
urls["validation"] = _urls["validation"]
data_dir = dl_manager.download_and_extract(urls) data_dir = dl_manager.download_and_extract(urls)
return [ splits = [
datasets.SplitGenerator( datasets.SplitGenerator(
name=datasets.Split.TRAIN, name=datasets.Split.TRAIN,
# These kwargs will be passed to _generate_examples # These kwargs will be passed to _generate_examples
...@@ -164,6 +178,9 @@ class LogiQA2(datasets.GeneratorBasedBuilder): ...@@ -164,6 +178,9 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
# These kwargs will be passed to _generate_examples # These kwargs will be passed to _generate_examples
gen_kwargs={"filepath": data_dir["test"], "split": "test"}, gen_kwargs={"filepath": data_dir["test"], "split": "test"},
), ),
]
if "validation" in _urls:
splits.append(
datasets.SplitGenerator( datasets.SplitGenerator(
name=datasets.Split.VALIDATION, name=datasets.Split.VALIDATION,
# These kwargs will be passed to _generate_examples # These kwargs will be passed to _generate_examples
...@@ -171,8 +188,9 @@ class LogiQA2(datasets.GeneratorBasedBuilder): ...@@ -171,8 +188,9 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
"filepath": data_dir["validation"], "filepath": data_dir["validation"],
"split": "validation", "split": "validation",
}, },
), )
] )
return splits
def _generate_examples(self, filepath, split): def _generate_examples(self, filepath, split):
with open(filepath, encoding="utf-8") as f: with open(filepath, encoding="utf-8") as f:
...@@ -196,7 +214,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder): ...@@ -196,7 +214,11 @@ class LogiQA2(datasets.GeneratorBasedBuilder):
"minor_premise": data["minor_premise"], "minor_premise": data["minor_premise"],
"conclusion": data["conclusion"], "conclusion": data["conclusion"],
} }
elif self.config.name == "logieval":
yield key, {
"content": data["input"][1]["content"],
"ideal": data["ideal"],
}
else: else:
yield key, { yield key, {
"id": data["id"], "id": data["id"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment