Commit 11be7c93 authored by lintangsutawika's avatar lintangsutawika
Browse files

format

parent d1c189ea
import numpy as np import numpy as np
def process_docs(dataset):
def process_docs(dataset):
def _detokenize(text): def _detokenize(text):
text = text.replace(" '", "'") text = text.replace(" '", "'")
text = text.replace(" \n", "\n") text = text.replace(" \n", "\n")
......
...@@ -46,7 +46,7 @@ Homepage: https://allenai.org/data/qasper ...@@ -46,7 +46,7 @@ Homepage: https://allenai.org/data/qasper
#### Tasks #### Tasks
* `qasper_bool`: Multiple choice task that evaluates the task with `answer_type="bool"` * `qasper_bool`: Multiple choice task that evaluates the task with `answer_type="bool"`
* `qasper_freeform`: Greedy generation task that evaluates the samples from the task with `answer_type="free form answer"` * `qasper_freeform`: Greedy generation task that evaluates the samples from the task with `answer_type="free form answer"`
### Checklist ### Checklist
......
import re
import string import string
from collections import Counter
def normalize_answer(s): def normalize_answer(s):
""" """
Taken from the official evaluation script for v1.1 of the SQuAD dataset. Taken from the official evaluation script for v1.1 of the SQuAD dataset.
...@@ -21,6 +25,7 @@ def normalize_answer(s): ...@@ -21,6 +25,7 @@ def normalize_answer(s):
return white_space_fix(remove_articles(remove_punc(lower(s)))) return white_space_fix(remove_articles(remove_punc(lower(s))))
def f1_abstractive(predictions, references): def f1_abstractive(predictions, references):
""" """
Taken from the official evaluation script for v1.1 of the SQuAD dataset. Taken from the official evaluation script for v1.1 of the SQuAD dataset.
......
from datasets import Dataset from datasets import Dataset
from functools import partial from functools import partial
def process_docs(dataset, set_answer_type="bool"): def process_docs(dataset, set_answer_type="bool"):
FEATURES = [ FEATURES = ["title", "abstract", "question", "answer", "answer_type"]
"title",
"abstract",
"question",
"answer",
"answer_type"
]
def _categorise_answer(answer_blob): def _categorise_answer(answer_blob):
if answer_blob["unanswerable"]: if answer_blob["unanswerable"]:
...@@ -62,12 +57,16 @@ def process_docs(dataset, set_answer_type="bool"): ...@@ -62,12 +57,16 @@ def process_docs(dataset, set_answer_type="bool"):
return obs_list return obs_list
dataset = dataset.map(_flatten, remove_columns=[key for key in dataset.features.keys() if key not in FEATURES]) dataset = dataset.map(
_flatten,
remove_columns=[key for key in dataset.features.keys() if key not in FEATURES],
)
new_dataset = {} new_dataset = {}
for key in dataset.features.keys(): for key in dataset.features.keys():
new_dataset[key] = [x for row in dataset[key] for x in row] new_dataset[key] = [x for row in dataset[key] for x in row]
return Dataset.from_dict(new_dataset) return Dataset.from_dict(new_dataset)
process_docs_bool = partial(process_docs, set_answer_type="bool") process_docs_bool = partial(process_docs, set_answer_type="bool")
process_docs_freeform = partial(process_docs, set_answer_type="free form answer") process_docs_freeform = partial(process_docs, set_answer_type="free form answer")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment