"examples/trials/mnist-pytorch/config_aml.yml" did not exist on "48f8b0526fca88ae9d0148b02ad918a0ba9bce9a"
utils.py 818 Bytes
Newer Older
lintangsutawika's avatar
lintangsutawika committed
1
2
3
import re
from functools import partial

lintangsutawika's avatar
update  
lintangsutawika committed
4
5
6
import sys
sys.path.append('..')
from preprocessors import process_docs_prepended_question
lintangsutawika's avatar
lintangsutawika committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

def process_docs(dataset):

    dataset = process_docs_prepended_question(dataset)

    _multiple_choice_pattern = re.compile(r" *\([A-D]\) *")

    def _normalize_answer(text):
        return " ".join(text.split()).strip()

    def _process_doc(doc):

        split = doc["text"].find("\n\n", doc["text"].find("(D)"))
        choices_text = doc["text"][:split]

        doc["text"] = doc["text"][split:].strip()
        doc["choices"] = [_normalize_answer(choice) for choice in re.split(
            _multiple_choice_pattern, choices_text)[1:]]
        doc["gold"] = doc["choices"].index(_normalize_answer(doc["outputs"][0]))

        return doc

    return dataset.map(_process_doc)