preprocess.py 492 Bytes
Newer Older
lintangsutawika's avatar
lintangsutawika committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import re
from lm_eval.utils import general_detokenize


def partial_context(doc, option):
    # Substitute the pronoun in the sentence with the specified option
    # and ignore everything after.
    pronoun_loc = doc["sentence"].index("_")
    return doc["sentence"][:pronoun_loc] + option


def partial_target(doc):
    # The target is everything after the document specified pronoun.
    pronoun_loc = doc["sentence"].index("_") + 1
    return " " + doc["sentence"][pronoun_loc:].strip()