Commit 5288813a authored by lintangsutawika's avatar lintangsutawika
Browse files

adjust wsc

parent 82d57f65
import re
from lm_eval.utils import general_detokenize
def doc_to_text(x):
def t5_prompt_doc_to_text(x):
def _mark_span(text, span_str, span_idx, mark):
pattern_tmpl = r"^((?:\S+\s){N})(W)"
pattern = re.sub("N", str(span_idx), pattern_tmpl)
......@@ -15,3 +16,19 @@ def doc_to_text(x):
text = _mark_span(text, x["span2_text"], span2_index, "#")
return text
def default_doc_to_text(doc):
raw_passage = doc["text"]
# NOTE: HuggingFace span indices are word-based not character-based.
pre = " ".join(raw_passage.split()[: doc["span2_index"]])
post = raw_passage[len(pre) + len(doc["span2_text"]) + 1 :]
passage = general_detokenize(pre + " *{}*".format(doc["span2_text"]) + post)
noun = doc["span1_text"]
pronoun = doc["span2_text"]
text = (
f"Passage: {passage}\n"
+ f'Question: In the passage above, does the pronoun "*{pronoun}*" refer to "*{noun}*"?\n'
+ "Answer:"
)
return text
......@@ -5,7 +5,7 @@ dataset_path: super_glue
dataset_name: wsc
training_split: train
validation_split: validation
doc_to_text: !function "preprocess_wsc.doc_to_text"
doc_to_text: !function "preprocess_wsc.t5_prompt_doc_to_text"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment