Unverified Commit 21e1ed17 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge pull request #769 from EleutherAI/superglue

[Refactor] Superglue T5 Parity
parents 4cda3a1c b7082722
......@@ -2,16 +2,17 @@ group:
- super-glue-t5-prompt
task: super_glue-wsc-t5-prompt
dataset_path: super_glue
dataset_name: wsc
dataset_name: wsc.fixed
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: !function "preprocess_wsc.t5_prompt_doc_to_text"
doc_to_text: !function "t5_utils.doc_to_text"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
- metric: exact_match
- metric: accuracy
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
filter_list:
- name: "wsc_postprocessor"
filter:
- function: !function t5_utils.WSCPostprocess
import re
from lm_eval.api.filter import Filter
def doc_to_text(x):
text = re.sub(r" X ", " *" + x["span2_text"] + "* ", _wsc_inputs(x))
return "wsc: " + text
def _wsc_inputs(x):
words = x["text"].split(" ")
# We would need some special logic to handle the case where the pronoun is the
# first or last word in the text. None of the examples in WSC seem to have
# this, so we are ignoring these cases.
assert x["span2_index"] > 0
assert x["span2_index"] < len(words)
pronoun_index = x["span2_index"]
def create_input():
assert words[pronoun_index] == x["span2_text"]
return " ".join(
[
" ".join(words[:pronoun_index]),
"X",
" ".join(words[pronoun_index + 1 :]),
]
)
# Handle some special cases.
if (
x["text"]
== 'The boy continued to whip the pony , and eventually the pony threw him over. John laughed out quite loud. "Good for him," he said. '
):
return (
"The boy continued to whip the pony , and eventually the pony threw "
'him over. John laughed out quite loud. "Good for X ," he said.'
)
# Using the span2_index, we get 'use' instead of 'it'.
if (
x["text"]
== "When they had eventually calmed down a bit , and had gotten home, Mr. Farley put the magic pebble in an iron safe . Some day they might want to use it , but really for now, what more could they wish for?"
):
return (
"When they had eventually calmed down a bit , and had gotten home, "
"Mr. Farley put the magic pebble in an iron safe . Some day they might "
"want to use X , but really for now, what more could they wish for?"
)
return create_input()
class WSCPostprocess(Filter):
def __init__(self, **kwargs):
self.determiners = {
"a",
"an",
"few",
"her",
"his",
"each",
"every",
"many",
"much",
"my",
"our",
"some",
"that",
"the",
"their",
"these",
"this",
"those",
"which",
"whose",
"your",
}
def clean(self, s):
"""Ignore capitalization and determiners."""
s = s.strip().lower()
return " ".join([w for w in s.split(" ") if w not in self.determiners])
def apply(self, resps, docs):
filtered_resps = []
for prediction, reference in zip(*(resps, docs["span1_text"])):
prediction = self.clean(prediction[0])
reference = self.clean(reference)
if ("'" in prediction) != ("'" in reference):
# referent is "Bob's hat" as predicting the referent.
predicted_referent = False
else:
prediction_words = set(prediction.split(" "))
referent_words = set(reference.split(" "))
# Handle cases where the prediction is "fuzzy bunny" and the referent is
# "bunny".
predicted_referent = prediction_words.issubset(
referent_words
) or referent_words.issubset(prediction_words)
filtered_resps.append(predicted_referent)
return filtered_resps
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment