t5_utils.py

import re
from lm_eval.api.filter import Filter


def doc_to_text(x):
    text = re.sub(r" X ", " *" + x["span2_text"] + "* ", _wsc_inputs(x))
    return "wsc: " + text


def _wsc_inputs(x):
    words = x["text"].split(" ")

    # We would need some special logic to handle the case where the pronoun is the
    # first or last word in the text. None of the examples in WSC seem to have
    # this, so we are ignoring these cases.
    assert x["span2_index"] > 0
    assert x["span2_index"] < len(words)
    pronoun_index = x["span2_index"]

    def create_input():
        assert words[pronoun_index] == x["span2_text"]

        return " ".join(
            [
                " ".join(words[:pronoun_index]),
                "X",
                " ".join(words[pronoun_index + 1 :]),
            ]
        )

    # Handle some special cases.
    if (
        x["text"]
        == 'The boy continued to whip the pony , and eventually the pony threw him over. John laughed out quite loud. "Good for him," he said. '
    ):
        return (
            "The boy continued to whip the pony , and eventually the pony threw "
            'him over. John laughed out quite loud. "Good for X ," he said.'
        )

    # Using the span2_index, we get 'use' instead of 'it'.
    if (
        x["text"]
        == "When they had eventually calmed down a bit , and had gotten home, Mr. Farley put the magic pebble in an iron safe . Some day they might want to use it , but really for now, what more could they wish for?"
    ):
        return (
            "When they had eventually calmed down a bit , and had gotten home, "
            "Mr. Farley put the magic pebble in an iron safe . Some day they might "
            "want to use X , but really for now, what more could they wish for?"
        )

    return create_input()


class WSCPostprocess(Filter):
    def __init__(self, **kwargs):
        self.determiners = {
            "a",
            "an",
            "few",
            "her",
            "his",
            "each",
            "every",
            "many",
            "much",
            "my",
            "our",
            "some",
            "that",
            "the",
            "their",
            "these",
            "this",
            "those",
            "which",
            "whose",
            "your",
        }

    def clean(self, s):
        """Ignore capitalization and determiners."""
        s = s.strip().lower()
        return " ".join([w for w in s.split(" ") if w not in self.determiners])

    def apply(self, resps, docs):
        filtered_resps = []
        for prediction, reference in zip(*(resps, docs["span1_text"])):
            prediction = self.clean(prediction[0])
            reference = self.clean(reference)

            if ("'" in prediction) != ("'" in reference):
                # referent is "Bob's hat" as predicting the referent.
                predicted_referent = False
            else:
                prediction_words = set(prediction.split(" "))
                referent_words = set(reference.split(" "))

                # Handle cases where the prediction is "fuzzy bunny" and the referent is
                # "bunny".
                predicted_referent = prediction_words.issubset(
                    referent_words
                ) or referent_words.issubset(prediction_words)

            filtered_resps.append(predicted_referent)

        return filtered_resps