glue.py

import nlp
import numpy as np
import random
from sklearn.metrics import f1_score, matthews_corrcoef
from . common import NLP_TASK, simple_accuracy_metric
from . import TASK_REGISTRY


@TASK_REGISTRY.register("cola")
class CoLA(NLP_TASK):
    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

    def doc_to_text(self, doc, include_target=True):
        text = "Does this sentence make sense?:\tTrue or False?" \
               "\nsentence:{}\nAnswer: ".format(doc["sentence"])
        if include_target:
            text += " {}".format({1: "True", 0: "False"}[doc["label"]])
        return text

    def evaluate(self, docs, lm, k=0):
        golds = [doc["label"] for doc in docs]
        preds = []
        for doc in docs:
            word = lm.generate(
                context=self.fewshot_context(doc=doc, k=k),
                max_gen_length=1,
            )
            if word.strip() == "True":
                preds.append(1)
            elif word.strip() == "False":
                preds.append(0)
            else:
                preds.append(-1)
        golds = np.array(golds)
        preds = np.array(preds)
        mcc = float(matthews_corrcoef(y_true=golds, y_pred=preds))
        return {
            "major": mcc,
            "minor": {"mcc": mcc},
            "higher_is_better": True,
        }


@TASK_REGISTRY.register("mnli")
class MNLI(NLP_TASK):
    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

    def validation_docs(self):
        if self.has_validation_docs():
            return self._load_nlp_dataset()["validation_matched"]

    def test_docs(self):
        if self.has_test_docs():
            return self._load_nlp_dataset()["test_matched"]

    def doc_to_text(self, doc, include_target=True):
        text = "{}\nquestion:\t{}\tTrue, False or Neither?\nanswer:".format(
            doc["sentence1"],
            doc["sentence2"],
        )
        if include_target:
            # True = entailment
            # False = contradiction
            # Neither = neutral
            text += " {}".format({0: "True", 1: "Neither", 2: "False"}[doc["label"]])
        return text

    def evaluate(self, docs, lm, k=0):
        golds = [doc["label"] for doc in docs]
        preds = []
        for doc in docs:
            word = lm.generate(
                context=self.fewshot_context(doc=doc, k=k),
                max_gen_length=1,
            )
            if word.strip() == "True":
                preds.append(1)
            elif word.strip() == "False":
                preds.append(0)
            else:
                preds.append(-1)
        return simple_accuracy_metric(preds=preds, golds=golds)


@TASK_REGISTRY.register("rte")
class RTE(NLP_TASK):

    NLP_PATH = "glue"
    NLP_NAME = "rte"

    def has_training_docs(self):
        return True

    def has_validation_docs(self):
        return True

    def has_test_docs(self):
        return True

    def doc_to_text(self, doc, include_target=True):
        text = "{}\nquestion:\t{}\tTrue or False?\nanswer:".format(
            doc["sentence1"],
            doc["sentence2"],
        )
        if include_target:
            text += " {}".format({1: "True", 0: "False"}[doc["label"]])
        return text

    def evaluate(self, docs, lm, k=0):
        golds = [doc["label"] for doc in docs]
        preds = []
        for doc in docs:
            word = lm.generate(
                context=self.fewshot_context(doc=doc, k=k),
                max_gen_length=1,
            )
            if word.strip() == "True":
                preds.append(1)
            elif word.strip() == "False":
                preds.append(0)
            else:
                preds.append(-1)
        return simple_accuracy_metric(preds=preds, golds=golds)