title = "{PAWS}-{X}: A Cross-lingual Adversarial Dataset for Paraphrase Identification",
author = "Yang, Yinfei and
Zhang, Yuan and
Tar, Chris and
Baldridge, Jason",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1382",
doi = "10.18653/v1/D19-1382",
pages = "3687--3692",
}"""
classPAWSXBase(Task):
VERSION=0
DATASET_PATH="paws-x"
DATASET_NAME=None# 'en'
YES=None# 'Yes'
NO=None# 'No'
QUESTION_WORD=None# 'right'
defhas_training_docs(self):
returnTrue
defhas_validation_docs(self):
returnTrue
defhas_test_docs(self):
returnTrue
deftraining_docs(self):
returnself.dataset["train"]
defvalidation_docs(self):
returnself.dataset["validation"]
deftest_docs(self):
returnself.dataset["test"]
defdoc_to_text(self,doc):
# same as in mGPT paper
return(
doc["sentence1"]
+", "
+self.QUESTION_WORD
+"? [MASK], "
+doc["sentence2"]
)
defdoc_to_target(self,doc):
return" "+[self.YES,self.NO][doc["label"]]
defconstruct_requests(self,doc,ctx):
"""Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
:param doc:
The document as returned from training_docs, validation_docs, or
test_docs.
:param ctx: str
The context string, generated by fewshot_context. This includes the natural
language description, as well as the few shot examples, and the question