Commit ebcb28f2 authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Add detokenizer

parent d235355a
......@@ -72,17 +72,33 @@ class MuTualBase(Task):
return ""
def doc_to_text(self, doc):
return doc["article"]
return self.detokenize(doc["article"])
def doc_to_target(self, doc):
return " " + doc["options"][self.CHOICES.index(doc["answers"])]
return " " + self.detokenize(doc["options"][self.CHOICES.index(doc["answers"])])
def construct_requests(self, doc, ctx):
lls = []
for option in doc["options"]:
lls.append(rf.loglikelihood(ctx, f" {option}"))
lls.append(rf.loglikelihood(ctx, f" {self.detokenize(option)}"))
return lls
def detokenize(self, text):
text = text.replace(" '", "'")
text = text.replace(" \n", "\n")
text = text.replace("\n ", "\n")
text = text.replace(" n't", "n't")
text = text.replace("`` ", '"')
text = text.replace("''", '"')
# punctuation
text = text.replace(" :", ":")
text = text.replace(" ;", ";")
text = text.replace(" !", "!")
text = text.replace(" ?", "?")
text = text.replace(" ,", ",")
text = text.replace(" .", ".")
return text
def process_results(self, doc, results):
gold = self.CHOICES.index(doc["answers"])
r4_1 = np.argmax(results) == gold # r4_1 = accuracy
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment