Commit 715dc759 authored by Jonathan Tow's avatar Jonathan Tow
Browse files

Fix `doc_to_text` passage bug

parent c0fbf9e8
...@@ -435,11 +435,10 @@ class SGWinogradSchemaChallenge(HFTask): ...@@ -435,11 +435,10 @@ class SGWinogradSchemaChallenge(HFTask):
def doc_to_text(self, doc): def doc_to_text(self, doc):
raw_passage = doc["text"] raw_passage = doc["text"]
passage = ( # NOTE: HuggingFace span indices are word-based not character-based.
raw_passage[:doc["span2_index"]] pre = " ".join(raw_passage.split()[:doc["span2_index"]])
+ "*{}*".format(doc["span2_text"]) post = raw_passage[len(pre) + len(doc["span2_text"]) + 1:]
+ raw_passage[doc["span2_index"] + len(doc["span2_text"]):] passage = pre + " *{}*".format(doc['span2_text']) + post
)
noun = doc["span1_text"] noun = doc["span1_text"]
pronoun = doc["span2_text"] pronoun = doc["span2_text"]
text = ( text = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment