Commit 88bf8b56 authored by James King's avatar James King Committed by Facebook Github Bot
Browse files

Fixed the issue that no space in string converted from tensor

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/548

Differential Revision: D14286021

Pulled By: myleott

fbshipit-source-id: 7c725304185e63787220371a812ec860e178872c
parent 66262a38
......@@ -176,7 +176,7 @@ def batch_by_size(
def process_bpe_symbol(sentence: str, bpe_symbol: str):
if bpe_symbol == 'sentencepiece':
sentence = sentence.replace('\u2581', ' ').strip()
sentence = sentence.replace(' ','').replace('\u2581', ' ').strip()
elif bpe_symbol is not None:
sentence = (sentence + ' ').replace(bpe_symbol, '').rstrip()
return sentence
......@@ -62,7 +62,7 @@ class Dictionary(object):
else:
return self[i]
sent = ''.join(token_string(i) for i in tensor if i != self.eos())
sent = ' '.join(token_string(i) for i in tensor if i != self.eos())
return data_utils.process_bpe_symbol(sent, bpe_symbol)
def unk_string(self, escape=False):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment