Commit 88bf8b56 authored by James King's avatar James King Committed by Facebook Github Bot
Browse files

Fixed the issue that no space in string converted from tensor

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/548

Differential Revision: D14286021

Pulled By: myleott

fbshipit-source-id: 7c725304185e63787220371a812ec860e178872c
parent 66262a38
...@@ -176,7 +176,7 @@ def batch_by_size( ...@@ -176,7 +176,7 @@ def batch_by_size(
def process_bpe_symbol(sentence: str, bpe_symbol: str): def process_bpe_symbol(sentence: str, bpe_symbol: str):
if bpe_symbol == 'sentencepiece': if bpe_symbol == 'sentencepiece':
sentence = sentence.replace('\u2581', ' ').strip() sentence = sentence.replace(' ','').replace('\u2581', ' ').strip()
elif bpe_symbol is not None: elif bpe_symbol is not None:
sentence = (sentence + ' ').replace(bpe_symbol, '').rstrip() sentence = (sentence + ' ').replace(bpe_symbol, '').rstrip()
return sentence return sentence
...@@ -62,7 +62,7 @@ class Dictionary(object): ...@@ -62,7 +62,7 @@ class Dictionary(object):
else: else:
return self[i] return self[i]
sent = ''.join(token_string(i) for i in tensor if i != self.eos()) sent = ' '.join(token_string(i) for i in tensor if i != self.eos())
return data_utils.process_bpe_symbol(sent, bpe_symbol) return data_utils.process_bpe_symbol(sent, bpe_symbol)
def unk_string(self, escape=False): def unk_string(self, escape=False):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment