fix #1920

8b388827 · thomwolf · d425a4d6 · 8b388827
Commit 8b388827 authored Dec 05, 2019 by thomwolf
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

transformers/tokenization_ctrl.py transformers/tokenization_ctrl.py +2 -2

No files found.
--- a/transformers/tokenization_ctrl.py
+++ b/transformers/tokenization_ctrl.py
@@ -192,9 +192,9 @@ class CTRLTokenizer(PreTrainedTokenizer):
        """
        split_tokens = []

-        text = text.split(' ')
+        words = re.findall(r'\S+\n?', text)

-        for token in text:
+        for token in words:
            split_tokens.extend([t for t in self.bpe(token).split(' ')])
        return split_tokens