Unverified Commit 388e3251 authored by Guillem García Subies's avatar Guillem García Subies Committed by GitHub
Browse files

Update tokenization_xlm.py

parent f5e2ed0f
...@@ -126,7 +126,7 @@ class XLMTokenizer(PreTrainedTokenizer): ...@@ -126,7 +126,7 @@ class XLMTokenizer(PreTrainedTokenizer):
import ftfy import ftfy
from spacy.lang.en import English from spacy.lang.en import English
_nlp = English() _nlp = English()
self.nlp = nlp.Defaults.create_tokenizer(_nlp) self.nlp = _nlp.Defaults.create_tokenizer(_nlp)
self.fix_text = ftfy.fix_text self.fix_text = ftfy.fix_text
except ImportError: except ImportError:
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.") logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment