Unverified Commit bb044462 authored by Guillem García Subies's avatar Guillem García Subies Committed by GitHub
Browse files

Update tokenization_openai.py

parent bfd75056
......@@ -89,9 +89,9 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
try:
import ftfy
import spacy
self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
self.fix_text = ftfy.fix_text
from spacy.lang.en import English
_nlp = English()
self.nlp = nlp.Defaults.create_tokenizer(_nlp)
except ImportError:
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
self.nlp = BasicTokenizer(do_lower_case=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment