Unverified Commit cd65c41a authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge branch 'master' into xlm-tokenization

parents 69da972a b66e9b44
...@@ -71,6 +71,10 @@ class XLNetTokenizer(PreTrainedTokenizer): ...@@ -71,6 +71,10 @@ class XLNetTokenizer(PreTrainedTokenizer):
pad_token=pad_token, cls_token=cls_token, pad_token=pad_token, cls_token=cls_token,
mask_token=mask_token, additional_special_tokens= mask_token=mask_token, additional_special_tokens=
additional_special_tokens, **kwargs) additional_special_tokens, **kwargs)
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens
try: try:
import sentencepiece as spm import sentencepiece as spm
except ImportError: except ImportError:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment