Commit 128cfdee authored by Stefan Schweter's avatar Stefan Schweter
Browse files

tokenization add XLM-RoBERTa base model

parent e778dd85
...@@ -30,11 +30,13 @@ VOCAB_FILES_NAMES = {'vocab_file': 'sentencepiece.bpe.model'} ...@@ -30,11 +30,13 @@ VOCAB_FILES_NAMES = {'vocab_file': 'sentencepiece.bpe.model'}
PRETRAINED_VOCAB_FILES_MAP = { PRETRAINED_VOCAB_FILES_MAP = {
'vocab_file': 'vocab_file':
{ {
'xlm-roberta-base': "https://schweter.eu/cloud/transformers/xlm-roberta-base-sentencepiece.bpe.model",
'xlm-roberta-large': "https://schweter.eu/cloud/transformers/xlm-roberta-large-sentencepiece.bpe.model", 'xlm-roberta-large': "https://schweter.eu/cloud/transformers/xlm-roberta-large-sentencepiece.bpe.model",
} }
} }
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
'xlm-roberta-base': None,
'xlm-roberta-large': None, 'xlm-roberta-large': None,
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment