Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
55e8d0ce
Commit
55e8d0ce
authored
Nov 10, 2020
by
Julien Chaumond
Browse files
Update links from s3 to huggingface.co
parent
850afb42
Changes
81
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
141 additions
and
141 deletions
+141
-141
src/transformers/tokenization_deberta.py
src/transformers/tokenization_deberta.py
+2
-2
src/transformers/tokenization_distilbert.py
src/transformers/tokenization_distilbert.py
+6
-6
src/transformers/tokenization_distilbert_fast.py
src/transformers/tokenization_distilbert_fast.py
+12
-12
src/transformers/tokenization_dpr.py
src/transformers/tokenization_dpr.py
+12
-12
src/transformers/tokenization_dpr_fast.py
src/transformers/tokenization_dpr_fast.py
+12
-12
src/transformers/tokenization_electra.py
src/transformers/tokenization_electra.py
+6
-6
src/transformers/tokenization_electra_fast.py
src/transformers/tokenization_electra_fast.py
+12
-12
src/transformers/tokenization_flaubert.py
src/transformers/tokenization_flaubert.py
+8
-8
src/transformers/tokenization_funnel.py
src/transformers/tokenization_funnel.py
+10
-10
src/transformers/tokenization_funnel_fast.py
src/transformers/tokenization_funnel_fast.py
+20
-20
src/transformers/tokenization_gpt2.py
src/transformers/tokenization_gpt2.py
+10
-10
src/transformers/tokenization_gpt2_fast.py
src/transformers/tokenization_gpt2_fast.py
+15
-15
src/transformers/tokenization_layoutlm.py
src/transformers/tokenization_layoutlm.py
+2
-2
src/transformers/tokenization_layoutlm_fast.py
src/transformers/tokenization_layoutlm_fast.py
+4
-4
src/transformers/tokenization_longformer.py
src/transformers/tokenization_longformer.py
+2
-2
src/transformers/tokenization_longformer_fast.py
src/transformers/tokenization_longformer_fast.py
+3
-3
src/transformers/tokenization_lxmert.py
src/transformers/tokenization_lxmert.py
+1
-1
src/transformers/tokenization_lxmert_fast.py
src/transformers/tokenization_lxmert_fast.py
+2
-2
src/transformers/tokenization_marian.py
src/transformers/tokenization_marian.py
+1
-1
src/transformers/tokenization_mbart.py
src/transformers/tokenization_mbart.py
+1
-1
No files found.
src/transformers/tokenization_deberta.py
View file @
55e8d0ce
...
...
@@ -42,8 +42,8 @@ VOCAB_FILES_NAMES = {"vocab_file": "bpe_encoder.bin"}
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"microsoft/deberta-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
microsoft/deberta-base/bpe_encoder.bin"
,
"microsoft/deberta-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
microsoft/deberta-large/bpe_encoder.bin"
,
"microsoft/deberta-base"
:
"https://huggingface.co/microsoft/deberta-base/
resolve/main/
bpe_encoder.bin"
,
"microsoft/deberta-large"
:
"https://huggingface.co/microsoft/deberta-large/
resolve/main/
bpe_encoder.bin"
,
}
}
...
...
src/transformers/tokenization_distilbert.py
View file @
55e8d0ce
...
...
@@ -24,12 +24,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"distilbert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"distilbert-base-uncased-distilled-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
vocab.txt"
,
"distilbert-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased
-
vocab.txt"
,
"distilbert-base-cased-distilled-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased
-
vocab.txt"
,
"distilbert-base-german-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilbert-base-german-cased
-
vocab.txt"
,
"distilbert-base-multilingual-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-cased
-
vocab.txt"
,
"distilbert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"distilbert-base-uncased-distilled-squad"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
vocab.txt"
,
"distilbert-base-cased"
:
"https://huggingface.co/bert-base-cased
/resolve/main/
vocab.txt"
,
"distilbert-base-cased-distilled-squad"
:
"https://huggingface.co/bert-large-cased
/resolve/main/
vocab.txt"
,
"distilbert-base-german-cased"
:
"https://huggingface.co/distilbert-base-german-cased
/resolve/main/
vocab.txt"
,
"distilbert-base-multilingual-cased"
:
"https://huggingface.co/bert-base-multilingual-cased
/resolve/main/
vocab.txt"
,
}
}
...
...
src/transformers/tokenization_distilbert_fast.py
View file @
55e8d0ce
...
...
@@ -25,20 +25,20 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"distilbert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"distilbert-base-uncased-distilled-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
vocab.txt"
,
"distilbert-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased
-
vocab.txt"
,
"distilbert-base-cased-distilled-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased
-
vocab.txt"
,
"distilbert-base-german-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilbert-base-german-cased
-
vocab.txt"
,
"distilbert-base-multilingual-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-cased
-
vocab.txt"
,
"distilbert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"distilbert-base-uncased-distilled-squad"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
vocab.txt"
,
"distilbert-base-cased"
:
"https://huggingface.co/bert-base-cased
/resolve/main/
vocab.txt"
,
"distilbert-base-cased-distilled-squad"
:
"https://huggingface.co/bert-large-cased
/resolve/main/
vocab.txt"
,
"distilbert-base-german-cased"
:
"https://huggingface.co/distilbert-base-german-cased
/resolve/main/
vocab.txt"
,
"distilbert-base-multilingual-cased"
:
"https://huggingface.co/bert-base-multilingual-cased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"distilbert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"distilbert-base-uncased-distilled-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
tokenizer.json"
,
"distilbert-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased
-
tokenizer.json"
,
"distilbert-base-cased-distilled-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased
-
tokenizer.json"
,
"distilbert-base-german-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilbert-base-german-cased
-
tokenizer.json"
,
"distilbert-base-multilingual-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-cased
-
tokenizer.json"
,
"distilbert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"distilbert-base-uncased-distilled-squad"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
tokenizer.json"
,
"distilbert-base-cased"
:
"https://huggingface.co/bert-base-cased
/resolve/main/
tokenizer.json"
,
"distilbert-base-cased-distilled-squad"
:
"https://huggingface.co/bert-large-cased
/resolve/main/
tokenizer.json"
,
"distilbert-base-german-cased"
:
"https://huggingface.co/distilbert-base-german-cased
/resolve/main/
tokenizer.json"
,
"distilbert-base-multilingual-cased"
:
"https://huggingface.co/bert-base-multilingual-cased
/resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_dpr.py
View file @
55e8d0ce
...
...
@@ -30,32 +30,32 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"facebook/dpr-question_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-question_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"facebook/dpr-question_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-question_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
READER_PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"facebook/dpr-reader-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-reader-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-reader-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"facebook/dpr-reader-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"facebook/dpr-reader-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-reader-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-reader-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"facebook/dpr-reader-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_dpr_fast.py
View file @
55e8d0ce
...
...
@@ -31,32 +31,32 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-ctx_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"facebook/dpr-ctx_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"facebook/dpr-question_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-question_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"facebook/dpr-question_encoder-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-question_encoder-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"facebook/dpr-question_encoder-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
READER_PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"facebook/dpr-reader-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-reader-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"facebook/dpr-reader-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"facebook/dpr-reader-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"facebook/dpr-reader-single-nq-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-reader-multiset-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"facebook/dpr-reader-single-nq-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"facebook/dpr-reader-multiset-base"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_electra.py
View file @
55e8d0ce
...
...
@@ -20,12 +20,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"google/electra-small-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-small-generator/vocab.txt"
,
"google/electra-base-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-base-generator/vocab.txt"
,
"google/electra-large-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-large-generator/vocab.txt"
,
"google/electra-small-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-small-discriminator/vocab.txt"
,
"google/electra-base-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-base-discriminator/vocab.txt"
,
"google/electra-large-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-large-discriminator/vocab.txt"
,
"google/electra-small-generator"
:
"https://huggingface.co/google/electra-small-generator/
resolve/main/
vocab.txt"
,
"google/electra-base-generator"
:
"https://huggingface.co/google/electra-base-generator/
resolve/main/
vocab.txt"
,
"google/electra-large-generator"
:
"https://huggingface.co/google/electra-large-generator/
resolve/main/
vocab.txt"
,
"google/electra-small-discriminator"
:
"https://huggingface.co/google/electra-small-discriminator/
resolve/main/
vocab.txt"
,
"google/electra-base-discriminator"
:
"https://huggingface.co/google/electra-base-discriminator/
resolve/main/
vocab.txt"
,
"google/electra-large-discriminator"
:
"https://huggingface.co/google/electra-large-discriminator/
resolve/main/
vocab.txt"
,
}
}
...
...
src/transformers/tokenization_electra_fast.py
View file @
55e8d0ce
...
...
@@ -21,20 +21,20 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"google/electra-small-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-small-generator/vocab.txt"
,
"google/electra-base-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-base-generator/vocab.txt"
,
"google/electra-large-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-large-generator/vocab.txt"
,
"google/electra-small-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-small-discriminator/vocab.txt"
,
"google/electra-base-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-base-discriminator/vocab.txt"
,
"google/electra-large-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-large-discriminator/vocab.txt"
,
"google/electra-small-generator"
:
"https://huggingface.co/google/electra-small-generator/
resolve/main/
vocab.txt"
,
"google/electra-base-generator"
:
"https://huggingface.co/google/electra-base-generator/
resolve/main/
vocab.txt"
,
"google/electra-large-generator"
:
"https://huggingface.co/google/electra-large-generator/
resolve/main/
vocab.txt"
,
"google/electra-small-discriminator"
:
"https://huggingface.co/google/electra-small-discriminator/
resolve/main/
vocab.txt"
,
"google/electra-base-discriminator"
:
"https://huggingface.co/google/electra-base-discriminator/
resolve/main/
vocab.txt"
,
"google/electra-large-discriminator"
:
"https://huggingface.co/google/electra-large-discriminator/
resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"google/electra-small-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-small-generator/tokenizer.json"
,
"google/electra-base-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-base-generator/tokenizer.json"
,
"google/electra-large-generator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-large-generator/tokenizer.json"
,
"google/electra-small-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-small-discriminator/tokenizer.json"
,
"google/electra-base-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-base-discriminator/tokenizer.json"
,
"google/electra-large-discriminator"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/electra-large-discriminator/tokenizer.json"
,
"google/electra-small-generator"
:
"https://huggingface.co/google/electra-small-generator/
resolve/main/
tokenizer.json"
,
"google/electra-base-generator"
:
"https://huggingface.co/google/electra-base-generator/
resolve/main/
tokenizer.json"
,
"google/electra-large-generator"
:
"https://huggingface.co/google/electra-large-generator/
resolve/main/
tokenizer.json"
,
"google/electra-small-discriminator"
:
"https://huggingface.co/google/electra-small-discriminator/
resolve/main/
tokenizer.json"
,
"google/electra-base-discriminator"
:
"https://huggingface.co/google/electra-base-discriminator/
resolve/main/
tokenizer.json"
,
"google/electra-large-discriminator"
:
"https://huggingface.co/google/electra-large-discriminator/
resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_flaubert.py
View file @
55e8d0ce
...
...
@@ -32,16 +32,16 @@ VOCAB_FILES_NAMES = {
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"flaubert/flaubert_small_cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_small_cased/vocab.json"
,
"flaubert/flaubert_base_uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_base_uncased/vocab.json"
,
"flaubert/flaubert_base_cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_base_cased/vocab.json"
,
"flaubert/flaubert_large_cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_large_cased/vocab.json"
,
"flaubert/flaubert_small_cased"
:
"https://huggingface.co/flaubert/flaubert_small_cased/
resolve/main/
vocab.json"
,
"flaubert/flaubert_base_uncased"
:
"https://huggingface.co/flaubert/flaubert_base_uncased/
resolve/main/
vocab.json"
,
"flaubert/flaubert_base_cased"
:
"https://huggingface.co/flaubert/flaubert_base_cased/
resolve/main/
vocab.json"
,
"flaubert/flaubert_large_cased"
:
"https://huggingface.co/flaubert/flaubert_large_cased/
resolve/main/
vocab.json"
,
},
"merges_file"
:
{
"flaubert/flaubert_small_cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_small_cased/merges.txt"
,
"flaubert/flaubert_base_uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_base_uncased/merges.txt"
,
"flaubert/flaubert_base_cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_base_cased/merges.txt"
,
"flaubert/flaubert_large_cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
flaubert/flaubert_large_cased/merges.txt"
,
"flaubert/flaubert_small_cased"
:
"https://huggingface.co/flaubert/flaubert_small_cased/
resolve/main/
merges.txt"
,
"flaubert/flaubert_base_uncased"
:
"https://huggingface.co/flaubert/flaubert_base_uncased/
resolve/main/
merges.txt"
,
"flaubert/flaubert_base_cased"
:
"https://huggingface.co/flaubert/flaubert_base_cased/
resolve/main/
merges.txt"
,
"flaubert/flaubert_large_cased"
:
"https://huggingface.co/flaubert/flaubert_large_cased/
resolve/main/
merges.txt"
,
},
}
...
...
src/transformers/tokenization_funnel.py
View file @
55e8d0ce
...
...
@@ -39,16 +39,16 @@ _model_names = [
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"funnel-transformer/small"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/small/vocab.txt"
,
"funnel-transformer/small-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/small-base/vocab.txt"
,
"funnel-transformer/medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/medium/vocab.txt"
,
"funnel-transformer/medium-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/medium-base/vocab.txt"
,
"funnel-transformer/intermediate"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/intermediate/vocab.txt"
,
"funnel-transformer/intermediate-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/intermediate-base/vocab.txt"
,
"funnel-transformer/large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/large/vocab.txt"
,
"funnel-transformer/large-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/large-base/vocab.txt"
,
"funnel-transformer/xlarge"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/xlarge/vocab.txt"
,
"funnel-transformer/xlarge-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/xlarge-base/vocab.txt"
,
"funnel-transformer/small"
:
"https://huggingface.co/funnel-transformer/small/
resolve/main/
vocab.txt"
,
"funnel-transformer/small-base"
:
"https://huggingface.co/funnel-transformer/small-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/medium"
:
"https://huggingface.co/funnel-transformer/medium/
resolve/main/
vocab.txt"
,
"funnel-transformer/medium-base"
:
"https://huggingface.co/funnel-transformer/medium-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/intermediate"
:
"https://huggingface.co/funnel-transformer/intermediate/
resolve/main/
vocab.txt"
,
"funnel-transformer/intermediate-base"
:
"https://huggingface.co/funnel-transformer/intermediate-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/large"
:
"https://huggingface.co/funnel-transformer/large/
resolve/main/
vocab.txt"
,
"funnel-transformer/large-base"
:
"https://huggingface.co/funnel-transformer/large-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/xlarge"
:
"https://huggingface.co/funnel-transformer/xlarge/
resolve/main/
vocab.txt"
,
"funnel-transformer/xlarge-base"
:
"https://huggingface.co/funnel-transformer/xlarge-base/
resolve/main/
vocab.txt"
,
}
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
=
{
f
"funnel-transformer/
{
name
}
"
:
512
for
name
in
_model_names
}
...
...
src/transformers/tokenization_funnel_fast.py
View file @
55e8d0ce
...
...
@@ -40,28 +40,28 @@ _model_names = [
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"funnel-transformer/small"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/small/vocab.txt"
,
"funnel-transformer/small-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/small-base/vocab.txt"
,
"funnel-transformer/medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/medium/vocab.txt"
,
"funnel-transformer/medium-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/medium-base/vocab.txt"
,
"funnel-transformer/intermediate"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/intermediate/vocab.txt"
,
"funnel-transformer/intermediate-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/intermediate-base/vocab.txt"
,
"funnel-transformer/large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/large/vocab.txt"
,
"funnel-transformer/large-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/large-base/vocab.txt"
,
"funnel-transformer/xlarge"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/xlarge/vocab.txt"
,
"funnel-transformer/xlarge-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/xlarge-base/vocab.txt"
,
"funnel-transformer/small"
:
"https://huggingface.co/funnel-transformer/small/
resolve/main/
vocab.txt"
,
"funnel-transformer/small-base"
:
"https://huggingface.co/funnel-transformer/small-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/medium"
:
"https://huggingface.co/funnel-transformer/medium/
resolve/main/
vocab.txt"
,
"funnel-transformer/medium-base"
:
"https://huggingface.co/funnel-transformer/medium-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/intermediate"
:
"https://huggingface.co/funnel-transformer/intermediate/
resolve/main/
vocab.txt"
,
"funnel-transformer/intermediate-base"
:
"https://huggingface.co/funnel-transformer/intermediate-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/large"
:
"https://huggingface.co/funnel-transformer/large/
resolve/main/
vocab.txt"
,
"funnel-transformer/large-base"
:
"https://huggingface.co/funnel-transformer/large-base/
resolve/main/
vocab.txt"
,
"funnel-transformer/xlarge"
:
"https://huggingface.co/funnel-transformer/xlarge/
resolve/main/
vocab.txt"
,
"funnel-transformer/xlarge-base"
:
"https://huggingface.co/funnel-transformer/xlarge-base/
resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"funnel-transformer/small"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/small/tokenizer.json"
,
"funnel-transformer/small-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/small-base/tokenizer.json"
,
"funnel-transformer/medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/medium/tokenizer.json"
,
"funnel-transformer/medium-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/medium-base/tokenizer.json"
,
"funnel-transformer/intermediate"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/intermediate/tokenizer.json"
,
"funnel-transformer/intermediate-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/intermediate-base/tokenizer.json"
,
"funnel-transformer/large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/large/tokenizer.json"
,
"funnel-transformer/large-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/large-base/tokenizer.json"
,
"funnel-transformer/xlarge"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/xlarge/tokenizer.json"
,
"funnel-transformer/xlarge-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
funnel-transformer/xlarge-base/tokenizer.json"
,
"funnel-transformer/small"
:
"https://huggingface.co/funnel-transformer/small/
resolve/main/
tokenizer.json"
,
"funnel-transformer/small-base"
:
"https://huggingface.co/funnel-transformer/small-base/
resolve/main/
tokenizer.json"
,
"funnel-transformer/medium"
:
"https://huggingface.co/funnel-transformer/medium/
resolve/main/
tokenizer.json"
,
"funnel-transformer/medium-base"
:
"https://huggingface.co/funnel-transformer/medium-base/
resolve/main/
tokenizer.json"
,
"funnel-transformer/intermediate"
:
"https://huggingface.co/funnel-transformer/intermediate/
resolve/main/
tokenizer.json"
,
"funnel-transformer/intermediate-base"
:
"https://huggingface.co/funnel-transformer/intermediate-base/
resolve/main/
tokenizer.json"
,
"funnel-transformer/large"
:
"https://huggingface.co/funnel-transformer/large/
resolve/main/
tokenizer.json"
,
"funnel-transformer/large-base"
:
"https://huggingface.co/funnel-transformer/large-base/
resolve/main/
tokenizer.json"
,
"funnel-transformer/xlarge"
:
"https://huggingface.co/funnel-transformer/xlarge/
resolve/main/
tokenizer.json"
,
"funnel-transformer/xlarge-base"
:
"https://huggingface.co/funnel-transformer/xlarge-base/
resolve/main/
tokenizer.json"
,
},
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
=
{
f
"funnel-transformer/
{
name
}
"
:
512
for
name
in
_model_names
}
...
...
src/transformers/tokenization_gpt2.py
View file @
55e8d0ce
...
...
@@ -36,18 +36,18 @@ VOCAB_FILES_NAMES = {
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"gpt2"
:
"https://
s3.amazonaws.com/models.huggingface.co/bert/gpt2-
vocab.json"
,
"gpt2-medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-medium
-
vocab.json"
,
"gpt2-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-large
-
vocab.json"
,
"gpt2-xl"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-xl
-
vocab.json"
,
"distilgpt2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilgpt2
-
vocab.json"
,
"gpt2"
:
"https://
huggingface.co/gpt2/resolve/main/
vocab.json"
,
"gpt2-medium"
:
"https://huggingface.co/gpt2-medium
/resolve/main/
vocab.json"
,
"gpt2-large"
:
"https://huggingface.co/gpt2-large
/resolve/main/
vocab.json"
,
"gpt2-xl"
:
"https://huggingface.co/gpt2-xl
/resolve/main/
vocab.json"
,
"distilgpt2"
:
"https://huggingface.co/distilgpt2
/resolve/main/
vocab.json"
,
},
"merges_file"
:
{
"gpt2"
:
"https://
s3.amazonaws.com/models.huggingface.co/bert/gpt2-
merges.txt"
,
"gpt2-medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-medium
-
merges.txt"
,
"gpt2-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-large
-
merges.txt"
,
"gpt2-xl"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-xl
-
merges.txt"
,
"distilgpt2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilgpt2
-
merges.txt"
,
"gpt2"
:
"https://
huggingface.co/gpt2/resolve/main/
merges.txt"
,
"gpt2-medium"
:
"https://huggingface.co/gpt2-medium
/resolve/main/
merges.txt"
,
"gpt2-large"
:
"https://huggingface.co/gpt2-large
/resolve/main/
merges.txt"
,
"gpt2-xl"
:
"https://huggingface.co/gpt2-xl
/resolve/main/
merges.txt"
,
"distilgpt2"
:
"https://huggingface.co/distilgpt2
/resolve/main/
merges.txt"
,
},
}
...
...
src/transformers/tokenization_gpt2_fast.py
View file @
55e8d0ce
...
...
@@ -33,25 +33,25 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "t
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"gpt2"
:
"https://
s3.amazonaws.com/models.huggingface.co/bert/gpt2-
vocab.json"
,
"gpt2-medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-medium
-
vocab.json"
,
"gpt2-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-large
-
vocab.json"
,
"gpt2-xl"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-xl
-
vocab.json"
,
"distilgpt2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilgpt2
-
vocab.json"
,
"gpt2"
:
"https://
huggingface.co/gpt2/resolve/main/
vocab.json"
,
"gpt2-medium"
:
"https://huggingface.co/gpt2-medium
/resolve/main/
vocab.json"
,
"gpt2-large"
:
"https://huggingface.co/gpt2-large
/resolve/main/
vocab.json"
,
"gpt2-xl"
:
"https://huggingface.co/gpt2-xl
/resolve/main/
vocab.json"
,
"distilgpt2"
:
"https://huggingface.co/distilgpt2
/resolve/main/
vocab.json"
,
},
"merges_file"
:
{
"gpt2"
:
"https://
s3.amazonaws.com/models.huggingface.co/bert/gpt2-
merges.txt"
,
"gpt2-medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-medium
-
merges.txt"
,
"gpt2-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-large
-
merges.txt"
,
"gpt2-xl"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-xl
-
merges.txt"
,
"distilgpt2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilgpt2
-
merges.txt"
,
"gpt2"
:
"https://
huggingface.co/gpt2/resolve/main/
merges.txt"
,
"gpt2-medium"
:
"https://huggingface.co/gpt2-medium
/resolve/main/
merges.txt"
,
"gpt2-large"
:
"https://huggingface.co/gpt2-large
/resolve/main/
merges.txt"
,
"gpt2-xl"
:
"https://huggingface.co/gpt2-xl
/resolve/main/
merges.txt"
,
"distilgpt2"
:
"https://huggingface.co/distilgpt2
/resolve/main/
merges.txt"
,
},
"tokenizer_file"
:
{
"gpt2"
:
"https://
s3.amazonaws.com/models.huggingface.co/bert/gpt2-
tokenizer.json"
,
"gpt2-medium"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-medium
-
tokenizer.json"
,
"gpt2-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-large
-
tokenizer.json"
,
"gpt2-xl"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
gpt2-xl
-
tokenizer.json"
,
"distilgpt2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilgpt2
-
tokenizer.json"
,
"gpt2"
:
"https://
huggingface.co/gpt2/resolve/main/
tokenizer.json"
,
"gpt2-medium"
:
"https://huggingface.co/gpt2-medium
/resolve/main/
tokenizer.json"
,
"gpt2-large"
:
"https://huggingface.co/gpt2-large
/resolve/main/
tokenizer.json"
,
"gpt2-xl"
:
"https://huggingface.co/gpt2-xl
/resolve/main/
tokenizer.json"
,
"distilgpt2"
:
"https://huggingface.co/distilgpt2
/resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_layoutlm.py
View file @
55e8d0ce
...
...
@@ -25,8 +25,8 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"microsoft/layoutlm-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"microsoft/layoutlm-large-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
vocab.txt"
,
"microsoft/layoutlm-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"microsoft/layoutlm-large-uncased"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
vocab.txt"
,
}
}
...
...
src/transformers/tokenization_layoutlm_fast.py
View file @
55e8d0ce
...
...
@@ -26,12 +26,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"microsoft/layoutlm-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"microsoft/layoutlm-large-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
vocab.txt"
,
"microsoft/layoutlm-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"microsoft/layoutlm-large-uncased"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"microsoft/layoutlm-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"microsoft/layoutlm-large-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
tokenizer.json"
,
"microsoft/layoutlm-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"microsoft/layoutlm-large-uncased"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_longformer.py
View file @
55e8d0ce
...
...
@@ -21,8 +21,8 @@ logger = logging.get_logger(__name__)
# vocab and merges same as roberta
vocab_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
vocab.json"
merges_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
merges.txt"
vocab_url
=
"https://huggingface.co/roberta-large
/resolve/main/
vocab.json"
merges_url
=
"https://huggingface.co/roberta-large
/resolve/main/
merges.txt"
_all_longformer_models
=
[
"allenai/longformer-base-4096"
,
"allenai/longformer-large-4096"
,
...
...
src/transformers/tokenization_longformer_fast.py
View file @
55e8d0ce
...
...
@@ -22,9 +22,9 @@ logger = logging.get_logger(__name__)
# vocab and merges same as roberta
vocab_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
vocab.json"
merges_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
merges.txt"
tokenizer_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
tokenizer.json"
vocab_url
=
"https://huggingface.co/roberta-large
/resolve/main/
vocab.json"
merges_url
=
"https://huggingface.co/roberta-large
/resolve/main/
merges.txt"
tokenizer_url
=
"https://huggingface.co/roberta-large
/resolve/main/
tokenizer.json"
_all_longformer_models
=
[
"allenai/longformer-base-4096"
,
"allenai/longformer-large-4096"
,
...
...
src/transformers/tokenization_lxmert.py
View file @
55e8d0ce
...
...
@@ -28,7 +28,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
####################################################
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"unc-nlp/lxmert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"unc-nlp/lxmert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
}
}
...
...
src/transformers/tokenization_lxmert_fast.py
View file @
55e8d0ce
...
...
@@ -29,10 +29,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
####################################################
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"unc-nlp/lxmert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"unc-nlp/lxmert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
},
"tokenizer_file"
:
{
"unc-nlp/lxmert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"unc-nlp/lxmert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
},
}
...
...
src/transformers/tokenization_marian.py
View file @
55e8d0ce
...
...
@@ -31,7 +31,7 @@ PRETRAINED_VOCAB_FILES_MAP = {
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
=
{
"Helsinki-NLP/opus-mt-en-de"
:
512
}
PRETRAINED_INIT_CONFIGURATION
=
{}
# Example URL https://
s3.amazonaws.com/models.
huggingface.co/
bert/
Helsinki-NLP/opus-mt-en-de/vocab.json
# Example URL https://huggingface.co/Helsinki-NLP/opus-mt-en-de/
resolve/main/
vocab.json
class
MarianTokenizer
(
PreTrainedTokenizer
):
...
...
src/transformers/tokenization_mbart.py
View file @
55e8d0ce
...
...
@@ -25,7 +25,7 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
_all_mbart_models
=
[
"facebook/mbart-large-en-ro"
,
"facebook/mbart-large-cc25"
]
SPM_URL
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
facebook/mbart-large-en-ro/sentence.bpe.model"
SPM_URL
=
"https://huggingface.co/facebook/mbart-large-en-ro/
resolve/main/
sentence.bpe.model"
FAIRSEQ_LANGUAGE_CODES
=
[
"ar_AR"
,
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment