Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
55e8d0ce
Commit
55e8d0ce
authored
Nov 10, 2020
by
Julien Chaumond
Browse files
Update links from s3 to huggingface.co
parent
850afb42
Changes
81
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
125 additions
and
125 deletions
+125
-125
src/transformers/configuration_retribert.py
src/transformers/configuration_retribert.py
+1
-1
src/transformers/configuration_roberta.py
src/transformers/configuration_roberta.py
+6
-6
src/transformers/configuration_squeezebert.py
src/transformers/configuration_squeezebert.py
+3
-3
src/transformers/configuration_t5.py
src/transformers/configuration_t5.py
+5
-5
src/transformers/configuration_transfo_xl.py
src/transformers/configuration_transfo_xl.py
+1
-1
src/transformers/configuration_xlm.py
src/transformers/configuration_xlm.py
+10
-10
src/transformers/configuration_xlm_prophetnet.py
src/transformers/configuration_xlm_prophetnet.py
+1
-1
src/transformers/configuration_xlm_roberta.py
src/transformers/configuration_xlm_roberta.py
+6
-6
src/transformers/configuration_xlnet.py
src/transformers/configuration_xlnet.py
+2
-2
src/transformers/tokenization_albert.py
src/transformers/tokenization_albert.py
+8
-8
src/transformers/tokenization_albert_fast.py
src/transformers/tokenization_albert_fast.py
+16
-16
src/transformers/tokenization_bart.py
src/transformers/tokenization_bart.py
+2
-2
src/transformers/tokenization_bart_fast.py
src/transformers/tokenization_bart_fast.py
+3
-3
src/transformers/tokenization_bert.py
src/transformers/tokenization_bert.py
+17
-17
src/transformers/tokenization_bert_fast.py
src/transformers/tokenization_bert_fast.py
+34
-34
src/transformers/tokenization_bert_generation.py
src/transformers/tokenization_bert_generation.py
+1
-1
src/transformers/tokenization_bert_japanese.py
src/transformers/tokenization_bert_japanese.py
+4
-4
src/transformers/tokenization_bertweet.py
src/transformers/tokenization_bertweet.py
+2
-2
src/transformers/tokenization_camembert.py
src/transformers/tokenization_camembert.py
+1
-1
src/transformers/tokenization_camembert_fast.py
src/transformers/tokenization_camembert_fast.py
+2
-2
No files found.
src/transformers/configuration_retribert.py
View file @
55e8d0ce
...
@@ -22,7 +22,7 @@ logger = logging.get_logger(__name__)
...
@@ -22,7 +22,7 @@ logger = logging.get_logger(__name__)
# TODO: upload to AWS
# TODO: upload to AWS
RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"retribert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilbert-base-uncased
-
config.json"
,
"retribert-base-uncased"
:
"https://huggingface.co/distilbert-base-uncased
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_roberta.py
View file @
55e8d0ce
...
@@ -22,12 +22,12 @@ from .utils import logging
...
@@ -22,12 +22,12 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"roberta-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-base
-
config.json"
,
"roberta-base"
:
"https://huggingface.co/roberta-base
/resolve/main/
config.json"
,
"roberta-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
config.json"
,
"roberta-large"
:
"https://huggingface.co/roberta-large
/resolve/main/
config.json"
,
"roberta-large-mnli"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large-mnli
-
config.json"
,
"roberta-large-mnli"
:
"https://huggingface.co/roberta-large-mnli
/resolve/main/
config.json"
,
"distilroberta-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
distilroberta-base
-
config.json"
,
"distilroberta-base"
:
"https://huggingface.co/distilroberta-base
/resolve/main/
config.json"
,
"roberta-base-openai-detector"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-base-openai-detector
-
config.json"
,
"roberta-base-openai-detector"
:
"https://huggingface.co/roberta-base-openai-detector
/resolve/main/
config.json"
,
"roberta-large-openai-detector"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large-openai-detector
-
config.json"
,
"roberta-large-openai-detector"
:
"https://huggingface.co/roberta-large-openai-detector
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_squeezebert.py
View file @
55e8d0ce
...
@@ -21,9 +21,9 @@ from .utils import logging
...
@@ -21,9 +21,9 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
SQUEEZEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
SQUEEZEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"squeezebert/squeezebert-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
squeezebert/squeezebert-uncased/config.json"
,
"squeezebert/squeezebert-uncased"
:
"https://huggingface.co/squeezebert/squeezebert-uncased/
resolve/main/
config.json"
,
"squeezebert/squeezebert-mnli"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
squeezebert/squeezebert-mnli/config.json"
,
"squeezebert/squeezebert-mnli"
:
"https://huggingface.co/squeezebert/squeezebert-mnli/
resolve/main/
config.json"
,
"squeezebert/squeezebert-mnli-headless"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
squeezebert/squeezebert-mnli-headless/config.json"
,
"squeezebert/squeezebert-mnli-headless"
:
"https://huggingface.co/squeezebert/squeezebert-mnli-headless/
resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_t5.py
View file @
55e8d0ce
...
@@ -21,11 +21,11 @@ from .utils import logging
...
@@ -21,11 +21,11 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
T5_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
T5_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"t5-small"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
t5-small
-
config.json"
,
"t5-small"
:
"https://huggingface.co/t5-small
/resolve/main/
config.json"
,
"t5-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
t5-base
-
config.json"
,
"t5-base"
:
"https://huggingface.co/t5-base
/resolve/main/
config.json"
,
"t5-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
t5-large
-
config.json"
,
"t5-large"
:
"https://huggingface.co/t5-large
/resolve/main/
config.json"
,
"t5-3b"
:
"https://
s3.amazonaws.com/models.huggingface.co/bert/t5-3b-
config.json"
,
"t5-3b"
:
"https://
huggingface.co/t5-3b/resolve/main/
config.json"
,
"t5-11b"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
t5-11b
-
config.json"
,
"t5-11b"
:
"https://huggingface.co/t5-11b
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_transfo_xl.py
View file @
55e8d0ce
...
@@ -25,7 +25,7 @@ from .utils import logging
...
@@ -25,7 +25,7 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"transfo-xl-wt103"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
transfo-xl-wt103
-
config.json"
,
"transfo-xl-wt103"
:
"https://huggingface.co/transfo-xl-wt103
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_xlm.py
View file @
55e8d0ce
...
@@ -21,16 +21,16 @@ from .utils import logging
...
@@ -21,16 +21,16 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"xlm-mlm-en-2048"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-en-2048
-
config.json"
,
"xlm-mlm-en-2048"
:
"https://huggingface.co/xlm-mlm-en-2048
/resolve/main/
config.json"
,
"xlm-mlm-ende-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-ende-1024
-
config.json"
,
"xlm-mlm-ende-1024"
:
"https://huggingface.co/xlm-mlm-ende-1024
/resolve/main/
config.json"
,
"xlm-mlm-enfr-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-enfr-1024
-
config.json"
,
"xlm-mlm-enfr-1024"
:
"https://huggingface.co/xlm-mlm-enfr-1024
/resolve/main/
config.json"
,
"xlm-mlm-enro-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-enro-1024
-
config.json"
,
"xlm-mlm-enro-1024"
:
"https://huggingface.co/xlm-mlm-enro-1024
/resolve/main/
config.json"
,
"xlm-mlm-tlm-xnli15-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-tlm-xnli15-1024
-
config.json"
,
"xlm-mlm-tlm-xnli15-1024"
:
"https://huggingface.co/xlm-mlm-tlm-xnli15-1024
/resolve/main/
config.json"
,
"xlm-mlm-xnli15-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-xnli15-1024
-
config.json"
,
"xlm-mlm-xnli15-1024"
:
"https://huggingface.co/xlm-mlm-xnli15-1024
/resolve/main/
config.json"
,
"xlm-clm-enfr-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-clm-enfr-1024
-
config.json"
,
"xlm-clm-enfr-1024"
:
"https://huggingface.co/xlm-clm-enfr-1024
/resolve/main/
config.json"
,
"xlm-clm-ende-1024"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-clm-ende-1024
-
config.json"
,
"xlm-clm-ende-1024"
:
"https://huggingface.co/xlm-clm-ende-1024
/resolve/main/
config.json"
,
"xlm-mlm-17-1280"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-17-1280
-
config.json"
,
"xlm-mlm-17-1280"
:
"https://huggingface.co/xlm-mlm-17-1280
/resolve/main/
config.json"
,
"xlm-mlm-100-1280"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-mlm-100-1280
-
config.json"
,
"xlm-mlm-100-1280"
:
"https://huggingface.co/xlm-mlm-100-1280
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_xlm_prophetnet.py
View file @
55e8d0ce
...
@@ -22,7 +22,7 @@ from .utils import logging
...
@@ -22,7 +22,7 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"microsoft/xprophetnet-large-wiki100-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
microsoft/xprophetnet-large-wiki100-cased/config.json"
,
"microsoft/xprophetnet-large-wiki100-cased"
:
"https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/
resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_xlm_roberta.py
View file @
55e8d0ce
...
@@ -22,12 +22,12 @@ from .utils import logging
...
@@ -22,12 +22,12 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"xlm-roberta-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-roberta-base
-
config.json"
,
"xlm-roberta-base"
:
"https://huggingface.co/xlm-roberta-base
/resolve/main/
config.json"
,
"xlm-roberta-large"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-roberta-large
-
config.json"
,
"xlm-roberta-large"
:
"https://huggingface.co/xlm-roberta-large
/resolve/main/
config.json"
,
"xlm-roberta-large-finetuned-conll02-dutch"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-roberta-large-finetuned-conll02-dutch
-
config.json"
,
"xlm-roberta-large-finetuned-conll02-dutch"
:
"https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch
/resolve/main/
config.json"
,
"xlm-roberta-large-finetuned-conll02-spanish"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-roberta-large-finetuned-conll02-spanish
-
config.json"
,
"xlm-roberta-large-finetuned-conll02-spanish"
:
"https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish
/resolve/main/
config.json"
,
"xlm-roberta-large-finetuned-conll03-english"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-roberta-large-finetuned-conll03-english
-
config.json"
,
"xlm-roberta-large-finetuned-conll03-english"
:
"https://huggingface.co/xlm-roberta-large-finetuned-conll03-english
/resolve/main/
config.json"
,
"xlm-roberta-large-finetuned-conll03-german"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlm-roberta-large-finetuned-conll03-german
-
config.json"
,
"xlm-roberta-large-finetuned-conll03-german"
:
"https://huggingface.co/xlm-roberta-large-finetuned-conll03-german
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/configuration_xlnet.py
View file @
55e8d0ce
...
@@ -22,8 +22,8 @@ from .utils import logging
...
@@ -22,8 +22,8 @@ from .utils import logging
logger
=
logging
.
get_logger
(
__name__
)
logger
=
logging
.
get_logger
(
__name__
)
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
"xlnet-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlnet-base-cased
-
config.json"
,
"xlnet-base-cased"
:
"https://huggingface.co/xlnet-base-cased
/resolve/main/
config.json"
,
"xlnet-large-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
xlnet-large-cased
-
config.json"
,
"xlnet-large-cased"
:
"https://huggingface.co/xlnet-large-cased
/resolve/main/
config.json"
,
}
}
...
...
src/transformers/tokenization_albert.py
View file @
55e8d0ce
...
@@ -31,14 +31,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}
...
@@ -31,14 +31,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"albert-base-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-base-v1
-
spiece.model"
,
"albert-base-v1"
:
"https://huggingface.co/albert-base-v1
/resolve/main/
spiece.model"
,
"albert-large-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-large-v1
-
spiece.model"
,
"albert-large-v1"
:
"https://huggingface.co/albert-large-v1
/resolve/main/
spiece.model"
,
"albert-xlarge-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xlarge-v1
-
spiece.model"
,
"albert-xlarge-v1"
:
"https://huggingface.co/albert-xlarge-v1
/resolve/main/
spiece.model"
,
"albert-xxlarge-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xxlarge-v1
-
spiece.model"
,
"albert-xxlarge-v1"
:
"https://huggingface.co/albert-xxlarge-v1
/resolve/main/
spiece.model"
,
"albert-base-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-base-v2
-
spiece.model"
,
"albert-base-v2"
:
"https://huggingface.co/albert-base-v2
/resolve/main/
spiece.model"
,
"albert-large-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-large-v2
-
spiece.model"
,
"albert-large-v2"
:
"https://huggingface.co/albert-large-v2
/resolve/main/
spiece.model"
,
"albert-xlarge-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xlarge-v2
-
spiece.model"
,
"albert-xlarge-v2"
:
"https://huggingface.co/albert-xlarge-v2
/resolve/main/
spiece.model"
,
"albert-xxlarge-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xxlarge-v2
-
spiece.model"
,
"albert-xxlarge-v2"
:
"https://huggingface.co/albert-xxlarge-v2
/resolve/main/
spiece.model"
,
}
}
}
}
...
...
src/transformers/tokenization_albert_fast.py
View file @
55e8d0ce
...
@@ -34,24 +34,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.
...
@@ -34,24 +34,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"albert-base-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-base-v1
-
spiece.model"
,
"albert-base-v1"
:
"https://huggingface.co/albert-base-v1
/resolve/main/
spiece.model"
,
"albert-large-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-large-v1
-
spiece.model"
,
"albert-large-v1"
:
"https://huggingface.co/albert-large-v1
/resolve/main/
spiece.model"
,
"albert-xlarge-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xlarge-v1
-
spiece.model"
,
"albert-xlarge-v1"
:
"https://huggingface.co/albert-xlarge-v1
/resolve/main/
spiece.model"
,
"albert-xxlarge-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xxlarge-v1
-
spiece.model"
,
"albert-xxlarge-v1"
:
"https://huggingface.co/albert-xxlarge-v1
/resolve/main/
spiece.model"
,
"albert-base-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-base-v2
-
spiece.model"
,
"albert-base-v2"
:
"https://huggingface.co/albert-base-v2
/resolve/main/
spiece.model"
,
"albert-large-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-large-v2
-
spiece.model"
,
"albert-large-v2"
:
"https://huggingface.co/albert-large-v2
/resolve/main/
spiece.model"
,
"albert-xlarge-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xlarge-v2
-
spiece.model"
,
"albert-xlarge-v2"
:
"https://huggingface.co/albert-xlarge-v2
/resolve/main/
spiece.model"
,
"albert-xxlarge-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xxlarge-v2
-
spiece.model"
,
"albert-xxlarge-v2"
:
"https://huggingface.co/albert-xxlarge-v2
/resolve/main/
spiece.model"
,
},
},
"tokenizer_file"
:
{
"tokenizer_file"
:
{
"albert-base-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-base-v1
-
tokenizer.json"
,
"albert-base-v1"
:
"https://huggingface.co/albert-base-v1
/resolve/main/
tokenizer.json"
,
"albert-large-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-large-v1
-
tokenizer.json"
,
"albert-large-v1"
:
"https://huggingface.co/albert-large-v1
/resolve/main/
tokenizer.json"
,
"albert-xlarge-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xlarge-v1
-
tokenizer.json"
,
"albert-xlarge-v1"
:
"https://huggingface.co/albert-xlarge-v1
/resolve/main/
tokenizer.json"
,
"albert-xxlarge-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xxlarge-v1
-
tokenizer.json"
,
"albert-xxlarge-v1"
:
"https://huggingface.co/albert-xxlarge-v1
/resolve/main/
tokenizer.json"
,
"albert-base-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-base-v2
-
tokenizer.json"
,
"albert-base-v2"
:
"https://huggingface.co/albert-base-v2
/resolve/main/
tokenizer.json"
,
"albert-large-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-large-v2
-
tokenizer.json"
,
"albert-large-v2"
:
"https://huggingface.co/albert-large-v2
/resolve/main/
tokenizer.json"
,
"albert-xlarge-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xlarge-v2
-
tokenizer.json"
,
"albert-xlarge-v2"
:
"https://huggingface.co/albert-xlarge-v2
/resolve/main/
tokenizer.json"
,
"albert-xxlarge-v2"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
albert-xxlarge-v2
-
tokenizer.json"
,
"albert-xxlarge-v2"
:
"https://huggingface.co/albert-xxlarge-v2
/resolve/main/
tokenizer.json"
,
},
},
}
}
...
...
src/transformers/tokenization_bart.py
View file @
55e8d0ce
...
@@ -24,8 +24,8 @@ logger = logging.get_logger(__name__)
...
@@ -24,8 +24,8 @@ logger = logging.get_logger(__name__)
# vocab and merges same as roberta
# vocab and merges same as roberta
vocab_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
vocab.json"
vocab_url
=
"https://huggingface.co/roberta-large
/resolve/main/
vocab.json"
merges_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
merges.txt"
merges_url
=
"https://huggingface.co/roberta-large
/resolve/main/
merges.txt"
_all_bart_models
=
[
_all_bart_models
=
[
"facebook/bart-base"
,
"facebook/bart-base"
,
"facebook/bart-large"
,
"facebook/bart-large"
,
...
...
src/transformers/tokenization_bart_fast.py
View file @
55e8d0ce
...
@@ -25,9 +25,9 @@ logger = logging.get_logger(__name__)
...
@@ -25,9 +25,9 @@ logger = logging.get_logger(__name__)
# vocab and merges same as roberta
# vocab and merges same as roberta
vocab_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
vocab.json"
vocab_url
=
"https://huggingface.co/roberta-large
/resolve/main/
vocab.json"
merges_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
merges.txt"
merges_url
=
"https://huggingface.co/roberta-large
/resolve/main/
merges.txt"
tokenizer_url
=
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
roberta-large
-
tokenizer.json"
tokenizer_url
=
"https://huggingface.co/roberta-large
/resolve/main/
tokenizer.json"
_all_bart_models
=
[
_all_bart_models
=
[
"facebook/bart-base"
,
"facebook/bart-base"
,
"facebook/bart-large"
,
"facebook/bart-large"
,
...
...
src/transformers/tokenization_bert.py
View file @
55e8d0ce
...
@@ -30,24 +30,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
...
@@ -30,24 +30,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"bert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"bert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"bert-large-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
vocab.txt"
,
"bert-large-uncased"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
vocab.txt"
,
"bert-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased
-
vocab.txt"
,
"bert-base-cased"
:
"https://huggingface.co/bert-base-cased
/resolve/main/
vocab.txt"
,
"bert-large-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased
-
vocab.txt"
,
"bert-large-cased"
:
"https://huggingface.co/bert-large-cased
/resolve/main/
vocab.txt"
,
"bert-base-multilingual-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-uncased
-
vocab.txt"
,
"bert-base-multilingual-uncased"
:
"https://huggingface.co/bert-base-multilingual-uncased
/resolve/main/
vocab.txt"
,
"bert-base-multilingual-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-cased
-
vocab.txt"
,
"bert-base-multilingual-cased"
:
"https://huggingface.co/bert-base-multilingual-cased
/resolve/main/
vocab.txt"
,
"bert-base-chinese"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-chinese
-
vocab.txt"
,
"bert-base-chinese"
:
"https://huggingface.co/bert-base-chinese
/resolve/main/
vocab.txt"
,
"bert-base-german-cased"
:
"https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt"
,
"bert-base-german-cased"
:
"https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt"
,
"bert-large-uncased-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased-whole-word-masking
-
vocab.txt"
,
"bert-large-uncased-whole-word-masking"
:
"https://huggingface.co/bert-large-uncased-whole-word-masking
/resolve/main/
vocab.txt"
,
"bert-large-cased-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased-whole-word-masking
-
vocab.txt"
,
"bert-large-cased-whole-word-masking"
:
"https://huggingface.co/bert-large-cased-whole-word-masking
/resolve/main/
vocab.txt"
,
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased-whole-word-masking-finetuned-squad
-
vocab.txt"
,
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
"https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad
/resolve/main/
vocab.txt"
,
"bert-large-cased-whole-word-masking-finetuned-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased-whole-word-masking-finetuned-squad
-
vocab.txt"
,
"bert-large-cased-whole-word-masking-finetuned-squad"
:
"https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad
/resolve/main/
vocab.txt"
,
"bert-base-cased-finetuned-mrpc"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased-finetuned-mrpc
-
vocab.txt"
,
"bert-base-cased-finetuned-mrpc"
:
"https://huggingface.co/bert-base-cased-finetuned-mrpc
/resolve/main/
vocab.txt"
,
"bert-base-german-dbmdz-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-german-dbmdz-cased
-
vocab.txt"
,
"bert-base-german-dbmdz-cased"
:
"https://huggingface.co/bert-base-german-dbmdz-cased
/resolve/main/
vocab.txt"
,
"bert-base-german-dbmdz-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-german-dbmdz-uncased
-
vocab.txt"
,
"bert-base-german-dbmdz-uncased"
:
"https://huggingface.co/bert-base-german-dbmdz-uncased
/resolve/main/
vocab.txt"
,
"TurkuNLP/bert-base-finnish-cased-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
TurkuNLP/bert-base-finnish-cased-v1/vocab.txt"
,
"TurkuNLP/bert-base-finnish-cased-v1"
:
"https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/
resolve/main/
vocab.txt"
,
"TurkuNLP/bert-base-finnish-uncased-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt"
,
"TurkuNLP/bert-base-finnish-uncased-v1"
:
"https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/
resolve/main/
vocab.txt"
,
"wietsedv/bert-base-dutch-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
wietsedv/bert-base-dutch-cased/vocab.txt"
,
"wietsedv/bert-base-dutch-cased"
:
"https://huggingface.co/wietsedv/bert-base-dutch-cased/
resolve/main/
vocab.txt"
,
}
}
}
}
...
...
src/transformers/tokenization_bert_fast.py
View file @
55e8d0ce
...
@@ -30,44 +30,44 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
...
@@ -30,44 +30,44 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"bert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
vocab.txt"
,
"bert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
vocab.txt"
,
"bert-large-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
vocab.txt"
,
"bert-large-uncased"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
vocab.txt"
,
"bert-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased
-
vocab.txt"
,
"bert-base-cased"
:
"https://huggingface.co/bert-base-cased
/resolve/main/
vocab.txt"
,
"bert-large-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased
-
vocab.txt"
,
"bert-large-cased"
:
"https://huggingface.co/bert-large-cased
/resolve/main/
vocab.txt"
,
"bert-base-multilingual-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-uncased
-
vocab.txt"
,
"bert-base-multilingual-uncased"
:
"https://huggingface.co/bert-base-multilingual-uncased
/resolve/main/
vocab.txt"
,
"bert-base-multilingual-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-cased
-
vocab.txt"
,
"bert-base-multilingual-cased"
:
"https://huggingface.co/bert-base-multilingual-cased
/resolve/main/
vocab.txt"
,
"bert-base-chinese"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-chinese
-
vocab.txt"
,
"bert-base-chinese"
:
"https://huggingface.co/bert-base-chinese
/resolve/main/
vocab.txt"
,
"bert-base-german-cased"
:
"https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt"
,
"bert-base-german-cased"
:
"https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt"
,
"bert-large-uncased-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased-whole-word-masking
-
vocab.txt"
,
"bert-large-uncased-whole-word-masking"
:
"https://huggingface.co/bert-large-uncased-whole-word-masking
/resolve/main/
vocab.txt"
,
"bert-large-cased-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased-whole-word-masking
-
vocab.txt"
,
"bert-large-cased-whole-word-masking"
:
"https://huggingface.co/bert-large-cased-whole-word-masking
/resolve/main/
vocab.txt"
,
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased-whole-word-masking-finetuned-squad
-
vocab.txt"
,
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
"https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad
/resolve/main/
vocab.txt"
,
"bert-large-cased-whole-word-masking-finetuned-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased-whole-word-masking-finetuned-squad
-
vocab.txt"
,
"bert-large-cased-whole-word-masking-finetuned-squad"
:
"https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad
/resolve/main/
vocab.txt"
,
"bert-base-cased-finetuned-mrpc"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased-finetuned-mrpc
-
vocab.txt"
,
"bert-base-cased-finetuned-mrpc"
:
"https://huggingface.co/bert-base-cased-finetuned-mrpc
/resolve/main/
vocab.txt"
,
"bert-base-german-dbmdz-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-german-dbmdz-cased
-
vocab.txt"
,
"bert-base-german-dbmdz-cased"
:
"https://huggingface.co/bert-base-german-dbmdz-cased
/resolve/main/
vocab.txt"
,
"bert-base-german-dbmdz-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-german-dbmdz-uncased
-
vocab.txt"
,
"bert-base-german-dbmdz-uncased"
:
"https://huggingface.co/bert-base-german-dbmdz-uncased
/resolve/main/
vocab.txt"
,
"TurkuNLP/bert-base-finnish-cased-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
TurkuNLP/bert-base-finnish-cased-v1/vocab.txt"
,
"TurkuNLP/bert-base-finnish-cased-v1"
:
"https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/
resolve/main/
vocab.txt"
,
"TurkuNLP/bert-base-finnish-uncased-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt"
,
"TurkuNLP/bert-base-finnish-uncased-v1"
:
"https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/
resolve/main/
vocab.txt"
,
"wietsedv/bert-base-dutch-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
wietsedv/bert-base-dutch-cased/vocab.txt"
,
"wietsedv/bert-base-dutch-cased"
:
"https://huggingface.co/wietsedv/bert-base-dutch-cased/
resolve/main/
vocab.txt"
,
},
},
"tokenizer_file"
:
{
"tokenizer_file"
:
{
"bert-base-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-uncased
-
tokenizer.json"
,
"bert-base-uncased"
:
"https://huggingface.co/bert-base-uncased
/resolve/main/
tokenizer.json"
,
"bert-large-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased
-
tokenizer.json"
,
"bert-large-uncased"
:
"https://huggingface.co/bert-large-uncased
/resolve/main/
tokenizer.json"
,
"bert-base-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased
-
tokenizer.json"
,
"bert-base-cased"
:
"https://huggingface.co/bert-base-cased
/resolve/main/
tokenizer.json"
,
"bert-large-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased
-
tokenizer.json"
,
"bert-large-cased"
:
"https://huggingface.co/bert-large-cased
/resolve/main/
tokenizer.json"
,
"bert-base-multilingual-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-uncased
-
tokenizer.json"
,
"bert-base-multilingual-uncased"
:
"https://huggingface.co/bert-base-multilingual-uncased
/resolve/main/
tokenizer.json"
,
"bert-base-multilingual-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-multilingual-cased
-
tokenizer.json"
,
"bert-base-multilingual-cased"
:
"https://huggingface.co/bert-base-multilingual-cased
/resolve/main/
tokenizer.json"
,
"bert-base-chinese"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-chinese
-
tokenizer.json"
,
"bert-base-chinese"
:
"https://huggingface.co/bert-base-chinese
/resolve/main/
tokenizer.json"
,
"bert-base-german-cased"
:
"https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-tokenizer.json"
,
"bert-base-german-cased"
:
"https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-tokenizer.json"
,
"bert-large-uncased-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased-whole-word-masking
-
tokenizer.json"
,
"bert-large-uncased-whole-word-masking"
:
"https://huggingface.co/bert-large-uncased-whole-word-masking
/resolve/main/
tokenizer.json"
,
"bert-large-cased-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased-whole-word-masking
-
tokenizer.json"
,
"bert-large-cased-whole-word-masking"
:
"https://huggingface.co/bert-large-cased-whole-word-masking
/resolve/main/
tokenizer.json"
,
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-uncased-whole-word-masking-finetuned-squad
-
tokenizer.json"
,
"bert-large-uncased-whole-word-masking-finetuned-squad"
:
"https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad
/resolve/main/
tokenizer.json"
,
"bert-large-cased-whole-word-masking-finetuned-squad"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-large-cased-whole-word-masking-finetuned-squad
-
tokenizer.json"
,
"bert-large-cased-whole-word-masking-finetuned-squad"
:
"https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad
/resolve/main/
tokenizer.json"
,
"bert-base-cased-finetuned-mrpc"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-cased-finetuned-mrpc
-
tokenizer.json"
,
"bert-base-cased-finetuned-mrpc"
:
"https://huggingface.co/bert-base-cased-finetuned-mrpc
/resolve/main/
tokenizer.json"
,
"bert-base-german-dbmdz-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-german-dbmdz-cased
-
tokenizer.json"
,
"bert-base-german-dbmdz-cased"
:
"https://huggingface.co/bert-base-german-dbmdz-cased
/resolve/main/
tokenizer.json"
,
"bert-base-german-dbmdz-uncased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/bert
/bert
-base-german-dbmdz-uncased
-
tokenizer.json"
,
"bert-base-german-dbmdz-uncased"
:
"https://huggingface.co/bert-base-german-dbmdz-uncased
/resolve/main/
tokenizer.json"
,
"TurkuNLP/bert-base-finnish-cased-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
TurkuNLP/bert-base-finnish-cased-v1/tokenizer.json"
,
"TurkuNLP/bert-base-finnish-cased-v1"
:
"https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/
resolve/main/
tokenizer.json"
,
"TurkuNLP/bert-base-finnish-uncased-v1"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
TurkuNLP/bert-base-finnish-uncased-v1/tokenizer.json"
,
"TurkuNLP/bert-base-finnish-uncased-v1"
:
"https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/
resolve/main/
tokenizer.json"
,
"wietsedv/bert-base-dutch-cased"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
wietsedv/bert-base-dutch-cased/tokenizer.json"
,
"wietsedv/bert-base-dutch-cased"
:
"https://huggingface.co/wietsedv/bert-base-dutch-cased/
resolve/main/
tokenizer.json"
,
},
},
}
}
...
...
src/transformers/tokenization_bert_generation.py
View file @
55e8d0ce
...
@@ -30,7 +30,7 @@ logger = logging.get_logger(__name__)
...
@@ -30,7 +30,7 @@ logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES
=
{
"vocab_file"
:
"spiece.model"
}
VOCAB_FILES_NAMES
=
{
"vocab_file"
:
"spiece.model"
}
tokenizer_url
=
(
tokenizer_url
=
(
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
google/bert_for_seq_generation_L-24_bbc_encoder/spiece.model"
"https://huggingface.co/google/bert_for_seq_generation_L-24_bbc_encoder/
resolve/main/
spiece.model"
)
)
...
...
src/transformers/tokenization_bert_japanese.py
View file @
55e8d0ce
...
@@ -31,10 +31,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
...
@@ -31,10 +31,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"cl-tohoku/bert-base-japanese"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
cl-tohoku/bert-base-japanese/vocab.txt"
,
"cl-tohoku/bert-base-japanese"
:
"https://huggingface.co/cl-tohoku/bert-base-japanese/
resolve/main/
vocab.txt"
,
"cl-tohoku/bert-base-japanese-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
cl-tohoku/bert-base-japanese-whole-word-masking/vocab.txt"
,
"cl-tohoku/bert-base-japanese-whole-word-masking"
:
"https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/
resolve/main/
vocab.txt"
,
"cl-tohoku/bert-base-japanese-char"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
cl-tohoku/bert-base-japanese-char/vocab.txt"
,
"cl-tohoku/bert-base-japanese-char"
:
"https://huggingface.co/cl-tohoku/bert-base-japanese-char/
resolve/main/
vocab.txt"
,
"cl-tohoku/bert-base-japanese-char-whole-word-masking"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
cl-tohoku/bert-base-japanese-char-whole-word-masking/vocab.txt"
,
"cl-tohoku/bert-base-japanese-char-whole-word-masking"
:
"https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/
resolve/main/
vocab.txt"
,
}
}
}
}
...
...
src/transformers/tokenization_bertweet.py
View file @
55e8d0ce
...
@@ -37,10 +37,10 @@ VOCAB_FILES_NAMES = {
...
@@ -37,10 +37,10 @@ VOCAB_FILES_NAMES = {
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"vinai/bertweet-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
vinai/bertweet-base/vocab.txt"
,
"vinai/bertweet-base"
:
"https://huggingface.co/vinai/bertweet-base/
resolve/main/
vocab.txt"
,
},
},
"merges_file"
:
{
"merges_file"
:
{
"vinai/bertweet-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
vinai/bertweet-base/bpe.codes"
,
"vinai/bertweet-base"
:
"https://huggingface.co/vinai/bertweet-base/
resolve/main/
bpe.codes"
,
},
},
}
}
...
...
src/transformers/tokenization_camembert.py
View file @
55e8d0ce
...
@@ -31,7 +31,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"}
...
@@ -31,7 +31,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"}
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"camembert-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
camembert-base
-
sentencepiece.bpe.model"
,
"camembert-base"
:
"https://huggingface.co/camembert-base
/resolve/main/
sentencepiece.bpe.model"
,
}
}
}
}
...
...
src/transformers/tokenization_camembert_fast.py
View file @
55e8d0ce
...
@@ -36,10 +36,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file":
...
@@ -36,10 +36,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file":
PRETRAINED_VOCAB_FILES_MAP
=
{
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{
"vocab_file"
:
{
"camembert-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
camembert-base
-
sentencepiece.bpe.model"
,
"camembert-base"
:
"https://huggingface.co/camembert-base
/resolve/main/
sentencepiece.bpe.model"
,
},
},
"tokenizer_file"
:
{
"tokenizer_file"
:
{
"camembert-base"
:
"https://
s3.amazonaws.com/models.
huggingface.co/
bert/
camembert-base
-
tokenizer.json"
,
"camembert-base"
:
"https://huggingface.co/camembert-base
/resolve/main/
tokenizer.json"
,
},
},
}
}
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment