Unverified Commit 455c6390 authored by Julien Chaumond's avatar Julien Chaumond Committed by GitHub
Browse files

CDN urls (#4030)

* [file_utils] use_cdn + documentation

* Move to cdn. urls for weights

* [urls] Hotfix for bert-base-japanese
parent 8ba4c588
...@@ -14,12 +14,12 @@ logger = logging.getLogger(__name__) ...@@ -14,12 +14,12 @@ logger = logging.getLogger(__name__)
TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP = {
"google/electra-small-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-generator/tf_model.h5", "google/electra-small-generator": "https://cdn.huggingface.co/google/electra-small-generator/tf_model.h5",
"google/electra-base-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-generator/tf_model.h5", "google/electra-base-generator": "https://cdn.huggingface.co/google/electra-base-generator/tf_model.h5",
"google/electra-large-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-generator/tf_model.h5", "google/electra-large-generator": "https://cdn.huggingface.co/google/electra-large-generator/tf_model.h5",
"google/electra-small-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-discriminator/tf_model.h5", "google/electra-small-discriminator": "https://cdn.huggingface.co/google/electra-small-discriminator/tf_model.h5",
"google/electra-base-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-discriminator/tf_model.h5", "google/electra-base-discriminator": "https://cdn.huggingface.co/google/electra-base-discriminator/tf_model.h5",
"google/electra-large-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-discriminator/tf_model.h5", "google/electra-large-discriminator": "https://cdn.huggingface.co/google/electra-large-discriminator/tf_model.h5",
} }
......
...@@ -38,11 +38,11 @@ from .tokenization_utils import BatchEncoding ...@@ -38,11 +38,11 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = {
"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-tf_model.h5", "gpt2": "https://cdn.huggingface.co/gpt2-tf_model.h5",
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-tf_model.h5", "gpt2-medium": "https://cdn.huggingface.co/gpt2-medium-tf_model.h5",
"gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-tf_model.h5", "gpt2-large": "https://cdn.huggingface.co/gpt2-large-tf_model.h5",
"gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-tf_model.h5", "gpt2-xl": "https://cdn.huggingface.co/gpt2-xl-tf_model.h5",
"distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-tf_model.h5", "distilgpt2": "https://cdn.huggingface.co/distilgpt2-tf_model.h5",
} }
......
...@@ -36,9 +36,7 @@ from .tokenization_utils import BatchEncoding ...@@ -36,9 +36,7 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP = {"openai-gpt": "https://cdn.huggingface.co/openai-gpt-tf_model.h5"}
"openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-tf_model.h5"
}
def gelu(x): def gelu(x):
......
...@@ -29,10 +29,10 @@ from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list ...@@ -29,10 +29,10 @@ from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = {
"roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-tf_model.h5", "roberta-base": "https://cdn.huggingface.co/roberta-base-tf_model.h5",
"roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-tf_model.h5", "roberta-large": "https://cdn.huggingface.co/roberta-large-tf_model.h5",
"roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-tf_model.h5", "roberta-large-mnli": "https://cdn.huggingface.co/roberta-large-mnli-tf_model.h5",
"distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-tf_model.h5", "distilroberta-base": "https://cdn.huggingface.co/distilroberta-base-tf_model.h5",
} }
......
...@@ -31,11 +31,11 @@ from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, shape_list ...@@ -31,11 +31,11 @@ from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, shape_list
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP = {
"t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-tf_model.h5", "t5-small": "https://cdn.huggingface.co/t5-small-tf_model.h5",
"t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-tf_model.h5", "t5-base": "https://cdn.huggingface.co/t5-base-tf_model.h5",
"t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-tf_model.h5", "t5-large": "https://cdn.huggingface.co/t5-large-tf_model.h5",
"t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-3b-tf_model.h5", "t5-3b": "https://cdn.huggingface.co/t5-3b-tf_model.h5",
"t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-11b-tf_model.h5", "t5-11b": "https://cdn.huggingface.co/t5-11b-tf_model.h5",
} }
#################################################### ####################################################
......
...@@ -31,7 +31,7 @@ from .tokenization_utils import BatchEncoding ...@@ -31,7 +31,7 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP = {
"transfo-xl-wt103": "https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-tf_model.h5", "transfo-xl-wt103": "https://cdn.huggingface.co/transfo-xl-wt103-tf_model.h5",
} }
......
...@@ -319,6 +319,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): ...@@ -319,6 +319,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
resume_download = kwargs.pop("resume_download", False) resume_download = kwargs.pop("resume_download", False)
proxies = kwargs.pop("proxies", None) proxies = kwargs.pop("proxies", None)
output_loading_info = kwargs.pop("output_loading_info", False) output_loading_info = kwargs.pop("output_loading_info", False)
use_cdn = kwargs.pop("use_cdn", True)
# Load config if we don't provide a configuration # Load config if we don't provide a configuration
if not isinstance(config, PretrainedConfig): if not isinstance(config, PretrainedConfig):
...@@ -358,7 +359,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): ...@@ -358,7 +359,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
archive_file = pretrained_model_name_or_path + ".index" archive_file = pretrained_model_name_or_path + ".index"
else: else:
archive_file = hf_bucket_url( archive_file = hf_bucket_url(
pretrained_model_name_or_path, postfix=(WEIGHTS_NAME if from_pt else TF2_WEIGHTS_NAME) pretrained_model_name_or_path,
filename=(WEIGHTS_NAME if from_pt else TF2_WEIGHTS_NAME),
use_cdn=use_cdn,
) )
# redirect to the cache, if necessary # redirect to the cache, if necessary
......
...@@ -32,16 +32,16 @@ from .tokenization_utils import BatchEncoding ...@@ -32,16 +32,16 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xlm-mlm-en-2048": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-tf_model.h5", "xlm-mlm-en-2048": "https://cdn.huggingface.co/xlm-mlm-en-2048-tf_model.h5",
"xlm-mlm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-ende-1024-tf_model.h5", "xlm-mlm-ende-1024": "https://cdn.huggingface.co/xlm-mlm-ende-1024-tf_model.h5",
"xlm-mlm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enfr-1024-tf_model.h5", "xlm-mlm-enfr-1024": "https://cdn.huggingface.co/xlm-mlm-enfr-1024-tf_model.h5",
"xlm-mlm-enro-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enro-1024-tf_model.h5", "xlm-mlm-enro-1024": "https://cdn.huggingface.co/xlm-mlm-enro-1024-tf_model.h5",
"xlm-mlm-tlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-tlm-xnli15-1024-tf_model.h5", "xlm-mlm-tlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-tlm-xnli15-1024-tf_model.h5",
"xlm-mlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-xnli15-1024-tf_model.h5", "xlm-mlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-xnli15-1024-tf_model.h5",
"xlm-clm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-enfr-1024-tf_model.h5", "xlm-clm-enfr-1024": "https://cdn.huggingface.co/xlm-clm-enfr-1024-tf_model.h5",
"xlm-clm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-ende-1024-tf_model.h5", "xlm-clm-ende-1024": "https://cdn.huggingface.co/xlm-clm-ende-1024-tf_model.h5",
"xlm-mlm-17-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-tf_model.h5", "xlm-mlm-17-1280": "https://cdn.huggingface.co/xlm-mlm-17-1280-tf_model.h5",
"xlm-mlm-100-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-100-1280-tf_model.h5", "xlm-mlm-100-1280": "https://cdn.huggingface.co/xlm-mlm-100-1280-tf_model.h5",
} }
......
...@@ -38,8 +38,8 @@ from .tokenization_utils import BatchEncoding ...@@ -38,8 +38,8 @@ from .tokenization_utils import BatchEncoding
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xlnet-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-tf_model.h5", "xlnet-base-cased": "https://cdn.huggingface.co/xlnet-base-cased-tf_model.h5",
"xlnet-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-tf_model.h5", "xlnet-large-cased": "https://cdn.huggingface.co/xlnet-large-cased-tf_model.h5",
} }
......
...@@ -34,7 +34,7 @@ from .modeling_utils import PreTrainedModel ...@@ -34,7 +34,7 @@ from .modeling_utils import PreTrainedModel
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP = { TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP = {
"transfo-xl-wt103": "https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-pytorch_model.bin", "transfo-xl-wt103": "https://cdn.huggingface.co/transfo-xl-wt103-pytorch_model.bin",
} }
......
...@@ -527,6 +527,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): ...@@ -527,6 +527,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
proxies = kwargs.pop("proxies", None) proxies = kwargs.pop("proxies", None)
output_loading_info = kwargs.pop("output_loading_info", False) output_loading_info = kwargs.pop("output_loading_info", False)
local_files_only = kwargs.pop("local_files_only", False) local_files_only = kwargs.pop("local_files_only", False)
use_cdn = kwargs.pop("use_cdn", True)
# Load config if we don't provide a configuration # Load config if we don't provide a configuration
if not isinstance(config, PretrainedConfig): if not isinstance(config, PretrainedConfig):
...@@ -577,7 +578,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): ...@@ -577,7 +578,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
archive_file = pretrained_model_name_or_path + ".index" archive_file = pretrained_model_name_or_path + ".index"
else: else:
archive_file = hf_bucket_url( archive_file = hf_bucket_url(
pretrained_model_name_or_path, postfix=(TF2_WEIGHTS_NAME if from_tf else WEIGHTS_NAME), pretrained_model_name_or_path,
filename=(TF2_WEIGHTS_NAME if from_tf else WEIGHTS_NAME),
use_cdn=use_cdn,
) )
# redirect to the cache, if necessary # redirect to the cache, if necessary
......
...@@ -35,16 +35,16 @@ from .modeling_utils import PreTrainedModel, SequenceSummary, SQuADHead, prune_l ...@@ -35,16 +35,16 @@ from .modeling_utils import PreTrainedModel, SequenceSummary, SQuADHead, prune_l
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
XLM_PRETRAINED_MODEL_ARCHIVE_MAP = { XLM_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xlm-mlm-en-2048": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-pytorch_model.bin", "xlm-mlm-en-2048": "https://cdn.huggingface.co/xlm-mlm-en-2048-pytorch_model.bin",
"xlm-mlm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-ende-1024-pytorch_model.bin", "xlm-mlm-ende-1024": "https://cdn.huggingface.co/xlm-mlm-ende-1024-pytorch_model.bin",
"xlm-mlm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enfr-1024-pytorch_model.bin", "xlm-mlm-enfr-1024": "https://cdn.huggingface.co/xlm-mlm-enfr-1024-pytorch_model.bin",
"xlm-mlm-enro-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enro-1024-pytorch_model.bin", "xlm-mlm-enro-1024": "https://cdn.huggingface.co/xlm-mlm-enro-1024-pytorch_model.bin",
"xlm-mlm-tlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-tlm-xnli15-1024-pytorch_model.bin", "xlm-mlm-tlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-tlm-xnli15-1024-pytorch_model.bin",
"xlm-mlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-xnli15-1024-pytorch_model.bin", "xlm-mlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-xnli15-1024-pytorch_model.bin",
"xlm-clm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-enfr-1024-pytorch_model.bin", "xlm-clm-enfr-1024": "https://cdn.huggingface.co/xlm-clm-enfr-1024-pytorch_model.bin",
"xlm-clm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-ende-1024-pytorch_model.bin", "xlm-clm-ende-1024": "https://cdn.huggingface.co/xlm-clm-ende-1024-pytorch_model.bin",
"xlm-mlm-17-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-pytorch_model.bin", "xlm-mlm-17-1280": "https://cdn.huggingface.co/xlm-mlm-17-1280-pytorch_model.bin",
"xlm-mlm-100-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-100-1280-pytorch_model.bin", "xlm-mlm-100-1280": "https://cdn.huggingface.co/xlm-mlm-100-1280-pytorch_model.bin",
} }
......
...@@ -32,12 +32,12 @@ from .modeling_roberta import ( ...@@ -32,12 +32,12 @@ from .modeling_roberta import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = { XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-pytorch_model.bin", "xlm-roberta-base": "https://cdn.huggingface.co/xlm-roberta-base-pytorch_model.bin",
"xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-pytorch_model.bin", "xlm-roberta-large": "https://cdn.huggingface.co/xlm-roberta-large-pytorch_model.bin",
"xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-pytorch_model.bin", "xlm-roberta-large-finetuned-conll02-dutch": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll02-dutch-pytorch_model.bin",
"xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-pytorch_model.bin", "xlm-roberta-large-finetuned-conll02-spanish": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll02-spanish-pytorch_model.bin",
"xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-pytorch_model.bin", "xlm-roberta-large-finetuned-conll03-english": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll03-english-pytorch_model.bin",
"xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-pytorch_model.bin", "xlm-roberta-large-finetuned-conll03-german": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll03-german-pytorch_model.bin",
} }
......
...@@ -33,8 +33,8 @@ from .modeling_utils import PoolerAnswerClass, PoolerEndLogits, PoolerStartLogit ...@@ -33,8 +33,8 @@ from .modeling_utils import PoolerAnswerClass, PoolerEndLogits, PoolerStartLogit
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP = { XLNET_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xlnet-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-pytorch_model.bin", "xlnet-base-cased": "https://cdn.huggingface.co/xlnet-base-cased-pytorch_model.bin",
"xlnet-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-pytorch_model.bin", "xlnet-large-cased": "https://cdn.huggingface.co/xlnet-large-cased-pytorch_model.bin",
} }
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Auto Model class. """ """ Auto Tokenizer class. """
import logging import logging
......
...@@ -30,10 +30,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} ...@@ -30,10 +30,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP = { PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": { "vocab_file": {
"bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-vocab.txt", "bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/vocab.txt",
"bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking-vocab.txt", "bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/vocab.txt",
"bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-vocab.txt", "bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/vocab.txt",
"bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-vocab.txt", "bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/vocab.txt",
} }
} }
......
...@@ -954,7 +954,9 @@ class PreTrainedTokenizer(SpecialTokensMixin): ...@@ -954,7 +954,9 @@ class PreTrainedTokenizer(SpecialTokensMixin):
logger.info("Didn't find file {}. We won't load it.".format(full_file_name)) logger.info("Didn't find file {}. We won't load it.".format(full_file_name))
full_file_name = None full_file_name = None
else: else:
full_file_name = hf_bucket_url(pretrained_model_name_or_path, postfix=file_name) full_file_name = hf_bucket_url(
pretrained_model_name_or_path, filename=file_name, use_cdn=False
)
vocab_files[file_id] = full_file_name vocab_files[file_id] = full_file_name
......
...@@ -36,8 +36,8 @@ logger = logging.getLogger(__name__) ...@@ -36,8 +36,8 @@ logger = logging.getLogger(__name__)
# for the pretrained weights provided with the models # for the pretrained weights provided with the models
#################################################### ####################################################
TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP = { TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-tf_model.h5", "xxx-base-uncased": "https://cdn.huggingface.co/xxx-base-uncased-tf_model.h5",
"xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-tf_model.h5", "xxx-large-uncased": "https://cdn.huggingface.co/xxx-large-uncased-tf_model.h5",
} }
......
...@@ -38,8 +38,8 @@ logger = logging.getLogger(__name__) ...@@ -38,8 +38,8 @@ logger = logging.getLogger(__name__)
# for the pretrained weights provided with the models # for the pretrained weights provided with the models
#################################################### ####################################################
XXX_PRETRAINED_MODEL_ARCHIVE_MAP = { XXX_PRETRAINED_MODEL_ARCHIVE_MAP = {
"xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-pytorch_model.bin", "xxx-base-uncased": "https://cdn.huggingface.co/xxx-base-uncased-pytorch_model.bin",
"xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-pytorch_model.bin", "xxx-large-uncased": "https://cdn.huggingface.co/xxx-large-uncased-pytorch_model.bin",
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment