Tokenizers: ability to load from model subfolder (#8586)

* <small>tiny typo</small> * Tokenizers: ability to load from model subfolder * use subfolder for local files as well * Uniformize model shortcut name => model id * from s3 => from huggingface.co Co-authored-by: Quentin Lhoest <lhoest.q@gmail.com>

Tokenizers: ability to load from model subfolder (#8586)
* <small>tiny typo</small> * Tokenizers: ability to load from model subfolder * use subfolder for local files as well * Uniformize model shortcut name => model id * from s3 => from huggingface.co Co-authored-by: Quentin Lhoest <lhoest.q@gmail.com>
042a6aa7 · Julien Chaumond · GitHub · 48395d6b · 042a6aa7 · 042a6aa7
Unverified Commit 042a6aa7 authored Nov 17, 2020 by Julien Chaumond Committed by GitHub Nov 17, 2020
20 changed files
--- a/examples/rag/finetune.sh
+++ b/examples/rag/finetune.sh
@@ -7,7 +7,7 @@ export PYTHONPATH="../":"${PYTHONPATH}"
 python examples/rag/finetune.py \
    --data_dir $DATA_DIR \
    --output_dir $OUTPUT_DIR \
-    --model_name_or_path $MODLE_NAME_OR_PATH \
+    --model_name_or_path $MODEL_NAME_OR_PATH \
    --model_type rag_sequence \
    --fp16 \
    --gpus 8 \

--- a/examples/seq2seq/finetune_trainer.py
+++ b/examples/seq2seq/finetune_trainer.py
@@ -43,7 +43,8 @@ class ModelArguments:
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )
    freeze_encoder: bool = field(default=False, metadata={"help": "Whether tp freeze the encoder."})
    freeze_embeds: bool = field(default=False, metadata={"help": "Whether  to freeze the embeddings."})

--- a/examples/text-classification/run_glue.py
+++ b/examples/text-classification/run_glue.py
@@ -124,7 +124,8 @@ class ModelArguments:
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )
    use_fast_tokenizer: bool = field(
        default=True,

--- a/examples/text-classification/run_tf_glue.py
+++ b/examples/text-classification/run_tf_glue.py
@@ -117,7 +117,8 @@ class ModelArguments:
    # If you want to tweak more attributes on your tokenizer, you should do it in a distinct script,
    # or just modify its tokenizer_config.json.
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )

--- a/examples/text-classification/run_tf_text_classification.py
+++ b/examples/text-classification/run_tf_text_classification.py
@@ -182,7 +182,8 @@ class ModelArguments:
    # If you want to tweak more attributes on your tokenizer, you should do it in a distinct script,
    # or just modify its tokenizer_config.json.
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )

--- a/examples/text-classification/run_xnli.py
+++ b/examples/text-classification/run_xnli.py
@@ -406,7 +406,7 @@ def main():
        "--cache_dir",
        default=None,
        type=str,
-        help="Where do you want to store the pre-trained models downloaded from s3",
+        help="Where do you want to store the pre-trained models downloaded from huggingface.co",
    )
    parser.add_argument(
        "--max_seq_length",

--- a/examples/token-classification/run_ner.py
+++ b/examples/token-classification/run_ner.py
@@ -60,7 +60,8 @@ class ModelArguments:
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )

--- a/examples/token-classification/run_ner_old.py
+++ b/examples/token-classification/run_ner_old.py
@@ -65,7 +65,8 @@ class ModelArguments:
    # If you want to tweak more attributes on your tokenizer, you should do it in a distinct script,
    # or just modify its tokenizer_config.json.
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )

--- a/examples/token-classification/run_tf_ner.py
+++ b/examples/token-classification/run_tf_ner.py
@@ -67,7 +67,8 @@ class ModelArguments:
    # If you want to tweak more attributes on your tokenizer, you should do it in a distinct script,
    # or just modify its tokenizer_config.json.
    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )

--- a/hubconf.py
+++ b/hubconf.py
@@ -25,7 +25,7 @@ def config(*args, **kwargs):
                # Using torch.hub !
                import torch
-                config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased')  # Download configuration from S3 and cache.
+                config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased')  # Download configuration from huggingface.co and cache.
                config = torch.hub.load('huggingface/transformers', 'config', './test/bert_saved_model/')  # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
                config = torch.hub.load('huggingface/transformers', 'config', './test/bert_saved_model/my_configuration.json')
                config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased', output_attentions=True, foo=False)
@@ -45,7 +45,7 @@ def tokenizer(*args, **kwargs):
        # Using torch.hub !
        import torch
-        tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', 'bert-base-uncased')    # Download vocabulary from S3 and cache.
+        tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', 'bert-base-uncased')    # Download vocabulary from huggingface.co and cache.
        tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', './test/bert_saved_model/')  # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')`
    """
@@ -59,7 +59,7 @@ def model(*args, **kwargs):
            # Using torch.hub !
            import torch
-            model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased')    # Download model and configuration from S3 and cache.
+            model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased')    # Download model and configuration from huggingface.co and cache.
            model = torch.hub.load('huggingface/transformers', 'model', './test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased', output_attentions=True)  # Update configuration during loading
            assert model.config.output_attentions == True
@@ -78,7 +78,7 @@ def modelWithLMHead(*args, **kwargs):
        # Using torch.hub !
        import torch
-        model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased')    # Download model and configuration from S3 and cache.
+        model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased')    # Download model and configuration from huggingface.co and cache.
        model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', './test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased', output_attentions=True)  # Update configuration during loading
        assert model.config.output_attentions == True
@@ -96,7 +96,7 @@ def modelForSequenceClassification(*args, **kwargs):
            # Using torch.hub !
            import torch
-            model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased')    # Download model and configuration from S3 and cache.
+            model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased')    # Download model and configuration from huggingface.co and cache.
            model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', './test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attentions=True)  # Update configuration during loading
            assert model.config.output_attentions == True
@@ -115,7 +115,7 @@ def modelForQuestionAnswering(*args, **kwargs):
        # Using torch.hub !
        import torch
-        model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased')    # Download model and configuration from S3 and cache.
+        model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased')    # Download model and configuration from huggingface.co and cache.
        model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', './test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attentions=True)  # Update configuration during loading
        assert model.config.output_attentions == True

--- a/src/transformers/commands/user.py
+++ b/src/transformers/commands/user.py
@@ -31,7 +31,7 @@ class UserCommands(BaseTransformersCLICommand):
        ls_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
        ls_parser.set_defaults(func=lambda args: ListObjsCommand(args))
        rm_parser = s3_subparsers.add_parser("rm")
-        rm_parser.add_argument("filename", type=str, help="individual object filename to delete from S3.")
+        rm_parser.add_argument("filename", type=str, help="individual object filename to delete from huggingface.co.")
        rm_parser.add_argument("--organization", type=str, help="Optional: organization namespace.")
        rm_parser.set_defaults(func=lambda args: DeleteObjCommand(args))
        upload_parser = s3_subparsers.add_parser("upload", help="Upload a file to S3.")

--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -291,10 +291,9 @@ class PretrainedConfig(object):
            pretrained_model_name_or_path (:obj:`str`):
                This can be either:
-                - the `shortcut name` of a pretrained model configuration to load from cache or download, e.g.,
+                - a string, the `model id` of a pretrained model configuration hosted inside a model repo on
-                  ``bert-base-uncased``.
+                  huggingface.co. Valid model ids can be located at the root-level, like ``bert-base-uncased``, or
-                - the `identifier name` of a pretrained model configuration that was uploaded to our S3 by any user,
+                  namespaced under a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                  e.g., ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing a configuration file saved using the
                  :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g., ``./my_model_directory/``.
                - a path or url to a saved configuration JSON `file`, e.g.,
@@ -333,7 +332,7 @@ class PretrainedConfig(object):
            # We can't instantiate directly the base class `PretrainedConfig` so let's show the examples on a
            # derived class: BertConfig
-            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
+            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from huggingface.co and cache.
            config = BertConfig.from_pretrained('./test/saved_model/')  # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
            config = BertConfig.from_pretrained('./test/saved_model/my_configuration.json')
            config = BertConfig.from_pretrained('bert-base-uncased', output_attentions=True, foo=False)

--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -855,7 +855,9 @@ def is_remote_url(url_or_filename):
    return parsed.scheme in ("http", "https")
-def hf_bucket_url(model_id: str, filename: str, revision: Optional[str] = None, mirror=None) -> str:
+def hf_bucket_url(
+    model_id: str, filename: str, subfolder: Optional[str] = None, revision: Optional[str] = None, mirror=None
+) -> str:
    """
    Resolve a model identifier, a file name, and an optional revision id, to a huggingface.co-hosted url, redirecting
    to Cloudfront (a Content Delivery Network, or CDN) for large files.
@@ -872,6 +874,9 @@ def hf_bucket_url(model_id: str, filename: str, revision: Optional[str] = None,
    its sha1 if stored in git, or its sha256 if stored in git-lfs. Files cached locally from transformers before v3.5.0
    are not shared with those new files, because the cached file's name contains a hash of the url (which changed).
    """
+    if subfolder is not None:
+        filename = f"{subfolder}/{filename}"
    if mirror:
        endpoint = PRESET_MIRROR_DICT.get(mirror, mirror)
        legacy_format = "/" not in model_id

--- a/src/transformers/generation_tf_utils.py
+++ b/src/transformers/generation_tf_utils.py
@@ -148,12 +148,12 @@ class TFGenerationMixin:
        Examples::
            tokenizer = AutoTokenizer.from_pretrained('distilgpt2')   # Initialize tokenizer
-            model = TFAutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from S3 and cache.
+            model = TFAutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from huggingface.co and cache.
            outputs = model.generate(max_length=40)  # do greedy decoding
            print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
            tokenizer = AutoTokenizer.from_pretrained('openai-gpt')   # Initialize tokenizer
-            model = TFAutoModelWithLMHead.from_pretrained('openai-gpt')    # Download model and configuration from S3 and cache.
+            model = TFAutoModelWithLMHead.from_pretrained('openai-gpt')    # Download model and configuration from huggingface.co and cache.
            input_context = 'The dog'
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context
            outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5)  # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
@@ -161,7 +161,7 @@ class TFGenerationMixin:
                print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
            tokenizer = AutoTokenizer.from_pretrained('distilgpt2')   # Initialize tokenizer
-            model = TFAutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from S3 and cache.
+            model = TFAutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from huggingface.co and cache.
            input_context = 'The dog'
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context
            outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3, do_sample=True)  # generate 3 candidates using sampling
@@ -169,14 +169,14 @@ class TFGenerationMixin:
                print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
            tokenizer = AutoTokenizer.from_pretrained('ctrl')   # Initialize tokenizer
-            model = TFAutoModelWithLMHead.from_pretrained('ctrl')    # Download model and configuration from S3 and cache.
+            model = TFAutoModelWithLMHead.from_pretrained('ctrl')    # Download model and configuration from huggingface.co and cache.
            input_context = 'Legal My neighbor is'  # "Legal" is one of the control codes for ctrl
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context
            outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2)  # generate sequences
            print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
            tokenizer = AutoTokenizer.from_pretrained('gpt2')   # Initialize tokenizer
-            model = TFAutoModelWithLMHead.from_pretrained('gpt2')    # Download model and configuration from S3 and cache.
+            model = TFAutoModelWithLMHead.from_pretrained('gpt2')    # Download model and configuration from huggingface.co and cache.
            input_context = 'My cute dog'
            bad_words_ids = [tokenizer.encode(bad_word, add_prefix_space=True) for bad_word in ['idiot', 'stupid', 'shut up']]
            input_ids = tokenizer.encode(input_context, return_tensors='tf')  # encode input context

--- a/src/transformers/modelcard.py
+++ b/src/transformers/modelcard.py
@@ -87,10 +87,9 @@ class ModelCard:
        Parameters:
            pretrained_model_name_or_path: either:
-                - a string with the `shortcut name` of a pre-trained model card to load from cache or download, e.g.:
+                - a string, the `model id` of a pretrained model card hosted inside a model repo on huggingface.co.
-                  ``bert-base-uncased``.
+                  Valid model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under a
-                - a string with the `identifier name` of a pre-trained model card that was user-uploaded to our S3,
+                  user or organization name, like ``dbmdz/bert-base-german-cased``.
-                  e.g.: ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing a model card file saved using the
                  :func:`~transformers.ModelCard.save_pretrained` method, e.g.: ``./my_model_directory/``.
                - a path or url to a saved model card JSON `file`, e.g.: ``./my_model_directory/modelcard.json``.
@@ -124,7 +123,7 @@ class ModelCard:
        Examples::
-            modelcard = ModelCard.from_pretrained('bert-base-uncased')    # Download model card from S3 and cache.
+            modelcard = ModelCard.from_pretrained('bert-base-uncased')    # Download model card from huggingface.co and cache.
            modelcard = ModelCard.from_pretrained('./test/saved_model/')  # E.g. model card was saved using `save_pretrained('./test/saved_model/')`
            modelcard = ModelCard.from_pretrained('./test/saved_model/modelcard.json')
            modelcard = ModelCard.from_pretrained('bert-base-uncased', output_attentions=True, foo=False)

--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -544,10 +544,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
            pretrained_model_name_or_path (:obj:`str`, `optional`):
                Can be either:
-                    - A string with the `shortcut name` of a pretrained model to load from cache or download, e.g.,
+                    - A string, the `model id` of a pretrained model hosted inside a model repo on huggingface.co.
-                      ``bert-base-uncased``.
+                      Valid model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under
-                    - A string with the `identifier name` of a pretrained model that was user-uploaded to our S3, e.g.,
+                      a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                      ``dbmdz/bert-base-german-cased``.
                    - A path to a `directory` containing model weights saved using
                      :func:`~transformersTF.PreTrainedModel.save_pretrained`, e.g., ``./my_model_directory/``.
                    - A path or url to a `PyTorch state_dict save file` (e.g, ``./pt_model/pytorch_model.bin``). In
@@ -568,8 +567,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can
                be automatically loaded when:
-                    - The model is a model provided by the library (loaded with the `shortcut name` string of a
+                    - The model is a model provided by the library (loaded with the `model id` string of a pretrained
-                      pretrained model).
+                      model).
                    - The model was saved using :func:`~transformers.TFPreTrainedModel.save_pretrained` and is reloaded
                      by supplying the save directory.
                    - The model is loaded by supplying a local directory as ``pretrained_model_name_or_path`` and a
@@ -618,7 +617,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
        Examples::
            >>> from transformers import BertConfig, TFBertModel
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = TFBertModel.from_pretrained('bert-base-uncased')
            >>> # Model was saved using `save_pretrained('./test/saved_model/')` (for example purposes, not runnable).
            >>> model = TFBertModel.from_pretrained('./test/saved_model/')

--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -758,10 +758,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
            pretrained_model_name_or_path (:obj:`str`, `optional`):
                Can be either:
-                    - A string with the `shortcut name` of a pretrained model to load from cache or download, e.g.,
+                    - A string, the `model id` of a pretrained model hosted inside a model repo on huggingface.co.
-                      ``bert-base-uncased``.
+                      Valid model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under
-                    - A string with the `identifier name` of a pretrained model that was user-uploaded to our S3, e.g.,
+                      a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                      ``dbmdz/bert-base-german-cased``.
                    - A path to a `directory` containing model weights saved using
                      :func:`~transformers.PreTrainedModel.save_pretrained`, e.g., ``./my_model_directory/``.
                    - A path or url to a `tensorflow index checkpoint file` (e.g, ``./tf_model/model.ckpt.index``). In
@@ -781,8 +780,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can
                be automatically loaded when:
-                    - The model is a model provided by the library (loaded with the `shortcut name` string of a
+                    - The model is a model provided by the library (loaded with the `model id` string of a pretrained
-                      pretrained model).
+                      model).
                    - The model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded
                      by supplying the save directory.
                    - The model is loaded by supplying a local directory as ``pretrained_model_name_or_path`` and a
@@ -838,7 +837,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
        Examples::
            >>> from transformers import BertConfig, BertModel
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = BertModel.from_pretrained('bert-base-uncased')
            >>> # Model was saved using `save_pretrained('./test/saved_model/')` (for example purposes, not runnable).
            >>> model = BertModel.from_pretrained('./test/saved_model/')

--- a/src/transformers/models/auto/configuration_auto.py
+++ b/src/transformers/models/auto/configuration_auto.py
@@ -277,10 +277,9 @@ class AutoConfig:
            pretrained_model_name_or_path (:obj:`str`):
                Can be either:
-                    - A string with the `shortcut name` of a pretrained model configuration to load from cache or
+                    - A string, the `model id` of a pretrained model configuration hosted inside a model repo on
-                      download, e.g., ``bert-base-uncased``.
+                      huggingface.co. Valid model ids can be located at the root-level, like ``bert-base-uncased``, or
-                    - A string with the `identifier name` of a pretrained model configuration that was user-uploaded to
+                      namespaced under a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                      our S3, e.g., ``dbmdz/bert-base-german-cased``.
                    - A path to a `directory` containing a configuration file saved using the
                      :meth:`~transformers.PretrainedConfig.save_pretrained` method, or the
                      :meth:`~transformers.PreTrainedModel.save_pretrained` method, e.g., ``./my_model_directory/``.
@@ -317,10 +316,10 @@ class AutoConfig:
            >>> from transformers import AutoConfig
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
-            >>> # Download configuration from S3 (user-uploaded) and cache.
+            >>> # Download configuration from huggingface.co (user-uploaded) and cache.
            >>> config = AutoConfig.from_pretrained('dbmdz/bert-base-german-cased')
            >>> # If configuration file is in a directory (e.g., was saved using `save_pretrained('./test/saved_model/')`).

--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -505,10 +505,9 @@ AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
            pretrained_model_name_or_path:
                Can be either:
-                    - A string with the `shortcut name` of a pretrained model to load from cache or download, e.g.,
+                    - A string, the `model id` of a pretrained model hosted inside a model repo on huggingface.co.
-                      ``bert-base-uncased``.
+                      Valid model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under
-                    - A string with the `identifier name` of a pretrained model that was user-uploaded to our S3, e.g.,
+                      a user or organization name, like ``dbmdz/bert-base-german-cased``.
-                      ``dbmdz/bert-base-german-cased``.
                    - A path to a `directory` containing model weights saved using
                      :func:`~transformers.PreTrainedModel.save_pretrained`, e.g., ``./my_model_directory/``.
                    - A path or url to a `tensorflow index checkpoint file` (e.g, ``./tf_model/model.ckpt.index``). In
@@ -521,8 +520,8 @@ AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
                Configuration for the model to use instead of an automatically loaded configuration. Configuration can
                be automatically loaded when:
-                    - The model is a model provided by the library (loaded with the `shortcut name` string of a
+                    - The model is a model provided by the library (loaded with the `model id` string of a pretrained
-                      pretrained model).
+                      model).
                    - The model was saved using :meth:`~transformers.PreTrainedModel.save_pretrained` and is reloaded
                      by supplying the save directory.
                    - The model is loaded by supplying a local directory as ``pretrained_model_name_or_path`` and a
@@ -608,7 +607,7 @@ class AutoModel:
        Examples::
            >>> from transformers import AutoConfig, AutoModel
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModel.from_config(config)
        """
@@ -634,7 +633,7 @@ class AutoModel:
            >>> from transformers import AutoConfig, AutoModel
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModel.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -702,7 +701,7 @@ class AutoModelForPreTraining:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForPreTraining
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForPreTraining.from_config(config)
        """
@@ -728,7 +727,7 @@ class AutoModelForPreTraining:
            >>> from transformers import AutoConfig, AutoModelForPreTraining
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForPreTraining.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -802,7 +801,7 @@ class AutoModelWithLMHead:
        Examples::
            >>> from transformers import AutoConfig, AutoModelWithLMHead
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelWithLMHead.from_config(config)
        """
@@ -834,7 +833,7 @@ class AutoModelWithLMHead:
            >>> from transformers import AutoConfig, AutoModelWithLMHead
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelWithLMHead.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -908,7 +907,7 @@ class AutoModelForCausalLM:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForCausalLM
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('gpt2')
            >>> model = AutoModelForCausalLM.from_config(config)
        """
@@ -934,7 +933,7 @@ class AutoModelForCausalLM:
            >>> from transformers import AutoConfig, AutoModelForCausalLM
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForCausalLM.from_pretrained('gpt2')
            >>> # Update configuration during loading
@@ -1002,7 +1001,7 @@ class AutoModelForMaskedLM:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForMaskedLM
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForMaskedLM.from_config(config)
        """
@@ -1028,7 +1027,7 @@ class AutoModelForMaskedLM:
            >>> from transformers import AutoConfig, AutoModelForMaskedLM
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForMaskedLM.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -1096,7 +1095,7 @@ class AutoModelForSeq2SeqLM:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForSeq2SeqLM
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('t5')
            >>> model = AutoModelForSeq2SeqLM.from_config(config)
        """
@@ -1124,7 +1123,7 @@ class AutoModelForSeq2SeqLM:
            >>> from transformers import AutoConfig, AutoModelForSeq2SeqLM
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
            >>> # Update configuration during loading
@@ -1194,7 +1193,7 @@ class AutoModelForSequenceClassification:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForSequenceClassification
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForSequenceClassification.from_config(config)
        """
@@ -1222,7 +1221,7 @@ class AutoModelForSequenceClassification:
            >>> from transformers import AutoConfig, AutoModelForSequenceClassification
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -1291,7 +1290,7 @@ class AutoModelForQuestionAnswering:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForQuestionAnswering
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForQuestionAnswering.from_config(config)
        """
@@ -1320,7 +1319,7 @@ class AutoModelForQuestionAnswering:
            >>> from transformers import AutoConfig, AutoModelForQuestionAnswering
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForQuestionAnswering.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -1390,7 +1389,7 @@ class AutoModelForTokenClassification:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForTokenClassification
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForTokenClassification.from_config(config)
        """
@@ -1419,7 +1418,7 @@ class AutoModelForTokenClassification:
            >>> from transformers import AutoConfig, AutoModelForTokenClassification
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForTokenClassification.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -1490,7 +1489,7 @@ class AutoModelForMultipleChoice:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForMultipleChoice
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForMultipleChoice.from_config(config)
        """
@@ -1519,7 +1518,7 @@ class AutoModelForMultipleChoice:
            >>> from transformers import AutoConfig, AutoModelForMultipleChoice
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForMultipleChoice.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading
@@ -1590,7 +1589,7 @@ class AutoModelForNextSentencePrediction:
        Examples::
            >>> from transformers import AutoConfig, AutoModelForNextSentencePrediction
-            >>> # Download configuration from S3 and cache.
+            >>> # Download configuration from huggingface.co and cache.
            >>> config = AutoConfig.from_pretrained('bert-base-uncased')
            >>> model = AutoModelForNextSentencePrediction.from_config(config)
        """
@@ -1619,7 +1618,7 @@ class AutoModelForNextSentencePrediction:
            >>> from transformers import AutoConfig, AutoModelForNextSentencePrediction
-            >>> # Download model and configuration from S3 and cache.
+            >>> # Download model and configuration from huggingface.co and cache.
            >>> model = AutoModelForNextSentencePrediction.from_pretrained('bert-base-uncased')
            >>> # Update configuration during loading

--- a/src/transformers/models/auto/modeling_flax_auto.py
+++ b/src/transformers/models/auto/modeling_flax_auto.py
@@ -75,7 +75,7 @@ class FlaxAutoModel(object):
        Examples::
            config = BertConfig.from_pretrained('bert-base-uncased')
-            # Download configuration from S3 and cache.
+            # Download configuration from huggingface.co and cache.
            model = FlaxAutoModel.from_config(config)
            # E.g. model was saved using `save_pretrained('./test/saved_model/')`
        """
@@ -109,10 +109,9 @@ class FlaxAutoModel(object):
        Args:
            pretrained_model_name_or_path: either:
-                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.:
+                - a string, the `model id` of a pretrained model hosted inside a model repo on huggingface.co. Valid
-                  ``bert-base-uncased``.
+                  model ids can be located at the root-level, like ``bert-base-uncased``, or namespaced under a user or
-                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.:
+                  organization name, like ``dbmdz/bert-base-german-cased``.
-                  ``dbmdz/bert-base-german-cased``.
                - a path to a `directory` containing model weights saved using
                  :func:`~transformers.FlaxPreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this
@@ -165,7 +164,7 @@ class FlaxAutoModel(object):
        Examples::
-            model = FlaxAutoModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
+            model = FlaxAutoModel.from_pretrained('bert-base-uncased')    # Download model and configuration from huggingface.co and cache.
            model = FlaxAutoModel.from_pretrained('./test/bert_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
            assert model.config.output_attention == True